In [1]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

from process_dataset import speech_features, text_features
import numpy as np

from ensemble import StackEnsemble, VoteEnsemble, BlendEnsemble

from speech_models import speech_logistic_regression, speech_mlp, speech_naive_bayes, speech_random_forest, speech_svm, speech_xgboost
from text_models import text_logistic_regression, text_mlp, text_naive_bayes, text_random_forest, text_svm, text_xgboost

import warnings
warnings.filterwarnings('ignore') 

# Methods

In [2]:
def get_speech_models():

    models = list()

    models.append(('Support Vector Machine', speech_svm.get_svm()))
    models.append(('Random Forest Classifier', speech_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', speech_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', speech_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', speech_mlp.get_mlp()))
    models.append(('XGBoost', speech_xgboost.get_xgb()))

    # TODO lstm

    return models

def get_text_models():
    
    models = list()

    models.append(('Support Vector Machine', text_svm.get_svm()))
    models.append(('Random Forest Classifier', text_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', text_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', text_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', text_mlp.get_mlp()))
    models.append(('XGBoost', text_xgboost.get_xgb()))

    # TODO lstm 

    return models

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Train Test Splits 

In [3]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()
speech_x_y = speech_features.get_data()
text_x_y = text_features.get_data()

# Vote Ensemble (Soft)

In [7]:
voter = VoteEnsemble(get_speech_models(), get_text_models(), type='soft')


In [None]:
voter.fit(x_train_s, x_train_t, y_train_s)

In [14]:
voter_result = voter.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8901    0.7788    0.8308       208
         hap     0.7934    0.7634    0.7781       317
         neu     0.7381    0.7561    0.7470       369
         sad     0.7231    0.8216    0.7692       213

    accuracy                         0.7751      1107
   macro avg     0.7862    0.7800    0.7813      1107
weighted avg     0.7796    0.7751    0.7759      1107



In [13]:
voter.save('soft_voter.pkl')

K Fold Test

In [None]:
voter_k_fold = voter.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [9]:
print_scores(voter_k_fold)

Accuracy:  0.7834003897603774
F1 Macro:  0.790329865958687
Precision Macro:  0.8017810603749211
Recall Macro:  0.783909660536235


# Vote Ensemble (Hard)

In [10]:
voter2 = VoteEnsemble(get_speech_models(), get_text_models(), type='hard')


In [None]:
voter2.fit(x_train_s, x_train_t, y_train_s)

In [5]:
voter_result2 = voter2.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result2, digits=4))

              precision    recall  f1-score   support

         ang     0.8619    0.7500    0.8021       208
         hap     0.7667    0.7256    0.7455       317
         neu     0.7005    0.7290    0.7145       369
         sad     0.6901    0.7840    0.7341       213

    accuracy                         0.7425      1107
   macro avg     0.7548    0.7471    0.7490      1107
weighted avg     0.7478    0.7425    0.7436      1107



In [14]:
voter2.save('hard_voter.pkl')

K Fold Test

In [None]:
voter_k_fold2 = voter2.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [12]:
print_scores(voter_k_fold2)

Accuracy:  0.7504940612998655
F1 Macro:  0.7566017627868237
Precision Macro:  0.7680135477221839
Recall Macro:  0.7511451247774269


# Blend Ensemble

In [4]:
meta_cls = LogisticRegression(solver='liblinear', random_state=42)
blender = BlendEnsemble(get_speech_models(), get_text_models(), meta_cls)

In [None]:
blender.fit(x_train_s, x_train_t, y_train_s)

In [19]:
blender_result = blender.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, blender_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8382    0.8221    0.8301       208
         hap     0.7830    0.7855    0.7843       317
         neu     0.7699    0.7073    0.7373       369
         sad     0.7195    0.8310    0.7712       213

    accuracy                         0.7751      1107
   macro avg     0.7777    0.7865    0.7807      1107
weighted avg     0.7768    0.7751    0.7747      1107



In [15]:
blender.save('blender.pkl')

K Fold Test

In [None]:
blender_k_fold = blender.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [6]:
print_scores(blender_k_fold)

Accuracy:  0.7808707044273577
F1 Macro:  0.7868704397749395
Precision Macro:  0.7888474845289276
Recall Macro:  0.7857326611990129


# Stack Ensemble

In [None]:
meta_cls2 = LogisticRegression(solver='liblinear', random_state=42)
stacker = StackEnsemble(get_speech_models(), get_text_models(), meta_cls2, cv=5, n_jobs=-1)

In [None]:

stacker.fit(x_train_s, x_train_t, y_train_s)

In [5]:
stacker_result = stacker.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, stacker_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8227    0.8702    0.8458       208
         hap     0.8058    0.7855    0.7955       317
         neu     0.7589    0.6911    0.7234       369
         sad     0.7438    0.8451    0.7912       213

    accuracy                         0.7814      1107
   macro avg     0.7828    0.7980    0.7890      1107
weighted avg     0.7814    0.7814    0.7801      1107



In [6]:
stacker.save('stacker.pkl')

K Fold Test

In [None]:
stacker_k_fold = stacker.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(stacker_k_fold)