In [19]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

from process_dataset import speech_features, text_features
import numpy as np

from ensemble import StackEnsemble, VoteEnsemble, BlendEnsemble

from speech_models import speech_logistic_regression, speech_mlp, speech_naive_bayes, speech_random_forest, speech_svm, speech_xgboost
from text_models import text_logistic_regression, text_mlp, text_naive_bayes, text_random_forest, text_svm, text_xgboost

import warnings
warnings.filterwarnings('ignore') 

# Methods

In [21]:
def get_speech_models():

    models = list()

    models.append(('Support Vector Machine', speech_svm.get_svm()))
    models.append(('Random Forest Classifier', speech_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', speech_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', speech_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', speech_mlp.get_mlp()))
    models.append(('XGBoost', speech_xgboost.get_xgb()))

    # TODO lstm

    return models

def get_text_models():
    
    models = list()

    models.append(('Support Vector Machine', text_svm.get_svm()))
    models.append(('Random Forest Classifier', text_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', text_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', text_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', text_mlp.get_mlp()))
    models.append(('XGBoost', text_xgboost.get_xgb()))

    # TODO lstm 

    return models

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Train Test Splits 

In [22]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()
speech_x_y = speech_features.get_data()
text_x_y = text_features.get_data()

# Vote Ensemble (Soft)

In [28]:
voter = VoteEnsemble(get_speech_models(), get_text_models(), type='soft')


In [None]:
voter.fit(x_train_s, x_train_t, y_train_s)

In [30]:
voter_result = voter.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8983    0.7644    0.8260       208
         hap     0.8019    0.7792    0.7904       317
         neu     0.7342    0.7561    0.7450       369
         sad     0.7273    0.8263    0.7736       213

    accuracy                         0.7778      1107
   macro avg     0.7904    0.7815    0.7837      1107
weighted avg     0.7831    0.7778    0.7787      1107



In [31]:
voter.save('soft_voter.pkl')

K Fold Test

In [None]:
voter_k_fold = voter.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold)

# Vote Ensemble (Hard)

In [32]:
voter2 = VoteEnsemble(get_speech_models(), get_text_models(), type='hard')


In [33]:
voter2.fit(x_train_s, x_train_t, y_train_s)

Training Support Vector Machine (Speech) ...
Training Random Forest Classifier (Speech) ...
Training Multinomial Naive Bayes (Speech) ...
Training Logistic Regression (Speech) ...
Training MLP Classifier (Speech) ...
Training XGBoost (Speech) ...
Training Support Vector Machine (Text) ...
Training Random Forest Classifier (Text) ...
Training Multinomial Naive Bayes (Text) ...
Training Logistic Regression (Text) ...
Training MLP Classifier (Text) ...
Training XGBoost (Text) ...
Parameters: { "tree_methods" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




In [34]:
voter_result2 = voter2.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result2, digits=4))

              precision    recall  f1-score   support

         ang     0.8736    0.7308    0.7958       208
         hap     0.7869    0.7224    0.7533       317
         neu     0.6830    0.7534    0.7165       369
         sad     0.7064    0.7793    0.7411       213

    accuracy                         0.7453      1107
   macro avg     0.7625    0.7465    0.7517      1107
weighted avg     0.7531    0.7453    0.7467      1107



In [35]:
voter2.save('hard_voter.pkl')

K Fold Test

In [None]:
voter_k_fold2 = voter2.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold2)

# Blend Ensemble

In [36]:
meta_cls = LogisticRegression(solver='liblinear', random_state=42)
blender = BlendEnsemble(get_speech_models(), get_text_models(), meta_cls)

In [37]:
blender.fit(x_train_s, x_train_t, y_train_s)

Training Support Vector Machine (Speech) ...
Training Random Forest Classifier (Speech) ...
Training Multinomial Naive Bayes (Speech) ...
Training Logistic Regression (Speech) ...
Training MLP Classifier (Speech) ...
Training XGBoost (Speech) ...
Training Support Vector Machine (Text) ...
Training Random Forest Classifier (Text) ...
Training Multinomial Naive Bayes (Text) ...
Training Logistic Regression (Text) ...
Training MLP Classifier (Text) ...
Training XGBoost (Text) ...
Parameters: { "tree_methods" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Training Meta Classifier ...


In [40]:
blender_result = blender.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, blender_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8333    0.8173    0.8252       208
         hap     0.7699    0.7918    0.7807       317
         neu     0.7774    0.6911    0.7317       369
         sad     0.7269    0.8498    0.7835       213

    accuracy                         0.7742      1107
   macro avg     0.7769    0.7875    0.7803      1107
weighted avg     0.7761    0.7742    0.7733      1107



In [39]:
blender.save('blender.pkl')

K Fold Test

In [None]:
blender_k_fold = blender.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(blender_k_fold)

# Stack Ensemble

In [24]:
meta_cls2 = LogisticRegression(random_state=42, n_jobs=-1)
stacker = StackEnsemble(get_speech_models(), get_text_models(), meta_cls2, cv=5, n_jobs=-1)

In [None]:

stacker.fit(x_train_s, x_train_t, y_train_s)

In [26]:
stacker_result = stacker.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, stacker_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8122    0.8317    0.8219       208
         hap     0.8072    0.7792    0.7929       317
         neu     0.7695    0.7236    0.7458       369
         sad     0.7386    0.8357    0.7841       213

    accuracy                         0.7814      1107
   macro avg     0.7819    0.7925    0.7862      1107
weighted avg     0.7824    0.7814    0.7810      1107



In [27]:
stacker.save('stacker.pkl')

K Fold Test

In [None]:
stacker_k_fold = stacker.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(stacker_k_fold)