In [1]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

from process_dataset import speech_features, text_features
import numpy as np

from ensemble import StackEnsemble, VoteEnsemble, BlendEnsemble

from speech_models import speech_logistic_regression, speech_mlp, speech_naive_bayes, speech_random_forest, speech_svm, speech_xgboost
from text_models import text_logistic_regression, text_mlp, text_naive_bayes, text_random_forest, text_svm, text_xgboost

import warnings
warnings.filterwarnings('ignore') 

  from pandas import MultiIndex, Int64Index


# Methods

In [2]:
def get_speech_models():

    models = list()

    models.append(('Support Vector Machine', speech_svm.get_svm()))
    models.append(('Random Forest Classifier', speech_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', speech_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', speech_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', speech_mlp.get_mlp()))
    models.append(('XGBoost', speech_xgboost.get_xgb()))

    return models

def get_text_models():
    
    models = list()

    models.append(('Support Vector Machine', text_svm.get_svm()))
    models.append(('Random Forest Classifier', text_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', text_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', text_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', text_mlp.get_mlp()))
    models.append(('XGBoost', text_xgboost.get_xgb()))

    return models

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Train Test Splits 

In [3]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()
speech_x_y = speech_features.get_data()
text_x_y = text_features.get_data()

# Vote Ensemble (Soft)

In [4]:
voter = VoteEnsemble(get_speech_models(), get_text_models(), type='soft')


In [None]:
voter.fit(x_train_s, x_train_t, y_train_s)

In [6]:
voter_result = voter.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result, digits=4))

              precision    recall  f1-score   support

         ang     0.9012    0.7452    0.8158       208
         hap     0.8092    0.7760    0.7923       317
         neu     0.7280    0.7615    0.7444       369
         sad     0.7184    0.8263    0.7686       213

    accuracy                         0.7751      1107
   macro avg     0.7892    0.7773    0.7802      1107
weighted avg     0.7819    0.7751    0.7762      1107



In [7]:
voter.save('soft_voter_all.pkl')

K Fold Test

In [None]:
voter_k_fold = voter.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [6]:
print_scores(voter_k_fold)

Accuracy:  0.7875606652389611
F1 Macro:  0.7945552350556448
Precision Macro:  0.8078514215650892
Recall Macro:  0.7870366953657099


# Vote Ensemble (Hard)

In [7]:
voter2 = VoteEnsemble(get_speech_models(), get_text_models(), type='hard')


In [None]:
voter2.fit(x_train_s, x_train_t, y_train_s)

In [10]:
voter_result2 = voter2.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result2, digits=4))

              precision    recall  f1-score   support

         ang     0.8678    0.7260    0.7906       208
         hap     0.7799    0.7603    0.7700       317
         neu     0.7102    0.7371    0.7234       369
         sad     0.7095    0.8028    0.7533       213

    accuracy                         0.7543      1107
   macro avg     0.7669    0.7565    0.7593      1107
weighted avg     0.7597    0.7543    0.7551      1107



In [11]:
voter2.save('hard_voter_all.pkl')

K Fold Test

In [None]:
voter_k_fold2 = voter2.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [9]:
print_scores(voter_k_fold2)

Accuracy:  0.748685743035851
F1 Macro:  0.754981688567011
Precision Macro:  0.7690791659637851
Recall Macro:  0.748525893557902


# Blend Ensemble

In [10]:
meta_cls = XGBClassifier(random_state=42)
blender = BlendEnsemble(get_speech_models(), get_text_models(), meta_cls)

In [None]:
blender.fit(x_train_s, x_train_t, y_train_s)

In [16]:
blender_result = blender.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, blender_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8150    0.7837    0.7990       208
         hap     0.7600    0.7792    0.7695       317
         neu     0.7515    0.6965    0.7229       369
         sad     0.7333    0.8263    0.7770       213

    accuracy                         0.7615      1107
   macro avg     0.7649    0.7714    0.7671      1107
weighted avg     0.7624    0.7615    0.7610      1107



In [15]:
blender.save('blender_all.pkl')

K Fold Test

In [None]:
blender_k_fold = blender.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [12]:
print_scores(blender_k_fold)

Accuracy:  0.7832193945809259
F1 Macro:  0.7898163081537071
Precision Macro:  0.7915814696765354
Recall Macro:  0.7891792937452807


# Stack Ensemble

In [13]:
meta_cls2 = XGBClassifier(random_state=42)
stacker = StackEnsemble(get_speech_models(), get_text_models(), meta_cls2, cv=5, n_jobs=-1)

In [None]:

stacker.fit(x_train_s, x_train_t, y_train_s)

In [7]:
stacker_result = stacker.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, stacker_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8700    0.8365    0.8529       208
         hap     0.7981    0.8107    0.8044       317
         neu     0.7736    0.7317    0.7521       369
         sad     0.7542    0.8357    0.7929       213

    accuracy                         0.7940      1107
   macro avg     0.7990    0.8037    0.8006      1107
weighted avg     0.7950    0.7940    0.7939      1107



In [None]:
stacker.save('stacker_all.pkl')

K Fold Test

In [None]:
stacker_k_fold = stacker.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [15]:
print_scores(stacker_k_fold)

Accuracy:  0.808171736328575
F1 Macro:  0.8145182932816365
Precision Macro:  0.8176545705817213
Recall Macro:  0.8122184556912864
