In [1]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

from process_dataset import speech_features, text_features
import numpy as np

from ensemble import StackEnsemble, VoteEnsemble, BlendEnsemble

from speech_models import speech_logistic_regression, speech_mlp, speech_naive_bayes, speech_random_forest, speech_svm, speech_xgboost
from text_models import text_logistic_regression, text_mlp, text_naive_bayes, text_random_forest, text_svm, text_xgboost

import warnings
warnings.filterwarnings('ignore') 

# Methods

In [2]:
def get_speech_models():

    models = list()

    models.append(('Support Vector Machine', speech_svm.get_svm()))
    # models.append(('Random Forest Classifier', speech_random_forest.get_random_forest()))
    # models.append(('Multinomial Naive Bayes', speech_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', speech_logistic_regression.get_logistic_regression()))
    # models.append(('MLP Classifier', speech_mlp.get_mlp()))
    models.append(('XGBoost', speech_xgboost.get_xgb()))

    return models

def get_text_models():
    
    models = list()

    models.append(('Support Vector Machine', text_svm.get_svm()))
    models.append(('Random Forest Classifier', text_random_forest.get_random_forest()))
    # models.append(('Multinomial Naive Bayes', text_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', text_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', text_mlp.get_mlp()))
    # models.append(('XGBoost', text_xgboost.get_xgb()))

    return models

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Train Test Splits 

In [3]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()
speech_x_y = speech_features.get_data()
text_x_y = text_features.get_data()

# Vote Ensemble (Soft)

In [7]:
voter = VoteEnsemble(get_speech_models(), get_text_models(), type='soft')


In [None]:
voter.fit(x_train_s, x_train_t, y_train_s)

In [9]:
voter_result = voter.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8777    0.7933    0.8333       208
         hap     0.8205    0.8076    0.8140       317
         neu     0.7587    0.7669    0.7628       369
         sad     0.7564    0.8310    0.7919       213

    accuracy                         0.7958      1107
   macro avg     0.8033    0.7997    0.8005      1107
weighted avg     0.7983    0.7958    0.7963      1107



In [9]:
voter.save('soft_voter.pkl')

K Fold Test

In [None]:
voter_k_fold = voter.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold)

# Vote Ensemble (Hard)

In [10]:
voter2 = VoteEnsemble(get_speech_models(), get_text_models(), type='hard')


In [None]:
voter2.fit(x_train_s, x_train_t, y_train_s)

In [12]:
voter_result2 = voter2.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result2, digits=4))

              precision    recall  f1-score   support

         ang     0.8424    0.7452    0.7908       208
         hap     0.7492    0.7445    0.7468       317
         neu     0.6888    0.7317    0.7096       369
         sad     0.7222    0.7324    0.7273       213

    accuracy                         0.7380      1107
   macro avg     0.7506    0.7384    0.7436      1107
weighted avg     0.7414    0.7380    0.7389      1107



In [16]:
voter2.save('hard_voter.pkl')

K Fold Test

In [None]:
voter_k_fold2 = voter2.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold2)

# Blend Ensemble

In [13]:
meta_cls = LogisticRegression(solver='liblinear', random_state=42)
blender = BlendEnsemble(get_speech_models(), get_text_models(), meta_cls)

In [None]:
blender.fit(x_train_s, x_train_t, y_train_s)

In [15]:
blender_result = blender.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, blender_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8143    0.8221    0.8182       208
         hap     0.7829    0.8076    0.7950       317
         neu     0.7957    0.7073    0.7489       369
         sad     0.7479    0.8498    0.7956       213

    accuracy                         0.7850      1107
   macro avg     0.7852    0.7967    0.7894      1107
weighted avg     0.7863    0.7850    0.7841      1107



In [20]:
blender.save('blender.pkl')

K Fold Test

In [None]:
blender_k_fold = blender.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(blender_k_fold)

# Stack Ensemble

In [4]:
meta_cls2 = LogisticRegression(solver='liblinear', random_state=42, n_jobs=-1)
stacker = StackEnsemble(get_speech_models(), get_text_models(), meta_cls2, cv=5, n_jobs=-1)

In [None]:

stacker.fit(x_train_s, x_train_t, y_train_s)

In [6]:
stacker_result = stacker.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, stacker_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8227    0.8702    0.8458       208
         hap     0.8275    0.8170    0.8222       317
         neu     0.7946    0.7127    0.7514       369
         sad     0.7531    0.8592    0.8026       213

    accuracy                         0.8004      1107
   macro avg     0.7995    0.8148    0.8055      1107
weighted avg     0.8013    0.8004    0.7993      1107



In [7]:
stacker.save('stacker.pkl')

K Fold Test

In [None]:
stacker_k_fold = stacker.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(stacker_k_fold)