In [1]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

from process_dataset import speech_features, text_features
import numpy as np

from ensemble import StackEnsemble, VoteEnsemble, BlendEnsemble

from speech_models import speech_logistic_regression, speech_mlp, speech_naive_bayes, speech_random_forest, speech_svm, speech_xgboost
from text_models import text_logistic_regression, text_mlp, text_naive_bayes, text_random_forest, text_svm, text_xgboost

import warnings
warnings.filterwarnings('ignore') 

# Methods

In [3]:
def get_speech_models():

    models = list()

    models.append(('Support Vector Machine', speech_svm.get_svm()))
    # models.append(('Random Forest Classifier', speech_random_forest.get_random_forest()))
    # models.append(('Multinomial Naive Bayes', speech_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', speech_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', speech_mlp.get_mlp()))
    models.append(('XGBoost', speech_xgboost.get_xgb()))

    return models

def get_text_models():
    
    models = list()

    models.append(('Support Vector Machine', text_svm.get_svm()))
    models.append(('Random Forest Classifier', text_random_forest.get_random_forest()))
    # models.append(('Multinomial Naive Bayes', text_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', text_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', text_mlp.get_mlp()))
    models.append(('XGBoost', text_xgboost.get_xgb()))

    return models

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Train Test Splits 

In [4]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()
speech_x_y = speech_features.get_data()
text_x_y = text_features.get_data()

# Vote Ensemble (Soft)

In [9]:
voter = VoteEnsemble(get_speech_models(), get_text_models(), type='soft')


In [None]:
voter.fit(x_train_s, x_train_t, y_train_s)

In [11]:
voter_result = voter.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8950    0.7788    0.8329       208
         hap     0.8203    0.7918    0.8058       317
         neu     0.7455    0.7859    0.7652       369
         sad     0.7619    0.8263    0.7928       213

    accuracy                         0.7940      1107
   macro avg     0.8057    0.7957    0.7992      1107
weighted avg     0.7982    0.7940    0.7948      1107



In [12]:
voter.save('soft_voter.pkl')

K Fold Test

In [None]:
voter_k_fold = voter.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold)

# Vote Ensemble (Hard)

In [13]:
voter2 = VoteEnsemble(get_speech_models(), get_text_models(), type='hard')


In [None]:
voter2.fit(x_train_s, x_train_t, y_train_s)

In [15]:
voter_result2 = voter2.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result2, digits=4))

              precision    recall  f1-score   support

         ang     0.8508    0.7404    0.7918       208
         hap     0.7540    0.7350    0.7444       317
         neu     0.6942    0.7507    0.7214       369
         sad     0.7248    0.7418    0.7332       213

    accuracy                         0.7425      1107
   macro avg     0.7560    0.7420    0.7477      1107
weighted avg     0.7467    0.7425    0.7435      1107



In [16]:
voter2.save('hard_voter.pkl')

K Fold Test

In [None]:
voter_k_fold2 = voter2.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold2)

# Blend Ensemble

In [17]:
meta_cls = LogisticRegression(solver='liblinear', random_state=42)
blender = BlendEnsemble(get_speech_models(), get_text_models(), meta_cls)

In [18]:
blender.fit(x_train_s, x_train_t, y_train_s)

Training Support Vector Machine (Speech) ...
Training Logistic Regression (Speech) ...
Training MLP Classifier (Speech) ...
Training XGBoost (Speech) ...
Training Support Vector Machine (Text) ...
Training Random Forest Classifier (Text) ...
Training Logistic Regression (Text) ...
Training MLP Classifier (Text) ...
Training XGBoost (Text) ...
Training Meta Classifier ...


In [19]:
blender_result = blender.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, blender_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8095    0.8173    0.8134       208
         hap     0.7809    0.7981    0.7894       317
         neu     0.7957    0.7073    0.7489       369
         sad     0.7388    0.8498    0.7904       213

    accuracy                         0.7814      1107
   macro avg     0.7812    0.7931    0.7855      1107
weighted avg     0.7831    0.7814    0.7806      1107



In [20]:
blender.save('blender.pkl')

K Fold Test

In [None]:
blender_k_fold = blender.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(blender_k_fold)

# Stack Ensemble

In [5]:
meta_cls2 = LogisticRegression(solver='liblinear', random_state=42, n_jobs=-1)
stacker = StackEnsemble(get_speech_models(), get_text_models(), meta_cls2, cv=5, n_jobs=-1)

In [None]:

stacker.fit(x_train_s, x_train_t, y_train_s)

In [7]:
stacker_result = stacker.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, stacker_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8227    0.8702    0.8458       208
         hap     0.8328    0.8013    0.8167       317
         neu     0.7851    0.7127    0.7472       369
         sad     0.7409    0.8592    0.7957       213

    accuracy                         0.7958      1107
   macro avg     0.7954    0.8108    0.8013      1107
weighted avg     0.7973    0.7958    0.7949      1107



In [8]:
stacker.save('stacker.pkl')

K Fold Test

In [None]:
stacker_k_fold = stacker.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(stacker_k_fold)