In [1]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

from process_dataset import speech_features, text_features
import numpy as np

from ensemble import StackEnsemble, VoteEnsemble, BlendEnsemble

from speech_models import speech_logistic_regression, speech_mlp, speech_naive_bayes, speech_random_forest, speech_svm, speech_xgboost
from text_models import text_logistic_regression, text_mlp, text_naive_bayes, text_random_forest, text_svm, text_xgboost

import warnings
warnings.filterwarnings('ignore') 

# Methods

In [2]:
def get_speech_models():

    models = list()

    models.append(('Support Vector Machine', speech_svm.get_svm()))
    # models.append(('Random Forest Classifier', speech_random_forest.get_random_forest()))
    # models.append(('Multinomial Naive Bayes', speech_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', speech_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', speech_mlp.get_mlp()))
    models.append(('XGBoost', speech_xgboost.get_xgb()))

    return models

def get_text_models():
    
    models = list()

    models.append(('Support Vector Machine', text_svm.get_svm()))
    models.append(('Random Forest Classifier', text_random_forest.get_random_forest()))
    # models.append(('Multinomial Naive Bayes', text_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', text_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', text_mlp.get_mlp()))
    models.append(('XGBoost', text_xgboost.get_xgb()))

    return models

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Train Test Splits 

In [3]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()
speech_x_y = speech_features.get_data()
text_x_y = text_features.get_data()

# Vote Ensemble (Soft)

In [None]:
voter = VoteEnsemble(get_speech_models(), get_text_models(), type='soft')


In [None]:
voter.fit(x_train_s, x_train_t, y_train_s)

In [None]:
voter_result = voter.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result, digits=4))

In [None]:
voter.save('soft_voter.pkl')

K Fold Test

In [None]:
voter_k_fold = voter.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold)

# Vote Ensemble (Hard)

In [None]:
voter2 = VoteEnsemble(get_speech_models(), get_text_models(), type='hard')


In [None]:
voter2.fit(x_train_s, x_train_t, y_train_s)

In [None]:
voter_result2 = voter2.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, voter_result2, digits=4))

In [None]:
voter2.save('hard_voter.pkl')

K Fold Test

In [None]:
voter_k_fold2 = voter2.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(voter_k_fold2)

# Blend Ensemble

In [None]:
meta_cls = LogisticRegression(solver='liblinear', random_state=42)
blender = BlendEnsemble(get_speech_models(), get_text_models(), meta_cls)

In [None]:
blender.fit(x_train_s, x_train_t, y_train_s)

In [None]:
blender_result = blender.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, blender_result, digits=4))

In [None]:
blender.save('blender.pkl')

K Fold Test

In [None]:
blender_k_fold = blender.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(blender_k_fold)

# Stack Ensemble

In [4]:
meta_cls2 = LogisticRegression(solver='liblinear', random_state=42, n_jobs=-1)
stacker = StackEnsemble(get_speech_models(), get_text_models(), meta_cls2, cv=5, n_jobs=-1)

In [5]:

stacker.fit(x_train_s, x_train_t, y_train_s)

Training Support Vector Machine (Speech) ...
Training Logistic Regression (Speech) ...
Training MLP Classifier (Speech) ...




Training XGBoost (Speech) ...


  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index



Training Support Vector Machine (Text) ...
Training Random Forest Classifier (Text) ...
Training Logistic Regression (Text) ...
Training MLP Classifier (Text) ...
Training XGBoost (Text) ...


  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index


Training Meta Classifier ...
Re-Training Base Support Vector Machine (Speech) ...
Re-Training Base Logistic Regression (Speech) ...
Re-Training Base MLP Classifier (Speech) ...
Re-Training Base XGBoost (Speech) ...
Re-Training Base Support Vector Machine (Text) ...
Re-Training Base Random Forest Classifier (Text) ...
Re-Training Base Logistic Regression (Text) ...
Re-Training Base MLP Classifier (Text) ...
Re-Training Base XGBoost (Text) ...


In [6]:
stacker_result = stacker.predict(x_test_s, x_test_t)
print(classification_report(y_test_s, stacker_result, digits=4))

              precision    recall  f1-score   support

         ang     0.8165    0.8558    0.8357       208
         hap     0.8147    0.8044    0.8095       317
         neu     0.7798    0.7100    0.7433       369
         sad     0.7500    0.8451    0.7947       213

    accuracy                         0.7904      1107
   macro avg     0.7902    0.8038    0.7958      1107
weighted avg     0.7909    0.7904    0.7895      1107



In [7]:
stacker.save('stacker.pkl')

K Fold Test

In [None]:
stacker_k_fold = stacker.cross_validate(speech_x_y[0], text_x_y[0], speech_x_y[1], cv=5)

In [None]:
print_scores(stacker_k_fold)