In [1]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from speech_models import speech_logistic_regression
from process_dataset import speech_features, text_features
from ensemble import SpeechTextEnsemble, StackEnsemble, VoteEnsemble, BlendEnsemble
import numpy as np
import pandas as pd
import pickle
from custom_stack import StackEnsembleCustom
from speech_models import speech_logistic_regression, speech_mlp, speech_naive_bayes, speech_random_forest, speech_svm, speech_xgboost
from text_models import text_logistic_regression, text_mlp, text_naive_bayes, text_random_forest, text_svm, text_xgboost



  from pandas import MultiIndex, Int64Index


In [2]:
def get_speech_models():

    models = list()

    models.append(('Support Vector Machine', speech_svm.get_svm()))
    models.append(('Random Forest Classifier', speech_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', speech_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', speech_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', speech_mlp.get_mlp()))
    models.append(('XGBoost', speech_xgboost.get_xgb()))

    # TODO lstm

    return models

def get_text_models():
    
    models = list()

    models.append(('Support Vector Machine', text_svm.get_svm()))
    models.append(('Random Forest Classifier', text_random_forest.get_random_forest()))
    models.append(('Multinomial Naive Bayes', text_naive_bayes.get_naive_bayes()))
    models.append(('Logistic Regression', text_logistic_regression.get_logistic_regression()))
    models.append(('MLP Classifier', text_mlp.get_mlp()))
    models.append(('XGBoost', text_xgboost.get_xgb()))

    # TODO lstm 

    return models

In [3]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()

In [4]:
meta_cls = LogisticRegression(solver='liblinear', random_state=42)
combined_model = StackEnsembleCustom(get_speech_models(), get_text_models(), meta_cls, 5)

combined_model.fit(x_train_s, x_train_t.toarray(), y_train_s)



In [5]:
result = combined_model.predict(x_test_s, x_test_t.toarray())

In [6]:
print(classification_report(y_test_s, result, digits=4))

              precision    recall  f1-score   support

         ang     0.8072    0.8654    0.8353       208
         hap     0.8006    0.7855    0.7930       317
         neu     0.7704    0.6911    0.7286       369
         sad     0.7397    0.8404    0.7868       213

    accuracy                         0.7796      1107
   macro avg     0.7795    0.7956    0.7859      1107
weighted avg     0.7801    0.7796    0.7783      1107



In [None]:
with open(f"trained_models/stack_custom.pkl", 'wb') as f:
    pickle.dump(combined_model, f)