# Imports

In [1]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from speech_models import speech_logistic_regression
from process_dataset import speech_features, text_features
from ensemble import SpeechTextEnsemble, StackEnsemble, VoteEnsemble, BlendEnsemble
import numpy as np
import pandas as pd
import pickle

  from pandas import MultiIndex, Int64Index


# Methods

In [3]:
def check_accuracy(model, data_type):
    if data_type == 'text':
        x_train, x_test, y_train, y_test = text_features.get_train_test()
    else:
        x_train, x_test, y_train, y_test = speech_features.get_train_test() 
    
    model.fit(x_train, y_train)

    results = model.predict(x_test)
    return classification_report(y_test, results, digits=4)

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Main

In [4]:
# for stacking ensembles, need a meta classifier
meta_cls = LogisticRegression(solver='liblinear', random_state=42)

### Speech Ensemble Models

In [None]:

# speech_model = BlendEnsemble(meta_cls=meta_cls, data_type='speech')
speech_model = StackEnsemble(meta_cls=meta_cls, data_type='speech')
# speech_model = VoteEnsemble(type='soft', data_type='speech')
# speech_model = VoteEnsemble(type='hard', data_type='speech')

speech_report = check_accuracy(speech_model, data_type='speech')

In [8]:
print(speech_report)
speech_model.save('stack_speech.pkl')

              precision    recall  f1-score   support

         ang       0.70      0.71      0.70       208
         hap       0.66      0.62      0.64       317
         neu       0.69      0.67      0.68       369
         sad       0.67      0.76      0.71       213

    accuracy                           0.68      1107
   macro avg       0.68      0.69      0.68      1107
weighted avg       0.68      0.68      0.68      1107



### Text Ensemble Models

In [None]:
# text_model = BlendEnsemble(meta_cls=meta_cls, data_type='text')
text_model = StackEnsemble(meta_cls=meta_cls, data_type='text')
# text_model = VoteEnsemble(type='soft', data_type='text')
# text_model = VoteEnsemble(type='hard', data_type='text')

text_report = check_accuracy(text_model, data_type='text')

In [7]:
print(text_report)
text_model.save('stack_text.pkl')

              precision    recall  f1-score   support

         ang       0.80      0.68      0.74       208
         hap       0.70      0.68      0.69       317
         neu       0.61      0.67      0.63       369
         sad       0.66      0.66      0.66       213

    accuracy                           0.67      1107
   macro avg       0.69      0.67      0.68      1107
weighted avg       0.68      0.67      0.67      1107



### Speech + Text Ensemble Model

In [12]:
# combined_model = SpeechTextEnsemble(speech_model, text_model, fit_bases=True)
combined_model = SpeechTextEnsemble(fit_bases=False)

##### Accuracy Test

In [13]:
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()
combined_model.fit(x_train_s, x_train_t, y_train_s)
result = combined_model.predict(x_test_s, x_test_t)

##### K Fold Test

In [None]:
speech_x_y = speech_features.get_data()
text_x_y = text_features.get_data()

combined_k_fold = combined_model.cross_validate(speech_x_y[0], text_x_y[0].toarray(), speech_x_y[1], cv=5)

# Result

In [14]:
print(classification_report(y_test_s, result, digits=4))

              precision    recall  f1-score   support

         ang     0.8028    0.8221    0.8124       208
         hap     0.7974    0.7697    0.7833       317
         neu     0.7543    0.7073    0.7301       369
         sad     0.7355    0.8357    0.7824       213

    accuracy                         0.7715      1107
   macro avg     0.7725    0.7837    0.7770      1107
weighted avg     0.7722    0.7715    0.7708      1107



In [None]:
print_scores(combined_k_fold)

# Save Model

In [11]:
combined_model.save('final_model.pkl')