# Imports

In [2]:
from sklearn.metrics import classification_report
from speech_models import speech_logistic_regression
from process_dataset import speech_features, text_features
from ensemble import SpeechTextEnsemble, StackEnsemble, VoteEnsemble, BlendEnsemble
import numpy as np
import pandas as pd
import pickle

  from pandas import MultiIndex, Int64Index


# Methods

In [4]:
def check_accuracy(model, data_type):
    if data_type == 'text':
        x_train, x_test, y_train, y_test = text_features.get_train_test()
    else:
        x_train, x_test, y_train, y_test = speech_features.get_train_test() 
    
    model.fit(x_train, y_train)

    results = model.predict(x_test)
    return classification_report(y_test, results)

def cross_validate(model, data_type, cv=5):
    if data_type == 'text':
        x, y = text_features.get_data()
    else:
        x, y = speech_features.get_data()

    scoring = {'accuracy': 'accuracy',
           'f1_macro': 'f1_macro',
           'precision_macro': 'precision_macro',
           'recall_macro' : 'recall_macro'}

    scores = model.cross_validate(x, y, cv=cv, scoring=scoring)
    return scores

def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))


# Main

In [5]:
meta_cls = speech_logistic_regression.get_logistic_regression()

### Speech Ensemble Models

In [6]:
# speech_model = BlendEnsemble(meta_cls=meta_cls, data_type='speech')
speech_model = StackEnsemble(meta_cls=meta_cls, data_type='speech')
# speech_model = VoteEnsemble(type='soft', data_type='speech')
# speech_model = VoteEnsemble(type='hard', data_type='speech')

# speech_report = check_accuracy(speech_model, data_type='speech')

In [7]:
print(speech_report)

              precision    recall  f1-score   support

         ang       0.80      0.64      0.71       212
         hap       0.68      0.69      0.69       333
         neu       0.64      0.71      0.68       333
         sad       0.73      0.74      0.73       229

    accuracy                           0.70      1107
   macro avg       0.71      0.70      0.70      1107
weighted avg       0.70      0.70      0.70      1107



In [None]:
speech_k_fold = cross_validate(speech_model, data_type='speech')

In [None]:
print_scores(speech_k_fold)

### Text Ensemble Models

In [7]:
# text_model = BlendEnsemble(meta_cls=meta_cls, data_type='text')
text_model = StackEnsemble(meta_cls=meta_cls, data_type='text')
# text_model = VoteEnsemble(type='soft', data_type='text')
# text_model = VoteEnsemble(type='hard', data_type='text')

# text_report = check_accuracy(text_model, data_type='text')

In [19]:
text_report

In [None]:
text_k_fold = cross_validate(text_model, data_type='text')

In [None]:
print_scores(text_k_fold)

### Speech + Text Ensemble Model

In [8]:
combined_model = SpeechTextEnsemble(speech_model, text_model, fit_bases=True, type='soft')
x_train_s, x_test_s, y_train_s, y_test_s = speech_features.get_train_test()
x_train_t, x_test_t, y_train_t, y_test_t = text_features.get_train_test()


In [None]:
combined_model.fit(x_train_s, x_train_t, y_train_s)
result = combined_model.predict(x_test_s, x_test_t)


# Result

In [10]:
print(classification_report(y_test_s, result))

              precision    recall  f1-score   support

         ang       0.82      0.81      0.82       208
         hap       0.80      0.77      0.78       317
         neu       0.75      0.73      0.74       369
         sad       0.75      0.84      0.79       213

    accuracy                           0.78      1107
   macro avg       0.78      0.79      0.78      1107
weighted avg       0.78      0.78      0.78      1107



In [11]:
combined_model.save('final_model.pkl')
text_model.save('stack_text.pkl')
speech_model.save('stack_speech.pkl')