In [1]:
# !pip install mlxtend

In [2]:
import numpy as np
import pandas as pd 
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,plot_confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit,LeaveOneOut,KFold
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import sys
%matplotlib inline
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier

In [3]:
arousal_dataset = pickle.load(open("../../Dataset/RelativeEnergyData/fourier_realtive_energy_data.pkl","rb"))
arousal_label = pickle.load(open("../../Dataset/UserLabels.pkl","rb"))[:,:1][:396]

valence_dataset = pickle.load(open("../../Dataset/RelativeEnergyData/fourier_realtive_energy_data.pkl","rb"))
valence_label = pickle.load(open("../../Dataset/UserLabels.pkl","rb"))[:, 1:2][:396]

dominance_dataset = pickle.load(open("../../Dataset/RelativeEnergyData/fourier_realtive_energy_data.pkl","rb"))
dominance_label = pickle.load(open("../../Dataset/UserLabels.pkl","rb"))[:,2:3][:396]

liking_dataset = pickle.load(open("../../Dataset/RelativeEnergyData/fourier_realtive_energy_data.pkl","rb"))
liking_label = pickle.load(open("../../Dataset/UserLabels.pkl","rb"))[:,3:4][:396]

In [4]:
arousal_dataset = arousal_dataset.astype(float)
arousal_label = arousal_label.astype(float)

valence_dataset = valence_dataset.astype(float)
valence_label = valence_label.astype(float)

dominance_dataset = dominance_dataset.astype(float)
dominance_label = dominance_label.astype(float)

liking_dataset = liking_dataset.astype(float)
liking_label = liking_label.astype(float)

In [5]:
def getLabel(label):
    new_label=[]
    for i in range(len(label)):
        if(label[i][0]>=4.5):
            new_label.append(1)
        else:
            new_label.append(0)
    return new_label

In [6]:
# clean data
def clean_data(dataset, target):
    del_rows = []
    for i in range(len(dataset)):
        if(np.isnan(dataset[i]).sum() > 0):
            del_rows.append(i)
            
    dataset = np.delete(dataset, del_rows, axis=0)
    target = np.delete(target, del_rows, axis=0)
    
    return dataset,target

In [7]:
arousal_dataset,arousal_label = clean_data(arousal_dataset,arousal_label)
valence_dataset,valence_label = clean_data(valence_dataset,valence_label)
dominance_dataset ,dominance_label = clean_data(dominance_dataset,dominance_label)
liking_dataset,liking_label = clean_data(liking_dataset,liking_label)

In [8]:
arousal_label = getLabel(arousal_label)
valence_label = getLabel(valence_label)
dominance_label = getLabel(dominance_label)
liking_label = getLabel(liking_label)

In [9]:
print(arousal_dataset.shape)

(393, 70)


In [10]:
sm1 = SMOTE()
arousal_dataset,arousal_label = sm1.fit_resample(arousal_dataset,arousal_label)

sm2 = SMOTE()
valence_dataset,valence_label = sm2.fit_resample(valence_dataset,valence_label)

sm3 = SMOTE()
dominance_dataset,dominance_label = sm3.fit_resample(dominance_dataset,dominance_label)

sm4 = SMOTE()
liking_dataset,liking_label = sm4.fit_resample(liking_dataset,liking_label)

In [11]:
sc = StandardScaler()

arousal_dataset = sc.fit_transform(arousal_dataset)
valence_dataset = sc.fit_transform(valence_dataset)
dominance_dataset = sc.fit_transform(dominance_dataset)
liking_dataset = sc.fit_transform(liking_dataset)

In [12]:
clf1 = RandomForestClassifier(random_state=10)
clf2 = GaussianNB()
clf3= SVC(kernel="rbf",C = 10)
lr = LogisticRegression()
sclf = StackingClassifier(classifiers=[ clf1, clf2,clf3], 
                          meta_classifier=lr)

In [13]:
ar_model = sclf
val_model =sclf
dom_model = sclf
lik_model = sclf

In [14]:
x_a_train,x_a_test,y_a_train,y_a_test = train_test_split(arousal_dataset,arousal_label,test_size=0.2,random_state=42)
x_v_train,x_v_test,y_v_train,y_v_test = train_test_split(valence_dataset,valence_label,test_size=0.2,random_state=42)
x_d_train,x_d_test,y_d_train,y_d_test = train_test_split(dominance_dataset,dominance_label,test_size = 0.3,random_state = 42)
x_l_train,x_l_test,y_l_train,y_l_test = train_test_split(liking_dataset,liking_label,test_size = 0.3,random_state = 42)

## Arousal

In [15]:
ar_model.fit(x_a_train,y_a_train)

StackingClassifier(classifiers=[RandomForestClassifier(random_state=10),
                                GaussianNB(), SVC(C=10)],
                   meta_classifier=LogisticRegression())

In [16]:
pred_a = ar_model.predict(x_a_test)
print(classification_report(y_a_test,pred_a))

              precision    recall  f1-score   support

           0       0.79      0.76      0.77        54
           1       0.75      0.78      0.76        50

    accuracy                           0.77       104
   macro avg       0.77      0.77      0.77       104
weighted avg       0.77      0.77      0.77       104



In [17]:
print(accuracy_score(pred_a,y_a_test))

0.7692307692307693


In [18]:
cvkfold = ShuffleSplit(n_splits=5,test_size=0.3,random_state=42)
cross_val_score(ar_model,arousal_dataset,arousal_label,scoring='accuracy',cv=cvkfold).mean()*100

77.93548387096776

In [19]:
loo = LeaveOneOut()
no_splits=loo.get_n_splits(arousal_dataset)
no_splits

516

In [20]:
cvloocv=ShuffleSplit(n_splits=no_splits,test_size=0.3,random_state=42)
cross_val_score(ar_model,arousal_dataset,arousal_label,scoring='accuracy',cv=cvloocv).mean()*100

77.6981745436359

## Valence

In [21]:
val_model.fit(x_v_train,y_v_train)

StackingClassifier(classifiers=[RandomForestClassifier(random_state=10),
                                GaussianNB(), SVC(C=10)],
                   meta_classifier=LogisticRegression())

In [22]:
pred_v = val_model.predict(x_v_test)
print(classification_report(y_v_test,pred_v))

              precision    recall  f1-score   support

           0       0.63      0.69      0.66        49
           1       0.62      0.55      0.58        44

    accuracy                           0.62        93
   macro avg       0.62      0.62      0.62        93
weighted avg       0.62      0.62      0.62        93



In [23]:
print(accuracy_score(pred_v,y_v_test))

0.6236559139784946


In [24]:
cvkfold = ShuffleSplit(n_splits=5,test_size=0.3,random_state=42)
cross_val_score(val_model,valence_dataset,valence_label,scoring='accuracy',cv=cvkfold).mean()*100

63.309352517985616

In [25]:
loo = LeaveOneOut()
no_splits=loo.get_n_splits(valence_dataset)
no_splits

462

In [26]:
cvloocv=ShuffleSplit(n_splits=no_splits,test_size=0.3,random_state=42)
cross_val_score(val_model,valence_dataset,valence_label,scoring='accuracy',cv=cvloocv).mean()*100

62.89669563050858

## Dominance

In [27]:
dom_model.fit(x_d_train,np.array(y_d_train))

StackingClassifier(classifiers=[RandomForestClassifier(random_state=10),
                                GaussianNB(), SVC(C=10)],
                   meta_classifier=LogisticRegression())

In [28]:
pred_d = dom_model.predict(x_d_test)
print(classification_report(y_d_test,pred_d))

              precision    recall  f1-score   support

           0       0.74      0.71      0.73        69
           1       0.69      0.73      0.71        62

    accuracy                           0.72       131
   macro avg       0.72      0.72      0.72       131
weighted avg       0.72      0.72      0.72       131



In [29]:
print(accuracy_score(pred_d,y_d_test))

0.7175572519083969


In [30]:
cvkfold = ShuffleSplit(n_splits=5,test_size=0.3,random_state=42)
cross_val_score(dom_model,dominance_dataset,dominance_label,scoring='accuracy',cv=cvkfold).mean()*100

66.56488549618321

In [31]:
loo = LeaveOneOut()
no_splits=loo.get_n_splits(dominance_dataset)
no_splits

434

In [32]:
cvloocv=ShuffleSplit(n_splits=no_splits,test_size=0.3,random_state=42)
cross_val_score(dom_model,dominance_dataset,dominance_label,scoring='accuracy',cv=cvloocv).mean()*100

67.54669856122699

## Liking

In [33]:
lik_model.fit(x_l_train,np.array(y_l_train))

StackingClassifier(classifiers=[RandomForestClassifier(random_state=10),
                                GaussianNB(), SVC(C=10)],
                   meta_classifier=LogisticRegression())

In [34]:
pred_l = lik_model.predict(x_l_test)
print(classification_report(y_l_test,pred_l))

              precision    recall  f1-score   support

           0       0.68      0.91      0.78        86
           1       0.87      0.60      0.71        90

    accuracy                           0.75       176
   macro avg       0.78      0.75      0.75       176
weighted avg       0.78      0.75      0.74       176



In [35]:
print(accuracy_score(pred_l,y_l_test))

0.75


In [36]:
cvkfold = ShuffleSplit(n_splits=5,test_size=0.3,random_state=42)
cross_val_score(lik_model,liking_dataset,liking_label,scoring='accuracy',cv=cvkfold).mean()*100

79.77272727272727

In [37]:
loo = LeaveOneOut()
no_splits=loo.get_n_splits(liking_dataset)
no_splits

584

In [38]:
cvloocv=ShuffleSplit(n_splits=no_splits,test_size=0.3,random_state=42)
cross_val_score(lik_model,liking_dataset,liking_label,scoring='accuracy',cv=cvloocv).mean()*100

79.57658779576589