In [1]:
import pandas as pd 
import numpy as np 
import os
import pickle
import re 
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,plot_confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit,LeaveOneOut,KFold
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score

Using TensorFlow backend.


In [2]:
sys.path.append('.')
sys.path.append('..')

In [3]:
from Fourier import fourier_feature_fusion as eff
from Wavelet import wavelet_feature_fusion as wff 

In [4]:
destination_folder = "FusedDataSlidingWindowNew"

In [5]:
user_no=1
user_folder='./User1/'
last_index_even=41 # For even user numbers
last_index_odd=40  # For odd user numbers
increment=2        # Number for the increment of the user numbers

In [6]:
def getLabel(label):
    new_label=[]
    for i in range(len(label)):
        if(label[i]>4.5):
            new_label.append(1)
        else:
            new_label.append(0)
    return new_label

In [7]:
# clean data
def clean_data(dataset, target):
    del_rows = []
    for i in range(len(dataset)):
        if(np.isnan(dataset[i]).sum() > 0):
            del_rows.append(i)
            
    dataset = np.delete(dataset, del_rows, axis=0)
    target = np.delete(target, del_rows, axis=0)
    
    return dataset,target

In [8]:
window_size = 512
step_size = 512
User_nums = ['26']

In [9]:
arousal=[]
valence=[]
dominance=[]
liking=[]

In [10]:
def get_full_features(user_no, window_size,step_size):
    
    ar_dict = {}
    val_dict = {}
    dom_dict = {}
    lik_dict = {}
    
    user_folder='./User'+ str(user_no) + '/'
    
    user_csv= os.listdir(user_folder)
    
    user_csv = user_csv[1:]
    
    user_labels_folder = user_folder + 'Label/'
    
    user_labels=os.listdir(user_labels_folder)
    
    all_data=[]
    all_labels=[]
    
    for csv in user_csv:
        all_data.append(csv)
    
    for lab in user_labels:
        all_labels.append(lab)
        
    final_features=[]
    final_labels=[]
        
    for i in range(0,len(all_data)):
        
        data = pd.read_csv(user_folder + all_data[i])
        cols = data.columns
        cols = cols[:14]
        labels=pd.read_csv(user_labels_folder + all_labels[i])
        labels=labels.values[0][:4]
        
        start = 0;
        while start + window_size < data.shape[0]:
            
            temp_array = []
            temp_data = [] 
            
            for i in cols:
                X = data[i][start : start + window_size]
                features_fourier = eff.power_spectrum(X)
                features_wavelet = wff.wavelet_energy(X)
                features_fourier = np.array(features_fourier).ravel()
                features_wavelet = np.array(features_wavelet).ravel()
                temp_array.append(np.concatenate((features_fourier,features_wavelet),axis=0).ravel())
                  
            temp_array=np.array(temp_array).ravel()
            final_features.append(temp_array) 
            final_labels.append(labels)
            start = start + step_size
                
    final_features = np.array(final_features)
    final_labels = np.array(final_labels)
#     print(final_features.shape)
#     print(final_labels.shape)
    
    arousal_dataset=final_features
    valence_dataset=final_features
    dominance_dataset=final_features
    liking_dataset=final_features
    
    arousal_labels=final_labels[:,0]
    valence_labels=final_labels[:,1]
    dominance_labels=final_labels[:,2]
    liking_labels=final_labels[:,3]
    
    
    arousal_dataset,arousal_labels = clean_data(arousal_dataset,arousal_labels)
    valence_dataset,valence_labels = clean_data(valence_dataset,valence_labels)
    dominance_dataset ,dominance_labels = clean_data(dominance_dataset,dominance_labels)
    liking_dataset,liking_labels = clean_data(liking_dataset,liking_labels)
    
    
    arousal_labels = getLabel(arousal_labels)
    valence_labels = getLabel(valence_labels)
    dominance_labels = getLabel(dominance_labels)
    liking_labels = getLabel(liking_labels)
    
    ar_count=len(set(arousal_labels))
    if ar_count==1:
        ar_dict[user_no]=['Accuracy: '+str(100),'F1-Score: '+str(1)]
        arousal.append(ar_dict)
    
    else:
        sm1 = SMOTE()
        arousal_dataset,arousal_labels = sm1.fit_resample(arousal_dataset,arousal_labels)
        x_a_train,x_a_test,y_a_train,y_a_test = train_test_split(arousal_dataset,arousal_labels,test_size=0.3,random_state=42)
        ar_model = SVC(kernel="rbf",C = 10)
        ar_model.fit(x_a_train,y_a_train)
        pred_a = ar_model.predict(x_a_test)
        ar_acc=accuracy_score(pred_a,y_a_test)
        ar_f1=f1_score(y_a_test, pred_a, average='macro')
        
        ar_dict[user_no]=['Accuracy: '+str(ar_acc),'F1-Score: '+str(ar_f1)]
        arousal.append(ar_dict)
        
        
    val_count=len(set(valence_labels))
    if val_count==1:
        val_dict[user_no]=['Accuracy: '+str(100),'F1-Score: '+str(1)]
        valence.append(val_dict)
        
    else:
        sm2 = SMOTE()
        valence_dataset,valence_labels = sm2.fit_resample(valence_dataset,valence_labels)
        x_v_train,x_v_test,y_v_train,y_v_test = train_test_split(valence_dataset,valence_labels,test_size=0.3,random_state=42)
        val_model = SVC(kernel="rbf",C = 10)
        val_model.fit(x_v_train,y_v_train)
        pred_v = val_model.predict(x_v_test)
        val_acc=accuracy_score(pred_v,y_v_test)
        val_f1=f1_score(y_v_test, pred_v, average='macro')
        
        val_dict[user_no]=['Accuracy: '+str(val_acc),'F1-Score: '+str(val_f1)]
        valence.append(val_dict)
        
        
    dom_count=len(set(dominance_labels))
    if dom_count==1:
        dom_dict[user_no]=['Accuracy: '+str(100),'F1-Score: '+str(1)]
        dominance.append(dom_dict)
    
    else:
        sm3 = SMOTE()
        dominance_dataset,dominance_labels = sm3.fit_resample(dominance_dataset,dominance_labels)
        x_d_train,x_d_test,y_d_train,y_d_test = train_test_split(dominance_dataset,dominance_labels,test_size = 0.3,random_state = 42)
        dom_model = SVC(kernel="rbf",C = 10)
        dom_model.fit(x_d_train,y_d_train)
        pred_d = dom_model.predict(x_d_test)
        dom_acc=accuracy_score(pred_d,y_d_test)
        dom_f1=f1_score(y_d_test, pred_d, average='macro')
        
        dom_dict[user_no]=['Accuracy: '+str(dom_acc),'F1-Score: '+str(dom_f1)]
        dominance.append(dom_dict)
        
    
    lik_count=len(set(liking_labels))
    if lik_count==1:
        lik_dict[user_no]=['Accuracy: '+str(100),'F1-Score: '+str(1)]
        liking.append(lik_dict)

    else:
        sm4 = SMOTE()
        liking_dataset,liking_labels = sm4.fit_resample(liking_dataset,liking_labels)
        x_l_train,x_l_test,y_l_train,y_l_test = train_test_split(liking_dataset,liking_labels,test_size = 0.3,random_state = 42)
        lik_model = SVC(kernel="rbf",C = 10)
        lik_model.fit(x_l_train,y_l_train)
        pred_l = lik_model.predict(x_l_test)
        lik_acc=accuracy_score(pred_l,y_l_test)
        lik_f1=f1_score(y_l_test, pred_l, average='macro')
 
        lik_dict[user_no]=['Accuracy: '+str(lik_acc),'F1-Score: '+str(lik_f1)]
        liking.append(lik_dict)
    
    
    print('User:' + str(user_no)+ " "+"Done")

In [11]:
for i in range(user_no,last_index_odd,2):
    get_full_features(i,window_size,step_size)

User:1 Done
User:3 Done
User:5 Done
User:7 Done
User:9 Done
User:11 Done
User:13 Done
User:15 Done
User:17 Done
User:19 Done
User:21 Done
User:23 Done
User:25 Done
User:27 Done
User:29 Done
User:31 Done
User:33 Done
User:35 Done
User:37 Done
User:39 Done


In [16]:
def get_dict(res,idx):
    user_no = []
    accuracy = []
    f1_score = []

    for i in range(len(res)):
        user_no.append(idx)
        acc = res[i][idx][0]
        acc = acc.split(' ')
        acc = float(acc[1])
        accuracy.append(acc)
    
        f1 = res[i][idx][1]
        f1 = f1.split(' ')
        f1 = float(f1[1])
        f1_score.append(f1)
        idx += 2
    f_dict = {'user_no':user_no, 'accuracy':accuracy,'f1_score':f1_score}
    return f_dict

In [17]:
f_dict = get_dict(arousal,1)    
df = pd.DataFrame(data=f_dict)
df.to_csv('./odd_arousal.csv',index=False)
df

Unnamed: 0,user_no,accuracy,f1_score
0,1,0.935915,0.935912
1,3,0.905697,0.905082
2,5,100.0,1.0
3,7,0.954545,0.954535
4,9,0.87269,0.872237
5,11,100.0,1.0
6,13,0.917105,0.916908
7,15,0.962704,0.962702
8,17,0.845972,0.845825
9,19,0.958382,0.958377


In [18]:
f_dict = get_dict(valence,1)
df = pd.DataFrame(data=f_dict)
df.to_csv('./odd_valence.csv',index=False)
df

Unnamed: 0,user_no,accuracy,f1_score
0,1,0.934659,0.934616
1,3,0.978934,0.978926
2,5,0.874126,0.874101
3,7,0.86255,0.862506
4,9,0.914369,0.914107
5,11,100.0,1.0
6,13,0.860269,0.859664
7,15,0.965922,0.965886
8,17,0.85101,0.850091
9,19,0.930976,0.930613


In [19]:
f_dict = get_dict(dominance,1)
df = pd.DataFrame(data=f_dict)
df.to_csv('./odd_dominance.csv',index=False)
df

Unnamed: 0,user_no,accuracy,f1_score
0,1,0.906442,0.906378
1,3,0.948549,0.948493
2,5,0.908789,0.908463
3,7,0.958763,0.95876
4,9,0.948571,0.948386
5,11,100.0,1.0
6,13,0.891775,0.891449
7,15,0.919476,0.919473
8,17,0.899209,0.899206
9,19,0.90593,0.905921


In [20]:
f_dict = get_dict(liking,1)
df = pd.DataFrame(data=f_dict)
df.to_csv('./odd_liking.csv',index=False)
df

Unnamed: 0,user_no,accuracy,f1_score
0,1,0.964706,0.964686
1,3,0.915228,0.914941
2,5,0.886282,0.886281
3,7,0.995662,0.995659
4,9,0.965636,0.965626
5,11,0.951807,0.95176
6,13,0.861842,0.860195
7,15,0.986471,0.986471
8,17,0.922551,0.922551
9,19,0.910828,0.90995
