In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, auc
import math
import statistics

with open("resultfile.csv",'w') as resultfile:
    header='user_no,accu,happy_accu,sad_accu,stress_accu,relax_accu\n'
    resultfile.write(header)

    for user in range(1,23,1):
        # read file to build personalized model for each user
        user_data="featured_files/user_"+str(user)+"_v4.csv"
        print(user_data)
        dataset=pd.read_csv(user_data)
        array=dataset.values
        X_set=array[:,3:6]
        Y_set=array[:,8]
        
        
        happy_count=0
        sad_count=0
        stressed_count=0
        relaxed_count=0
        # user 8,10 and 16 is not suitable for SMOTE 
        # as there is only one sample in class with minimum number of sample
        if(user==8 or user==10 or user==16):
            X_train,X_test,Y_train,Y_test=train_test_split(X_set,Y_set,test_size=0.4,random_state=42)
            X_res=X_train
            Y_res=label_binarize(Y_train,classes=['Happy','Sad','Stressed','Relaxed'])
        else:
        
            X_train,X_test,Y_train,Y_test=train_test_split(X_set,Y_set,test_size=0.4,stratify=Y_set)
            
            
            # count the number of samples in training data for each class and store in sample_count
            sample_count=dict()
            
        
            for i in range(len(Y_train)):
                
                if(Y_train[i]=="Happy"):
                    happy_count+=1
                    sample_count["Happy"]=happy_count
                elif(Y_train[i]=="Sad"):
                    sad_count+=1
                    sample_count["Sad"]=sad_count
                elif(Y_train[i]=="Stressed"):
                    stressed_count+=1
                    sample_count["Stressed"]=stressed_count
                else:
                    relaxed_count+=1
                    sample_count["Relaxed"]=relaxed_count
                  
           
            print(sample_count)
            
            # find the class with minimum number of sample
            min_key=min(sample_count, key=lambda k: sample_count[k])
            print("min key=",min_key)
            min_key_value=sample_count[min_key] 
            print("min key value=",min_key_value)
            
            
            # Set the number of nearest neighbour=one less than the sample count in minor class
            # if min_key_value greater than 1, then number of nearest number>0 
            if(min_key_value>1):
            
                Y_train=np.reshape(Y_train,(len(Y_train),1))
                
                # apply SMOTE and upsample all classes to make the sample count equal to the major class
                sm=SMOTE('all',random_state=2,k_neighbors=min_key_value-1)
                # X_res and Y_res become the training data
                X_res,Y_res=sm.fit_sample(X_train,Y_train)
                
                
                happy_res=0
                sad_res=0
                stressed_res=0
                relaxed_res=0
                
                for i in range(len(Y_res)):
                    if(Y_res[i]=="Happy"):
                        happy_res+=1
                    
                    elif(Y_res[i]=="Sad"):
                        sad_res+=1
                    
                    elif(Y_res[i]=="Stressed"):
                        stressed_res+=1
                    
                    else:
                        relaxed_res+=1
                    
                print("Happy=",happy_res,"sad=",sad_res,"stressed=",stressed_res,"relaxed=",relaxed_res)
                Y_res=label_binarize(Y_res,classes=['Happy','Sad','Stressed','Relaxed'])
                print(Y_res.shape)
            else:
                X_res=X_train
                Y_res=label_binarize(Y_train,classes=['Happy','Sad','Stressed','Relaxed'])
           
        Y_test=label_binarize(Y_test,classes=['Happy','Sad','Stressed','Relaxed'])
        
        roc_auc_avg=[]
        happy_list=[]
        sad_list=[]
        stressed_list=[]
        relaxed_list=[]
        
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        # one model for each emotion class as it is multiclass classification problem
        clf=OneVsRestClassifier(RandomForestClassifier(n_estimators=100))
        Y_prob=clf.fit(X_res,Y_res).predict_proba(X_test)
        # Calculate weighted AUCROC score for emotion-wise and user-wise
        for i in range(4):
            fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], Y_prob[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
            if(math.isnan(roc_auc[i])):
                        continue
            elif(i==0):
                happy_list.append(roc_auc[i])
                        
            elif(i==1):
                sad_list.append(roc_auc[i])
                        
            elif(i==2):
                stressed_list.append(roc_auc[i])
                        
            else:
                relaxed_list.append(roc_auc[i])
         
        happy_score,sad_score,stress_score,relax_score=0,0,0,0
        if(len(happy_list)!=0):
            happy_score=statistics.mean(happy_list)
            print("happy score=",happy_score)    
        if(len(sad_list)!=0):
            sad_score=statistics.mean(sad_list)
            print("sad_score=",sad_score)    
        if(len(stressed_list)!=0):
            stress_score=statistics.mean(stressed_list)
            print("stressed_score=",stress_score)    
        if(len(relaxed_list)!=0):    
            relax_score=statistics.mean(relaxed_list)
            print("relax_score=",relax_score) 
        
        roc_auc_weight=0
        happy_entry,sad_entry,stress_entry,relax_entry=0,0,0,0

        for i in range(4):
            no_entry=sum(Y_test[:,i]==1)
            if(i==0):
                happy_entry=no_entry
            elif(i==1):
                sad_entry=no_entry
            elif(i==2):
                stress_entry=no_entry
            else:
                relax_entry=no_entry
            
                        
        roc_auc_weight=((happy_entry*happy_score)+(sad_entry*sad_score)+(stress_entry*stress_score)+(relax_score*relax_entry))/X_test.shape[0]
        print("roc_auc_weight=",roc_auc_weight)  
        line=str(user)+','+str(roc_auc_weight)+','+str(happy_score)+','+str(sad_score)+','+str(stress_score)+','+str(relax_score)+'\n'
        resultfile.write(line)
        

Time_data/U1_LIHF_itd_file.csv
{'Sad': 14, 'Relaxed': 30, 'Happy': 19, 'Stressed': 5}
min key= Stressed
min key value= 5




Happy= 30 sad= 30 stressed= 30 relaxed= 30
(120, 4)
happy score= 0.875
sad_score= 0.9264264264264265
stressed_score= 0.5654761904761905
relax_score= 0.6866666666666666
roc_auc_weight= 0.7721683173857087
Time_data/U2_LIHF_itd_file.csv
{'Relaxed': 12, 'Stressed': 2, 'Happy': 8, 'Sad': 5}
min key= Stressed
min key value= 2
Happy= 12 sad= 12 stressed= 12 relaxed= 12
(48, 4)




happy score= 0.9692307692307693
sad_score= 0.9777777777777777
stressed_score= 0.640625
relax_score= 0.8125
roc_auc_weight= 0.8644853988603989
Time_data/U3_LIHF_itd_file.csv
{'Relaxed': 19, 'Happy': 3, 'Sad': 4, 'Stressed': 1}
min key= Stressed
min key value= 1
happy score= 0.6875
sad_score= 0.7444444444444445
stressed_score= 0.7058823529411764
relax_score= 0.5555555555555556
roc_auc_weight= 0.6100490196078431
Time_data/U4_LIHF_itd_file.csv
{'Relaxed': 40, 'Sad': 9, 'Happy': 3, 'Stressed': 18}
min key= Happy
min key value= 3
Happy= 40 sad= 40 stressed= 40 relaxed= 40
(160, 4)




happy score= 0.9347826086956521
sad_score= 0.8849206349206349
stressed_score= 0.6681318681318681
relax_score= 0.7477954144620811
roc_auc_weight= 0.7511516563146997
Time_data/U5_LIHF_itd_file.csv
{'Relaxed': 20, 'Happy': 12, 'Sad': 5, 'Stressed': 2}
min key= Stressed
min key value= 2
Happy= 20 sad= 20 stressed= 20 relaxed= 20
(80, 4)




happy score= 0.9144736842105263
sad_score= 0.8913043478260869
stressed_score= 0.12
relax_score= 0.7445054945054945
roc_auc_weight= 0.7703547516133329
Time_data/U6_LIHF_itd_file.csv
{'Stressed': 1, 'Relaxed': 17, 'Happy': 7, 'Sad': 4}
min key= Stressed
min key value= 1
happy score= 0.84
sad_score= 1.0
stressed_score= 0.39473684210526316
relax_score= 0.7083333333333334
roc_auc_weight= 0.7547368421052632
Time_data/U7_LIHF_itd_file.csv
{'Happy': 22, 'Relaxed': 31, 'Sad': 8, 'Stressed': 5}
min key= Stressed
min key value= 5
Happy= 31 sad= 31 stressed= 31 relaxed= 31
(124, 4)




happy score= 0.9377777777777777
sad_score= 0.975
stressed_score= 0.7865853658536586
relax_score= 0.9196428571428572
roc_auc_weight= 0.9200112917795844
Time_data/U8_LIHF_itd_file.csv




happy score= 0.7166666666666667
sad_score= 0.5
relax_score= 0.75
roc_auc_weight= 0.7090909090909091
Time_data/U9_LIHF_itd_file.csv
{'Sad': 7, 'Relaxed': 15, 'Happy': 3, 'Stressed': 2}
min key= Stressed
min key value= 2
Happy= 15 sad= 15 stressed= 15 relaxed= 15
(60, 4)
happy score= 0.671875
sad_score= 0.7857142857142857
stressed_score= 0.671875
relax_score= 0.5
roc_auc_weight= 0.6016865079365079
Time_data/U10_LIHF_itd_file.csv




happy score= 0.2857142857142857
sad_score= 0.5699588477366255
relax_score= 0.5134615384615384
roc_auc_weight= 0.5212595532039975
Time_data/U11_LIHF_itd_file.csv
{'Relaxed': 13, 'Sad': 11, 'Stressed': 8}
min key= Stressed
min key value= 8
Happy= 0 sad= 13 stressed= 13 relaxed= 13
(39, 4)




sad_score= 0.4107142857142857
stressed_score= 0.6588235294117648
relax_score= 0.48717948717948717
roc_auc_weight= 0.49838396897220427
Time_data/U12_LIHF_itd_file.csv
{'Relaxed': 179, 'Stressed': 19, 'Happy': 6}
min key= Happy
min key value= 6
Happy= 179 sad= 0 stressed= 179 relaxed= 179
(537, 4)




happy score= 0.9043560606060606
stressed_score= 0.6504065040650406
relax_score= 0.7172516065249628
roc_auc_weight= 0.7163650733216201
Time_data/U13_LIHF_itd_file.csv
{'Happy': 8, 'Sad': 2, 'Stressed': 16, 'Relaxed': 14}
min key= Sad
min key value= 2
Happy= 16 sad= 16 stressed= 16 relaxed= 16
(64, 4)
happy score= 0.3492063492063492
sad_score= 0.3653846153846154
stressed_score= 0.5235294117647059
relax_score= 0.5441176470588235
roc_auc_weight= 0.486559011068815
Time_data/U14_LIHF_itd_file.csv
{'Relaxed': 18, 'Happy': 1, 'Sad': 4, 'Stressed': 4}
min key= Happy
min key value= 1
happy score= 0.4722222222222222
sad_score= 0.6176470588235294
stressed_score= 0.49999999999999994
relax_score= 0.48717948717948717
roc_auc_weight= 0.5021499828001377
Time_data/U15_LIHF_itd_file.csv
{'Relaxed': 68, 'Happy': 7, 'Sad': 13}
min key= Happy
min key value= 7
Happy= 68 sad= 68 stressed= 0 relaxed= 68
(204, 4)




happy score= 0.7381818181818182
sad_score= 0.5997596153846154
relax_score= 0.6734860883797054
roc_auc_weight= 0.6690472027972028
Time_data/U16_LIHF_itd_file.csv




sad_score= 0.6648550724637681
stressed_score= 0.5273333333333333
relax_score= 0.44645550527903466
roc_auc_weight= 0.5172032506014199
Time_data/U17_LIHF_itd_file.csv
{'Relaxed': 27, 'Stressed': 25, 'Sad': 1}
min key= Sad
min key value= 1




sad_score= 0.37142857142857144
stressed_score= 0.46406249999999993
relax_score= 0.3668730650154799
roc_auc_weight= 0.41019491129785246
Time_data/U18_LIHF_itd_file.csv
{'Happy': 15, 'Stressed': 2, 'Relaxed': 5}
min key= Stressed
min key value= 2
Happy= 15 sad= 0 stressed= 15 relaxed= 15
(45, 4)




happy score= 0.9181818181818182
stressed_score= 1.0
relax_score= 0.875
roc_auc_weight= 0.9125
Time_data/U19_LIHF_itd_file.csv
{'Happy': 32, 'Relaxed': 25, 'Stressed': 4, 'Sad': 6}
min key= Stressed
min key value= 4
Happy= 32 sad= 32 stressed= 32 relaxed= 32
(128, 4)
happy score= 0.691699604743083
sad_score= 0.32926829268292684
stressed_score= 0.6825396825396826
relax_score= 0.6616379310344828
roc_auc_weight= 0.6481842315388957
Time_data/U20_LIHF_itd_file.csv
{'Happy': 51, 'Stressed': 33, 'Relaxed': 11, 'Sad': 14}
min key= Relaxed
min key value= 11
Happy= 51 sad= 51 stressed= 51 relaxed= 51
(204, 4)




happy score= 0.6546003016591252
sad_score= 0.6953125
stressed_score= 0.5739750445632799
relax_score= 0.7692307692307693
roc_auc_weight= 0.6478838341184735
Time_data/U21_LIHF_itd_file.csv
{'Relaxed': 35, 'Stressed': 5, 'Happy': 22, 'Sad': 3}
min key= Sad
min key value= 3
Happy= 35 sad= 35 stressed= 35 relaxed= 35
(140, 4)




happy score= 0.5345238095238095
sad_score= 0.761904761904762
stressed_score= 0.415625
relax_score= 0.6104166666666666
roc_auc_weight= 0.5754464285714286
Time_data/U22_LIHF_itd_file.csv
{'Happy': 9, 'Relaxed': 15, 'Stressed': 3}
min key= Stressed
min key value= 3
Happy= 15 sad= 0 stressed= 15 relaxed= 15
(45, 4)




happy score= 0.6858974358974359
stressed_score= 0.7708333333333334
relax_score= 0.7000000000000001
roc_auc_weight= 0.7067307692307693


