## Real experiments for head movements

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

In [18]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score,f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC,LinearSVC
from sklearn.dummy import DummyClassifier
from seqlearn.perceptron import StructuredPerceptron
import sklearn_crfsuite
from sklearn_crfsuite import metrics as skcrfmetrics
from scipy.stats import pearsonr,spearmanr

In [19]:
import glob

In [20]:
windowsize="9-11-13"
memory="middle"

PATH = "C:\\Users\\zgk261\\nomco\\"+windowsize+"\\"+memory+"\\"
PATH

'C:\\Users\\zgk261\\nomco\\9-11-13\\middle\\'

In [21]:
!dir /B C:\Users\zgk261\nomco\9-11-13\middle\

F2_M4-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F2_M4-all-final2_primary_M4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F3.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F6.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F1.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F5_F2-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBac

### SKELETON TO READ FILES, SPEAKERS and PRIMARY SPEAKERS


In [22]:
speaker_set = set()

for each_file in glob.glob(PATH+"*.tab"):
    filename=each_file.split("\\")[-1]
    fileinfo=filename.split("-")
    
    primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
    speakers = fileinfo[0]
    l_speaker,r_speaker = speakers.split("_")

    speaker_set.add(primary_speaker)

In [23]:
speaker_set

{'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6'}

### SKELETON TO READ FILES AND COSS_VALIDATE

In the following way: In each iteration, we will leave one speaker out (two files), train a model and test on those two files.

In [24]:

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()


Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Speaker on focus: M6
Training data size: (181528, 28)


### Experiments:

It seems that in the 3-6-9 experiments considering both the past and future of frames, the MLP was not the best model, but the Logistic Regression. I think, though, that the MLP will show good performance when cross-validating with the whole data.

I will, then, try 3 classifiers:

  * SVM, because it is the default classifier that is commonly used
  * Logistic Regression, because it seems to perform the best in our initial experiments with two dialogue data
  * MLP, because when including more data, I expect to estimate a more general (and thus, better) model

In [25]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
import math

def map_movements(mov_class):
    if mov_class == "Nod" or mov_class == "Jerk":
        return 1
    elif mov_class == "Shake":
        return 2
    elif mov_class!=mov_class: #This returns True if the value is NAN, and it works. It might be source of errors.
        return 0
    else:
        return 3



In [27]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_resultsw = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1score_resultsb = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1_score(y_true, y_pred, average='micro')

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()
    
    
    
    
    
    
    
    #Train data
    features=['PRIMA:velocity-r', 'PRIMA:velocity-clock', 'PRIMA:velocity-x', 'PRIMA:velocity-y',
              'PRIMA:acceleration-r', 'PRIMA:acceleration-clock', 'PRIMA:acceleration-x',
              'PRIMA:acceleration-y', 'PRIMA:jerk-r', 'PRIMA:jerk-clock', 'PRIMA:jerk-x', 'PRIMA:jerk-y'
             ]

    feats_learning = training_data[features]
    X = feats_learning.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    scaler = MinMaxScaler((0,1))
    scaler.fit(X)
    X_norm = scaler.transform(X)
    #X_norm = X
    #We're ignoring anyrow that has a NaN value.
    #Y = (training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y = training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    #Test data
    feats_testing = testing_data[features]

    X_test = feats_testing.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    X_test_norm = scaler.transform(X_test)
    #X_test_norm = X_test
#    Y_test = (testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y_test =  testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    classifiers_to_test = [(DummyClassifier(strategy="most_frequent"),"MF"),
                           (LogisticRegression(solver="liblinear"),"LR"),
                          (LinearSVC(),"LINEARSVC"),
                          (MLPClassifier(hidden_layer_sizes=(30,30,30,30), random_state=442), "MLP")
                          ]
                        #I think that now we have to fine-tune the n_hidden_layers and their sizes (MLP)
    
    
    for clf, clfname in classifiers_to_test:
        print ("Trying classifier ",clfname,speaker_on_focus)
        clf.fit(X_norm, Y)
        y_pred = clf.predict(X_test_norm)
        accuracy_results.loc[clfname,speaker_on_focus] = accuracy_score(Y_test,y_pred)
        #f1score_resultsb.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred)
        f1score_results.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='macro')
        f1score_resultsw.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='weighted')
    
        print (confusion_matrix(Y_test,y_pred))
        print (accuracy_score(Y_test,y_pred)) #same as f1-score (micro-averagaed)
        print (f1_score(Y_test,y_pred, average='macro'))
        print (f1_score(Y_test,y_pred, average='weighted'))
        y_pred=None
        print ()

        
accuracy_results

Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Trying classifier  MF F5


  'precision', 'predicted', average, warn_for)


[[12154     0     0     0]
 [ 1330     0     0     0]
 [  914     0     0     0]
 [ 2015     0     0     0]]
0.7405105708889295
0.21272797283578954
0.6301092504347008

Trying classifier  LR F5


  'precision', 'predicted', average, warn_for)


[[11829    22     0   303]
 [ 1186     9     0   135]
 [  783     9     0   122]
 [ 1678    17     0   320]]
0.7407542801437884
0.27257291881144613
0.6622493134316133

Trying classifier  LINEARSVC F5


  'precision', 'predicted', average, warn_for)


[[11882    14     0   258]
 [ 1196     4     0   130]
 [  800     7     0   107]
 [ 1723    13     0   279]]
0.7411807713397917
0.2655314381249438
0.6590660500599278

Trying classifier  MLP F5
[[11288   101    11   754]
 [ 1059    69     0   202]
 [  704     2    30   178]
 [ 1289    50    12   664]]
0.7342350575763115
0.3378411871582909
0.6844206434884557

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Trying classifier  MF F6


  'precision', 'predicted', average, warn_for)


[[7718    0    0    0]
 [3393    0    0    0]
 [ 435    0    0    0]
 [3692    0    0    0]]
0.5064969156057225
0.16810419933786372
0.34057703385999005

Trying classifier  LR F6




[[6849   51    0  818]
 [1753  243    0 1397]
 [ 232   21    7  175]
 [2085   51    1 1555]]
0.567922299514372
0.3257774798881913
0.5006275137444952

Trying classifier  LINEARSVC F6


  'precision', 'predicted', average, warn_for)


[[6975   19    0  724]
 [1860  114    0 1419]
 [ 256   10    0  169]
 [2252   24    0 1416]]
0.5581441134007088
0.2943942426475717
0.4774206976466722

Trying classifier  MLP F6
[[5825  471   16 1406]
 [1163 1352   12  866]
 [ 131   16   68  220]
 [1289  463   60 1880]]
0.5988318676991731
0.47340664020419426
0.5911756241386416

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Trying classifier  MF M2


  'precision', 'predicted', average, warn_for)


[[10965     0     0     0]
 [ 1608     0     0     0]
 [  156     0     0     0]
 [ 2616     0     0     0]]
0.7145650048875856
0.20838084378563285
0.5956066346326397

Trying classifier  LR M2


  'precision', 'predicted', average, warn_for)


[[10824    13     0   128]
 [ 1463    24     0   121]
 [  142     1     0    13]
 [ 2253     8     0   355]]
0.7300749429781688
0.2731765389438101
0.6436266873080014

Trying classifier  LINEARSVC M2


  'precision', 'predicted', average, warn_for)


[[10845    10     0   110]
 [ 1481    11     0   116]
 [  145     1     0    10]
 [ 2308     6     0   302]]
0.7271423916585207
0.2618691750509668
0.6360962571091932

Trying classifier  MLP M2
[[10447    80    12   426]
 [ 1285   213     0   110]
 [  113     1     5    37]
 [ 1773    74    18   751]]
0.7439556858911698
0.37477457197152514
0.6954484946261141

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Trying classifier  MF M4


  'precision', 'predicted', average, warn_for)


[[10705     0     0     0]
 [ 1702     0     0     0]
 [  539     0     0     0]
 [ 3566     0     0     0]]
0.6483163759689923
0.19666017562552818
0.509992049435872

Trying classifier  LR M4


  'precision', 'predicted', average, warn_for)


[[10257    20     0   428]
 [ 1367    69     0   266]
 [  457     5     0    77]
 [ 3007    20     0   539]]
0.6580062984496124
0.2731015277665422
0.571205683147957

Trying classifier  LINEARSVC M4


  'precision', 'predicted', average, warn_for)


[[10307    15     0   383]
 [ 1419    34     0   249]
 [  469     4     0    66]
 [ 3081    14     0   471]]
0.654796511627907
0.2577024488536192
0.5613180665475027

Trying classifier  MLP M4




[[10017   104    13   571]
 [ 1190   315     3   194]
 [  403    10     6   120]
 [ 2654   151    19   742]]
0.6710271317829457
0.3462218749346676
0.6110278455346319

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Trying classifier  MF F1


  'precision', 'predicted', average, warn_for)


[[10841     0     0     0]
 [  829     0     0     0]
 [  725     0     0     0]
 [ 3746     0     0     0]]
0.6716436404188092
0.20089318805129347
0.5397145286324446

Trying classifier  LR F1


  'precision', 'predicted', average, warn_for)


[[10554    39     0   248]
 [  594    42     0   193]
 [  547    22     0   156]
 [ 2948    35     0   763]]
0.7037358280156124
0.30350377323687583
0.63013373323664

Trying classifier  LINEARSVC F1


  'precision', 'predicted', average, warn_for)


[[10603    26     0   212]
 [  624    28     0   177]
 [  569    20     0   136]
 [ 3049    25     0   672]]
0.7002664023294716
0.2894576014894526
0.6207012550221706

Trying classifier  MLP F1




[[10159   117    22   543]
 [  377   213     1   238]
 [  365     0    80   280]
 [ 2187    85    57  1417]]
0.7353323833715383
0.4569160301680323
0.7016716853223852

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Trying classifier  MF F3


  'precision', 'predicted', average, warn_for)


[[9660    0    0    0]
 [2223    0    0    0]
 [ 847    0    0    0]
 [3679    0    0    0]]
0.5887013224449997
0.18527753270167632
0.4362925140832941

Trying classifier  LR F3


  'precision', 'predicted', average, warn_for)


[[9433   21    0  206]
 [2044   34    0  145]
 [ 755   15    0   77]
 [3282   32    0  365]]
0.5991833749771467
0.2354773113979646
0.4817481275964589

Trying classifier  LINEARSVC F3


  'precision', 'predicted', average, warn_for)


[[9467   14    0  179]
 [2076   18    0  129]
 [ 766   10    0   71]
 [3348   18    0  313]]
0.5971113413370711
0.22671557530880238
0.4745224574625599

Trying classifier  MLP F3
[[9101   97   23  439]
 [1815  243    3  162]
 [ 628   15   45  159]
 [2654  156   80  789]]
0.6202693643732098
0.33317720542740514
0.5455489398618383

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Trying classifier  MF M3


  'precision', 'predicted', average, warn_for)


[[11972     0     0     0]
 [  778     0     0     0]
 [  616     0     0     0]
 [ 3268     0     0     0]]
0.71973067211735
0.20925679927287982
0.6024341471431808

Trying classifier  LR M3


  'precision', 'predicted', average, warn_for)


[[11563    13     0   396]
 [  602    55     0   121]
 [  513     2     0   101]
 [ 2745    23     0   500]]
0.7285078754358543
0.29961460968916176
0.6582746136583204

Trying classifier  LINEARSVC M3


  'precision', 'predicted', average, warn_for)


[[11638     9     0   325]
 [  626    38     0   114]
 [  524     1     0    91]
 [ 2830    13     0   425]]
0.727485872309727
0.28387543501481927
0.6509738672677228

Trying classifier  MLP M3
[[10794   124    15  1039]
 [  435   180     0   163]
 [  446    10    10   150]
 [ 2101   148    25   994]]
0.720091379103042
0.37822534425562226
0.687708839311644

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Trying classifier  MF M5


  'precision', 'predicted', average, warn_for)


[[9595    0    0    0]
 [2325    0    0    0]
 [1014    0    0    0]
 [4477    0    0    0]]
0.5510883923956119
0.1776457083611049
0.39159395134680414

Trying classifier  LR M5


  'precision', 'predicted', average, warn_for)


[[9474   14    0  107]
 [2167   59    0   99]
 [ 859   31    0  124]
 [3684   73    0  720]]
0.5888805927287347
0.26067957893111854
0.47835121797817826

Trying classifier  LINEARSVC M5


  'precision', 'predicted', average, warn_for)


[[9495    9    0   91]
 [2188   35    0  102]
 [ 875   23    0  116]
 [3788   48    0  641]]
0.5841709264258228
0.2492401492118353
0.46799524853668223

Trying classifier  MLP M5
[[9341   66   14  174]
 [1976  242    4  103]
 [ 749   10   87  168]
 [3076  216   72 1113]]
0.6193211188329217
0.35986475762062026
0.5421548440954482

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Trying classifier  MF M1


  'precision', 'predicted', average, warn_for)


[[12498     0     0     0]
 [  805     0     0     0]
 [  489     0     0     0]
 [ 4243     0     0     0]]
0.6929858608261713
0.204663806373432
0.5673164961586146

Trying classifier  LR M1




[[11587    28     0   883]
 [  679    27     1    98]
 [  388    21     0    80]
 [ 2818    38     0  1387]]
0.7208760742999722
0.32546921200425444
0.674320381196277

Trying classifier  LINEARSVC M1


  'precision', 'predicted', average, warn_for)


[[11709    22     0   767]
 [  695    15     0    95]
 [  397    18     0    74]
 [ 2943    25     0  1275]]
0.7207651788189632
0.31454817097868454
0.6690844090677789

Trying classifier  MLP M1
[[11452    40    54   952]
 [  636    61     2   106]
 [  354     0    52    83]
 [ 2513    56    52  1622]]
0.7311893540338231
0.3960989916327621
0.6970977292074658

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Trying classifier  MF F2


  'precision', 'predicted', average, warn_for)


[[10315     0     0     0]
 [ 1779     0     0     0]
 [ 1060     0     0     0]
 [ 3313     0     0     0]]
0.6264043237991134
0.19257337017399748
0.48251516690223695

Trying classifier  LR F2


  'precision', 'predicted', average, warn_for)


[[9684   60    0  571]
 [1066  137    0  576]
 [ 647   38    0  375]
 [2073   83    0 1157]]
0.6666666666666666
0.33278478767301967
0.6018897702161894

Trying classifier  LINEARSVC F2


  'precision', 'predicted', average, warn_for)


[[9795   37    0  483]
 [1131   75    0  573]
 [ 689   30    0  341]
 [2183   54    0 1076]]
0.6647233861662719
0.3150766126063618
0.5919407070112884

Trying classifier  MLP F2




[[8351  309   64 1591]
 [ 693  587   10  489]
 [ 369   37   95  559]
 [1221  293   74 1725]]
0.6533066132264529
0.4457908487453416
0.6414203446888133

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Trying classifier  MF F4


  'precision', 'predicted', average, warn_for)


[[10008     0     0     0]
 [ 3183     0     0     0]
 [ 1039     0     0     0]
 [ 2693     0     0     0]]
0.5913845062932104
0.18580817645093015
0.43953630678270017

Trying classifier  LR F4




[[9552   13    0  443]
 [2360  119    0  704]
 [ 565   37    1  436]
 [1521   32    1 1139]]
0.638834721976009
0.3221841339392305
0.5509151232540206

Trying classifier  LINEARSVC F4


  'precision', 'predicted', average, warn_for)


[[9606    6    0  396]
 [2459   57    0  667]
 [ 587   16    0  436]
 [1623   18    0 1052]]
0.6331619689180406
0.30678682947045544
0.5382705463142309

Trying classifier  MLP F4
[[9025  142   21  820]
 [1662  906    5  610]
 [ 433    1  153  452]
 [1002  189   75 1427]]
0.6801985463570289
0.48441600089967557
0.6496423128256729

Speaker on focus: M6
Training data size: (181528, 28)
Testing data size: (16532, 28)

Trying classifier  MF M6


  'precision', 'predicted', average, warn_for)


[[9316    0    0    0]
 [1800    0    0    0]
 [1671    0    0    0]
 [3745    0    0    0]]
0.5635131865473022
0.18020736614051377
0.4061969085325493

Trying classifier  LR M6




[[8746   29    0  541]
 [1628   36    0  136]
 [1523   33    0  115]
 [2923   59    1  762]]
0.5773046213404307
0.2622797476070576
0.4775492208317573

Trying classifier  LINEARSVC M6


  'precision', 'predicted', average, warn_for)


[[8817   22    0  477]
 [1654   23    0  123]
 [1539   28    0  104]
 [3026   43    0  676]]
0.5756109363658359
0.2529856659333693
0.4704303239559315

Trying classifier  MLP M6




[[8352   99   24  841]
 [1443  158    5  194]
 [1312   28   99  232]
 [2097   83   52 1513]]
0.6122671183159932
0.3645552819067213
0.5497172139049159



Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.740754,0.567922,0.730075,0.658006,0.703736,0.599183,0.728508,0.588881,0.720876,0.666667,0.638835,0.577305
LINEARSVC,0.741181,0.558144,0.727142,0.654797,0.700266,0.597111,0.727486,0.584171,0.720765,0.664723,0.633162,0.575611
MLP,0.734235,0.598832,0.743956,0.671027,0.735332,0.620269,0.720091,0.619321,0.731189,0.653307,0.680199,0.612267


In [28]:
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.740754,0.567922,0.730075,0.658006,0.703736,0.599183,0.728508,0.588881,0.720876,0.666667,0.638835,0.577305
LINEARSVC,0.741181,0.558144,0.727142,0.654797,0.700266,0.597111,0.727486,0.584171,0.720765,0.664723,0.633162,0.575611
MLP,0.734235,0.598832,0.743956,0.671027,0.735332,0.620269,0.720091,0.619321,0.731189,0.653307,0.680199,0.612267


In [29]:
f1score_resultsw

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.630109,0.340577,0.595607,0.509992,0.539715,0.436293,0.602434,0.391594,0.567316,0.482515,0.439536,0.406197
LR,0.662249,0.500628,0.643627,0.571206,0.630134,0.481748,0.658275,0.478351,0.67432,0.60189,0.550915,0.477549
LINEARSVC,0.659066,0.477421,0.636096,0.561318,0.620701,0.474522,0.650974,0.467995,0.669084,0.591941,0.538271,0.47043
MLP,0.684421,0.591176,0.695448,0.611028,0.701672,0.545549,0.687709,0.542155,0.697098,0.64142,0.649642,0.549717


In [30]:
f1score_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.212728,0.168104,0.208381,0.19666,0.200893,0.185278,0.209257,0.177646,0.204664,0.192573,0.185808,0.180207
LR,0.272573,0.325777,0.273177,0.273102,0.303504,0.235477,0.299615,0.26068,0.325469,0.332785,0.322184,0.26228
LINEARSVC,0.265531,0.294394,0.261869,0.257702,0.289458,0.226716,0.283875,0.24924,0.314548,0.315077,0.306787,0.252986
MLP,0.337841,0.473407,0.374775,0.346222,0.456916,0.333177,0.378225,0.359865,0.396099,0.445791,0.484416,0.364555


In [31]:
accuracy_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.634612,0.660062,0.657047,0.676669
std,0.075262,0.064887,0.066628,0.055109
min,0.506497,0.567922,0.558144,0.598832
25%,0.582404,0.596608,0.593876,0.620032
50%,0.63736,0.662336,0.65976,0.675613
75%,0.698381,0.722784,0.722359,0.731951
max,0.740511,0.740754,0.741181,0.743956


In [32]:
f1score_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.193517,0.290552,0.276515,0.395941
std,0.014236,0.031836,0.028377,0.054662
min,0.168104,0.235477,0.226716,0.333177
25%,0.18401,0.27,0.256523,0.356454
50%,0.194617,0.286396,0.274703,0.3765
75%,0.205593,0.323005,0.297492,0.448572
max,0.212728,0.332785,0.315077,0.484416
