## Real experiments for head movements

In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

In [34]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score,f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC,LinearSVC
from sklearn.dummy import DummyClassifier
from seqlearn.perceptron import StructuredPerceptron
import sklearn_crfsuite
from sklearn_crfsuite import metrics as skcrfmetrics
from scipy.stats import pearsonr,spearmanr

In [35]:
import glob

In [36]:
windowsize="9-11-13"
memory="middle"

PATH = "C:\\Users\\zgk261\\nomco\\"+windowsize+"\\"+memory+"\\"
PATH

'C:\\Users\\zgk261\\nomco\\9-11-13\\middle\\'

In [37]:
!dir /B C:\Users\zgk261\nomco\9-11-13\middle\

F2_M4-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F2_M4-all-final2_primary_M4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F3.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F6.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F1.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F5_F2-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBac

### SKELETON TO READ FILES, SPEAKERS and PRIMARY SPEAKERS


In [38]:
speaker_set = set()

for each_file in glob.glob(PATH+"*.tab"):
    filename=each_file.split("\\")[-1]
    fileinfo=filename.split("-")
    
    primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
    speakers = fileinfo[0]
    l_speaker,r_speaker = speakers.split("_")

    speaker_set.add(primary_speaker)

In [39]:
speaker_set

{'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6'}

### SKELETON TO READ FILES AND COSS_VALIDATE

In the following way: In each iteration, we will leave one speaker out (two files), train a model and test on those two files.

In [40]:

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()


Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Speaker on focus: M6
Training data size: (181528, 28)


### Experiments:

It seems that in the 3-6-9 experiments considering both the past and future of frames, the MLP was not the best model, but the Logistic Regression. I think, though, that the MLP will show good performance when cross-validating with the whole data.

I will, then, try 3 classifiers:

  * SVM, because it is the default classifier that is commonly used
  * Logistic Regression, because it seems to perform the best in our initial experiments with two dialogue data
  * MLP, because when including more data, I expect to estimate a more general (and thus, better) model

In [41]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
import math

def map_movements(mov_class):
    if mov_class == "Nod" or mov_class == "Jerk":
        return 1
    elif mov_class == "Shake":
        return 2
    elif mov_class!=mov_class: #This returns True if the value is NAN, and it works. It might be source of errors.
        return 0
    else:
        return 3



In [43]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_resultsw = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1score_resultsb = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1_score(y_true, y_pred, average='micro')

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()
    
    
    
    
    
    
    
    #Train data
    features=['PRIMA:velocity-r', 'PRIMA:velocity-clock', 'PRIMA:velocity-x', 'PRIMA:velocity-y',
              'PRIMA:acceleration-r', 'PRIMA:acceleration-clock', 'PRIMA:acceleration-x', 'PRIMA:acceleration-y'
             ]

    feats_learning = training_data[features]
    X = feats_learning.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    scaler = MinMaxScaler((0,1))
    scaler.fit(X)
    X_norm = scaler.transform(X)
    #X_norm = X
    #We're ignoring anyrow that has a NaN value.
    #Y = (training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y = training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    #Test data
    feats_testing = testing_data[features]

    X_test = feats_testing.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    X_test_norm = scaler.transform(X_test)
    #X_test_norm = X_test
#    Y_test = (testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y_test =  testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    classifiers_to_test = [(DummyClassifier(strategy="most_frequent"),"MF"),
                           (LogisticRegression(solver="liblinear"),"LR"),
                          (LinearSVC(),"LINEARSVC"),
                          (MLPClassifier(hidden_layer_sizes=(30,30,30,30), random_state=442), "MLP")
                          ]
                        #I think that now we have to fine-tune the n_hidden_layers and their sizes (MLP)
    
    
    for clf, clfname in classifiers_to_test:
        print ("Trying classifier ",clfname,speaker_on_focus)
        clf.fit(X_norm, Y)
        y_pred = clf.predict(X_test_norm)
        accuracy_results.loc[clfname,speaker_on_focus] = accuracy_score(Y_test,y_pred)
        #f1score_resultsb.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred)
        f1score_results.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='macro')
        f1score_resultsw.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='weighted')
    
        print (confusion_matrix(Y_test,y_pred))
        print (accuracy_score(Y_test,y_pred)) #same as f1-score (micro-averagaed)
        print (f1_score(Y_test,y_pred, average='macro'))
        print (f1_score(Y_test,y_pred, average='weighted'))
        y_pred=None
        print ()

        
accuracy_results

Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Trying classifier  MF F5


  'precision', 'predicted', average, warn_for)


[[12154     0     0     0]
 [ 1330     0     0     0]
 [  914     0     0     0]
 [ 2015     0     0     0]]
0.7405105708889295
0.21272797283578954
0.6301092504347008

Trying classifier  LR F5


  'precision', 'predicted', average, warn_for)


[[11842     0     0   312]
 [ 1201     1     0   128]
 [  807     3     0   104]
 [ 1692     1     0   322]]
0.7411807713397917
0.2700432898347372
0.6608056259841936

Trying classifier  LINEARSVC F5


  'precision', 'predicted', average, warn_for)


[[11898     0     0   256]
 [ 1216     0     0   114]
 [  817     0     0    97]
 [ 1745     0     0   270]]
0.7413635532809358
0.2628173597590019
0.6572622159517291

Trying classifier  MLP F5
[[11164   111     7   872]
 [ 1056    81     1   192]
 [  731     5    11   167]
 [ 1253    85     1   676]]
0.7269847072442576
0.3289695513827708
0.6790666590892508

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Trying classifier  MF F6


  'precision', 'predicted', average, warn_for)


[[7718    0    0    0]
 [3393    0    0    0]
 [ 435    0    0    0]
 [3692    0    0    0]]
0.5064969156057225
0.16810419933786372
0.34057703385999005

Trying classifier  LR F6


  'precision', 'predicted', average, warn_for)


[[7054    0    0  664]
 [2158    0    0 1235]
 [ 283    0    0  152]
 [2343    0    0 1349]]
0.5514503215645098
0.275461018594755
0.45756836941174667

Trying classifier  LINEARSVC F6


  'precision', 'predicted', average, warn_for)


[[7133    0    0  585]
 [2265    0    0 1128]
 [ 304    0    0  131]
 [2478    0    0 1214]]
0.5477752985956162
0.26916504543542435
0.45028850056249564

Trying classifier  MLP F6
[[6068  314   13 1323]
 [1356  798    4 1235]
 [ 187   15    9  224]
 [1507  289   24 1872]]
0.5740254626591417
0.3846063913301287
0.5487494652921369

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Trying classifier  MF M2


  'precision', 'predicted', average, warn_for)


[[10965     0     0     0]
 [ 1608     0     0     0]
 [  156     0     0     0]
 [ 2616     0     0     0]]
0.7145650048875856
0.20838084378563285
0.5956066346326397

Trying classifier  LR M2


  'precision', 'predicted', average, warn_for)


[[10838     1     0   126]
 [ 1504     2     0   102]
 [  148     0     0     8]
 [ 2298     0     0   318]]
0.7271423916585207
0.26120054791810754
0.6359045779815996

Trying classifier  LINEARSVC M2


  'precision', 'predicted', average, warn_for)


[[10861     0     0   104]
 [ 1519     0     0    89]
 [  149     0     0     7]
 [ 2345     0     0   271]]
0.7254480286738352
0.25406055009620576
0.6306432140654138

Trying classifier  MLP M2
[[10542    30     1   392]
 [ 1344   111     0   153]
 [  125     0     0    31]
 [ 1848    65     0   703]]
0.7400456174649723
0.3331741090427983
0.6812712657380414

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Trying classifier  MF M4


  'precision', 'predicted', average, warn_for)


[[10705     0     0     0]
 [ 1702     0     0     0]
 [  539     0     0     0]
 [ 3566     0     0     0]]
0.6483163759689923
0.19666017562552818
0.509992049435872

Trying classifier  LR M4


  'precision', 'predicted', average, warn_for)


[[10249     0     0   456]
 [ 1454     3     0   245]
 [  471     0     0    68]
 [ 3051     2     0   513]]
0.6519500968992248
0.25141538031332333
0.5585700624413501

Trying classifier  LINEARSVC M4


  'precision', 'predicted', average, warn_for)


[[10298     0     0   407]
 [ 1485     0     0   217]
 [  485     0     0    54]
 [ 3105     0     0   461]]
0.6515867248062015
0.2464365588756497
0.5543509307785209

Trying classifier  MLP M4
[[9636  118    4  947]
 [1176  234    0  292]
 [ 363    6    0  170]
 [2364  185    6 1011]]
0.6589752906976745
0.3352924385952952
0.6097963785781432

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Trying classifier  MF F1


  'precision', 'predicted', average, warn_for)


[[10841     0     0     0]
 [  829     0     0     0]
 [  725     0     0     0]
 [ 3746     0     0     0]]
0.6716436404188092
0.20089318805129347
0.5397145286324446

Trying classifier  LR F1


  'precision', 'predicted', average, warn_for)


[[10560     3     0   278]
 [  672     4     0   153]
 [  617     4     0   104]
 [ 2979     2     0   765]]
0.7018772071123227
0.2838734989610746
0.6234735318766771

Trying classifier  LINEARSVC F1


  'precision', 'predicted', average, warn_for)


[[10602     1     0   238]
 [  687     1     0   141]
 [  635     1     0    89]
 [ 3079     0     0   667]]
0.6982219193358528
0.2740424314380285
0.6146095156785091

Trying classifier  MLP F1
[[10082   102     0   657]
 [  372   179     0   278]
 [  444     1     0   280]
 [ 2084   134     2  1526]]
0.7302521529025463
0.4011096384316532
0.6924424919718654

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Trying classifier  MF F3


  'precision', 'predicted', average, warn_for)


[[9660    0    0    0]
 [2223    0    0    0]
 [ 847    0    0    0]
 [3679    0    0    0]]
0.5887013224449997
0.18527753270167632
0.4362925140832941

Trying classifier  LR F3


  'precision', 'predicted', average, warn_for)


[[9439    2    0  219]
 [2113    2    0  108]
 [ 779    0    0   68]
 [3345    0    0  334]]
0.5957096715217259
0.22461114159238482
0.47286489381139424

Trying classifier  LINEARSVC F3


  'precision', 'predicted', average, warn_for)


[[9487    0    0  173]
 [2134    0    0   89]
 [ 788    0    0   59]
 [3401    0    0  278]]
0.5951002498628801
0.21873053081726931
0.46769537707536046

Trying classifier  MLP F3
[[8991   71    1  597]
 [1815  126    0  282]
 [ 621   12    0  214]
 [2478  131    0 1070]]
0.620817843866171
0.3069290063369498
0.5446764900391825

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Trying classifier  MF M3


  'precision', 'predicted', average, warn_for)


[[11972     0     0     0]
 [  778     0     0     0]
 [  616     0     0     0]
 [ 3268     0     0     0]]
0.71973067211735
0.20925679927287982
0.6024341471431808

Trying classifier  LR M3


  'precision', 'predicted', average, warn_for)


[[11489     1     0   482]
 [  649     5     0   124]
 [  534     0     0    82]
 [ 2787     1     0   480]]
0.7198509077792473
0.2667038624472314
0.646006547866263

Trying classifier  LINEARSVC M3


  'precision', 'predicted', average, warn_for)


[[11579     0     0   393]
 [  671     1     0   106]
 [  549     0     0    67]
 [ 2849     0     0   419]]
0.7213538535529638
0.25951379443303796
0.6422895140077978

Trying classifier  MLP M3


  'precision', 'predicted', average, warn_for)


[[10616   104     0  1252]
 [  459   125     0   194]
 [  440     8     0   168]
 [ 1986   146     0  1136]]
0.7140194781772273
0.35659391653113165
0.6841463825724875

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Trying classifier  MF M5


  'precision', 'predicted', average, warn_for)


[[9595    0    0    0]
 [2325    0    0    0]
 [1014    0    0    0]
 [4477    0    0    0]]
0.5510883923956119
0.1776457083611049
0.39159395134680414

Trying classifier  LR M5


  'precision', 'predicted', average, warn_for)


[[9463    2    0  130]
 [2220    6    0   99]
 [ 912    1    0  101]
 [3779    5    0  693]]
0.5836540118316007
0.24648060471548186
0.46711226341886863

Trying classifier  LINEARSVC M5


  'precision', 'predicted', average, warn_for)


[[9470    1    0  124]
 [2240    1    0   84]
 [ 928    0    0   86]
 [3859    0    0  618]]
0.5794612601229108
0.2390271413728925
0.45912175058049526

Trying classifier  MLP M5
[[9360   41    5  189]
 [2047  117    0  161]
 [ 828    5    7  174]
 [3156  151   11 1159]]
0.6112802251450232
0.3069221742882374
0.5222731673452562

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Trying classifier  MF M1


  'precision', 'predicted', average, warn_for)


[[12498     0     0     0]
 [  805     0     0     0]
 [  489     0     0     0]
 [ 4243     0     0     0]]
0.6929858608261713
0.204663806373432
0.5673164961586146

Trying classifier  LR M1


  'precision', 'predicted', average, warn_for)


[[11407     1     0  1090]
 [  706     0     0    99]
 [  403     1     0    85]
 [ 2719     4     0  1520]]
0.7167729415026338
0.31365808834615877
0.6717059264341129

Trying classifier  LINEARSVC M1


  'precision', 'predicted', average, warn_for)


[[11496     0     0  1002]
 [  710     0     0    95]
 [  411     0     0    78]
 [ 2825     1     0  1417]]
0.7159966731355697
0.3093842013846161
0.6678103451616642

Trying classifier  MLP M1
[[11232    30    31  1205]
 [  639    41     3   122]
 [  370     1    18   100]
 [ 2420    59    28  1736]]
0.7223177155530912
0.3617034831477308
0.6891088849584287

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Trying classifier  MF F2


  'precision', 'predicted', average, warn_for)


[[10315     0     0     0]
 [ 1779     0     0     0]
 [ 1060     0     0     0]
 [ 3313     0     0     0]]
0.6264043237991134
0.19257337017399748
0.48251516690223695

Trying classifier  LR F2


  'precision', 'predicted', average, warn_for)


[[9789    2    0  524]
 [1226   10    0  543]
 [ 759    0    0  301]
 [2246    2    0 1065]]
0.6597437298840104
0.2965918459456165
0.5797397743459362

Trying classifier  LINEARSVC F2


  'precision', 'predicted', average, warn_for)


[[9884    1    0  430]
 [1274    0    0  505]
 [ 797    0    0  263]
 [2353    0    0  960]]
0.6585291795712638
0.2884419881274062
0.573500046062096

Trying classifier  MLP F2
[[8750  284    0 1281]
 [ 790  434    1  554]
 [ 497   39    7  517]
 [1444  260   13 1596]]
0.6550677111799357
0.39147573648344614
0.6257570114920401

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Trying classifier  MF F4


  'precision', 'predicted', average, warn_for)


[[10008     0     0     0]
 [ 3183     0     0     0]
 [ 1039     0     0     0]
 [ 2693     0     0     0]]
0.5913845062932104
0.18580817645093015
0.43953630678270017

Trying classifier  LR F4


  'precision', 'predicted', average, warn_for)


[[9568    0    0  440]
 [2552    3    0  628]
 [ 636    7    0  396]
 [1615    0    0 1078]]
0.6292619511906873
0.2996650822231861
0.5300912042246081

Trying classifier  LINEARSVC F4


  'precision', 'predicted', average, warn_for)


[[9605    0    0  403]
 [2643    0    0  540]
 [ 663    0    0  376]
 [1710    0    0  983]]
0.6256573893517697
0.29339210500443197
0.5238985765171827

Trying classifier  MLP F4
[[9066   70    1  871]
 [1843  502    0  838]
 [ 516    2    2  519]
 [1009  146    1 1537]]
0.6563257105714116
0.3862553137424361
0.6021742805090549

Speaker on focus: M6
Training data size: (181528, 28)
Testing data size: (16532, 28)

Trying classifier  MF M6


  'precision', 'predicted', average, warn_for)


[[9316    0    0    0]
 [1800    0    0    0]
 [1671    0    0    0]
 [3745    0    0    0]]
0.5635131865473022
0.18020736614051377
0.4061969085325493

Trying classifier  LR M6


  'precision', 'predicted', average, warn_for)


[[8682    1    0  633]
 [1655    5    0  140]
 [1581    1    0   89]
 [2944    7    0  794]]
0.5734938301475926
0.25442646283907067
0.47190485592994247

Trying classifier  LINEARSVC M6


  'precision', 'predicted', average, warn_for)


[[8743    0    0  573]
 [1673    1    0  126]
 [1589    0    0   82]
 [3056    1    0  688]]
0.5705298814420517
0.24558256293274908
0.4641200546578269

Trying classifier  MLP M6
[[8272  111    8  925]
 [1463  127    0  210]
 [1369   32    5  265]
 [2020  144    5 1576]]
0.6036777159448342
0.33171946908328764
0.5347801920921669



Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.741181,0.55145,0.727142,0.65195,0.701877,0.59571,0.719851,0.583654,0.716773,0.659744,0.629262,0.573494
LINEARSVC,0.741364,0.547775,0.725448,0.651587,0.698222,0.5951,0.721354,0.579461,0.715997,0.658529,0.625657,0.57053
MLP,0.726985,0.574025,0.740046,0.658975,0.730252,0.620818,0.714019,0.61128,0.722318,0.655068,0.656326,0.603678


In [44]:
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.741181,0.55145,0.727142,0.65195,0.701877,0.59571,0.719851,0.583654,0.716773,0.659744,0.629262,0.573494
LINEARSVC,0.741364,0.547775,0.725448,0.651587,0.698222,0.5951,0.721354,0.579461,0.715997,0.658529,0.625657,0.57053
MLP,0.726985,0.574025,0.740046,0.658975,0.730252,0.620818,0.714019,0.61128,0.722318,0.655068,0.656326,0.603678


In [45]:
f1score_resultsw

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.630109,0.340577,0.595607,0.509992,0.539715,0.436293,0.602434,0.391594,0.567316,0.482515,0.439536,0.406197
LR,0.660806,0.457568,0.635905,0.55857,0.623474,0.472865,0.646007,0.467112,0.671706,0.57974,0.530091,0.471905
LINEARSVC,0.657262,0.450289,0.630643,0.554351,0.61461,0.467695,0.64229,0.459122,0.66781,0.5735,0.523899,0.46412
MLP,0.679067,0.548749,0.681271,0.609796,0.692442,0.544676,0.684146,0.522273,0.689109,0.625757,0.602174,0.53478


In [46]:
f1score_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.212728,0.168104,0.208381,0.19666,0.200893,0.185278,0.209257,0.177646,0.204664,0.192573,0.185808,0.180207
LR,0.270043,0.275461,0.261201,0.251415,0.283873,0.224611,0.266704,0.246481,0.313658,0.296592,0.299665,0.254426
LINEARSVC,0.262817,0.269165,0.254061,0.246437,0.274042,0.218731,0.259514,0.239027,0.309384,0.288442,0.293392,0.245583
MLP,0.32897,0.384606,0.333174,0.335292,0.40111,0.306929,0.356594,0.306922,0.361703,0.391476,0.386255,0.331719


In [47]:
accuracy_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.634612,0.654341,0.652585,0.667816
std,0.075262,0.06709,0.068194,0.057532
min,0.506497,0.55145,0.547775,0.574025
25%,0.582404,0.592696,0.591191,0.618433
50%,0.63736,0.655847,0.655058,0.657651
75%,0.698381,0.717542,0.717336,0.723484
max,0.740511,0.741181,0.741364,0.740046


In [48]:
f1score_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.193517,0.270344,0.263383,0.352063
std,0.014236,0.025233,0.025379,0.032975
min,0.168104,0.224611,0.218731,0.306922
25%,0.18401,0.253674,0.246223,0.331032
50%,0.194617,0.268374,0.261166,0.345943
75%,0.205593,0.287053,0.277642,0.385019
max,0.212728,0.313658,0.309384,0.40111
