## Real experiments for head movements

In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

In [50]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score,f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC,LinearSVC
from sklearn.dummy import DummyClassifier
from seqlearn.perceptron import StructuredPerceptron
import sklearn_crfsuite
from sklearn_crfsuite import metrics as skcrfmetrics
from scipy.stats import pearsonr,spearmanr

In [51]:
import glob

In [52]:
windowsize="9-11-13"
memory="middle"

PATH = "C:\\Users\\zgk261\\nomco\\"+windowsize+"\\"+memory+"\\"
PATH

'C:\\Users\\zgk261\\nomco\\9-11-13\\middle\\'

In [53]:
!dir /B C:\Users\zgk261\nomco\9-11-13\middle\

F2_M4-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F2_M4-all-final2_primary_M4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F3.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F6.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F1.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F5_F2-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBac

### SKELETON TO READ FILES, SPEAKERS and PRIMARY SPEAKERS


In [54]:
speaker_set = set()

for each_file in glob.glob(PATH+"*.tab"):
    filename=each_file.split("\\")[-1]
    fileinfo=filename.split("-")
    
    primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
    speakers = fileinfo[0]
    l_speaker,r_speaker = speakers.split("_")

    speaker_set.add(primary_speaker)

In [55]:
speaker_set

{'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6'}

### SKELETON TO READ FILES AND COSS_VALIDATE

In the following way: In each iteration, we will leave one speaker out (two files), train a model and test on those two files.

In [56]:

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()


Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Speaker on focus: M6
Training data size: (181528, 28)


### Experiments:

It seems that in the 3-6-9 experiments considering both the past and future of frames, the MLP was not the best model, but the Logistic Regression. I think, though, that the MLP will show good performance when cross-validating with the whole data.

I will, then, try 3 classifiers:

  * SVM, because it is the default classifier that is commonly used
  * Logistic Regression, because it seems to perform the best in our initial experiments with two dialogue data
  * MLP, because when including more data, I expect to estimate a more general (and thus, better) model

In [57]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0,0,0,0,0,0,0,0,0,0,0,0


In [58]:
import math

def map_movements(mov_class):
    if mov_class == "Nod" or mov_class == "Jerk":
        return 1
    elif mov_class == "Shake":
        return 2
    elif mov_class!=mov_class: #This returns True if the value is NAN, and it works. It might be source of errors.
        return 0
    else:
        return 3



In [59]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_resultsw = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1score_resultsb = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1_score(y_true, y_pred, average='micro')

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()
    
    
    
    
    
    
    
    #Train data
    features=['PRIMA:velocity-r', 'PRIMA:velocity-clock', 'PRIMA:velocity-x', 'PRIMA:velocity-y'
             ]

    feats_learning = training_data[features]
    X = feats_learning.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    scaler = MinMaxScaler((0,1))
    scaler.fit(X)
    X_norm = scaler.transform(X)
    #X_norm = X
    #We're ignoring anyrow that has a NaN value.
    #Y = (training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y = training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    #Test data
    feats_testing = testing_data[features]

    X_test = feats_testing.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    X_test_norm = scaler.transform(X_test)
    #X_test_norm = X_test
#    Y_test = (testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y_test =  testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    classifiers_to_test = [(DummyClassifier(strategy="most_frequent"),"MF"),
                           (LogisticRegression(solver="liblinear"),"LR"),
                          (LinearSVC(),"LINEARSVC"),
                          (MLPClassifier(hidden_layer_sizes=(30,30,30,30), random_state=442), "MLP")
                          ]
                        #I think that now we have to fine-tune the n_hidden_layers and their sizes (MLP)
    
    
    for clf, clfname in classifiers_to_test:
        print ("Trying classifier ",clfname,speaker_on_focus)
        clf.fit(X_norm, Y)
        y_pred = clf.predict(X_test_norm)
        accuracy_results.loc[clfname,speaker_on_focus] = accuracy_score(Y_test,y_pred)
        #f1score_resultsb.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred)
        f1score_results.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='macro')
        f1score_resultsw.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='weighted')
    
        print (confusion_matrix(Y_test,y_pred))
        print (accuracy_score(Y_test,y_pred)) #same as f1-score (micro-averagaed)
        print (f1_score(Y_test,y_pred, average='macro'))
        print (f1_score(Y_test,y_pred, average='weighted'))
        y_pred=None
        print ()

        
accuracy_results

Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Trying classifier  MF F5


  'precision', 'predicted', average, warn_for)


[[12154     0     0     0]
 [ 1330     0     0     0]
 [  914     0     0     0]
 [ 2015     0     0     0]]
0.7405105708889295
0.21272797283578954
0.6301092504347008

Trying classifier  LR F5


  'precision', 'predicted', average, warn_for)


[[11891     0     0   263]
 [ 1230     0     0   100]
 [  850     0     0    64]
 [ 1770     0     0   245]]
0.7394138792420643
0.2587284324831014
0.6537134231660671

Trying classifier  LINEARSVC F5


  'precision', 'predicted', average, warn_for)


[[11944     0     0   210]
 [ 1243     0     0    87]
 [  858     0     0    56]
 [ 1827     0     0   188]]
0.7391701699872053
0.24986405983176585
0.6492350603268261

Trying classifier  MLP F5


  'precision', 'predicted', average, warn_for)


[[11239    27     0   888]
 [ 1097    26     0   207]
 [  798     1     0   115]
 [ 1354    38     0   623]]
0.7243039054408091
0.3010195537771085
0.6674890456730501

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Trying classifier  MF F6


  'precision', 'predicted', average, warn_for)


[[7718    0    0    0]
 [3393    0    0    0]
 [ 435    0    0    0]
 [3692    0    0    0]]
0.5064969156057225
0.16810419933786372
0.34057703385999005

Trying classifier  LR F6


  'precision', 'predicted', average, warn_for)


[[7334    0    0  384]
 [2782    0    0  611]
 [ 381    0    0   54]
 [2826    0    0  866]]
0.5381283633022707
0.2515036864805561
0.4279296535305882

Trying classifier  LINEARSVC F6


  'precision', 'predicted', average, warn_for)


[[7413    0    0  305]
 [2906    0    0  487]
 [ 391    0    0   44]
 [2958    0    0  734]]
0.5346502165638535
0.24305966175366023
0.41872679211673014

Trying classifier  MLP F6


  'precision', 'predicted', average, warn_for)


[[6076   11    0 1631]
 [1721   99    0 1573]
 [ 244    0    0  191]
 [1676   47    0 1969]]
0.5344533403333771
0.2969033217024204
0.470801194828184

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Trying classifier  MF M2


  'precision', 'predicted', average, warn_for)


[[10965     0     0     0]
 [ 1608     0     0     0]
 [  156     0     0     0]
 [ 2616     0     0     0]]
0.7145650048875856
0.20838084378563285
0.5956066346326397

Trying classifier  LR M2


  'precision', 'predicted', average, warn_for)


[[10877     0     0    88]
 [ 1573     0     0    35]
 [  150     0     0     6]
 [ 2354     0     0   262]]
0.7259042033235582
0.2533917829731065
0.629447139877914

Trying classifier  LINEARSVC M2


  'precision', 'predicted', average, warn_for)


[[10890     0     0    75]
 [ 1591     0     0    17]
 [  152     0     0     4]
 [ 2403     0     0   213]]
0.7235581622678396
0.24582527890939107
0.6233913202925364

Trying classifier  MLP M2


  'precision', 'predicted', average, warn_for)


[[10656     5     0   304]
 [ 1409    19     0   180]
 [  139     0     0    17]
 [ 1980    19     0   617]]
0.7358748778103616
0.3002305938898933
0.6642945489075717

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Trying classifier  MF M4


  'precision', 'predicted', average, warn_for)


[[10705     0     0     0]
 [ 1702     0     0     0]
 [  539     0     0     0]
 [ 3566     0     0     0]]
0.6483163759689923
0.19666017562552818
0.509992049435872

Trying classifier  LR M4


  'precision', 'predicted', average, warn_for)


[[10332     0     0   373]
 [ 1563     0     0   139]
 [  492     0     0    47]
 [ 3154     0     0   412]]
0.6506782945736435
0.24223444542305522
0.5496553666225803

Trying classifier  LINEARSVC M4


  'precision', 'predicted', average, warn_for)


[[10370     0     0   335]
 [ 1591     0     0   111]
 [  499     0     0    40]
 [ 3212     0     0   354]]
0.6494670542635659
0.236745263789236
0.5444686599016326

Trying classifier  MLP M4


  'precision', 'predicted', average, warn_for)


[[10016    26     0   663]
 [ 1398    74     0   230]
 [  447     0     0    92]
 [ 2749    56     0   761]]
0.6571584302325582
0.28937153205722005
0.5831078078135699

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Trying classifier  MF F1


  'precision', 'predicted', average, warn_for)


[[10841     0     0     0]
 [  829     0     0     0]
 [  725     0     0     0]
 [ 3746     0     0     0]]
0.6716436404188092
0.20089318805129347
0.5397145286324446

Trying classifier  LR F1


  'precision', 'predicted', average, warn_for)


[[10605     0     0   236]
 [  762     0     0    67]
 [  667     0     0    58]
 [ 3162     0     0   584]]
0.6932036428969705
0.26589935020811073
0.6049125616873265

Trying classifier  LINEARSVC F1


  'precision', 'predicted', average, warn_for)


[[10644     0     0   197]
 [  775     0     0    54]
 [  678     0     0    47]
 [ 3229     0     0   517]]
0.6914689300539
0.2600621116352259
0.5990250774485912

Trying classifier  MLP F1


  'precision', 'predicted', average, warn_for)


[[10318    62     0   461]
 [  522   143     0   164]
 [  596     7     0   122]
 [ 2532   180     0  1034]]
0.712161576110526
0.360048086553161
0.6575357274241863

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Trying classifier  MF F3


  'precision', 'predicted', average, warn_for)


[[9660    0    0    0]
 [2223    0    0    0]
 [ 847    0    0    0]
 [3679    0    0    0]]
0.5887013224449997
0.18527753270167632
0.4362925140832941

Trying classifier  LR F3


  'precision', 'predicted', average, warn_for)


[[9476    0    0  184]
 [2171    0    0   52]
 [ 812    0    0   35]
 [3445    0    0  234]]
0.5917484307392284
0.21330242875602132
0.4615152037944927

Trying classifier  LINEARSVC F3


  'precision', 'predicted', average, warn_for)


[[9500    0    0  160]
 [2187    0    0   36]
 [ 816    0    0   31]
 [3495    0    0  184]]
0.5901639344262295
0.20762133316155462
0.45611216698648893

Trying classifier  MLP F3


  'precision', 'predicted', average, warn_for)


[[9068   49    0  543]
 [1941   37    0  245]
 [ 698   11    0  138]
 [2685  113    0  881]]
0.6085684685233713
0.27640731062349666
0.5200314770488633

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Trying classifier  MF M3


  'precision', 'predicted', average, warn_for)


[[11972     0     0     0]
 [  778     0     0     0]
 [  616     0     0     0]
 [ 3268     0     0     0]]
0.71973067211735
0.20925679927287982
0.6024341471431808

Trying classifier  LR M3


  'precision', 'predicted', average, warn_for)


[[11478     0     0   494]
 [  705     0     0    73]
 [  572     0     0    44]
 [ 2855     0     0   413]]
0.7148611278105086
0.25618324868866826
0.6368287414086554

Trying classifier  LINEARSVC M3


  'precision', 'predicted', average, warn_for)


[[11551     0     0   421]
 [  721     0     0    57]
 [  576     0     0    40]
 [ 2935     0     0   333]]
0.714440302993868
0.24851106530807557
0.6308374703049264

Trying classifier  MLP M3


  'precision', 'predicted', average, warn_for)


[[10759    38     0  1175]
 [  540    33     0   205]
 [  495     3     0   118]
 [ 2203    63     0  1002]]
0.7090296982084886
0.31204215039125904
0.6680035985427646

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Trying classifier  MF M5


  'precision', 'predicted', average, warn_for)


[[9595    0    0    0]
 [2325    0    0    0]
 [1014    0    0    0]
 [4477    0    0    0]]
0.5510883923956119
0.1776457083611049
0.39159395134680414

Trying classifier  LR M5


  'precision', 'predicted', average, warn_for)


[[9464    0    0  131]
 [2285    0    0   40]
 [ 970    0    0   44]
 [4018    0    0  459]]
0.5699270576072598
0.22425975697979944
0.44196029297590694

Trying classifier  LINEARSVC M5


  'precision', 'predicted', average, warn_for)


[[9474    0    0  121]
 [2299    0    0   26]
 [ 977    0    0   37]
 [4093    0    0  384]]
0.5661937855378784
0.2172313989886468
0.434106502110659

Trying classifier  MLP M5


  'precision', 'predicted', average, warn_for)


[[9342    0    0  253]
 [2111    1    0  213]
 [ 923    0    0   91]
 [3312    9    0 1156]]
0.6030095916374706
0.27833919166739723
0.5034075731925789

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Trying classifier  MF M1


  'precision', 'predicted', average, warn_for)


[[12498     0     0     0]
 [  805     0     0     0]
 [  489     0     0     0]
 [ 4243     0     0     0]]
0.6929858608261713
0.204663806373432
0.5673164961586146

Trying classifier  LR M1


  'precision', 'predicted', average, warn_for)


[[11381     0     0  1117]
 [  738     0     0    67]
 [  433     0     0    56]
 [ 2807     0     0  1436]]
0.7106736900471305
0.3080476279598266
0.6638956620850025

Trying classifier  LINEARSVC M1


  'precision', 'predicted', average, warn_for)


[[11463     0     0  1035]
 [  747     0     0    58]
 [  435     0     0    54]
 [ 2935     0     0  1308]]
0.7081230939839201
0.3017688737710168
0.6577167090878696

Trying classifier  MLP M1


  'precision', 'predicted', average, warn_for)


[[11141    36     0  1321]
 [  665    27     0   113]
 [  385     6     0    98]
 [ 2333    87     0  1823]]
0.7203215968949266
0.3401604905868115
0.6868304046643673

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Trying classifier  MF F2


  'precision', 'predicted', average, warn_for)


[[10315     0     0     0]
 [ 1779     0     0     0]
 [ 1060     0     0     0]
 [ 3313     0     0     0]]
0.6264043237991134
0.19257337017399748
0.48251516690223695

Trying classifier  LR F2


  'precision', 'predicted', average, warn_for)


[[10020     0     0   295]
 [ 1484     0     0   295]
 [  913     0     0   147]
 [ 2650     0     0   663]]
0.6487520495536527
0.26772133763001404
0.5511734668787912

Trying classifier  LINEARSVC F2


  'precision', 'predicted', average, warn_for)


[[10095     0     0   220]
 [ 1523     0     0   256]
 [  940     0     0   120]
 [ 2762     0     0   551]]
0.6465051314750714
0.2586700716596651
0.5430641023694748

Trying classifier  MLP F2


  'precision', 'predicted', average, warn_for)


[[9215   86    0 1014]
 [1056  161    0  562]
 [ 682   19    0  359]
 [1918  128    0 1267]]
0.6463229489281593
0.33300175878663646
0.592175322637978

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Trying classifier  MF F4


  'precision', 'predicted', average, warn_for)


[[10008     0     0     0]
 [ 3183     0     0     0]
 [ 1039     0     0     0]
 [ 2693     0     0     0]]
0.5913845062932104
0.18580817645093015
0.43953630678270017

Trying classifier  LR F4


  'precision', 'predicted', average, warn_for)


[[9662    0    0  346]
 [2889    0    0  294]
 [ 813    0    0  226]
 [1896    0    0  797]]
0.6180346274301247
0.282673450436845
0.5104999490985552

Trying classifier  LINEARSVC F4


  'precision', 'predicted', average, warn_for)


[[9709    0    0  299]
 [2957    0    0  226]
 [ 833    0    0  206]
 [1985    0    0  708]]
0.6155527979672635
0.27610509016058643
0.5050081492223149

Trying classifier  MLP F4


  'precision', 'predicted', average, warn_for)


[[9230    1    0  777]
 [2223    8    0  952]
 [ 694    1    0  344]
 [1257    3    0 1433]]
0.630561957099805
0.31395585749049315
0.5408113193611181

Speaker on focus: M6
Training data size: (181528, 28)
Testing data size: (16532, 28)

Trying classifier  MF M6


  'precision', 'predicted', average, warn_for)


[[9316    0    0    0]
 [1800    0    0    0]
 [1671    0    0    0]
 [3745    0    0    0]]
0.5635131865473022
0.18020736614051377
0.4061969085325493

Trying classifier  LR M6


  'precision', 'predicted', average, warn_for)


[[8748    0    0  568]
 [1710    0    0   90]
 [1639    0    0   32]
 [3051    0    0  694]]
0.5711347689329784
0.24644784252358476
0.46431280086146604

Trying classifier  LINEARSVC M6


  'precision', 'predicted', average, warn_for)


[[8794    0    0  522]
 [1719    0    0   81]
 [1645    0    0   26]
 [3151    0    0  594]]
0.5678683764819743
0.23834098433016992
0.4566503017121667

Trying classifier  MLP M6


  'precision', 'predicted', average, warn_for)


[[8450   26    0  840]
 [1542   27    0  231]
 [1508    0    0  163]
 [2280   81    0 1384]]
0.5964795548028067
0.2986661935751872
0.5139227904311249



Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.739414,0.538128,0.725904,0.650678,0.693204,0.591748,0.714861,0.569927,0.710674,0.648752,0.618035,0.571135
LINEARSVC,0.73917,0.53465,0.723558,0.649467,0.691469,0.590164,0.71444,0.566194,0.708123,0.646505,0.615553,0.567868
MLP,0.724304,0.534453,0.735875,0.657158,0.712162,0.608568,0.70903,0.60301,0.720322,0.646323,0.630562,0.59648


In [60]:
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.739414,0.538128,0.725904,0.650678,0.693204,0.591748,0.714861,0.569927,0.710674,0.648752,0.618035,0.571135
LINEARSVC,0.73917,0.53465,0.723558,0.649467,0.691469,0.590164,0.71444,0.566194,0.708123,0.646505,0.615553,0.567868
MLP,0.724304,0.534453,0.735875,0.657158,0.712162,0.608568,0.70903,0.60301,0.720322,0.646323,0.630562,0.59648


In [61]:
f1score_resultsw

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.630109,0.340577,0.595607,0.509992,0.539715,0.436293,0.602434,0.391594,0.567316,0.482515,0.439536,0.406197
LR,0.653713,0.42793,0.629447,0.549655,0.604913,0.461515,0.636829,0.44196,0.663896,0.551173,0.5105,0.464313
LINEARSVC,0.649235,0.418727,0.623391,0.544469,0.599025,0.456112,0.630837,0.434107,0.657717,0.543064,0.505008,0.45665
MLP,0.667489,0.470801,0.664295,0.583108,0.657536,0.520031,0.668004,0.503408,0.68683,0.592175,0.540811,0.513923


In [62]:
f1score_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.212728,0.168104,0.208381,0.19666,0.200893,0.185278,0.209257,0.177646,0.204664,0.192573,0.185808,0.180207
LR,0.258728,0.251504,0.253392,0.242234,0.265899,0.213302,0.256183,0.22426,0.308048,0.267721,0.282673,0.246448
LINEARSVC,0.249864,0.24306,0.245825,0.236745,0.260062,0.207621,0.248511,0.217231,0.301769,0.25867,0.276105,0.238341
MLP,0.30102,0.296903,0.300231,0.289372,0.360048,0.276407,0.312042,0.278339,0.34016,0.333002,0.313956,0.298666


In [63]:
accuracy_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.634612,0.647705,0.645597,0.65652
std,0.075262,0.069421,0.070217,0.064146
min,0.506497,0.538128,0.53465,0.534453
25%,0.582404,0.586595,0.58459,0.607179
50%,0.63736,0.649715,0.647986,0.651741
75%,0.698381,0.711721,0.709702,0.714202
max,0.740511,0.739414,0.73917,0.735875


In [64]:
f1score_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.193517,0.255866,0.24865,0.308346
std,0.014236,0.024866,0.0248,0.025159
min,0.168104,0.213302,0.207621,0.276407
25%,0.18401,0.245394,0.237942,0.29502
50%,0.194617,0.254788,0.247168,0.300625
75%,0.205593,0.266355,0.259018,0.318717
max,0.212728,0.308048,0.301769,0.360048
