## Real experiments for head movements

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

In [2]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score,f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC,LinearSVC
from sklearn.dummy import DummyClassifier
from seqlearn.perceptron import StructuredPerceptron
import sklearn_crfsuite
from sklearn_crfsuite import metrics as skcrfmetrics
from scipy.stats import pearsonr,spearmanr



In [3]:
import glob

In [4]:
windowsize="9-11-13"
memory="middle"

PATH = "C:\\Users\\zgk261\\nomco\\"+windowsize+"\\"+memory+"\\"
PATH

'C:\\Users\\zgk261\\nomco\\9-11-13\\middle\\'

In [5]:
!dir /B C:\Users\zgk261\nomco\9-11-13\middle\

F2_M4-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F2_M4-all-final2_primary_M4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F3.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F3_F6-all-final2_primary_F6.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F1.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F4_F1-all-final2_primary_F4.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBackward-HF-HeadForward--rdXY.svm.9-11-13-middle.tab
F5_F2-all-final2_primary_F2.T-Tilt-ST-SideTurn-HO-HeadOther-S-Shake-W-Waggle-N-Nod-J-Jerk-HB-HeadBac

### SKELETON TO READ FILES, SPEAKERS and PRIMARY SPEAKERS


In [6]:
speaker_set = set()

for each_file in glob.glob(PATH+"*.tab"):
    filename=each_file.split("\\")[-1]
    fileinfo=filename.split("-")
    
    primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
    speakers = fileinfo[0]
    l_speaker,r_speaker = speakers.split("_")

    speaker_set.add(primary_speaker)

In [7]:
speaker_set

{'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6'}

### SKELETON TO READ FILES AND COSS_VALIDATE

In the following way: In each iteration, we will leave one speaker out (two files), train a model and test on those two files.

In [8]:

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()


Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Speaker on focus: M6
Training data size: (181528, 28)


### Experiments:

It seems that in the 3-6-9 experiments considering both the past and future of frames, the MLP was not the best model, but the Logistic Regression. I think, though, that the MLP will show good performance when cross-validating with the whole data.

I will, then, try 3 classifiers:

  * SVM, because it is the default classifier that is commonly used
  * Logistic Regression, because it seems to perform the best in our initial experiments with two dialogue data
  * MLP, because when including more data, I expect to estimate a more general (and thus, better) model

In [9]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
import math

def map_movements(mov_class):
    if mov_class == "Nod" or mov_class == "Jerk":
        return 1
    elif mov_class == "Shake":
        return 2
    elif mov_class!=mov_class: #This returns True if the value is NAN, and it works. It might be source of errors.
        return 0
    else:
        return 3



In [11]:
accuracy_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_results = pd.DataFrame(0,columns=speaker_set,index=["MF"])
f1score_resultsw = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1score_resultsb = pd.DataFrame(0,columns=speaker_set,index=["MF"])
#f1_score(y_true, y_pred, average='micro')

for speaker_on_focus in speaker_set:
    train_files = []
    test_files  = []
    
    for each_file in glob.glob(PATH+"*.tab"):
        filename=each_file.split("\\")[-1]
        fileinfo=filename.split("-")

        primary_speaker = fileinfo[2].split("_")[-1][:-2] #fileinfo[2] = final2_primary_M1.T
        speakers = fileinfo[0]
        l_speaker,r_speaker = speakers.split("_")
        if primary_speaker == l_speaker:
            secondary_speaker = r_speaker
        else:
            secondary_speaker = l_speaker

        #print (primary_speaker,l_speaker,r_speaker,speaker_on_focus)

        file_df = pd.read_csv(each_file,sep="\t")
        file_df.columns = [el.replace(primary_speaker, "PRIMA").replace(secondary_speaker, "SECON") for el in file_df.columns]

        if primary_speaker != speaker_on_focus:
            #FILES TO TRAIN!
            train_files.append(file_df)
        else:
            #FILES TO TEST!
            test_files.append(file_df)
            
        
        
    training_data = pd.concat(train_files, ignore_index=True)
    testing_data = pd.concat(test_files, ignore_index=True)
    print ("Speaker on focus:",speaker_on_focus)
    print ("Training data size:",training_data.shape)
    print ("Testing data size:",testing_data.shape)
    print ()
    
    
    
    
    
    
    
    #Train data
    features=['PRIMA:velocity-r', 'PRIMA:velocity-clock', 'PRIMA:velocity-x', 'PRIMA:velocity-y',
              'PRIMA:acceleration-r', 'PRIMA:acceleration-clock', 'PRIMA:acceleration-x',
              'PRIMA:acceleration-y', 'PRIMA:jerk-r', 'PRIMA:jerk-clock', 'PRIMA:jerk-x', 'PRIMA:jerk-y',
              'PRIMA:pitch', 'PRIMA:intensity'
             ]

    feats_learning = training_data[features]
    X = feats_learning.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    scaler = MinMaxScaler((0,1))
    scaler.fit(X)
    X_norm = scaler.transform(X)
    #X_norm = X
    #We're ignoring anyrow that has a NaN value.
    #Y = (training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y = training_data[(feats_learning.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    #Test data
    feats_testing = testing_data[features]

    X_test = feats_testing.dropna() #REMEMBER THIS!!!! We're ignoring any row that has a NaN value.
    X_test_norm = scaler.transform(X_test)
    #X_test_norm = X_test
#    Y_test = (testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].equals('nod')*1).values #0: no movement; 1: movement
    Y_test =  testing_data[(feats_testing.isnull().sum(axis=1)==0)]['PRIMA:HeadMovement'].map(map_movements)
    
    classifiers_to_test = [(DummyClassifier(strategy="most_frequent"),"MF"),
                           (LogisticRegression(solver="liblinear"),"LR"),
                          (LinearSVC(),"LINEARSVC"),
                          (MLPClassifier(hidden_layer_sizes=(30,30,30,30), random_state=442), "MLP")
                          ]
                        #I think that now we have to fine-tune the n_hidden_layers and their sizes (MLP)
    
    
    for clf, clfname in classifiers_to_test:
        print ("Trying classifier ",clfname,speaker_on_focus)
        clf.fit(X_norm, Y)
        y_pred = clf.predict(X_test_norm)
        accuracy_results.loc[clfname,speaker_on_focus] = accuracy_score(Y_test,y_pred)
        #f1score_resultsb.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred)
        f1score_results.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='macro')
        f1score_resultsw.loc[clfname,speaker_on_focus] = f1_score(Y_test,y_pred, average='weighted')
    
        print (confusion_matrix(Y_test,y_pred))
        print (accuracy_score(Y_test,y_pred)) #same as f1-score (micro-averagaed)
        print (f1_score(Y_test,y_pred, average='macro'))
        print (f1_score(Y_test,y_pred, average='weighted'))
        y_pred=None
        print ()

        
accuracy_results

Speaker on focus: F5
Training data size: (181647, 28)
Testing data size: (16413, 28)

Trying classifier  MF F5


  'precision', 'predicted', average, warn_for)


[[12154     0     0     0]
 [ 1330     0     0     0]
 [  914     0     0     0]
 [ 2015     0     0     0]]
0.7405105708889295
0.21272797283578954
0.6301092504347008

Trying classifier  LR F5


  'precision', 'predicted', average, warn_for)


[[11829    20     0   305]
 [ 1193    13     0   124]
 [  784     8     0   122]
 [ 1678    14     0   323]]
0.7411807713397917
0.27459367905665444
0.6628467002145769

Trying classifier  LINEARSVC F5


  'precision', 'predicted', average, warn_for)


[[11883    13     0   258]
 [ 1203     8     0   119]
 [  794     7     0   113]
 [ 1721    15     0   279]]
0.7414854079083654
0.2670906241188322
0.6596339519750344

Trying classifier  MLP F5




[[11439   105    14   596]
 [ 1082    85     4   159]
 [  726     4    53   131]
 [ 1386    48    19   562]]
0.7395966611832084
0.3480913140500055
0.6869386012794754

Speaker on focus: F6
Training data size: (182822, 28)
Testing data size: (15238, 28)

Trying classifier  MF F6


  'precision', 'predicted', average, warn_for)


[[7718    0    0    0]
 [3393    0    0    0]
 [ 435    0    0    0]
 [3692    0    0    0]]
0.5064969156057225
0.16810419933786372
0.34057703385999005

Trying classifier  LR F6




[[6948   74    0  696]
 [1863  398    0 1132]
 [ 263   36    3  133]
 [2250  108    0 1334]]
0.5698254364089775
0.3309647934992843
0.50673966118877

Trying classifier  LINEARSVC F6


  'precision', 'predicted', average, warn_for)


[[7037   26    0  655]
 [1954  180    0 1259]
 [ 275   18    0  142]
 [2381   37    0 1274]]
0.5572253576584854
0.29703921534644917
0.4779638795082593

Trying classifier  MLP F6




[[6022  428   48 1220]
 [1297 1245   31  820]
 [ 147   10   84  194]
 [1475  407   97 1713]]
0.5948287176794855
0.4668295395720445
0.5828676627799465

Speaker on focus: M2
Training data size: (182715, 28)
Testing data size: (15345, 28)

Trying classifier  MF M2


  'precision', 'predicted', average, warn_for)


[[10965     0     0     0]
 [ 1608     0     0     0]
 [  156     0     0     0]
 [ 2616     0     0     0]]
0.7145650048875856
0.20838084378563285
0.5956066346326397

Trying classifier  LR M2


  'precision', 'predicted', average, warn_for)


[[10831    15     0   119]
 [ 1463    36     0   109]
 [  141     2     0    13]
 [ 2264     7     0   345]]
0.7306614532420984
0.2756793767187648
0.6443968401908097

Trying classifier  LINEARSVC M2


  'precision', 'predicted', average, warn_for)


[[10851    12     0   102]
 [ 1487    18     0   103]
 [  145     1     0    10]
 [ 2311     5     0   300]]
0.7278592375366569
0.2640078816400494
0.6369861033476094

Trying classifier  MLP M2




[[10378    96    16   475]
 [ 1241   216     1   150]
 [  112     3     5    36]
 [ 1740    67    22   787]]
0.742000651678071
0.37594777440320604
0.6962370184133646

Speaker on focus: M4
Training data size: (181548, 28)
Testing data size: (16512, 28)

Trying classifier  MF M4


  'precision', 'predicted', average, warn_for)


[[10705     0     0     0]
 [ 1702     0     0     0]
 [  539     0     0     0]
 [ 3566     0     0     0]]
0.6483163759689923
0.19666017562552818
0.509992049435872

Trying classifier  LR M4


  'precision', 'predicted', average, warn_for)


[[10238    22     0   445]
 [ 1365    82     0   255]
 [  449     4     0    86]
 [ 2958     7     0   601]]
0.6613977713178295
0.28230166045322547
0.5779467781586729

Trying classifier  LINEARSVC M4


  'precision', 'predicted', average, warn_for)


[[10297    15     0   393]
 [ 1406    44     0   252]
 [  466     3     0    70]
 [ 3045     5     0   516]]
0.6575218023255814
0.264858084247822
0.5667101778480078

Trying classifier  MLP M4




[[9541  165   46  953]
 [1017  358   11  316]
 [ 328   13   22  176]
 [2194  187   41 1144]]
0.6701187015503876
0.384006550255383
0.6330190615981359

Speaker on focus: F1
Training data size: (181919, 28)
Testing data size: (16141, 28)

Trying classifier  MF F1


  'precision', 'predicted', average, warn_for)


[[10841     0     0     0]
 [  829     0     0     0]
 [  725     0     0     0]
 [ 3746     0     0     0]]
0.6716436404188092
0.20089318805129347
0.5397145286324446

Trying classifier  LR F1




[[10533    27     2   279]
 [  586    40     0   203]
 [  525    15     0   185]
 [ 2870    18     1   857]]
0.7081345641533981
0.3105483339561178
0.6379335665606458

Trying classifier  LINEARSVC F1


  'precision', 'predicted', average, warn_for)


[[10585    21     0   235]
 [  623    23     0   183]
 [  553    13     0   159]
 [ 3001    16     0   729]]
0.7023728393531999
0.29161314632818974
0.6249498660907697

Trying classifier  MLP F1




[[9955  110   15  761]
 [ 371  198    0  260]
 [ 371    4   69  281]
 [2013   76   53 1604]]
0.732668360076823
0.4532858579234891
0.7036329699293927

Speaker on focus: F3
Training data size: (181651, 28)
Testing data size: (16409, 28)

Trying classifier  MF F3


  'precision', 'predicted', average, warn_for)


[[9660    0    0    0]
 [2223    0    0    0]
 [ 847    0    0    0]
 [3679    0    0    0]]
0.5887013224449997
0.18527753270167632
0.4362925140832941

Trying classifier  LR F3


  'precision', 'predicted', average, warn_for)


[[9450   27    0  183]
 [2049   41    0  133]
 [ 748   18    0   81]
 [3299   33    0  347]]
0.5995490279724541
0.2354353393219002
0.4813437760530567

Trying classifier  LINEARSVC F3


  'precision', 'predicted', average, warn_for)


[[9477   14    0  169]
 [2082   25    0  116]
 [ 765   14    0   68]
 [3361   19    0  299]]
0.5972941678347249
0.22692516267848983
0.4741629269521967

Trying classifier  MLP F3




[[9126  122   32  380]
 [1760  286    2  175]
 [ 626   21   49  151]
 [2692  188   85  714]]
0.6200865378755561
0.3357122868104035
0.5453183823885062

Speaker on focus: M3
Training data size: (181426, 28)
Testing data size: (16634, 28)

Trying classifier  MF M3


  'precision', 'predicted', average, warn_for)


[[11972     0     0     0]
 [  778     0     0     0]
 [  616     0     0     0]
 [ 3268     0     0     0]]
0.71973067211735
0.20925679927287982
0.6024341471431808

Trying classifier  LR M3


  'precision', 'predicted', average, warn_for)


[[11536    12     0   424]
 [  597    63     0   118]
 [  505     5     0   106]
 [ 2713    13     0   542]]
0.7298905855476734
0.3080591168837657
0.6622908423692495

Trying classifier  LINEARSVC M3


  'precision', 'predicted', average, warn_for)


[[11623     9     0   340]
 [  618    51     0   109]
 [  516     1     0    99]
 [ 2796     7     0   465]]
0.7297703498857762
0.29558691620322036
0.6561613024371803

Trying classifier  MLP M3




[[10690   172    27  1083]
 [  393   228     0   157]
 [  427    17    15   157]
 [ 1954   202    32  1080]]
0.722195503186245
0.39660302060957664
0.6957069063314825

Speaker on focus: M5
Training data size: (180649, 28)
Testing data size: (17411, 28)

Trying classifier  MF M5


  'precision', 'predicted', average, warn_for)


[[9595    0    0    0]
 [2325    0    0    0]
 [1014    0    0    0]
 [4477    0    0    0]]
0.5510883923956119
0.1776457083611049
0.39159395134680414

Trying classifier  LR M5


  'precision', 'predicted', average, warn_for)


[[9473   15    0  107]
 [2171   65    0   89]
 [ 851   36    0  127]
 [3683   70    0  724]]
0.5893975073229567
0.26225287031398636
0.47942711975497065

Trying classifier  LINEARSVC M5


  'precision', 'predicted', average, warn_for)


[[9488   10    0   97]
 [2196   41    0   88]
 [ 874   29    0  111]
 [3784   45    0  648]]
0.5845155361553042
0.2510396481913167
0.4691297193797355

Trying classifier  MLP M5




[[9294   74   31  196]
 [1918  284   10  113]
 [ 739    8  114  153]
 [2967  247  121 1142]]
0.6222503015335134
0.3759901547476334
0.550574866523211

Speaker on focus: M1
Training data size: (180025, 28)
Testing data size: (18035, 28)

Trying classifier  MF M1


  'precision', 'predicted', average, warn_for)


[[12498     0     0     0]
 [  805     0     0     0]
 [  489     0     0     0]
 [ 4243     0     0     0]]
0.6929858608261713
0.204663806373432
0.5673164961586146

Trying classifier  LR M1




[[11523    29     0   946]
 [  669    26     1   109]
 [  384    19     1    85]
 [ 2772    32     0  1439]]
0.7202107014139174
0.32766180693650004
0.6754407687509676

Trying classifier  LINEARSVC M1


  'precision', 'predicted', average, warn_for)


[[11674    19     0   805]
 [  691    17     0    97]
 [  392    17     0    80]
 [ 2903    22     0  1318]]
0.7213196562240088
0.3176719680063927
0.6711152168268768

Trying classifier  MLP M1




[[11280    45    28  1145]
 [  571    79     2   153]
 [  344     0    37   108]
 [ 2322    51    18  1852]]
0.7345716662046021
0.4047591981871431
0.7055724672632459

Speaker on focus: F2
Training data size: (181593, 28)
Testing data size: (16467, 28)

Trying classifier  MF F2


  'precision', 'predicted', average, warn_for)


[[10315     0     0     0]
 [ 1779     0     0     0]
 [ 1060     0     0     0]
 [ 3313     0     0     0]]
0.6264043237991134
0.19257337017399748
0.48251516690223695

Trying classifier  LR F2


  'precision', 'predicted', average, warn_for)


[[9720   77    0  518]
 [1071  144    0  564]
 [ 646   44    0  370]
 [2109  114    0 1090]]
0.6652092062913706
0.33012670592727095
0.5996705958324396

Trying classifier  LINEARSVC F2


  'precision', 'predicted', average, warn_for)


[[9826   40    0  449]
 [1133   76    0  570]
 [ 695   33    0  332]
 [2224   72    0 1017]]
0.6630837432440639
0.31158415898616965
0.5890727776045146

Trying classifier  MLP F2
[[8717  478   35 1085]
 [ 713  698    8  360]
 [ 458   72   53  477]
 [1436  415   38 1424]]
0.6614441003218559
0.4319305390144971
0.6402567233475954

Speaker on focus: F4
Training data size: (181137, 28)
Testing data size: (16923, 28)

Trying classifier  MF F4


  'precision', 'predicted', average, warn_for)


[[10008     0     0     0]
 [ 3183     0     0     0]
 [ 1039     0     0     0]
 [ 2693     0     0     0]]
0.5913845062932104
0.18580817645093015
0.43953630678270017

Trying classifier  LR F4




[[9462    7    0  539]
 [2274   91    0  818]
 [ 547   17    1  474]
 [1405   14    2 1272]]
0.6397210896413166
0.3236025680494949
0.5525895589087003

Trying classifier  LINEARSVC F4


  'precision', 'predicted', average, warn_for)


[[9547    5    0  456]
 [2381   44    0  758]
 [ 574    9    0  456]
 [1525   12    0 1156]]
0.63505288660403
0.3100983048975318
0.5415618854908852

Trying classifier  MLP F4
[[8721  102   12 1173]
 [1631  703    3  846]
 [ 423    3   57  556]
 [ 853  146   26 1668]]
0.6588075400342729
0.4318122295382461
0.6233652625225989

Speaker on focus: M6
Training data size: (181528, 28)
Testing data size: (16532, 28)

Trying classifier  MF M6


  'precision', 'predicted', average, warn_for)


[[9316    0    0    0]
 [1800    0    0    0]
 [1671    0    0    0]
 [3745    0    0    0]]
0.5635131865473022
0.18020736614051377
0.4061969085325493

Trying classifier  LR M6




[[8727   27    0  562]
 [1606   26    0  168]
 [1519   31    0  121]
 [2892   63    1  789]]
0.5771836438422453
0.26129509404950885
0.47808251625856824

Trying classifier  LINEARSVC M6


  'precision', 'predicted', average, warn_for)


[[8812   20    0  484]
 [1642   12    0  146]
 [1538   24    0  109]
 [3002   43    0  700]]
0.5760948463585773
0.2519172768853328
0.47108290439257006

Trying classifier  MLP M6
[[8403   87   23  803]
 [1435  148    2  215]
 [1276   29   60  306]
 [2116   55   32 1542]]
0.614142269537866
0.35467681168928655
0.5477162774088756



Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.741181,0.569825,0.730661,0.661398,0.708135,0.599549,0.729891,0.589398,0.720211,0.665209,0.639721,0.577184
LINEARSVC,0.741485,0.557225,0.727859,0.657522,0.702373,0.597294,0.72977,0.584516,0.72132,0.663084,0.635053,0.576095
MLP,0.739597,0.594829,0.742001,0.670119,0.732668,0.620087,0.722196,0.62225,0.734572,0.661444,0.658808,0.614142


In [12]:
accuracy_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.740511,0.506497,0.714565,0.648316,0.671644,0.588701,0.719731,0.551088,0.692986,0.626404,0.591385,0.563513
LR,0.741181,0.569825,0.730661,0.661398,0.708135,0.599549,0.729891,0.589398,0.720211,0.665209,0.639721,0.577184
LINEARSVC,0.741485,0.557225,0.727859,0.657522,0.702373,0.597294,0.72977,0.584516,0.72132,0.663084,0.635053,0.576095
MLP,0.739597,0.594829,0.742001,0.670119,0.732668,0.620087,0.722196,0.62225,0.734572,0.661444,0.658808,0.614142


In [13]:
f1score_resultsw

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.630109,0.340577,0.595607,0.509992,0.539715,0.436293,0.602434,0.391594,0.567316,0.482515,0.439536,0.406197
LR,0.662847,0.50674,0.644397,0.577947,0.637934,0.481344,0.662291,0.479427,0.675441,0.599671,0.55259,0.478083
LINEARSVC,0.659634,0.477964,0.636986,0.56671,0.62495,0.474163,0.656161,0.46913,0.671115,0.589073,0.541562,0.471083
MLP,0.686939,0.582868,0.696237,0.633019,0.703633,0.545318,0.695707,0.550575,0.705572,0.640257,0.623365,0.547716


In [14]:
f1score_results

Unnamed: 0,F5,F6,M2,M4,F1,F3,M3,M5,M1,F2,F4,M6
MF,0.212728,0.168104,0.208381,0.19666,0.200893,0.185278,0.209257,0.177646,0.204664,0.192573,0.185808,0.180207
LR,0.274594,0.330965,0.275679,0.282302,0.310548,0.235435,0.308059,0.262253,0.327662,0.330127,0.323603,0.261295
LINEARSVC,0.267091,0.297039,0.264008,0.264858,0.291613,0.226925,0.295587,0.25104,0.317672,0.311584,0.310098,0.251917
MLP,0.348091,0.46683,0.375948,0.384007,0.453286,0.335712,0.396603,0.37599,0.404759,0.431931,0.431812,0.354677


In [15]:
accuracy_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.634612,0.66103,0.6578,0.676059
std,0.075262,0.064994,0.067069,0.055718
min,0.506497,0.569825,0.557225,0.594829
25%,0.582404,0.597011,0.5941,0.621709
50%,0.63736,0.663303,0.660303,0.665781
75%,0.698381,0.722631,0.722955,0.733144
max,0.740511,0.741181,0.741485,0.742001


In [16]:
f1score_results.T.describe()

Unnamed: 0,MF,LR,LINEARSVC,MLP
count,12.0,12.0,12.0,12.0
mean,0.193517,0.293543,0.279119,0.396637
std,0.014236,0.032342,0.028727,0.042134
min,0.168104,0.235435,0.226925,0.335712
25%,0.18401,0.271508,0.260985,0.37063
50%,0.194617,0.29518,0.279352,0.390305
75%,0.205593,0.324617,0.300304,0.431842
max,0.212728,0.330965,0.317672,0.46683
