In [1]:
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd
from scipy.io import loadmat

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis 
from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import StandardScaler  

In [2]:
train_ratio = 0.9
L = 3000;
ind_start = 1000
n_train = round(L*train_ratio)
n_test = round(L*(1-train_ratio) )
trainInd = np.arange(0,n_train, dtype=np.uint16)
testInd = np.arange(n_train,L, dtype=np.uint16)

In [3]:
names = ["LDA",
         "Linear SVM",
         "sigmoid kernel SVM", 
         "quadratic kernel  SVM", 
         "rbf kernel SVM", 
         "Decision Tree",
         "Neural Net logistic",
         "Neural Net LRU"]
classifiers = [LinearDiscriminantAnalysis(),
              SVC(kernel="linear"), 
              SVC(kernel="sigmoid"), 
              SVC(kernel="poly"), 
              SVC(kernel="rbf"), 
              DecisionTreeClassifier(max_depth=5),
              MLPClassifier( activation='tanh'),
              MLPClassifier( activation='relu')]

n_iter = 10
df = pd.DataFrame(index = np.arange(1,n_iter+1),columns = names)

scaler = StandardScaler()  

for name, clf in zip(names, classifiers):
    for j in np.arange(1,n_iter+1):
        # load data
        strainData = loadmat("eulerLagrangeData/strainSet_th0.1ph0.312it"+str(j)+"harm0.2")
        
        # split data into training and testing 
        Xtrain = np.concatenate((  strainData['strain_0'][:,trainInd],  strainData['strain_10'][:,trainInd]   ) , axis=1).transpose()
        Ytrain = np.concatenate(( np.zeros(n_train), np.ones(n_train)), axis = 0)
        Xtest = np.concatenate((  strainData['strain_0'][:,testInd],  strainData['strain_10'][:,testInd]   ) , axis=1).transpose()
        Ytest = np.concatenate(( np.zeros(n_test), np.ones(n_test)), axis = 0)
        
        
        # Don't cheat - fit only on training data
        scaler.fit(Xtrain)  
        Xtrain = scaler.transform(Xtrain)  
        Xtest = scaler.transform(Xtest)  

        clf.fit(Xtrain,Ytrain)
        score = clf.score(Xtest,Ytest)
        df.loc[j][name] = score
        
print( ) 
print('Average accuracy over ', n_iter, 'trials') 
print( df.loc[:].mean() ,'\n' )
print( df.loc[:].std()   )




Average accuracy over  10 trials
LDA                      0.497000
Linear SVM               0.610000
sigmoid kernel SVM       0.499167
quadratic kernel  SVM    0.558333
rbf kernel SVM           0.590167
Decision Tree            0.631333
Neural Net logistic      0.501167
Neural Net LRU           0.709500
dtype: float64 

LDA                      4.499657e-03
Linear SVM               1.170278e-16
sigmoid kernel SVM       3.868997e-03
quadratic kernel  SVM    1.170278e-16
rbf kernel SVM           2.382355e-02
Decision Tree            1.570956e-02
Neural Net logistic      6.238570e-03
Neural Net LRU           1.523297e-01
dtype: float64


In [4]:
df

Unnamed: 0,LDA,Linear SVM,sigmoid kernel SVM,quadratic kernel SVM,rbf kernel SVM,Decision Tree,Neural Net logistic,Neural Net LRU
1,0.498333,0.61,0.501667,0.558333,0.57,0.655,0.511667,0.686667
2,0.503333,0.61,0.49,0.558333,0.628333,0.656667,0.49,0.886667
3,0.495,0.61,0.498333,0.558333,0.59,0.623333,0.5,0.938333
4,0.486667,0.61,0.498333,0.558333,0.61,0.621667,0.51,0.55
5,0.498333,0.61,0.498333,0.558333,0.626667,0.648333,0.498333,0.698333
6,0.501667,0.61,0.501667,0.558333,0.593333,0.625,0.501667,0.535
7,0.496667,0.61,0.498333,0.558333,0.568333,0.62,0.496667,0.895
8,0.498333,0.61,0.5,0.558333,0.57,0.613333,0.5,0.725
9,0.495,0.61,0.505,0.558333,0.573333,0.625,0.503333,0.641667
10,0.496667,0.61,0.5,0.558333,0.571667,0.625,0.5,0.538333
