In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix 
from sklearn import metrics

import warnings
warnings.filterwarnings("ignore")

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline  

In [9]:
df = pd.read_csv("FeatureSetFastBrian.csv")
df = df[df.name != "name"]
df.columns

Index(['name', 'SDx', 'SDy', 'SDz', 'SR', 'PFD_t', 'PFD_i', 'PFD_m', 'PFD_r',
       'PFD_p', 'FA_ti', 'FA_im', 'FA_mr', 'FA_rp', 'FA_pt', 'FD_ti', 'FD_tm',
       'FD_tr', 'FD_tp', 'FD_im', 'FD_ir', 'FD_ip', 'FD_mr', 'FD_mp', 'FD_rp'],
      dtype='object')

In [10]:
createLabels = pd.DataFrame(df,columns=['name'])
createLabels.rename(columns={'name':'0'},inplace = True)
for index,row in createLabels.iterrows():
    createLabels.set_value(index,'0',row['0'][0])
createLabels.to_csv("labelsForTestAndTrainFastBrian.csv")

In [11]:
le = preprocessing.LabelEncoder()
le.fit(createLabels['0'])
LabelEncoder()
list(le.classes_)

letter_mappings = dict()
# create csv of letter to label mappings
for i in range(0, len(le.classes_)):
    letter_mappings[le.classes_[i]] = i
letter_mappings

{'A': 0,
 'B': 1,
 'C': 2,
 'D': 3,
 'E': 4,
 'F': 5,
 'G': 6,
 'H': 7,
 'I': 8,
 'K': 9,
 'L': 10,
 'M': 11,
 'N': 12,
 'O': 13,
 'P': 14,
 'R': 15,
 'S': 16,
 'T': 17,
 'U': 18,
 'V': 19,
 'W': 20,
 'X': 21,
 'Y': 22}

In [12]:
transformedLabels = le.transform(createLabels['0']) 
np.unique(transformedLabels)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22])

In [13]:
df['labels'] = transformedLabels
df

Unnamed: 0,name,SDx,SDy,SDz,SR,PFD_t,PFD_i,PFD_m,PFD_r,PFD_p,...,FD_tm,FD_tr,FD_tp,FD_im,FD_ir,FD_ip,FD_mr,FD_mp,FD_rp,labels
0,A,0,0,0,36.474487,62.1613121,41.63286972,40.62351227,38.32725143,34.05849838,...,76.09812927,82.78836823,84.68005371,9.649793625,20.76934242,31.06712914,11.45111752,22.76948357,12.58252048,0
1,A,0,0,0,36.524467,62.20118332,41.63124084,40.668293,38.36265945,34.05198288,...,76.25331879,82.90651703,84.7429657,9.631640434,20.72311592,30.99757195,11.43206882,22.7405262,12.57679939,0
2,A,0,0,0,36.88997,62.54512787,41.51000977,40.53670883,38.2361908,33.83018875,...,77.44051361,83.93241882,85.5488739,9.693142891,20.72189522,30.87499237,11.37094879,22.5844574,12.48222828,0
3,A,0,0,0,36.866932,62.5332756,41.52760315,40.55209732,38.25095749,33.83273697,...,77.42211151,83.89690399,85.51763153,9.683080673,20.68358612,30.84685707,11.34147263,22.5647831,12.48937893,0
4,A,0,0,0,36.866932,62.5332756,41.52760315,40.55209732,38.25095749,33.83273697,...,77.42211151,83.89690399,85.51763153,9.683080673,20.68358612,30.84685707,11.34147263,22.5647831,12.48937893,0
5,A,0,0,0,36.83379,62.51315689,41.54090118,40.56211853,38.26262283,33.83771515,...,77.36565399,83.82987976,85.46598816,9.672813416,20.64648819,30.82357025,11.31281757,22.54933739,12.49890423,0
6,A,0,0,0,36.797794,62.48514557,41.54735565,40.56980515,38.27283478,33.8427887,...,77.28864288,83.74704742,85.39992523,9.661387444,20.61198044,30.80189514,11.28860569,22.53855705,12.50930119,0
7,A,0,0,0,36.750824,62.43977737,41.54794693,40.57364273,38.28125763,33.85076523,...,77.16963196,83.62656403,85.30492401,9.651597023,20.57816315,30.78262901,11.2636528,22.52862549,12.52046013,0
8,A,0,0,0,36.750824,62.43977737,41.54794693,40.57364273,38.28125763,33.85076523,...,77.16963196,83.62656403,85.30492401,9.651597023,20.57816315,30.78262901,11.2636528,22.52862549,12.52046013,0
9,A,0,0,0,36.51352,62.32382965,41.55205154,40.60037231,38.33174896,33.91937637,...,76.46431732,82.96372223,84.8398056,9.599715233,20.41735077,30.70539474,11.15098953,22.5007,12.58296108,0


# SVM

In [14]:
df = df.drop(columns=['name'])

In [15]:
df = df.apply(pd.to_numeric, errors='coerce')

In [16]:
X = df.drop('labels', axis=1)  
y = df['labels']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30)  

In [17]:
svclassifier = SVC(kernel='poly',degree=10)  

In [18]:
svclassifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=10, gamma='auto_deprecated',
  kernel='poly', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [19]:
y_pred = svclassifier.predict(X_test)  
y_pred

array([18, 19,  3, ..., 18, 11,  3])

In [20]:
metrics.accuracy_score(y_pred,y_test)

1.0

In [27]:
###Testing on new data (entire alphabet)
dfTest4 = pd.read_csv("TestBrian.csv")
dfTest4 = dfTest4[dfTest4.name != "name"]


labelsForTest4 = pd.DataFrame(dfTest4,columns=['name'])
labelsForTest4.rename(columns={'name':'0'},inplace = True)
for index,row in labelsForTest4.iterrows():
    labelsForTest4.set_value(index,'0',row['0'][0])
labelsForTest4.to_csv("labelsForTest4.csv")

dfTest4 = dfTest4.drop(columns=['name'])
dfTest4 = dfTest4.apply(pd.to_numeric, errors='coerce')

dfTest4

Unnamed: 0,SDx,SDy,SDz,SR,PFD_t,PFD_i,PFD_m,PFD_r,PFD_p,FA_ti,...,FD_ti,FD_tm,FD_tr,FD_tp,FD_im,FD_ir,FD_ip,FD_mr,FD_mp,FD_rp
0,0,0,0,45.224316,96.631470,103.438042,96.316101,79.048698,46.547424,0.709144,...,19.436155,33.191711,54.087036,78.290024,17.966825,44.452396,79.071930,27.680656,70.247734,53.553837
1,0,0,0,45.457638,96.848206,103.449722,96.474457,78.860344,46.165657,0.709556,...,19.230637,33.109707,54.228710,77.942398,17.929771,44.624504,78.811104,27.987446,70.294411,53.218765
2,0,0,0,46.089264,97.027649,103.527069,96.430580,78.446442,45.699604,0.707650,...,19.057110,32.499916,54.339451,77.415413,17.486053,44.895393,78.388832,28.605953,69.748428,51.981644
3,0,0,0,49.056805,97.216454,103.493927,96.482117,78.127327,45.236561,0.703081,...,19.244513,32.970921,54.712002,77.019852,17.597183,44.995022,77.874718,28.723331,69.273399,50.983334
4,0,0,0,46.440716,97.119835,103.475807,96.402885,78.117363,45.556690,0.704342,...,19.160950,32.789032,54.441795,77.234421,17.580109,44.873825,78.116272,28.595106,69.502625,51.326134
5,0,0,0,45.590725,96.979126,103.434761,96.354782,78.282036,45.834232,0.706387,...,19.167027,32.728474,54.214272,77.465599,17.579535,44.690498,78.356453,28.375080,69.767105,51.996941
6,0,0,0,46.460160,96.968384,103.369598,96.381058,78.315819,46.194942,0.706643,...,19.572828,33.723854,54.939388,77.960777,18.052364,44.970371,78.697105,28.305468,70.090240,52.475117
7,0,0,0,46.460160,96.968384,103.369598,96.381058,78.315819,46.194942,0.706643,...,19.572828,33.723854,54.939388,77.960777,18.052364,44.970371,78.697105,28.305468,70.090240,52.475117
8,0,0,0,45.726420,96.858650,103.356056,96.346504,78.459709,46.278423,0.707925,...,19.529922,33.576347,54.746895,78.091393,18.009987,44.855282,78.873398,28.185652,70.239838,52.816467
9,0,0,0,46.997830,97.068718,103.413895,96.428658,78.285179,45.976028,0.705056,...,19.545033,33.786419,55.100121,77.837578,18.103325,45.105446,78.610191,28.415699,69.998833,52.158127


In [28]:
dfTest_pred4 = svclassifier.predict(dfTest4)  
transformedLabelsForTest4 = le.transform(labelsForTest4['0']) 
transformedLabelsForTest4

for i in range(len(dfTest_pred4)):
    if(i % 50 == 0 and le.classes_[i//50]  != le.classes_[dfTest_pred4[i]]):
        print(le.classes_[i//50] + " was recognized as " + le.classes_[dfTest_pred4[i]])

A was recognized as B
C was recognized as L
D was recognized as L


In [30]:
metrics.accuracy_score(dfTest_pred4,transformedLabelsForTest4)
print(confusion_matrix(dfTest_pred4,transformedLabelsForTest4))  
print(classification_report(dfTest_pred4,transformedLabelsForTest4)) 

[[100   0]
 [  0 100]]
              precision    recall  f1-score   support

           1       1.00      1.00      1.00       100
          10       1.00      1.00      1.00       100

   micro avg       1.00      1.00      1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [None]:
[i for i, j in zip(dfTest_pred4, transformedLabelsForTest4) if i != j]

In [23]:
from sklearn_porter import Porter
porter = Porter(svclassifier, language='java')
output = porter.export(embed_data=True)
print(output)

class SVC {

    private enum Kernel { LINEAR, POLY, RBF, SIGMOID }

    private int nClasses;
    private int nRows;
    private int[] classes;
    private double[][] vectors;
    private double[][] coefficients;
    private double[] intercepts;
    private int[] weights;
    private Kernel kernel;
    private double gamma;
    private double coef0;
    private double degree;

    public SVC (int nClasses, int nRows, double[][] vectors, double[][] coefficients, double[] intercepts, int[] weights, String kernel, double gamma, double coef0, double degree) {
        this.nClasses = nClasses;
        this.classes = new int[nClasses];
        for (int i = 0; i < nClasses; i++) {
            this.classes[i] = i;
        }
        this.nRows = nRows;

        this.vectors = vectors;
        this.coefficients = coefficients;
        this.intercepts = intercepts;
        this.weights = weights;

        this.kernel = Kernel.valueOf(kernel.toUpperCase());
        this.gamma = gamma;
        thi