In [21]:
#importing libraries
from sklearn import svm
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd
import warnings 
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

#prevent warnings
warnings.filterwarnings('ignore')

#importing data from excel file
df = pd.read_excel('Assignment1 data.xls', sheet_name='Training data')
df_test = pd.read_excel('Assignment1 data.xls', sheet_name='Testing data')


#Replacing labels N and S to binary output
df = df.replace('N', 0)
df = df.replace('S', 1)

df_test = df_test.replace('N', 0)
df_test = df_test.replace('S', 1)

df

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,Label
0,-78.0,86.0,-164.0,98.640105,-374,605,9.122040,-1,0
1,-81.0,83.0,-164.0,98.641311,-365,623,7.267395,-3,0
2,-78.0,83.0,-161.0,97.195540,-365,623,7.115118,-3,0
3,-81.0,85.0,-166.0,99.204429,-374,623,7.793443,-2,0
4,-81.0,82.0,-163.0,97.979990,-365,623,5.627322,-6,0
...,...,...,...,...,...,...,...,...,...
5945,-113.0,26.0,-139.0,84.612895,-441,343,-44.429144,-48,1
5946,-113.0,24.0,-137.0,84.589724,-441,343,-45.017486,-49,1
5947,-116.0,26.0,-142.0,85.254887,-441,343,-45.196357,-49,1
5948,-113.0,27.0,-140.0,85.393428,-441,343,-44.161384,-49,1


In [22]:
#Reshaping the data
X_dataTrain = df.drop('Label', axis=1)
y_labelTrain = df['Label']

X_dataTest = df_test.drop('Label', axis=1)
y_labelTest = df_test['Label']


print(y_labelTest)

0       0
1       0
2       0
3       0
4       0
       ..
2945    1
2946    1
2947    1
2948    1
2949    1
Name: Label, Length: 2950, dtype: int64


In [23]:
#setting dictionaries
classifiers_parameters ={
    'MLPClassifier' : {
        'model' : MLPClassifier(),
        'parameters' : {
            'hidden_layer_sizes' : [50, 100, 150],
            'activation' : ['logistic', 'relu'],
            'solver' : ['lbfgs', 'sgd', 'adam'],
            'max_iter' : [50, 100, 150, 200]
        }
    },
    'svm' : {
        'model' : svm.SVC(),
        'parameters' : {
            'kernel' : ['linear', 'rbf', 'sigmoid'],
            'C' : [1, 10, 20],
            'gamma' : ['scale', 'auto']
        }
    }
}

#setting empty array
modelScores = []


#loop to append parameters to the suggested models 
for model_name, model_param in classifiers_parameters.items():
    EGGclassifier = GridSearchCV(model_param['model'], model_param['parameters'], cv=5, return_train_score=False)
    EGGclassifier.fit(X_dataTrain, y_labelTrain)
    modelScores.append({
        'model' : model_name,
        'best_score' : EGGclassifier.best_score_,
        'best_parameters' : EGGclassifier.best_params_
    })

#display the results to a Pandas Dataframe    
bestModels = pd.DataFrame(modelScores, columns=['model', 'best_score', 'best_parameters'])    
bestModels
    

Unnamed: 0,model,best_score,best_parameters
0,MLPClassifier,0.974286,"{'activation': 'logistic', 'hidden_layer_sizes': 150, 'max_iter': 150, 'solver': 'adam'}"
1,svm,0.970588,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}"


In [26]:
#print extended version of the dataframe

pd.set_option('display.max_colwidth', None)
bestModels

Unnamed: 0,model,best_score,best_parameters
0,MLPClassifier,0.974286,"{'activation': 'logistic', 'hidden_layer_sizes': 150, 'max_iter': 150, 'solver': 'adam'}"
1,svm,0.970588,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}"


In [16]:
#training and testing the MLPClassifier 


mlp = MLPClassifier(hidden_layer_sizes=(100),  #number of neurons                            
                                activation='logistic', # sigmoid function
                                solver='adam', #for large datasets 
                                max_iter=100)  #epoch numbers


mlp.fit(X_dataTrain, y_labelTrain)


#label prediction
data_predic = mlp.predict(X_dataTest)

score = accuracy_score(y_labelTest, data_predic)
match = accuracy_score(y_labelTest, data_predic, normalize=False)

#print results
print(score , 'out of 1.0')
print(match, 'out of 2950')
                    

0.9952542372881356 out of 1.0
2936 out of 2950


In [20]:
#training and testing the SVM 

svm_model = svm.SVC(kernel='rbf', #type of kernel
                    C=1, # cost or penalty on incorrect datapoints
                    gamma='scale') # kernel coeficient to calculate the classification 

svm_model.fit(X_dataTrain, y_labelTrain)

#test label prediction

svm_pred = svm_model.predict(X_dataTest)

scoreSVM = accuracy_score(y_labelTest, svm_pred)
matchSVM = accuracy_score(y_labelTest, svm_pred, normalize=False)


#print results

print(scoreSVM, 'out of 1.0')
print(matchSVM, 'out of 2950')

0.9498305084745763 out of 1.0
2802 out of 2950
