In [6]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [29]:
def svm(X_train, X_test, y_train, y_test, c, kernel):
    model = SVC(C = c, kernel = kernel)
    model.fit(X_train, y_train)
    y_pred_test = model.predict(X_test)
    y_pred_train = model.predict(X_train)
    print('kernel: {}, C: {}, train accuracy: {}, test accuracy:'.format(kernel, c, accuracy_score(y_train, y_pred_train))
                                             , accuracy_score(y_test, y_pred_test))

def param_list(kernel):
    C = [1, 10, 100, 500]
    gamma1 = [0.1, 0.3, 0.5, 0.7, 0.9]
    gamma2 = [0.01, 0.03, 0.05]
    degree = [2, 3, 4]
    if kernel == 'linear':
        C[-1] = 1000
        return dict(kernel = [kernel], C = C)
    elif kernel == 'rbf':
        return dict(kernel = [kernel], C = C, gamma = gamma1)
    else:
        return dict(kernel = [kernel], C = C, gamma = gamma2, degree = degree)

In [30]:
from sklearn import preprocessing
import numpy as np
data = pd.read_csv("q5.csv")
y = data['target_class']
X = data.drop(columns =['target_class'])
X = np.array(X)
y = np.array(y).reshape(-1,1)
X = preprocessing.normalize(X)
y = preprocessing.normalize(y)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3)

In [32]:
import warnings
warnings.filterwarnings('ignore')
kernels = ['rbf', 'linear', 'poly', 'sigmoid']
C = [1, 100, 1000]
for c in C:
    for kernel in kernels:
        svm(X_train, X_test, y_train, y_test, c, kernel)

kernel: rbf, C: 1, train accuracy: 0.9723818646232439, test accuracy: 0.9741154562383613
kernel: linear, C: 1, train accuracy: 0.971823116219668, test accuracy: 0.9722532588454376
kernel: poly, C: 1, train accuracy: 0.9733397190293742, test accuracy: 0.9746741154562384
kernel: sigmoid, C: 1, train accuracy: 0.9009418901660281, test accuracy: 0.8955307262569833
kernel: rbf, C: 100, train accuracy: 0.9769316730523627, test accuracy: 0.9757914338919925
kernel: linear, C: 100, train accuracy: 0.9753352490421456, test accuracy: 0.9757914338919925
kernel: poly, C: 100, train accuracy: 0.9786079182630907, test accuracy: 0.9774674115456239
kernel: sigmoid, C: 100, train accuracy: 0.8996647509578544, test accuracy: 0.8942271880819367
kernel: rbf, C: 1000, train accuracy: 0.9796455938697318, test accuracy: 0.9787709497206704
kernel: linear, C: 1000, train accuracy: 0.9774904214559387, test accuracy: 0.976536312849162
kernel: poly, C: 1000, train accuracy: 0.979565772669221, test accuracy: 0.9795

In [35]:
kernels = ['rbf', 'linear', 'poly']
for kernel in kernels:
    parameters = param_list(kernel)
    model = GridSearchCV(SVC(), parameters, error_score=5*10**-1)
    model.fit(X_train, y_train)
    y_pred_test = model.predict(X_test)
    print(model.best_params_, "accuracy: ", accuracy_score(y_test, y_pred_test))

{'C': 500, 'gamma': 0.9, 'kernel': 'rbf'} accuracy:  0.9763500931098696
{'C': 1000, 'kernel': 'linear'} accuracy:  0.976536312849162
{'C': 500, 'degree': 2, 'gamma': 0.05, 'kernel': 'poly'} accuracy:  0.9737430167597766


In [36]:
#best model for part b
model = SVC(C = 1000, kernel = 'poly')
model.fit(X_train, y_train)
y_pred_test = model.predict(X_test)
print("accuracy: ", accuracy_score(y_test, y_pred_test))
print(confusion_matrix(y_test, y_pred_test))

accuracy:  0.9795158286778398
[[4844   29]
 [  81  416]]


In [37]:
#best model for part c
model = SVC(C = 1000, kernel = 'linear')
model.fit(X_train, y_train)
y_pred_test = model.predict(X_test)
print("accuracy: ", accuracy_score(y_test, y_pred_test))
print(confusion_matrix(y_test, y_pred_test))

accuracy:  0.976536312849162
[[4846   27]
 [  99  398]]
