In [1]:
import numpy as np
from scipy.optimize import minimize, Bounds, LinearConstraint
import matplotlib.pyplot as plt
import pandas as pd


data=pd.read_csv('../data/heart_disease.csv')
labels=data.values[:,-1]
labels[labels>1]=1
labels=labels.astype(int)

data=data.values[:,:-1]

data=np.hstack((np.ones((len(data),1)),data))
data=(data-np.mean(data))/np.std(data)

In [2]:
from numpy import linalg as LA

def polynomial_kernel(A, B):
        degree = 3 #Default, so we can compare with the built in function
        gamma = 1 / (data.shape[1]*np.var(data))
        coef0 = 0 
        return (gamma*np.dot(A,np.transpose(B))+coef0)**degree 
def rbf_kernel(A, B):
        gamma =  1 / (data.shape[1]*np.var(data))
        #norm = LA.norm(A)**2 + LA.norm(B)**2 - (2 * np.transpose(A) @ B)#LA.norm(A-B)
        #norm = np.abs(A-np.transpose(B))
        norm = np.abs(A-B)
        return np.exp(-gamma * (norm**2))
def linear_kernel(A, B):
        return np.dot(A,np.transpose(B))


In [3]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [20]:
svm = SVC(kernel=polynomial_kernel)
score = cross_val_score(svm, data, labels)
print("The score of my polynomial kernel:")
print(score.mean())

svm = SVC(kernel='poly')
score = cross_val_score(svm, data, labels)
print("The score of the built in polynomial kernel:")
print(score.mean())

The score of my polynomial kernel:
0.6598360655737705
The score of the built in polynomial kernel:
0.6598907103825137


In [4]:
svm = SVC(kernel=rbf_kernel)
score = cross_val_score(svm, data, labels)
print("The score of my rbf kernel:")
print(score.mean())

svm = SVC(kernel='rbf')
score = cross_val_score(svm, data, labels)
print("The score of the built in rbf kernel:")
print(score.mean())

Traceback (most recent call last):
  File "c:\Users\Virag\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Virag\anaconda3\lib\site-packages\sklearn\svm\_base.py", line 226, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "c:\Users\Virag\anaconda3\lib\site-packages\sklearn\svm\_base.py", line 269, in _dense_fit
    raise ValueError("X.shape[0] should be equal to X.shape[1]")
ValueError: X.shape[0] should be equal to X.shape[1]

Traceback (most recent call last):
  File "c:\Users\Virag\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Virag\anaconda3\lib\site-packages\sklearn\svm\_base.py", line 226, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "c:\Users\Virag\anaconda3\lib\site-packages\skle

The score of my rbf kernel:
nan
The score of the built in rbf kernel:
0.6533333333333334


In [112]:
svm = SVC(kernel=linear_kernel)
score = cross_val_score(svm, data, labels)
print("The score of my polynomial kernel:")
print(score.mean())

svm = SVC(kernel='linear')
score = cross_val_score(svm, data, labels)
print("The score of the built in polynomial kernel:")
print(score.mean())

The score of my polynomial kernel:
0.7128415300546449
The score of the built in polynomial kernel:
0.7128415300546449


In [125]:
C = range(1,11)

num = int(data.shape[0] * 0.8)
train_set, test_set = data[:num], data[num:]
train_labels, test_labels = labels[:num], labels[num:]

for c in C:
    svm = SVC(C=c, kernel=polynomial_kernel).fit(train_set, train_labels)
    score1 = cross_val_score(svm, train_set, train_labels, cv=5)
    score2 = cross_val_score(svm, test_set, test_labels, cv=5)
    print("C" + str(c) + " score on train: " + str(score1.mean()) + " score on test: " + str(score2.mean()))

C1 score on train: 0.6657312925170069 score on test: 0.5076923076923077
C2 score on train: 0.6698979591836733 score on test: 0.5076923076923078
C3 score on train: 0.6779761904761904 score on test: 0.5410256410256411
C4 score on train: 0.6779761904761904 score on test: 0.573076923076923
C5 score on train: 0.6780612244897959 score on test: 0.5743589743589743
C6 score on train: 0.6738945578231293 score on test: 0.5410256410256411
C7 score on train: 0.6863095238095238 score on test: 0.5410256410256411
C8 score on train: 0.6821428571428572 score on test: 0.5410256410256411
C9 score on train: 0.6863095238095237 score on test: 0.5243589743589744
C10 score on train: 0.6863945578231292 score on test: 0.5410256410256411
