In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score,accuracy_score
from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV

In [2]:
cancer = pd.read_csv('BreastCancer.csv').set_index('Code')

In [3]:
X = cancer.drop('Class',axis=1)
y = cancer['Class']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=24, test_size=0.3, stratify=y)

In [20]:
svm = SVC(kernel='linear',probability=True,random_state=24)

In [21]:
svm.fit(X_train,y_train)

In [22]:
y_pred = svm.predict(X_test)

In [23]:
accuracy_score(y_test,y_pred)

0.9714285714285714

In [24]:
y_pred_prob = svm.predict_proba(X_test)[:,1]

In [25]:
roc_auc_score(y_test,y_pred_prob)

0.9960748792270531

In [26]:
y.unique()

array(['Benign', 'Malignant'], dtype=object)

In [27]:
#GCV

In [52]:
#Linear kernel
svm = SVC(kernel='linear',probability=True,random_state=24)
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)

params = {'C' : np.linspace(0.001,5,10)}

In [53]:
gcv = GridSearchCV(svm,param_grid=params,cv=kfold,scoring='roc_auc')

In [54]:
gcv.fit(X,y)

In [55]:
gcv.best_params_

{'C': 0.001}

In [56]:
gcv.best_score_

0.9948458438559371

In [57]:
#polynomial kernel

In [72]:
svm = SVC(kernel='poly',probability=True,random_state=24)
svm.fit(X_train,y_train)
y_pred = svm.predict(X_test)

In [73]:
accuracy_score(y_test,y_pred)
y_pred_prob = svm.predict_proba(X_test)[:,1]

In [74]:
roc_auc_score(y_test,y_pred_prob)

0.9974838969404187

In [77]:

svm = SVC(kernel='poly',probability=True,random_state=24)
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)

params = {'C' : np.linspace(0.001,5,10),
          'degree' : [2,3,4]}
gcv = GridSearchCV(svm,param_grid=params,cv=kfold,scoring='roc_auc')
gcv.fit(X,y)

In [78]:
print(gcv.best_params_)
print(gcv.best_score_)

{'C': 0.001, 'degree': 4}
0.9946664923733468


### Radial Kernel

In [80]:
svm = SVC(kernel='rbf',probability=True,random_state=24)
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)

params = {'C' : np.linspace(0.001,5,10),
          'gamma' : np.linspace(0.001,5,10)}
gcv = GridSearchCV(svm,param_grid=params,cv=kfold,scoring='roc_auc')
gcv.fit(X,y)

In [81]:
print(gcv.best_params_)
print(gcv.best_score_)

{'C': 1.6673333333333331, 'gamma': 0.001}
0.9947580674649219
