In [9]:
import sklearn
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, r2_score, roc_auc_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split


In [3]:
df = pd.read_csv(r'C:\Users\DAI.STUDENTSDC\Desktop\Machine Learning\Data Sets\Cases\Wisconsin\BreastCancer.csv', index_col=0)
df.head()

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,Benign
63375,9,1,2,6,4,10,7,7,2,Malignant
76389,10,4,7,2,2,8,6,1,1,Malignant
95719,6,10,10,10,8,10,7,10,7,Malignant
128059,1,1,1,1,2,5,5,1,1,Benign


In [4]:
X = df.drop(columns=['Class'], axis=1)
y = df['Class']

In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [61]:
y_train.head()

Code
1266124    Benign
128059     Benign
837082     Benign
1197510    Benign
1180831    Benign
Name: Class, dtype: object

In [62]:
from sklearn.svm import SVC

svm = SVC(kernel='linear', probability=True, random_state=24)
svm.fit(X_train, y_train)


In [63]:
y_pred = svm.predict(X_test)
print(accuracy_score(y_test, y_pred))


y_pred_proba = svm.predict_proba(X_test)
print(y_pred_proba.shape)
 
print(roc_auc_score(y_test, y_pred_proba[:,1]))


0.9714285714285714
(210, 2)
0.9960748792270531


GridSearchCV

In [None]:
#Using Pipeline
svm = SVC(kernel='linear', probability=True,random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
params = {
    'C': np.linspace(0.001, 5, 10),
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid']
}
gcv = GridSearchCV(estimator=svm, param_grid=params, scoring='roc_auc', cv=kfold)
gcv.fit(X_train, y_train)

print(f'Best params: {gcv.best_params_}')
print(f'Best score: {gcv.best_score_}')

Best params: {'C': 0.001, 'kernel': 'poly'}
Best score: 0.9953041443850268


In [92]:
y_pred = gcv.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.9523809523809523


Polynomial

In [93]:
#Linear SVM
svm = SVC(kernel='poly', probability=True,random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
params = {
    'C': np.linspace(0.001, 5, 10),
    'degree': [2, 3, 4, 5, 6, 7, 8, 9, 10],
}
gcv = GridSearchCV(estimator=svm, param_grid=params, scoring='roc_auc', cv=kfold)
gcv.fit(X_train, y_train)

In [80]:
print(f'Best params: {gcv.best_params_}')
print(f'Best score: {gcv.best_score_}')

Best params: {'C': 0.001, 'degree': 4, 'kernel': 'poly'}
Best score: 0.9953960561497326


Radial SVM

In [89]:
#Radial SVM

svm = SVC(kernel='rbf', probability=True,random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
params = {
    'C': np.linspace(0.001, 5, 10),
    'gamma': np.linspace(0.001, 5, 10)
}
gcv = GridSearchCV(estimator=svm, param_grid=params, scoring='roc_auc', cv=kfold)
gcv.fit(X_train, y_train)

In [90]:
print(f'Best params: {gcv.best_params_}')
print(f'Best score: {gcv.best_score_}')

Best params: {'C': 5.0, 'gamma': 0.001}
Best score: 0.9948418003565063
