In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV
from sklearn.metrics import accuracy_score,log_loss
from sklearn.preprocessing import LabelEncoder

In [2]:
cancer=pd.read_csv("BreastCancer.csv",index_col=0)
lbl=LabelEncoder()
cancer['Class']=lbl.fit_transform(cancer['Class'])
cancer.head()

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,0
63375,9,1,2,6,4,10,7,7,2,1
76389,10,4,7,2,2,8,6,1,1,1
95719,6,10,10,10,8,10,7,10,7,1
128059,1,1,1,1,2,5,5,1,1,0


In [3]:
X=cancer.drop('Class',axis=1)
y=cancer['Class']

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=23,stratify=y)

In [5]:
svm=SVC(kernel='linear',probability=True,random_state=23 )
svm.fit(X_train,y_train)

In [6]:
y_pred=svm.predict(X_test)
y_pred

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0])

In [7]:
print(accuracy_score(y_test,y_pred))

0.9476190476190476


In [8]:
y_pred_proba=svm.predict_proba(X_test)
y_pred_proba

array([[6.77214202e-06, 9.99993228e-01],
       [9.81441790e-01, 1.85582104e-02],
       [9.89211706e-01, 1.07882938e-02],
       [9.81473481e-01, 1.85265186e-02],
       [9.94007923e-01, 5.99207654e-03],
       [9.81441790e-01, 1.85582104e-02],
       [9.64731363e-01, 3.52686367e-02],
       [9.97270788e-01, 2.72921191e-03],
       [9.98442761e-01, 1.55723856e-03],
       [9.87477632e-01, 1.25223677e-02],
       [9.97484366e-01, 2.51563396e-03],
       [9.95170389e-01, 4.82961103e-03],
       [9.97938432e-01, 2.06156807e-03],
       [5.58595006e-01, 4.41404994e-01],
       [9.48006127e-01, 5.19938726e-02],
       [9.94620447e-01, 5.37955291e-03],
       [9.89399542e-01, 1.06004583e-02],
       [3.10064174e-01, 6.89935826e-01],
       [8.68881807e-02, 9.13111819e-01],
       [9.95900377e-01, 4.09962317e-03],
       [9.97624451e-01, 2.37554881e-03],
       [9.93435974e-01, 6.56402601e-03],
       [9.81664669e-01, 1.83353309e-02],
       [9.94620447e-01, 5.37955291e-03],
       [9.984427

In [9]:
print(log_loss(y_test,y_pred_proba))

0.1452891776134698


### Using GCV

In [10]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
params={'C':np.linspace(0.001,5,10)}
gcv= GridSearchCV(svm,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [12]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'C': 0.001}
best score : -0.09742584975054852


### Ploynomial SVM

In [13]:
svm=SVC(kernel='poly',probability=True,random_state=23 )

In [14]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
params={'C':np.linspace(0.001,5,10),'degree':[1,2,3],'coef0':[0.0,0.3]}
gcv= GridSearchCV(svm,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [15]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'C': 0.5564444444444444, 'coef0': 0.0, 'degree': 1}
best score : -0.09791435969482494
