In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [3]:
data = pd.read_csv('BreastCancerData.csv')

In [4]:
data.head()

Unnamed: 0,Age,BMI,BreastFeeding,Marital Status,Alcohol,Smoking,BreastCancerHistory,Age at firstPeriod,MenstrualCycle,Cancer Positive
0,48,8.543723,1,1,0,0,0,15,1,0
1,31,10.204207,1,1,0,0,0,12,1,0
2,31,13.807133,1,1,0,0,0,14,1,0
3,33,14.088867,1,1,1,0,0,12,1,0
4,49,14.494061,1,1,0,0,0,15,1,0


In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X = data[['Age', 'BMI', 'BreastFeeding', 'Marital Status', 'Alcohol', 'Smoking',
       'BreastCancerHistory', 'Age at firstPeriod', 'MenstrualCycle']]
y = data['Cancer Positive']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [7]:
from sklearn.svm import SVC

In [8]:
svm = SVC(probability=True)

In [10]:
svm.fit(X_train,y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=True, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [11]:
svm_predictions = svm.predict(X_test)

In [12]:
from sklearn.metrics import classification_report,confusion_matrix

In [13]:
print(classification_report(y_test,svm_predictions))

              precision    recall  f1-score   support

           0       0.95      0.96      0.96       428
           1       0.94      0.93      0.93       289

    accuracy                           0.95       717
   macro avg       0.95      0.94      0.94       717
weighted avg       0.95      0.95      0.95       717



In [14]:
print(confusion_matrix(y_test,svm_predictions))

[[410  18]
 [ 20 269]]


In [15]:
from sklearn.model_selection import GridSearchCV

In [16]:
param_grid = {'C':[0.1,1,10,100,1000],'gamma':[1,0.1,0.01,0.001,0.0001]}

In [17]:
grid = GridSearchCV(SVC(),param_grid,verbose=3)

In [18]:
grid.fit(X_train,y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] ...................... C=0.1, gamma=1, score=0.584, total=   0.1s
[CV] C=0.1, gamma=1 ..................................................
[CV] ...................... C=0.1, gamma=1, score=0.585, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ...................... C=0.1, gamma=1, score=0.585, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] .................... C=0.1, gamma=0.1, score=0.876, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] .................... C=0.1, gamma=0.1, score=0.892, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] .................... C=0.1, gamma=0.1, score=0.897, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...........

[CV] ..................... C=1000, gamma=1, score=0.916, total=   0.1s
[CV] C=1000, gamma=1 .................................................
[CV] ..................... C=1000, gamma=1, score=0.921, total=   0.1s
[CV] C=1000, gamma=1 .................................................
[CV] ..................... C=1000, gamma=1, score=0.941, total=   0.1s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ................... C=1000, gamma=0.1, score=0.943, total=   0.0s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ................... C=1000, gamma=0.1, score=0.943, total=   0.0s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ................... C=1000, gamma=0.1, score=0.946, total=   0.0s
[CV] C=1000, gamma=0.01 ..............................................
[CV] .................. C=1000, gamma=0.01, score=0.946, total=   0.0s
[CV] C=1000, gamma=0.01 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    2.3s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [19]:
grid.best_params_

{'C': 1000, 'gamma': 0.01}

In [20]:
grid.best_estimator_

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [21]:
grid_predictions = grid.predict(X_test)

In [22]:
print(classification_report(y_test,grid_predictions))

              precision    recall  f1-score   support

           0       0.96      0.95      0.96       428
           1       0.93      0.94      0.94       289

    accuracy                           0.95       717
   macro avg       0.95      0.95      0.95       717
weighted avg       0.95      0.95      0.95       717



In [23]:
print(confusion_matrix(y_test,grid_predictions))

[[408  20]
 [ 16 273]]


In [24]:
import joblib as jb

In [25]:
jb.dump(svm,'model_final.pkl')

['model_final.pkl']

In [26]:
model = jb.load('model_final.pkl')

In [29]:
print(model.predict([[23,19.5,0,3,0,0,0,13,1]]))

[0]


In [34]:
print(model.predict_proba([[23,19.5,0,3,0,0,0,13,1]]))

[[0.76047207 0.23952793]]


In [37]:
print(model.decision_function([[23,19.5,0,3,0,0,0,13,1]]))

[-0.43367477]


In [33]:
print(model.predict([[21,30.1,0,3,0,0,0,13,1]]))

[1]


In [31]:
print(model.predict_proba([[21,30.1,0,3,0,0,0,13,1]]))

[[0.09860082 0.90139918]]


In [36]:
print(model.decision_function([[21,30.1,0,3,0,0,0,13,1]]))

[0.4583154]


In [39]:
print(model.predict([[21,21.5,1,1,0,0,0,15,3]]))
print(model.predict_proba([[21,21.5,1,1,0,0,0,15,3]])[:,1])

[1]
[0.65793436]


In [41]:
print(model.predict([[21,21.5,0,3,0,0,0,13,1]]))
print(model.predict_proba([[21,21.5,0,3,0,0,0,13,1]])[:,1])

[0]
[0.57721474]
