In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [3]:
data = pd.read_csv('BreastCancer_final1.csv')

In [4]:
data.head()

Unnamed: 0,Age,BMI,BreastFeeding,Marital Status,Alcohol,Smoking,BreastCancerHistory,Age at firstPeriod,MenstrualCycle,Cancer Positive
0,21,26.453292,0,1,0,0,0,11,1,0
1,52,31.557711,0,1,1,1,1,14,3,1
2,52,14.604623,0,1,1,0,0,12,3,1
3,50,31.726225,0,1,1,0,0,13,3,1
4,22,29.022006,0,2,0,0,0,9,1,0


In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X = data[['Age', 'BMI', 'BreastFeeding', 'Marital Status', 'Alcohol', 'Smoking',
       'BreastCancerHistory', 'Age at firstPeriod', 'MenstrualCycle']]
y = data['Cancer Positive']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [7]:
from sklearn.svm import SVC

In [8]:
svm = SVC(probability=True)

In [9]:
svm.fit(X_train,y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=True, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [10]:
svm_predictions = svm.predict(X_test)

In [11]:
from sklearn.metrics import classification_report,confusion_matrix

In [12]:
print(classification_report(y_test,svm_predictions))

              precision    recall  f1-score   support

           0       0.89      0.98      0.94       474
           1       0.97      0.83      0.89       315

    accuracy                           0.92       789
   macro avg       0.93      0.90      0.91       789
weighted avg       0.92      0.92      0.92       789



In [13]:
print(confusion_matrix(y_test,svm_predictions))

[[466   8]
 [ 55 260]]


In [14]:
from sklearn.model_selection import GridSearchCV

In [15]:
param_grid = {'C':[0.1,1,10,100,1000],'gamma':[1,0.1,0.01,0.001,0.0001]}

In [16]:
grid = GridSearchCV(SVC(),param_grid,verbose=3)

In [17]:
grid.fit(X_train,y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] ...................... C=0.1, gamma=1, score=0.582, total=   0.1s
[CV] C=0.1, gamma=1 ..................................................
[CV] ...................... C=0.1, gamma=1, score=0.582, total=   0.1s
[CV] C=0.1, gamma=1 ..................................................
[CV] ...................... C=0.1, gamma=1, score=0.583, total=   0.1s
[CV] C=0.1, gamma=0.1 ................................................
[CV] .................... C=0.1, gamma=0.1, score=0.890, total=   0.1s
[CV] C=0.1, gamma=0.1 ................................................
[CV] .................... C=0.1, gamma=0.1, score=0.893, total=   0.1s
[CV] C=0.1, gamma=0.1 ................................................
[CV] .................... C=0.1, gamma=0.1, score=0.880, total=   0.1s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...........

[CV] ................. C=100, gamma=0.0001, score=0.900, total=   0.0s
[CV] C=1000, gamma=1 .................................................
[CV] ..................... C=1000, gamma=1, score=0.831, total=   0.1s
[CV] C=1000, gamma=1 .................................................
[CV] ..................... C=1000, gamma=1, score=0.818, total=   0.1s
[CV] C=1000, gamma=1 .................................................
[CV] ..................... C=1000, gamma=1, score=0.823, total=   0.1s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ................... C=1000, gamma=0.1, score=0.938, total=   0.1s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ................... C=1000, gamma=0.1, score=0.927, total=   0.0s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ................... C=1000, gamma=0.1, score=0.929, total=   0.0s
[CV] C=1000, gamma=0.01 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    4.5s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [18]:
grid.best_params_

{'C': 100, 'gamma': 0.01}

In [19]:
grid.best_estimator_

SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [20]:
grid_predictions = grid.predict(X_test)

In [21]:
print(classification_report(y_test,grid_predictions))

              precision    recall  f1-score   support

           0       0.97      0.98      0.98       474
           1       0.97      0.96      0.97       315

    accuracy                           0.97       789
   macro avg       0.97      0.97      0.97       789
weighted avg       0.97      0.97      0.97       789



In [22]:
print(confusion_matrix(y_test,grid_predictions))

[[465   9]
 [ 12 303]]


In [23]:
import joblib as jb

In [24]:
jb.dump(svm,'model4.pkl')

['model4.pkl']

In [25]:
model = jb.load('model4.pkl')

In [26]:
print(model.predict([[23,19.5,0,3,0,0,0,13,1]]))

[0]


In [27]:
print(model.predict([[21,30.1,0,3,0,0,0,13,1]]))

[0]


In [28]:
model.predict_proba([[23,19.5,0,3,0,0,0,13,1]])

array([[0.74266678, 0.25733322]])

In [29]:
print(model.predict_proba([[23,19.5,0,3,0,0,0,13,1]]))

[[0.74266678 0.25733322]]


In [30]:
print(model.predict_proba([[21,30.1,0,3,0,0,0,13,1]]))

[[0.38078122 0.61921878]]


In [35]:
data.head()

Unnamed: 0,Age,BMI,BreastFeeding,Marital Status,Alcohol,Smoking,BreastCancerHistory,Age at firstPeriod,MenstrualCycle,Cancer Positive
0,21,26.453292,0,1,0,0,0,11,1,0
1,52,31.557711,0,1,1,1,1,14,3,1
2,52,14.604623,0,1,1,0,0,12,3,1
3,50,31.726225,0,1,1,0,0,13,3,1
4,22,29.022006,0,2,0,0,0,9,1,0


In [36]:
print(model.predict([[52,31.6,0,1,1,1,1,14,3]]))

[1]


In [37]:
print(model.predict_proba([[52,31.6,0,1,1,1,1,14,3]]))

[[0.00979166 0.99020834]]


In [38]:
print(model.predict([[23,19.5,0,3,0,0,0,13,1]]))

[0]


In [71]:
print(model.predict_proba([[23,19.5,0,3,0,0,0,13,1]]))

[[0.74266678 0.25733322]]


In [42]:
print(model.predict([[21,30.1,0,3,0,0,0,13,1]]))

[0]


In [72]:
print(model.predict_proba([[21,30.1,0,3,0,0,0,13,1]]))

[[0.38078122 0.61921878]]


In [44]:
from sklearn.metrics import accuracy_score

In [45]:
accuracy_score(y_test,grid_predictions)

0.973384030418251

In [47]:
print(model.decision_function([[21,30.1,0,3,0,0,0,13,1]]))

[-0.0821693]


In [48]:
print(model.predict([[23,19.5,0,3,0,0,0,13,1]]))

[0]


In [49]:
print(model.predict_proba([[23,19.5,0,3,0,0,0,13,1]]))

[[0.74266678 0.25733322]]


In [57]:
from sklearn.calibration import CalibratedClassifierCV,calibration_curve

In [58]:
svm1 = SVC()

In [59]:
calibrated = CalibratedClassifierCV(svm1)

In [60]:
calibrated.fit(X_train,y_train)



CalibratedClassifierCV(base_estimator=SVC(C=1.0, cache_size=200,
                                          class_weight=None, coef0=0.0,
                                          decision_function_shape='ovr',
                                          degree=3, gamma='auto_deprecated',
                                          kernel='rbf', max_iter=-1,
                                          probability=False, random_state=None,
                                          shrinking=True, tol=0.001,
                                          verbose=False),
                       cv='warn', method='sigmoid')

In [70]:
calibrated.predict_proba([[21,30.1,0,3,0,0,0,13,1]])

array([[0.35086483, 0.64913517]])

In [68]:
pred_cal = calibrated.predict(X_test)

In [69]:
print(classification_report(y_test,pred_cal))

              precision    recall  f1-score   support

           0       0.90      0.98      0.94       474
           1       0.96      0.83      0.89       315

    accuracy                           0.92       789
   macro avg       0.93      0.91      0.91       789
weighted avg       0.92      0.92      0.92       789



In [73]:
calibrated.predict_proba([[52,31.6,0,1,1,1,1,14,3]])

array([[0.0141091, 0.9858909]])