In [1]:
import pandas as pd 
import seaborn as sns 

# Visualisation libraries
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()

In [7]:
# To see the description of the loaded data
print(cancer_data['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [10]:
cancer_data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [47]:
features = pd.DataFrame(cancer_data['data'] , columns = cancer_data['feature_names'])
target= cancer_data['target']

In [48]:
from sklearn.model_selection import train_test_split

X = features
y= target

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42)

In [49]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train,y_train)

SVC()

In [50]:
predictions = model.predict(X_test)

In [51]:
from sklearn.metrics import classification_report , confusion_matrix 
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print(classification_report(y_test , predictions))
print(confusion_matrix(y_test , predictions))

Accuracy: 0.935672514619883
              precision    recall  f1-score   support

           0       1.00      0.83      0.90        63
           1       0.91      1.00      0.95       108

    accuracy                           0.94       171
   macro avg       0.95      0.91      0.93       171
weighted avg       0.94      0.94      0.93       171

[[ 52  11]
 [  0 108]]


In [57]:
# Tuning using grid search to find best parameters by tetsing for all possible combinations
# Parameter C: high c value less bias ( penalize the cause of missclassification) , more variance
# large gama less variance

from sklearn.model_selection import GridSearchCV
parameters_grid = {'C':[0.1 , 1 , 10 , 100, 1000] , 'gamma':[1,0.1,0.01,0.001,0.0001]}

In [58]:
grid_model = GridSearchCV(SVC(),parameters_grid, verbose = 3)
grid_model.fit(X_train , y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .................................C=0.1, gamma=1; total time=   0.2s
[CV 2/5] END .................................C=0.1, gamma=1; total time=   0.1s
[CV 3/5] END .................................C=0.1, gamma=1; total time=   0.1s
[CV 4/5] END .................................C=0.1, gamma=1; total time=   0.1s
[CV 5/5] END .................................C=0.1, gamma=1; total time=   0.1s
[CV 1/5] END ...............................C=0.1, gamma=0.1; total time=   0.1s
[CV 2/5] END ...............................C=0.1, gamma=0.1; total time=   0.1s
[CV 3/5] END ...............................C=0.1, gamma=0.1; total time=   0.1s
[CV 4/5] END ...............................C=0.1, gamma=0.1; total time=   0.1s
[CV 5/5] END ...............................C=0.1, gamma=0.1; total time=   0.1s
[CV 1/5] END ..............................C=0.1, gamma=0.01; total time=   0.1s
[CV 2/5] END ..............................C=0.

[CV 2/5] END ................................C=1000, gamma=1; total time=   0.1s
[CV 3/5] END ................................C=1000, gamma=1; total time=   0.1s
[CV 4/5] END ................................C=1000, gamma=1; total time=   0.1s
[CV 5/5] END ................................C=1000, gamma=1; total time=   0.1s
[CV 1/5] END ..............................C=1000, gamma=0.1; total time=   0.1s
[CV 2/5] END ..............................C=1000, gamma=0.1; total time=   0.1s
[CV 3/5] END ..............................C=1000, gamma=0.1; total time=   0.1s
[CV 4/5] END ..............................C=1000, gamma=0.1; total time=   0.1s
[CV 5/5] END ..............................C=1000, gamma=0.1; total time=   0.1s
[CV 1/5] END .............................C=1000, gamma=0.01; total time=   0.1s
[CV 2/5] END .............................C=1000, gamma=0.01; total time=   0.1s
[CV 3/5] END .............................C=1000, gamma=0.01; total time=   0.1s
[CV 4/5] END ...............

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]},
             verbose=3)

In [59]:
grid_model.best_params_

{'C': 100, 'gamma': 0.0001}

In [60]:
predictions = grid_model.predict(X_test)

In [61]:
from sklearn.metrics import classification_report , confusion_matrix 
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print(classification_report(y_test , predictions))
print(confusion_matrix(y_test , predictions))

Accuracy: 0.9532163742690059
              precision    recall  f1-score   support

           0       0.97      0.90      0.93        63
           1       0.95      0.98      0.96       108

    accuracy                           0.95       171
   macro avg       0.96      0.94      0.95       171
weighted avg       0.95      0.95      0.95       171

[[ 57   6]
 [  2 106]]
