# Importing essential libraries

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics

# Load the dataset

In [2]:
cancer = datasets.load_breast_cancer()

# Exploring Data

In [3]:
# print the names of the 13 features
print("Features: ", cancer.feature_names)

# print the label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)

Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']


# Split the data into training/testing sets

In [4]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test

# Generating Model

In [5]:
clf = svm.SVC() # Linear Kernel

# Train the model using the training sets

In [6]:
clf.fit(X_train, y_train)

SVC()

# Predict the response for test dataset

In [7]:
y_pred = clf.predict(X_test)

# Evaluating the Model

In [8]:
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:",metrics.precision_score(y_test, y_pred))

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:",metrics.recall_score(y_test, y_pred))

Accuracy: 0.9239766081871345
Precision: 0.8925619834710744
Recall: 1.0


In [9]:
grid={
    'c':[0.1, 1, 10, 100, 1000],
    'kernal':['rbf','linear'],
    'gamma':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
}

In [10]:
from sklearn.svm import SVC 
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix 

param_grid = {'C': [0.1, 1, 10, 100],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['linear']}  
   
grid = GridSearchCV(SVC(), param_grid,n_jobs=-1) 
   
# fitting the model for grid search 
grid.fit(X_train, y_train) 
 
# print best parameter after tuning 
print(grid.best_params_) 
print(grid.best_score_) 
grid_predictions = grid.predict(X_test) 
   
# print classification report 
print(classification_report(y_test, grid_predictions)) 

{'C': 100, 'gamma': 1, 'kernel': 'linear'}
0.9497784810126582
              precision    recall  f1-score   support

           0       0.94      0.98      0.96        63
           1       0.99      0.96      0.98       108

    accuracy                           0.97       171
   macro avg       0.96      0.97      0.97       171
weighted avg       0.97      0.97      0.97       171



In [11]:
print(grid.best_score_) 

0.9497784810126582


In [None]:
from sklearn.model_selection import RandomizedSearchCV
rgrid = RandomizedSearchCV(SVC(), param_grid,n_jobs=-1) 

In [None]:
rgrid.fit(X_train, y_train) 
print(rgrid.best_params_) 
print(rgrid.best_score_)

In [None]:
rgrid_predictions = rgrid.predict(X_test) 
   
# print classification report 
print(classification_report(y_test, rgrid_predictions)) 