In [18]:
# Import Cancer dataset
from sklearn.datasets import load_breast_cancer

# Import Min Max scaler scalar to normalize the data
from sklearn.preprocessing import MinMaxScaler

#Import train test to split the data into training and testing datasets
from sklearn.model_selection import train_test_split

#Import Support Vector Classifier
from sklearn.svm import SVC

# Ignore feature warnings

from warnings import filterwarnings
filterwarnings('ignore')

In [6]:
# Load the data

cancer = load_breast_cancer()

# Split the data into training and testing datasets

x_train, x_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size = 0.2, random_state = 0)

In [9]:
# Compute minimum and maximum of training data

scaler = MinMaxScaler().fit(x_train)

In [10]:
# rescale the training data

x_train_scaled = scaler.transform(x_train)

In [19]:
# Learn SVM on the scaled training data

svm = SVC()

# Learn the model with scaled trained data

svm.fit(x_train_scaled, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [20]:
# Scale the testing data
x_testing_scaled = scaler.transform(x_test)

In [21]:
# Score the scaled data

print('Test Score: {:0.2f}'.format(svm.score(x_testing_scaled, y_test)))

Test Score: 0.96


In [28]:
# Parameter selectin with grid search CV
from sklearn.model_selection import GridSearchCV

param_grid = { 'C' : [0.001, 0.01, 1, 10, 100],
             'gamma' : [0.001, 0.01, 0.1, 1, 10, 100]}

grid = GridSearchCV(SVC(), param_grid= param_grid, cv = 5 )

grid.fit(x_train_scaled, y_train)

print('Best cross validation accuracy: {:0.2f}'.format(grid.best_score_))
print('Best Parameters: {}'.format(grid.best_params_))

Best cross validation accuracy: 0.98
Best Parameters: {'C': 1, 'gamma': 1}


<h1> Example of a simple pipeline </h1>

In [30]:
# Import pipeline method from sklearn

from sklearn.pipeline import Pipeline

# Create a simple pipeline

pipe = Pipeline([('scaler', MinMaxScaler()),('svm', SVC())])

In [31]:
# Fit the pipe processes to training data

pipe.fit(x_train, y_train)

Pipeline(memory=None,
     steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))), ('svm', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [36]:
# Print the scores

print('Test Score: {:0.2f}'.format(pipe.score(x_test, y_test)))

Test Score: 0.96


<h1> Using pipelines for Grid Searches </h1>

In [37]:
# Define parameter grid for grid search CV

# The parameter name should be estimator name followed by double underscore and paramter. eg. svm__C, svm__gamma etc

param_grid_pipe = { 'svm__C' : [0.001, 0.01, 1, 10, 100],
                     'svm__gamma' : [0.001, 0.01, 0.1, 1, 10, 100]}

grid = GridSearchCV(pipe, param_grid = param_grid_pipe, cv=5)

In [38]:
# Fit the model to training data

grid.fit(x_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))), ('svm', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))]),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'svm__C': [0.001, 0.01, 1, 10, 100], 'svm__gamma': [0.001, 0.01, 0.1, 1, 10, 100]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [39]:
# Print Best Score

print('Best Score: {:0.2f}'.format(grid.best_score_))

Best Score: 0.98


In [41]:
# Print Best Parameters

print('Best Parameters: {}'.format(grid.best_params_))

Best Parameters: {'svm__C': 1, 'svm__gamma': 1}
