In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [2]:
dataset = pd.read_csv('data/Social_Network_Ads.csv')
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [4]:
features = ['Age', 'EstimatedSalary']
target = ['Purchased']

X = dataset[features]
Y = dataset[target].values.ravel()

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [6]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [7]:
from sklearn.svm import SVC
model = SVC(kernel='rbf', random_state=0)
model.fit(X_train, Y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
y_pred = model.predict(X_test)

In [9]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, y_pred)
cm

array([[64,  4],
       [ 3, 29]])

## Apply 10-Fold Cross Validation

In [11]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(model, X=X_train, y=Y_train, cv=10)
for i in range(10):
    print('Cross Val Score {0}: {1:.2f}%'.format(i+1,accuracies[i]*100))
print('\nCross Val Mean: {0:.2f}%'.format(accuracies.mean()*100))
print('Cross Val Std: {0:.2f}%'.format(accuracies.std()*100))

Cross Val Score 1: 80.65%
Cross Val Score 2: 96.67%
Cross Val Score 3: 80.00%
Cross Val Score 4: 93.33%
Cross Val Score 5: 86.67%
Cross Val Score 6: 83.33%
Cross Val Score 7: 93.33%
Cross Val Score 8: 93.33%
Cross Val Score 9: 96.67%
Cross Val Score 10: 96.55%

Cross Val Mean: 90.05%
Cross Val Std: 6.39%


## Apply Grid Search

In [22]:
from sklearn.model_selection import GridSearchCV
params = [
          {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
          #{'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.5, 0.1, 0.01, 0.001]}
          {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}
         ]
grid_search = GridSearchCV(estimator=model, 
                           param_grid=params, 
                           scoring='accuracy',
                           cv=10,
                           n_jobs=-1)

### Now fit the grid_search object to the training set

In [23]:
grid_search = grid_search.fit(X_train, Y_train)

In [24]:
print('Best Accuracy: {0:.2f}%'.format(grid_search.best_score_*100))
print('Best Parameters: {0}'.format(grid_search.best_params_))

Best Accuracy: 90.33%
Best Parameters: {'C': 1, 'gamma': 0.7, 'kernel': 'rbf'}
