Grid Search

Importing the libraries

In [21]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Importing the dataset

In [22]:
dataset = pd.read_csv('social_network_ads.csv')
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, -1].values

Spliting the dataset into the Training set & Test set

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

Feature Scaling

In [24]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

Fitting Kernel SVM to the Training set

In [25]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

Predicting the Test set results

In [26]:
y_pred = classifier.predict(X_test)
print(y_pred)

print("accuracy: {}".format(accuracy_score(y_test, y_pred)))
print("classification: {}".format(classification_report(y_test, y_pred)))

[0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0
 0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 0 1 1 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1
 0 0 0 0 1 1 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 1 1 1]
accuracy: 0.93
classification:               precision    recall  f1-score   support

           0       0.96      0.94      0.95        68
           1       0.88      0.91      0.89        32

    accuracy                           0.93       100
   macro avg       0.92      0.92      0.92       100
weighted avg       0.93      0.93      0.93       100



Making the Confusion Matrix

In [27]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

Applying K-Fold Cross Validation

In [28]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
accuracies.mean()
accuracies.std()

0.06574360974438671

Applying Grid Search to find the best model and the best parameters

In [29]:
from sklearn.model_selection import GridSearchCV
parameters = [{ 'C' : [1, 10, 100, 1000], 'kernel' : ['linear'] },
              { 'C' : [1, 10, 100, 1000] , 'kernel' : ['rbf'], 'gamma' : [0.5, 0.1, 0.01, 0.001, 0.0001] }]

In [30]:
grid_search = GridSearchCV(estimator = classifier, param_grid = parameters, scoring = 'accuracy', cv = 10, n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train)

In [32]:
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_

print(best_accuracy)
print(best_parameters)

0.9066666666666666
{'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
