# Use GridSearchCV and select the best hyperparamter for Support Vector machine

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.svm import SVC

import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('/home/admin1/Downloads/machine_learning/Flask/Problem1/Social_Network_Ads.csv')

In [3]:
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [4]:
standard_x = StandardScaler()
data.iloc[:,2:4] = standard_x.fit_transform(data.iloc[:,2:4])

In [5]:
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,-1.781797,-1.490046,0
1,15810944,Male,-0.253587,-1.460681,0
2,15668575,Female,-1.113206,-0.78529,0
3,15603246,Female,-1.017692,-0.374182,0
4,15804002,Male,-1.781797,0.183751,0


In [31]:
data['Purchased'].value_counts()

0    257
1    143
Name: Purchased, dtype: int64

In [6]:
x = data.iloc[:,2:4].values
y = data.iloc[:, -1:].values

In [7]:
x_train, x_test, y_train, y_test =train_test_split(x, y, test_size = 0.3, random_state = 0)

# Checking the accuracy for SVC by using random kernel and default hyperparameters

In [8]:
classifier = SVC(kernel = 'linear')
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

In [9]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[75,  4],
       [12, 29]])

# Accuracy before applying GridSearchcv

In [10]:
score = accuracy_score(y_test, y_pred)
score

0.8666666666666667

In [11]:
x_train.shape

(280, 2)

# Applying GridSearchCV

In [20]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C':[1, 10, 100, 1000], 'kernel': ['linear']},
             {'C':[1, 10, 100, 1000], 'kernel':['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]

grid_search = GridSearchCV(estimator = classifier,
                          param_grid = parameters,
                          scoring = 'accuracy',
                          cv = 5)

grid_search = grid_search.fit(x_train, y_train)

In [21]:
accuracy = grid_search.best_score_

In [22]:
accuracy

0.9142857142857143

# Getting the tuned hyperparameters from gridsearchcv

In [16]:
grid_search.best_params_

{'C': 1, 'gamma': 0.7, 'kernel': 'rbf'}

# Applying the tuned hyperparameters from gridsearch to our model

In [18]:
classifier = SVC(kernel = 'rbf', gamma = 0.7)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

# Accuracy after applying GridSearchCv

In [19]:
score = accuracy_score(y_test, y_pred)
score

0.9083333333333333

In [29]:
cr = classification_report(y_test, y_pred)
print(cr)

              precision    recall  f1-score   support

           0       0.95      0.91      0.93        79
           1       0.84      0.90      0.87        41

    accuracy                           0.91       120
   macro avg       0.89      0.91      0.90       120
weighted avg       0.91      0.91      0.91       120

