# Social Network ads

## KNN & SVM with GRID search cv

In [1]:
import warnings
warnings.filterwarnings(action="ignore")

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv('./Social_Network_Ads.csv')
df.head(2)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [5]:
df.describe()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased
count,400.0,400.0,400.0,400.0
mean,15691540.0,37.655,69742.5,0.3575
std,71658.32,10.482877,34096.960282,0.479864
min,15566690.0,18.0,15000.0,0.0
25%,15626760.0,29.75,43000.0,0.0
50%,15694340.0,37.0,70000.0,0.0
75%,15750360.0,46.0,88000.0,1.0
max,15815240.0,60.0,150000.0,1.0


In [7]:
# remove the User ID as it will not add any value to the model
df.drop('User ID',axis = 1, inplace=True)

In [8]:
df.head(2)

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0


In [9]:
from sklearn.preprocessing import LabelEncoder

df['Gender'] = LabelEncoder().fit_transform(df['Gender'])

In [10]:
df.corr()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
Gender,1.0,-0.073741,-0.060435,-0.042469
Age,-0.073741,1.0,0.155238,0.622454
EstimatedSalary,-0.060435,0.155238,1.0,0.362083
Purchased,-0.042469,0.622454,0.362083,1.0


In [11]:
# create x
x = df.drop(['Gender', 'Purchased'], axis=1)

# create y
y = df['Purchased']

In [12]:
from sklearn.model_selection import train_test_split

# split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=123456)

## SVC model

In [23]:
from sklearn.svm import SVC

svc_model = SVC(C=2, kernel='rbf')

svc_model.fit(x_train,y_train)

In [24]:
y_pred_svc = svc_model.predict(x_test)

In [20]:
from sklearn.model_selection import GridSearchCV

parameters = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [1, 2]
}

grid_search_svc = GridSearchCV(svc_model,parameters)
grid_search_svc.fit(x_test,y_test)

In [21]:
grid_search_svc.cv_results_

{'mean_fit_time': array([7.85391202e+00, 4.02698517e-03, 3.51276398e-03, 2.81348228e-03,
        1.31557985e+01, 3.14493179e-03, 6.36434555e-03, 3.13897133e-03]),
 'std_fit_time': array([3.92297662e+00, 1.27961776e-03, 7.59610253e-04, 1.60770435e-03,
        1.11755622e+01, 6.28986359e-03, 7.79473327e-03, 6.27794266e-03]),
 'mean_score_time': array([0.00398722, 0.00351686, 0.00241203, 0.00230713, 0.00594573,
        0.00320053, 0.        , 0.00631719]),
 'std_score_time': array([0.00616458, 0.00148745, 0.00080445, 0.00139375, 0.00730123,
        0.00640106, 0.        , 0.00773707]),
 'param_C': masked_array(data=[1, 1, 1, 1, 2, 2, 2, 2],
              mask=[False, False, False, False, False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['linear', 'poly', 'rbf', 'sigmoid', 'linear', 'poly',
                    'rbf', 'sigmoid'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value=np.str_('?'),
          

In [22]:
grid_search_svc.best_params_

{'C': 2, 'kernel': 'rbf'}

In [25]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred_svc)
precision = precision_score(y_test, y_pred_svc)
recall = recall_score(y_test, y_pred_svc)
f1 = f1_score(y_test, y_pred_svc)
print(f"accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1 = {f1:.2f}")

accuracy = 0.85, precision = 0.93, recall = 0.54, f1 = 0.68


## KNN model

In [32]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors= 7, weights='uniform')

knn_model.fit(x_train,y_train)

In [33]:
y_pred_knn = knn_model.predict(x_test)

In [34]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred_svc)
precision = precision_score(y_test, y_pred_svc)
recall = recall_score(y_test, y_pred_svc)
f1 = f1_score(y_test, y_pred_svc)
print(f"accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1 = {f1:.2f}")

accuracy = 0.85, precision = 0.93, recall = 0.54, f1 = 0.68


In [29]:
from sklearn.model_selection import GridSearchCV

parameters = {
    'n_neighbors': range(3, 11),
    'weights': ['uniform', 'distance']
}

grid_search_knn = GridSearchCV(knn_model,parameters)
grid_search_knn.fit(x_test,y_test)

In [30]:
grid_search_knn.cv_results_

{'mean_fit_time': array([0.00598454, 0.00931091, 0.00654831, 0.00394993, 0.00323539,
        0.00677505, 0.00392323, 0.        , 0.00362768, 0.00021243,
        0.00300694, 0.00950642, 0.00313711, 0.00951123, 0.        ,
        0.        ]),
 'std_fit_time': array([0.00228681, 0.00232117, 0.00674786, 0.00317598, 0.00647078,
        0.00705534, 0.00623582, 0.        , 0.00629968, 0.00042486,
        0.00601387, 0.00776376, 0.00627422, 0.00776752, 0.        ,
        0.        ]),
 'mean_score_time': array([0.01138296, 0.0080893 , 0.01562052, 0.00719781, 0.00797343,
        0.00211792, 0.00597897, 0.00636749, 0.00912261, 0.0079411 ,
        0.00495982, 0.        , 0.00637283, 0.        , 0.00313754,
        0.00969081]),
 'std_score_time': array([0.00363962, 0.00689365, 0.0052118 , 0.00568683, 0.00679659,
        0.00240159, 0.00669122, 0.00787886, 0.0087564 , 0.00501746,
        0.00649731, 0.        , 0.00780635, 0.        , 0.00627508,
        0.00791251]),
 'param_n_neighbors': mask

In [31]:
grid_search_knn.best_params_

{'n_neighbors': 7, 'weights': 'uniform'}