# K-Nearest Neighbours (K-NN)

## Importing the Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Importing the Dataset

In [2]:
dataset = pd.read_csv("Social_Network_Ads.csv")
dataset.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


## Taking care of Missing Data

In [3]:
dataset.isna().sum()

Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

In [4]:
x = dataset.drop("Purchased",axis=1)
y = dataset['Purchased']

In [5]:
x

Unnamed: 0,Age,EstimatedSalary
0,19,19000
1,35,20000
2,26,43000
3,27,57000
4,19,76000
...,...,...
395,46,41000
396,51,23000
397,50,20000
398,36,33000


In [6]:
y

0      0
1      0
2      0
3      0
4      0
      ..
395    1
396    1
397    1
398    0
399    1
Name: Purchased, Length: 400, dtype: int64

## Splitting the dataset into the Training set and Test set

In [7]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=0)

In [8]:
x_train

Unnamed: 0,Age,EstimatedSalary
250,44,39000
63,32,120000
312,38,50000
159,32,135000
283,52,21000
...,...,...
323,48,30000
192,29,43000
117,36,52000
47,27,54000


In [9]:
y_train

250    0
63     1
312    0
159    1
283    1
      ..
323    1
192    0
117    0
47     0
172    0
Name: Purchased, Length: 300, dtype: int64

## Before Applying Feature Scaling 

### Training the K-NN model on the Training set

In [10]:
from sklearn.neighbors import KNeighborsClassifier
cls = KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
cls.fit(x_train,y_train)

KNeighborsClassifier()

### Model Score on Training data

In [11]:
cls.score(x_train,y_train)

0.8766666666666667

### Model Score on Testing data

In [12]:
cls.score(x_test,y_test)

0.83

### Predicting a New Result

In [13]:
cls.predict([[30,87000]])

array([0], dtype=int64)

### Predicting the Test set Results

In [14]:
y_predict = cls.predict(x_test)

### Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_predict)

array([[59,  9],
       [ 8, 24]], dtype=int64)

### Classification Report

In [16]:
from sklearn.metrics import accuracy_score,recall_score,r2_score,precision_score,f1_score
print("Accuracy Score =" ,accuracy_score(y_predict,y_test))
print("R2 Score =", r2_score(y_predict,y_test))
print("Recall Score =",recall_score(y_predict,y_test))
print("Precision Score =",precision_score(y_predict,y_test))
print("f1 Score =", f1_score(y_predict,y_test))

Accuracy Score = 0.83
R2 Score = 0.2311171415649026
Recall Score = 0.7272727272727273
Precision Score = 0.75
f1 Score = 0.7384615384615384


## After Applying Feature Scaling 

### Feature Scaling

In [17]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [18]:
x_train

array([[ 0.58164944, -0.88670699],
       [-0.60673761,  1.46173768],
       [-0.01254409, -0.5677824 ],
       [-0.60673761,  1.89663484],
       [ 1.37390747, -1.40858358],
       [ 1.47293972,  0.99784738],
       [ 0.08648817, -0.79972756],
       [-0.01254409, -0.24885782],
       [-0.21060859, -0.5677824 ],
       [-0.21060859, -0.19087153],
       [-0.30964085, -1.29261101],
       [-0.30964085, -0.5677824 ],
       [ 0.38358493,  0.09905991],
       [ 0.8787462 , -0.59677555],
       [ 2.06713324, -1.17663843],
       [ 1.07681071, -0.13288524],
       [ 0.68068169,  1.78066227],
       [-0.70576986,  0.56295021],
       [ 0.77971394,  0.35999821],
       [ 0.8787462 , -0.53878926],
       [-1.20093113, -1.58254245],
       [ 2.1661655 ,  0.93986109],
       [-0.01254409,  1.22979253],
       [ 0.18552042,  1.08482681],
       [ 0.38358493, -0.48080297],
       [-0.30964085, -0.30684411],
       [ 0.97777845, -0.8287207 ],
       [ 0.97777845,  1.8676417 ],
       [-0.01254409,

### Training the K-NN model on the Training set

In [19]:
from sklearn.neighbors import KNeighborsClassifier
cls = KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
cls.fit(x_train,y_train)

KNeighborsClassifier()

### Model Score on Training data

In [20]:
cls.score(x_train,y_train)

0.91

### Model Score on Testing data

In [21]:
cls.score(x_test,y_test)

0.93

### Predicting a New Result

In [22]:
cls.predict(sc.fit_transform([[30,87000]]))

array([0], dtype=int64)

### Predicting the Test set Results

In [23]:
y_predict = cls.predict(x_test)

### Confusion Matrix

In [24]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_predict)

array([[64,  4],
       [ 3, 29]], dtype=int64)

### Classification Report

In [25]:
from sklearn.metrics import accuracy_score,recall_score,r2_score,precision_score,f1_score
print("Accuracy Score =" ,accuracy_score(y_predict,y_test))
print("R2 Score =", r2_score(y_predict,y_test))
print("Recall Score =",recall_score(y_predict,y_test))
print("Precision Score =",precision_score(y_predict,y_test))
print("f1 Score =", f1_score(y_predict,y_test))

Accuracy Score = 0.93
R2 Score = 0.6834011759384893
Recall Score = 0.8787878787878788
Precision Score = 0.90625
f1 Score = 0.8923076923076922
