# K Nearest Neighbors

kNN is one of the simplest of classification algorithms available for supervised learning. The idea is to search for closest match of the test data in feature space. 

<img src='images/knn_theory.png' />

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data loading & Understanding

In [2]:
# Importing the dataset
dataset = pd.read_csv('data/Social_Network_Ads.csv')

In [3]:
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values

## Spliting dataset into training and testing

In [4]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.10, random_state = 0)

In [6]:
X_train[:5, :]

array([[    27,  88000],
       [    41,  52000],
       [    27,  84000],
       [    35,  20000],
       [    43, 112000]])

In [7]:
y_train[:5]

array([0, 0, 0, 0, 1])

## Feature Scaling

In [8]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

In [9]:
sc = StandardScaler()

In [10]:
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)



In [11]:
X_train[:5, :]

array([[-1.05714987,  0.53420426],
       [ 0.2798728 , -0.51764734],
       [-1.05714987,  0.41733186],
       [-0.29313691, -1.45262654],
       [ 0.47087604,  1.23543867]])

In [12]:
y_train[:5]

array([0, 0, 0, 0, 1])

# K Nearest Neighbors

In [13]:
# Fitting K-NN to the Training set
from sklearn.neighbors import KNeighborsClassifier

In [14]:
knn = KNeighborsClassifier(n_neighbors = 5)

In [15]:
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [16]:
# Predicting the Test set results
y_test_pred = knn.predict(X_test)

## Accuracy

In [17]:
# Training accuracy
knn.score(X_train, y_train)

0.9222222222222223

In [18]:
# Testing accuracy
knn.score(X_test, y_test)

0.925

## Confusion Matrix
<img src='images/confusion_matrix_simple2.png'/>

In [19]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_test_pred)

In [20]:
cm

array([[30,  2],
       [ 1,  7]])