### <b> K - Nearest Neighbor Classifier
![image.png](attachment:image.png)
![image-2.png](attachment:image-2.png)
![image-3.png](attachment:image-3.png)
![image-4.png](attachment:image-4.png)
![image-5.png](attachment:image-5.png)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [3]:
dataset = pd.read_csv("Social_Network_Ads.csv")

description = dataset.describe()

print(f"Description: \n{description}\n")

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

print(f"X: \n{X}\n")
print(f"y: \n{y}\n")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

Description: 
              Age  EstimatedSalary   Purchased
count  400.000000       400.000000  400.000000
mean    37.655000     69742.500000    0.357500
std     10.482877     34096.960282    0.479864
min     18.000000     15000.000000    0.000000
25%     29.750000     43000.000000    0.000000
50%     37.000000     70000.000000    0.000000
75%     46.000000     88000.000000    1.000000
max     60.000000    150000.000000    1.000000

X: 
[[    19  19000]
 [    35  20000]
 [    26  43000]
 [    27  57000]
 [    19  76000]
 [    27  58000]
 [    27  84000]
 [    32 150000]
 [    25  33000]
 [    35  65000]
 [    26  80000]
 [    26  52000]
 [    20  86000]
 [    32  18000]
 [    18  82000]
 [    29  80000]
 [    47  25000]
 [    45  26000]
 [    46  28000]
 [    48  29000]
 [    45  22000]
 [    47  49000]
 [    48  41000]
 [    45  22000]
 [    46  23000]
 [    47  20000]
 [    49  28000]
 [    47  30000]
 [    29  43000]
 [    31  18000]
 [    31  74000]
 [    27 137000]
 [    21  1600

In [4]:
standard_scaler = StandardScaler()

X_train = standard_scaler.fit_transform(X_train)
X_test = standard_scaler.fit_transform(X_test)

print(f"X_train: \n{X_train}\n")
print(f"X_test: \n{X_test}\n")

X_train: 
[[ 1.70122803 -0.91280141]
 [-1.78383698  0.17960782]
 [ 0.73315442 -0.73565397]
 [-0.62214864 -1.53281746]
 [-0.33172656  1.24249248]
 [-0.81576337  0.15008325]
 [-1.00937809 -1.148998  ]
 [ 1.0235765   0.12055868]
 [ 0.05550289 -0.26326078]
 [ 0.82996178  0.26818155]
 [-1.20299281  0.50437814]
 [-1.10618545 -0.52898195]
 [ 1.41080595  2.36442629]
 [-1.10618545 -1.17852257]
 [ 1.21719123 -1.00137513]
 [-1.00937809 -0.35183451]
 [-0.62214864  0.12055868]
 [-1.10618545 -1.62139118]
 [ 2.08845748  0.17960782]
 [ 0.34592497 -1.17852257]
 [-1.59022226 -1.59186661]
 [ 0.92676914 -1.32614544]
 [ 1.60442067  1.12439419]
 [ 0.82996178 -0.32230993]
 [-0.04130447 -0.23373621]
 [ 0.24911761 -0.38135908]
 [ 1.89484276 -1.29662087]
 [-1.68702962 -0.61755567]
 [-0.04130447  0.68152558]
 [ 2.18526484  0.38627984]
 [ 1.12038386  0.53390271]
 [-1.10618545  0.06150953]
 [ 0.44273233 -0.49945737]
 [ 0.34592497  0.29770612]
 [ 0.5395397   1.86250854]
 [-0.13811184 -0.4699328 ]
 [-0.2349192  -0.7

In [5]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

y_predicted = classifier.predict(X_test)

print(f"y_predicted: \n{y_predicted}\n")

print(np.concatenate((y_predicted.reshape(len(y_predicted), 1), y_test.reshape(len(y_test), 1)), 1))

confusion_matrix_result = confusion_matrix(y_true = y_test, y_pred = y_predicted)
accuracy_score_result = accuracy_score(y_true = y_test, y_pred = y_predicted)

print(f"\nConfusion matrix: \n{confusion_matrix_result}")
print(f"\nAccuracy score: \n{accuracy_score_result}\n")

y_predicted: 
[1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 1 0 0
 1 0 0 1 1 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 1
 0 1 0 1 0 0 0 1 0 1 0 1 0 1 1 0 1 0 0 0 0 1 0 0 1 0]

[[1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]]

Confusion matrix: 
[[63  1]
 [ 6 30]]

Accuracy score: 
0.93



In [5]:
X_set, y_set = standard_scaler.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() -10, stop = X_set[:, 0].max()+10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(standard_scaler.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title("K - Nearest Neighbor Classifier (Training set)")
plt.xlabel('Age')
plt.ylabel('Estimeted Salary')
plt.legend()
plt.show()

![image.png](attachment:image.png)

In [6]:
X_set, y_set = standard_scaler.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() -10, stop = X_set[:, 0].max()+10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(standard_scaler.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title("K - Nearest Neighbor Classifier (Testing set)")
plt.xlabel('Age')
plt.ylabel('Estimeted Salary')
plt.legend()
plt.show()