### Import Libraries.

In [149]:
import pandas as pd 
import numpy as np 

In [150]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score

In [151]:
from collections import Counter

In [152]:
social_media_df = pd.read_csv('../Datasets/Social_Network_Ads.csv')

In [153]:
### Data Preprocessing.

In [154]:
social_media_df = social_media_df.iloc[ : , 1 : ]
social_media_df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [155]:
X = social_media_df[['Gender', 'Age', 'EstimatedSalary']]
y = social_media_df['Purchased']

In [156]:
X.head()

Unnamed: 0,Gender,Age,EstimatedSalary
0,Male,19,19000
1,Male,35,20000
2,Female,26,43000
3,Female,27,57000
4,Male,19,76000


In [157]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [158]:
# apply label encoding on categorical data. 
encoder = LabelEncoder()
X_train['Gender'] = encoder.fit_transform(X_train['Gender'])
X_test['Gender'] = encoder.transform(X_test['Gender'])

In [159]:
# scale the values of the dataset. 
scaler = StandardScaler() 
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [160]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(320, 3)
(80, 3)
(320,)
(80,)


### Initially we apply built-in KNeighorClassifier for comparison of the results.

In [196]:
knn_model = KNeighborsClassifier(n_neighbors = 5)

In [197]:
knn_model.fit(X_train, y_train)

In [198]:
y_pred = knn_model.predict(X_test)

In [199]:
print('Accuracy Score : ', accuracy_score(y_test, y_pred))

Accuracy Score :  0.95


### Build KNN from scratch and compare with sklearn KNeigbhorClassifier.

In [200]:
class MyKNeighborsClassifier: 

    def __init__(self, k = 5): 
        self.n_neighbors = k 
        self.X_train = None
        self.y_train = None


    def fit(self, X_train, y_train): 
        self.X_train = X_train
        self.y_train = y_train


    
    def calculate_distances(self, point_A, point_B): 
        return np.linalg.norm(point_A - point_B)

    def majority_votes(self, n_neigbhors): 
       
        votes = [] 
        for i in n_neigbhors: 
            votes.append(self.y_train.iloc[i[0]])

        majority = Counter(votes)
        return majority.most_common()[0][0]

    
    def predict(self, X_test): 
        
        y_pred = []
        
        for i in X_test: 
            # calculate the distance of X_test point with each X_train point. 
            distances = [] 
            for j in self.X_train: 
                dist = self.calculate_distances(i, j)
                distances.append(dist)

            n_neigbhors = sorted(list(enumerate(distances)), key=lambda x : x[1])[0 : self.n_neighbors]
            labels = self.majority_votes(n_neigbhors)
            y_pred.append(labels)
            
        return np.array(y_pred)
        

In [201]:
my_knn_model = MyKNeighborsClassifier(k = 5)

In [202]:
my_knn_model.fit(X_train, y_train)

In [203]:
new_y_pred = my_knn_model.predict(X_test)

In [204]:
# here is the aaccuracy_score for My KNN model.
print('Accuracy Score using my own KNN model : ', accuracy_score(y_test, new_y_pred))

Accuracy Score using my own KNN model :  0.95


### Results of both MyKNeighborsClassifier class and built-in sklearn class.

`Comparision : ` Here we are clearly see that the results are same for both MyKNeighborsClassifier class and built-in sklearn class for the same value of the k.

For k = 2, Accuracy Score is 0.925

For k = 5, Accuracy Score is 0.95

Also same for other values of k as well.
