In [49]:
import numpy as np
import pandas as pd
import scipy.spatial
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

df=pd.read_csv('Social_Network_Ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [50]:
pd.crosstab(df.Gender,df.Purchased)

Purchased,0,1
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,127,77
Male,130,66


In [51]:
# #creating dummies
# df1=pd.get_dummies(data=df,columns=['Gender'])
def converter(gender):
    if gender == 'Male':
        return 0
    elif gender == 'Female':
        return 1
    else:
        return 2

df['Gender'] = df['Gender'].apply(converter)

# Concatenate the DataFrames
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,0,19,19000,0
1,15810944,0,35,20000,0
2,15668575,1,26,43000,0
3,15603246,1,27,57000,0
4,15804002,0,19,76000,0


In [52]:
#Defining the x and y 
# we will be dropping the USER ID colum as it does not impact the Y
x=df.drop(columns=['User ID','Purchased'])
y=df.Purchased
X_train, X_test, y_train, y_test=train_test_split(x,y,random_state=101,test_size=0.3)

class KNN:
    def __init__(self, k):
        self.k = k
        
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
     
    def predict(self, X_test):
        final_output = []
        for i in range(len(X_test)):
            d = []
            votes = []
            for j in range(len(X_train)):
                dist = scipy.spatial.distance.euclidean(list(X_train.iloc[j]) , list(X_test.iloc[i]))
                d.append([dist, j])
            d.sort()
            d = d[0:self.k]
            for d, j in d:
                votes.append(y_train.iloc[j])
            ans = Counter(votes).most_common(1)[0][0]
            final_output.append(ans)
            
        return final_output
    
    def score(self, X_test, y_test):
        predictions = self.predict(X_test)
        return (predictions == y_test).sum() / len(y_test)

clf = KNN(10)
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)
y_prediction = pd.DataFrame({"prediction":prediction})

In [53]:
print("--------------------------------------------------------------")
matrix = confusion_matrix(y_test,y_prediction)
print('Confusion matrix : \n',matrix)
print("--------------------------------------------------------------")
print("Accuracy : \t", accuracy_score(y_test, y_prediction))
print("--------------------------------------------------------------")

# classification report for precision, recall f1-score and accuracy
matrix = classification_report(y_test,y_prediction)
print('Classification report : \n',matrix)
print("--------------------------------------------------------------")


--------------------------------------------------------------
Confusion matrix : 
 [[75  5]
 [11 29]]
--------------------------------------------------------------
Accuracy : 	 0.8666666666666667
--------------------------------------------------------------
Classification report : 
               precision    recall  f1-score   support

           0       0.87      0.94      0.90        80
           1       0.85      0.72      0.78        40

    accuracy                           0.87       120
   macro avg       0.86      0.83      0.84       120
weighted avg       0.87      0.87      0.86       120

--------------------------------------------------------------
