In [None]:
#Social_Networks_Ads.csv

**Importing the libraries**

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets, neighbors
from mlxtend.plotting import plot_decision_regions 
from sklearn.model_selection import cross_val_score 

**Importing the dataset**

In [None]:
#Input :  k and data
#Output : The graph of the decision boundary
def knn_comparison(data, k): #k and the data are input to the function
  x = data[['X','Y']].values # independent features
  y = data['class'].astype(int).values # y -> target/true labels 
  clf = KNeighborsClassifier(n_neighbors=k) #it will initialise the model with @neighbours as k 
  clf.fit(x, y) # train the model
  print("Train Accuracy : ", clf.score(x,y)) # test the model and it computes the accuracy (train data accuracy)
  print("Val Accuracy : ", np.mean(cross_val_score(clf, x, y, cv=5)))
  # Plotting decision region
  plot_decision_regions(x, y, clf=clf, legend=2) # it plots the decision boundary
  # Adding axes annotations
  plt.xlabel('X') #Names the x-axis
  plt.ylabel('Y') #Names the y-axis
  plt.title('Knn with K='+ str(k)) #Names the graph
  plt.show() #Displays the graph 

In [None]:
ds = pd.read_csv('Social_Network_Ads.csv')
ds.head()
ds = ds.drop('User ID', axis=1)
ds

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


In [None]:
ds['Purchased'].value_counts()

0    257
1    143
Name: Purchased, dtype: int64

In [None]:
ds.isnull().sum()

Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

**Splitting the dataset into the Training set and Test set**

In [None]:
feature = ds.drop('Purchased', axis = 1)

target = ds['Purchased']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(feature, target, shuffle = True, test_size=0.2, random_state=1)

print('Shape of training feature:', X_train.shape)
print('Shape of testing feature:', X_test.shape)
print('Shape of training label:', y_train.shape)
print('Shape of testing label:', y_test.shape)

Shape of training feature: (320, 3)
Shape of testing feature: (80, 3)
Shape of training label: (320,)
Shape of testing label: (80,)


**Feature Scaling**

In [None]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
cat_cols = ['Gender']

# Encode Categorical Data
df_encoded = pd.DataFrame(encoder.fit_transform(ds[cat_cols]))
df_encoded.columns = encoder.get_feature_names(cat_cols)

# Replace Categotical Data with Encoded Data
ds = ds.drop(cat_cols ,axis=1)
ds = pd.concat([df_encoded, ds], axis=1)
#ds.head()


In [None]:
from sklearn.preprocessing import StandardScaler

# Copying original dataframe
ds_new = ds.copy()

scaler = StandardScaler()
num_cols = ['Age', 'EstimatedSalary']
ds_new[num_cols] = scaler.fit_transform(ds[num_cols])

ds_new.head()

Unnamed: 0,Gender_Female,Gender_Male,Age,EstimatedSalary,Purchased
0,0.0,1.0,-1.781797,-1.490046,0
1,0.0,1.0,-0.253587,-1.460681,0
2,1.0,0.0,-1.113206,-0.78529,0
3,1.0,0.0,-1.017692,-0.374182,0
4,0.0,1.0,-1.781797,0.183751,0


**Fitting K-NN to the Training set**

In [None]:
def evaluate_model(model, x_test, y_test):
    from sklearn import metrics

    # Predict Test Data 
    y_pred = model.predict(x_test)

    # Calculate accuracy, precision, recall, f1-score, and kappa score
    acc = metrics.accuracy_score(y_test, y_pred)
    prec = metrics.precision_score(y_test, y_pred)
    rec = metrics.recall_score(y_test, y_pred)
    f1 = metrics.f1_score(y_test, y_pred)

    # Display confussion matrix
    cm = metrics.confusion_matrix(y_test, y_pred)

    return {'acc': acc, 'prec': prec, 'rec': rec, 'f1': f1, 
             'cm': cm}

In [None]:
feature = ds.drop('Purchased', axis=1)

# Select Target
target = ds['Purchased']

# Set Training and Testing Data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(feature , target, 
                                                    shuffle = True, 
                                                    test_size=0.2, 
                                                    random_state=1)

# Show the Training and Testing Data
print('Shape of training feature:', X_train.shape)
print('Shape of testing feature:', X_test.shape)
print('Shape of training label:', y_train.shape)
print('Shape of testing label:', y_test.shape)

from sklearn.neighbors import KNeighborsClassifier 

#Building Decision Tree model 
dtc = KNeighborsClassifier()
dtc.fit(X_train, y_train)

Shape of training feature: (320, 4)
Shape of testing feature: (80, 4)
Shape of training label: (320,)
Shape of testing label: (80,)


KNeighborsClassifier()

**Predicting the Test set results**

In [None]:
# Evaluate Model
dtc_eval = evaluate_model(dtc, X_test, y_test)

# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])


Accuracy: 0.75
Precision: 0.7
Recall: 0.65625
F1 Score: 0.6774193548387096


**Making the Confusion Matrix**

In [None]:
print('Confusion Matrix:\n', dtc_eval['cm'])

Confusion Matrix:
 [[39  9]
 [11 21]]
