In [1]:
# Import libiraries
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
 
# Read Dataset    
df = pd.read_csv("BankNote_Authentication.csv")


# Split dataset into features and target
X = df.drop("class",axis=1)
Y = df["class"]

# Split dataset 70% train and 30% test 
X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.70) #random_state=42

# normalization of training set  
X_train["variance"] = (X_train["variance"] - X_train["variance"].mean())/(X_train["variance"].std())
X_train["skewness"] = (X_train["skewness"] - X_train["skewness"].mean())/(X_train["skewness"].std())
X_train["curtosis"] = (X_train["curtosis"] - X_train["curtosis"].mean())/(X_train["curtosis"].std())
X_train["entropy"] = (X_train["entropy"] - X_train["entropy"].mean())/(X_train["entropy"].std())

# normalization of testing set
X_test["variance"] = (X_test["variance"] - X_train["variance"].mean())/(X_train["variance"].std())
X_test["skewness"] = (X_test["skewness"] - X_train["skewness"].mean())/(X_train["skewness"].std())
X_test["curtosis"] = (X_test["curtosis"] - X_train["curtosis"].mean())/(X_train["curtosis"].std())
X_test["entropy"] = (X_test["entropy"] - X_train["entropy"].mean())/(X_train["entropy"].std())

# convert dataframes into numpy arrays
X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values

# euclidean distance function
def eucledian(x1,x2):
    distance = np.sqrt(np.sum((x1-x2)**2))
    return distance
 
# KNN
def classify(x_train, y_train , X_test, k):
    
    test_labels = []
    for item in X_test:
        point_distances = []
        for j in range(len(x_train)):
            distances = eucledian(np.array(x_train[j,:]) , item) 
            point_distances.append(distances) 
        point_distances = np.array(point_distances) 
        nearest_dist = np.argsort(point_distances)[:k]
        labels = y_train[nearest_dist]
        zeros =np.count_nonzero(labels == 0)
        ones =np.count_nonzero(labels == 1)
        if zeros>ones:
            test_labels.append(0) 
        elif zeros<ones:
            test_labels.append(1) 
        else:
            test_labels.append(labels[0]) 
    return test_labels


# list for storing the summary
ks = []
The_correct = []
The_Test_all = []
All_Accuracy = []

# try diffrent values for k from 1 to 9
for i in range(1,10):
    y_pred = classify(X_train,y_train,X_test , i)
    
    correct = np.sum(y_test==y_pred)
    all_tests = y_test.shape[0]
    acc = correct/all_tests*100
    
    ks.append(i)
    The_correct.append(correct)
    The_Test_all.append(all_tests)
    All_Accuracy.append(str(round(acc,2))+"%")


report =  pd.DataFrame(list(zip(ks,The_Test_all,The_correct,All_Accuracy)))
report.columns = ["K",'Test Cases',"The correct","Accuracy"]
report.to_csv("KNN_SUMMARY.csv")
print("Done Successfully.")
report

# Time to run 25 seconds

Done Successfully.


Unnamed: 0,K,Test Cases,The correct,Accuracy
0,1,412,334,81.07%
1,2,412,334,81.07%
2,3,412,335,81.31%
3,4,412,336,81.55%
4,5,412,334,81.07%
5,6,412,336,81.55%
6,7,412,335,81.31%
7,8,412,336,81.55%
8,9,412,339,82.28%


# End