In [8]:
#--------------------First Phase---------------------

#importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib qt

# Importing the dataset
dataset = pd.read_csv('iris.csv')

X = dataset.iloc[:, 0:4].values
y = dataset.iloc[:, 4].values
y_mark = y
#PCA
from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=3)
X_sklearn = sklearn_pca.fit_transform(X)

In [142]:
#--------------------Second Phase---------------------

# Encoding the Dependent Variable
from sklearn.preprocessing import LabelEncoder
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)

#plotting the dataset
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
k = X_sklearn[:,0]
l = X_sklearn[:,1]
m = X_sklearn[:,2]

labl = { 0 : 'Setosa', 1 : 'Versicolor', 2 : 'Virginica'}
cdict = {0: 'red', 1: 'blue', 2: 'green'}

for g in np.unique(y):
    ix = np.where(y == g)
    ax.scatter(k[ix], l[ix], m[ix], c = cdict[g], marker = 'o', label = labl[g])

ax.set_xlabel('First component')
ax.set_ylabel('Second component')
ax.set_zlabel('Third Component')

ax.legend()
plt.show()


In [154]:
#--------------------Third Phase---------------------

# Euclidean distance
def distance(instance1, instance2):
    # just in case, if the instances are lists or tuples:
    instance1 = np.array(instance1) 
    instance2 = np.array(instance2)
    
    return np.linalg.norm(instance1 - instance2)


# Get Neighbours
def get_neighbors(training_set, 
                  labels, 
                  test_instance, 
                  k, 
                  distance=distance):
    distances = []
    for index in range(len(training_set)):
        dist = distance(test_instance, training_set[index])
        distances.append((training_set[index], dist, labels[index]))
    distances.sort(key=lambda x: x[1])
    neighbors = distances[:k]
    return neighbors  

# Counter
from collections import Counter
def vote(neighbors):
    class_counter = Counter()
    for neighbor in neighbors:
        class_counter[neighbor[2]] += 1
    return class_counter.most_common(1)[0][0]

accu1 = 0
accu3 = 0
accu5 = 0

# Splitting the dataset into the Training set and Test set
import random
from sklearn.model_selection import train_test_split
for acc in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X_sklearn, y, test_size = 1/3, random_state = random.randrange(1, 100, 2))
    #Prediction for k = 1
    y_pred1 = []
    for i in range(50):
        neighbors = get_neighbors(X_train, y_train, X_test[i], 1, distance=distance)
        y_pred1.append(vote(neighbors))
    y_pred1 = np.array(y_pred1)

    #Prediction for k = 3
    y_pred3 = []
    for i in range(50):
        neighbors = get_neighbors(X_train, y_train, X_test[i], 3, distance=distance)
        y_pred3.append(vote(neighbors))
    y_pred3 = np.array(y_pred3)
    
    #Prediction for k = 5
    y_pred5 = []
    for i in range(50):
        neighbors = get_neighbors(X_train, y_train, X_test[i], 5, distance=distance)
        y_pred5.append(vote(neighbors))
    y_pred5 = np.array(y_pred5)
    accu1 += accuracy_score(y_test, y_pred1)
    accu3 += accuracy_score(y_test, y_pred3)
    accu5 += accuracy_score(y_test, y_pred5)

#Acuuracy Comparison for k = 1, k = 2 and k = 3
from sklearn.metrics import accuracy_score
print('Accuracy for k = 1 :', round(accu1/50 * 100,2))
print('Accuracy for k = 3 :', round(accu3/50 * 100,2))
print('Accuracy for k = 5 :', round(accu5/50 * 100,2))

'''#Displaying the result
print("index\tObserved\tfor k = 1\tfor k = 3\tfor k = 5")
for i in range(50):
    print(i,'\t',y_test[i],'\t\t', y_pred1[i],'\t\t' ,y_pred3[i],'\t\t', y_pred5[i])'''

Accuracy for k = 1 : 95.32
Accuracy for k = 3 : 95.36
Accuracy for k = 5 : 96.08


'#Displaying the result\nprint("index\tObserved\tfor k = 1\tfor k = 3\tfor k = 5")\nfor i in range(50):\n    print(i,\'\t\',y_test[i],\'\t\t\', y_pred1[i],\'\t\t\' ,y_pred3[i],\'\t\t\', y_pred5[i])'