### Handle/Get the data

In [69]:
import pandas as pd
from nest_asyncio import apply

url = 'iris.data'
df = pd.read_csv(url, header=None)
print(df.head())

     0    1    2    3            4
0  5.1  3.5  1.4  0.2  Iris-setosa
1  4.9  3.0  1.4  0.2  Iris-setosa
2  4.7  3.2  1.3  0.2  Iris-setosa
3  4.6  3.1  1.5  0.2  Iris-setosa
4  5.0  3.6  1.4  0.2  Iris-setosa


### Preprocess the data by first splitting it up between training and testing data

In [70]:
import random


def loadDataset(filename, split):
	trainingSet=[]
	testSet=[]
	df = pd.read_csv(url, header=None)
	array = df.to_numpy()
	random.shuffle(array)
	training_len = int(len(array)*split)
	trainingSet = array[:training_len]
	testSet = array[training_len:]
	return trainingSet, testSet

### Test the function by printing the output

In [71]:
trainingSet=[]
testSet=[]
url = 'iris.data'
trainingSet, testSet = loadDataset(url, 0.66)

# repr() Return a string containing a printable representation
print('Train: ' + repr(len(trainingSet)))
print('Test: ' + repr(len(testSet)))

Train: 99
Test: 51


### Perform the KNN algorithm

In [72]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


def train_data_knn(trainingSet, testSet, k=4, mode='brute'):
    # Split the data again to get the X and y values. 
    # X is the data and y is the target/label.
    X_train = [data[:4] for data in trainingSet]
    y_train = [data[4] for data in trainingSet]
    
    X_test = [data[:4] for data in testSet]
    y_test = [data[4] for data in testSet]
    
    # Train the KNN model
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.algorithm = mode
    knn.fit(X_train, y_train)
    
    # Make predictions
    predictions = knn.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, predictions)
    
    return accuracy


# def train_data_knn_kdtree(trainingSet, testSet, k=4):
#     # Split the data again to get the X and y values. 
#     # X is the data and y is the target/label.
#     X_train = [data[:4] for data in trainingSet]
#     y_train = [data[4] for data in trainingSet]
#     
#     X_test = [data[:4] for data in testSet]
#     y_test = [data[4] for data in testSet]
#     
#     # Train the KNN model
#     knn = KNeighborsClassifier(n_neighbors=k)
#     knn.algorithm = 'kd_tree'
#     knn.fit(X_train, y_train)
#     
#     # Make predictions
#     predictions = knn.predict(X_test)
#     
#     # Evaluate the model
#     accuracy = accuracy_score(y_test, predictions)
#     
#     return accuracy
    


### Get the average accuracy of the model

In [73]:
count = 0
avg = 0
accuracy_scores = []

def get_avg_accuracy(trainingSet, testSet, epochs):
    for i in range(epochs):
        accuracy_scores.append(train_data_knn(trainingSet, testSet))
        print(f'Epoch {i+1}: {accuracy_scores[i]}')
        print('Average: ', sum(accuracy_scores)/len(accuracy_scores))
    avg = sum(accuracy_scores)/len(accuracy_scores)
    return avg
	
print(get_avg_accuracy(trainingSet, testSet, epochs=5))

Epoch 1: 0.7843137254901961
Average:  0.7843137254901961
Epoch 2: 0.7843137254901961
Average:  0.7843137254901961
Epoch 3: 0.7843137254901961
Average:  0.7843137254901961
Epoch 4: 0.7843137254901961
Average:  0.7843137254901961
Epoch 5: 0.7843137254901961
Average:  0.7843137254901961
0.7843137254901961
