In [1]:
import pandas as pd
import numpy as np

# Iris Dataset

In [2]:
X_train = pd.read_csv('../datasets/iris.data', header=None)
y_train = X_train.pop(X_train.columns[-1]).rename('target')

X_test = pd.read_csv('../datasets/iris.test.data', header=None)
y_test = X_test.pop(X_test.columns[-1]).rename('target')

In [3]:
pd.concat((X_train, y_train), axis=1).head()

Unnamed: 0,0,1,2,3,target
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
pd.concat((X_test, y_test), axis=1).head()

Unnamed: 0,0,1,2,3,target
0,5.0,3.2,1.2,0.2,Iris-setosa
1,5.5,3.5,1.3,0.2,Iris-setosa
2,4.9,3.1,1.5,0.1,Iris-setosa
3,4.4,3.0,1.3,0.2,Iris-setosa
4,5.1,3.4,1.5,0.2,Iris-setosa


In [5]:
from _classes import KNN, Metrics

model = KNN()
model.fit(X_train, y_train)

In [6]:
model.distance(X_train.iloc[0])

0      0.000000
1      0.538516
2      0.509902
3      0.648074
4      0.141421
         ...   
100    5.546170
101    6.014150
102    4.880574
103    4.160529
104    4.570558
Name: distance, Length: 105, dtype: float64

In [7]:
print(
    f'train accuracy: {Metrics.accuracy(y_train, model.predict(X_train))}\n'
    f'test accuracy: {Metrics.accuracy(y_test, model.predict(X_test))}'
)

train accuracy: 0.9619047619047619
test accuracy: 1.0


In [8]:
k_space = np.arange(1, len(X_train) + 1)

def knn_predict(k: int) -> float:
    model = KNN(k)
    model.fit(X_train, y_train)
    return Metrics.accuracy(y_test, model.predict(X_test))

accuracies = np.vectorize(knn_predict)(k_space)

In [11]:
import plotly.express as px
px.line(x=k_space, y=accuracies)

# WDBC Dataset

In [10]:
# load dataset
X_train = pd.read_csv('../datasets/wdbc.data', header=None)
y_train = X_train.pop(X_train.columns[-1]).rename('target')

X_test = pd.read_csv('../datasets/wdbc.test.data', header=None)
y_test = X_test.pop(X_test.columns[-1]).rename('target')

# train and estimate model
model = KNN()
model.fit(X_train, y_train)
print(
    f'train accuracy: {Metrics.accuracy(y_train, model.predict(X_train))}\n'
    f'test accuracy: {Metrics.accuracy(y_test, model.predict(X_test))}'
)

train accuracy: 0.9212598425196851
test accuracy: 0.9521276595744681
