In [223]:
from knn import KNearestNeighbor
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from scaler import ScaleData
from accuracy import accuracy
from train_test_split import train_test_split

### **Test: Compare scaler from scratch with sklearn Standardscaler**

In [177]:
X_train = [[i + j for j in range(4)] for i in range(100)]
X_test = [[i + j for j in range(4)] for i in range(50, 60)]

sklearn_scaler = StandardScaler()
sklearn_scaler.fit(X_train)
X_train_sklearn_scaled = sklearn_scaler.transform(X_train)
X_test_sklearn_scaled = sklearn_scaler.transform(X_test)

custom_scaler = ScaleData(method="standard")
custom_scaler.fit(X_train)
X_train_custom_scaled = custom_scaler.transform(X_train)
X_test_custom_scaled = custom_scaler.transform(X_test)

In [178]:
print("Vergleich der Trainingsdaten:")
print("Maximale Abweichung (Training):", np.max(np.abs(np.array(X_train_sklearn_scaled) - np.array(X_train_custom_scaled))))

print("\nVergleich der Testdaten:")
print("Maximale Abweichung (Test):", np.max(np.abs(np.array(X_test_sklearn_scaled) - np.array(X_test_custom_scaled))))

Vergleich der Trainingsdaten:
Maximale Abweichung (Training): 5.940581360164288e-11

Vergleich der Testdaten:
Maximale Abweichung (Test): 1.1401157795631889e-11


### By hand test

In [None]:
print("=== Test für Z-Score-Scaler ===")
scaler = ScaleData(method="standard")
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

print("Erwartete Mittelwerte:", [175.0, 70.0])
print("Berechnete Mittelwerte:", scaler.means)

print("Erwartete Standardabweichungen:", [11.18, 11.18])
print("Berechnete Standardabweichungen:", scaler.std_devs)


print("Erwartete Skalierte Daten (Z-Score):")
print([
    [-1.34, -1.34],
    [-0.45, -0.45],
    [ 0.45,  0.45],
    [ 1.34,  1.34]
])
print("Berechnete Skalierte Daten (Z-Score):", X_train_scaled)

In [None]:
print("\n=== Test für Min-Max-Scaler ===")
scaler = ScaleData(method="minmax")
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

print("Erwartete Min-Werte:", [160, 55])
print("Berechnete Min-Werte:", scaler.min)

print("Erwartete Max-Werte:", [190, 85])
print("Berechnete Max-Werte:", scaler.max)

print("Erwartete Skalierte Daten (Min-Max):")
print([
    [0.0, 0.0],
    [0.33, 0.33],
    [0.67, 0.67],
    [1.0, 1.0]
])
print("Berechnete Skalierte Daten (Min-Max):", X_train_scaled)

## **Test: Simple dataset classification**

In [181]:
X_train = [[1, 1], [2, 2], [3, 3], [6, 5], [7, 7]]
y_train = [0, 0, 1, 1, 1]

X_test = [[2.5, 2.5], [6.5, 6.5], [4.0, 4.0]]

In [182]:
knn = KNearestNeighbor(k = 3)
knn.fit(X_train, y_train)

In [None]:
predictions = knn.predict(X_test)
("Vorhersagen:", predictions)

### Prediction with scaling & larger dataframe

In [184]:
X_train = [[i + j for j in range(4)] for i in range(100)]
y_train = [i % 2 for i in range(100)]

X_test = [[i + j for j in range(4)] for i in range(50, 60)]

In [None]:
print("Unskalierte Trainingsdaten:")
print(X_train[:5])
print("Unskalierte Testdaten:")
print(X_test)

In [186]:
scaler = ScaleData(method="standard")
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
knn = KNearestNeighbor(k=5)
knn.fit(X_train_scaled, y_train)
predictions = knn.predict(X_test_scaled)

In [188]:
("Vorhersagen für Testdaten:", predictions)

('Vorhersagen für Testdaten:', [0, 1, 0, 1, 0, 1, 0, 1, 0, 1])

## **Test: train_test_split and accuracy**

In [201]:
X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
y = [0, 1, 0, 1, 0]

In [202]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, shuffle=False)

In [203]:
print("X_train:", X_train)
print("X_test:", X_test)
print("y_train:", y_train)
print("y_test:", y_test)

X_train: [[1, 2], [3, 4], [5, 6]]
X_test: [[7, 8], [9, 10]]
y_train: [0, 1, 0]
y_test: [1, 0]


In [204]:
knn = KNearestNeighbor(k=3)
knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)

In [206]:
acc = accuracy(y_test, y_pred)

In [199]:
for test_point in X_test:
    distances = []
    for train_point, label in knn.data:
        distance = knn.distance_function(test_point, train_point)
        distances.append((distance, label))
    distances.sort(key=lambda x: x[0])
    print(f"Test point: {test_point}")
    print(f"Distances to neighbors: {distances[:knn.k]}")

Test point: [7, 8]
Distances to neighbors: [(2.8284271247461903, 0), (5.656854249492381, 1), (8.48528137423857, 0)]
Test point: [9, 10]
Distances to neighbors: [(5.656854249492381, 0), (8.48528137423857, 1), (11.313708498984761, 0)]


In [207]:
print("y_test (Ground Truth):", y_test)
print("y_pred (Predictions):", y_pred)
print("Accuracy:", acc)

y_test (Ground Truth): [1, 0]
y_pred (Predictions): [0, 0]
Accuracy: 0.5


### Test all correct accuracy

In [196]:
y_true = [0, 1, 1, 0, 1]
y_pred = [0, 1, 1, 0, 1]

acc = accuracy(y_true, y_pred)
print("Accuracy (perfect):", acc)

Accuracy (perfect): 1.0


### Test partially correct accuracy

In [197]:
y_true = [0, 1, 1, 0, 1]
y_pred = [0, 1, 0, 0, 0]

acc = accuracy(y_true, y_pred)
print("Accuracy (partial):", acc)

Accuracy (partial): 0.6


### Test all false accuracy

In [198]:
y_true = [0, 1, 1, 0, 1]
y_pred = [1, 0, 0, 1, 0]

acc = accuracy(y_true, y_pred)
print("Accuracy (none correct):", acc)

Accuracy (none correct): 0.0


## **KNN-from-scratch vs. KNN from sklearn**

In [228]:
from sklearn.datasets import load_iris
import numpy as np

In [229]:
iris = load_iris()
X = iris.data  # Features
y = iris.target

scratch:

In [None]:
train_test_split_scratch = train_test_split(X, y, 0.3, True)

In [None]:
scaler = ScaleData(method="standard")
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
knn_scratch = KNearestNeighbor(k=3)
knn_scratch.fit(X_train_scaled.tolist(), y_train.tolist())
y_pred_scratch = knn_scratch.predict(X_test_scaled.tolist())

sklearn:

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
knn_sklearn = KNeighborsClassifier(n_neighbors=3)
knn_sklearn.fit(X_train_scaled, y_train)
y_pred_sklearn = knn_sklearn.predict(X_test_scaled)