In [1]:
from knn import KNearestNeighbor
from sklearn.preprocessing import StandardScaler
import numpy as np
from scaler import ScaleData
from accuracy import accuracy
from train_test_split import train_test_split

### **Test: Compare scaler from scratch with sklearn Standardscaler**

In [2]:
X_train = [[i + j for j in range(4)] for i in range(100)]
X_test = [[i + j for j in range(4)] for i in range(50, 60)]

sklearn_scaler = StandardScaler()
sklearn_scaler.fit(X_train)
X_train_sklearn_scaled = sklearn_scaler.transform(X_train)
X_test_sklearn_scaled = sklearn_scaler.transform(X_test)

custom_scaler = ScaleData(method="standard")
custom_scaler.fit(X_train)
X_train_custom_scaled = custom_scaler.transform(X_train)
X_test_custom_scaled = custom_scaler.transform(X_test)

In [3]:
print("Vergleich der Trainingsdaten:")
print("Maximale Abweichung (Training):", np.max(np.abs(np.array(X_train_sklearn_scaled) - np.array(X_train_custom_scaled))))

print("\nVergleich der Testdaten:")
print("Maximale Abweichung (Test):", np.max(np.abs(np.array(X_test_sklearn_scaled) - np.array(X_test_custom_scaled))))

Vergleich der Trainingsdaten:
Maximale Abweichung (Training): 5.940581360164288e-11

Vergleich der Testdaten:
Maximale Abweichung (Test): 1.1401157795631889e-11


### By hand test

In [4]:
print("=== Test für Z-Score-Scaler ===")
scaler = ScaleData(method="standard")
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

print("Erwartete Mittelwerte:", [175.0, 70.0])
print("Berechnete Mittelwerte:", scaler.means)

print("Erwartete Standardabweichungen:", [11.18, 11.18])
print("Berechnete Standardabweichungen:", scaler.std_devs)


print("Erwartete Skalierte Daten (Z-Score):")
print([
    [-1.34, -1.34],
    [-0.45, -0.45],
    [ 0.45,  0.45],
    [ 1.34,  1.34]
])
print("Berechnete Skalierte Daten (Z-Score):", X_train_scaled)

=== Test für Z-Score-Scaler ===
Erwartete Mittelwerte: [175.0, 70.0]
Berechnete Mittelwerte: [49.5, 50.5, 51.5, 52.5]
Erwartete Standardabweichungen: [11.18, 11.18]
Berechnete Standardabweichungen: [28.86607004772212, 28.86607004772212, 28.86607004772212, 28.86607004772212]
Erwartete Skalierte Daten (Z-Score):
[[-1.34, -1.34], [-0.45, -0.45], [0.45, 0.45], [1.34, 1.34]]
Berechnete Skalierte Daten (Z-Score): [[-1.7148160423795318, -1.7148160423795318, -1.7148160423795318, -1.7148160423795318], [-1.680173294048632, -1.680173294048632, -1.680173294048632, -1.680173294048632], [-1.6455305457177325, -1.6455305457177325, -1.6455305457177325, -1.6455305457177325], [-1.6108877973868327, -1.6108877973868327, -1.6108877973868327, -1.6108877973868327], [-1.5762450490559332, -1.5762450490559332, -1.5762450490559332, -1.5762450490559332], [-1.5416023007250337, -1.5416023007250337, -1.5416023007250337, -1.5416023007250337], [-1.506959552394134, -1.506959552394134, -1.506959552394134, -1.506959552394

In [5]:
print("\n=== Test für Min-Max-Scaler ===")
scaler = ScaleData(method="minmax")
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

print("Erwartete Min-Werte:", [160, 55])
print("Berechnete Min-Werte:", scaler.min)

print("Erwartete Max-Werte:", [190, 85])
print("Berechnete Max-Werte:", scaler.max)

print("Erwartete Skalierte Daten (Min-Max):")
print([
    [0.0, 0.0],
    [0.33, 0.33],
    [0.67, 0.67],
    [1.0, 1.0]
])
print("Berechnete Skalierte Daten (Min-Max):", X_train_scaled)


=== Test für Min-Max-Scaler ===
Erwartete Min-Werte: [160, 55]
Berechnete Min-Werte: [0, 1, 2, 3]
Erwartete Max-Werte: [190, 85]
Berechnete Max-Werte: [99, 100, 101, 102]
Erwartete Skalierte Daten (Min-Max):
[[0.0, 0.0], [0.33, 0.33], [0.67, 0.67], [1.0, 1.0]]
Berechnete Skalierte Daten (Min-Max): [[0.0, 0.0, 0.0, 0.0], [0.01010101010090807, 0.01010101010090807, 0.01010101010090807, 0.01010101010090807], [0.02020202020181614, 0.02020202020181614, 0.02020202020181614, 0.02020202020181614], [0.030303030302724212, 0.030303030302724212, 0.030303030302724212, 0.030303030302724212], [0.04040404040363228, 0.04040404040363228, 0.04040404040363228, 0.04040404040363228], [0.05050505050454035, 0.05050505050454035, 0.05050505050454035, 0.05050505050454035], [0.060606060605448424, 0.060606060605448424, 0.060606060605448424, 0.060606060605448424], [0.0707070707063565, 0.0707070707063565, 0.0707070707063565, 0.0707070707063565], [0.08080808080726457, 0.08080808080726457, 0.08080808080726457, 0.08080

## **Test: Simple dataset classification**

In [6]:
X_train = [[1, 1], [2, 2], [3, 3], [6, 5], [7, 7]]
y_train = [0, 0, 1, 1, 1]

X_test = [[2.5, 2.5], [6.5, 6.5], [4.0, 4.0]]

In [7]:
knn = KNearestNeighbor(k = 3)
knn.fit(X_train, y_train)

In [None]:
predictions = knn.predict(X_test)
("Vorhersagen:", predictions)

### Prediction with scaling & larger dataframe

In [9]:
X_train = [[i + j for j in range(4)] for i in range(100)]
y_train = [i % 2 for i in range(100)]

X_test = [[i + j for j in range(4)] for i in range(50, 60)]

In [10]:
print("Unskalierte Trainingsdaten:")
print(X_train[:5])
print("Unskalierte Testdaten:")
print(X_test)

Unskalierte Trainingsdaten:
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [4, 5, 6, 7]]
Unskalierte Testdaten:
[[50, 51, 52, 53], [51, 52, 53, 54], [52, 53, 54, 55], [53, 54, 55, 56], [54, 55, 56, 57], [55, 56, 57, 58], [56, 57, 58, 59], [57, 58, 59, 60], [58, 59, 60, 61], [59, 60, 61, 62]]


In [11]:
scaler = ScaleData(method="standard")
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
knn = KNearestNeighbor(k=5)
knn.fit(X_train_scaled, y_train)
predictions = knn.predict(X_test_scaled)

In [13]:
("Vorhersagen für Testdaten:", predictions)

('Vorhersagen für Testdaten:', [0, 1, 0, 1, 0, 1, 0, 1, 0, 1])

## **Test: train_test_split and accuracy**

In [14]:
X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
y = [0, 1, 0, 1, 0]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, shuffle=False)

In [16]:
print("X_train:", X_train)
print("X_test:", X_test)
print("y_train:", y_train)
print("y_test:", y_test)

X_train: [[1, 2], [3, 4], [5, 6]]
X_test: [[7, 8], [9, 10]]
y_train: [0, 1, 0]
y_test: [1, 0]


### Test all correct accuracy

In [17]:
y_true = [0, 1, 1, 0, 1]
y_pred = [0, 1, 1, 0, 1]

acc = accuracy(y_true, y_pred)
print("Accuracy (perfect):", acc)

Accuracy (perfect): 1.0


### Test partially correct accuracy

In [18]:
y_true = [0, 1, 1, 0, 1]
y_pred = [0, 1, 0, 0, 0]

acc = accuracy(y_true, y_pred)
print("Accuracy (partial):", acc)

Accuracy (partial): 0.6


### Test all false accuracy

In [19]:
y_true = [0, 1, 1, 0, 1]
y_pred = [1, 0, 0, 1, 0]

acc = accuracy(y_true, y_pred)
print("Accuracy (none correct):", acc)

Accuracy (none correct): 0.0
