In [1]:
import numpy as np
# 1. Create data
np.random.seed(42)
n_samples = 200

age = np.random.randint(18, 71, n_samples)              # 18–70
income = np.random.randint(20000, 200001, n_samples)    # 20k–200k
spend_score = np.random.randint(0, 101, n_samples)      # 0–100

# Target: high spender if both income & spend_score high
y = ((income > 100000) & (spend_score > 50)).astype(int)

# Combine features
X = np.column_stack([age, income, spend_score])

In [2]:
from sklearn.model_selection import train_test_split
# 2. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [3]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# 3. KNN without scaling
knn_no_scale = KNeighborsClassifier(n_neighbors=5)
knn_no_scale.fit(X_train, y_train)
acc_no_scale = accuracy_score(y_test, knn_no_scale.predict(X_test))

In [4]:
from sklearn.preprocessing import StandardScaler
# 4. KNN with scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_scaled = KNeighborsClassifier(n_neighbors=5)
knn_scaled.fit(X_train_scaled, y_train)
acc_scaled = accuracy_score(y_test, knn_scaled.predict(X_test_scaled))

In [5]:
print(f"Accuracy without scaling: {acc_no_scale:.2f}")
print(f"Accuracy with scaling: {acc_scaled:.2f}")

Accuracy without scaling: 0.82
Accuracy with scaling: 0.97
