In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


X, y = make_classification(
    n_samples=500,      
    n_classes=2,        
    n_clusters_per_class=1,
    n_informative=2, 
    n_redundant=0,
    random_state=42
)

np.save("X.npy", X)
np.save("y.npy", y)
# Load arrays
X_loaded = np.load("X.npy")
y_loaded = np.load("y.npy")
print(X_loaded)
print(y_loaded)


[[-0.77909274 -0.79252345 -0.20846821 ... -0.43030932  0.20323595
   1.20660815]
 [ 0.49663461  0.08129143  0.33078381 ...  0.76730736  0.63510001
   0.08486546]
 [ 0.31745326  1.81806192  0.69272276 ...  0.01840163 -0.3328604
  -1.2693305 ]
 ...
 [-0.28370673 -1.1356359  -0.2885329  ... -0.6844404   1.19047499
   1.51541382]
 [ 0.47558844 -0.561338   -0.80152759 ... -0.49796185 -1.74689569
  -0.3471865 ]
 [ 1.99566749 -1.10015423  0.49774309 ...  1.79521136  3.10991856
   0.66592425]]
[0 1 0 1 1 1 0 1 0 0 0 1 1 0 1 0 0 0 0 1 1 0 0 1 0 1 0 1 0 1 0 0 0 1 0 0 1
 0 1 1 0 0 0 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 1 1 1 1 1 0 1 0
 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 1 1 1 1 0 0 1 0 1 0 1 0 1 0 0 1
 0 0 1 1 1 1 0 0 1 0 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0 0 1 0 0 0 0 0 1 1 0 1 1
 0 1 0 1 0 0 1 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 0 0 0 1 1 1 1
 0 0 1 1 1 1 1 0 1 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 1 1 1
 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 1 1 0 1 1 0 0 0 0 1 1 1 1 

In [5]:
# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Step 4: KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)  # k=5
knn.fit(X_train, y_train)

# Step 5: Predictions
y_pred = knn.predict(X_test)

# Step 6: Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

# Step 7: Visualization of decision boundary
h = 0.02  # step size in mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.3)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolor='k', label="Train")
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, marker='x', label="Test")
plt.legend()
plt.title("KNN Decision Boundary (k=5)")
plt.show()

Confusion Matrix:
 [[55 11]
 [ 9 50]]

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.83      0.85        66
           1       0.82      0.85      0.83        59

    accuracy                           0.84       125
   macro avg       0.84      0.84      0.84       125
weighted avg       0.84      0.84      0.84       125

Accuracy: 0.84


ValueError: X has 2 features, but KNeighborsClassifier is expecting 20 features as input.

In [6]:
import pandas as pd

# Convert to DataFrame
df = pd.DataFrame(X, columns=['Feature1', 'Feature2'])
df['Target'] = y

# Save to CSV
df.to_csv("classification_dataset.csv", index=False)

print("Dataset saved as classification_dataset.csv")
# Load dataset
df = pd.read_csv("classification_dataset.csv")
X = df[['Feature1', 'Feature2']].values
y = df['Target'].values


ValueError: Shape of passed values is (500, 20), indices imply (500, 2)

In [None]:
import numpy as np

# Save arrays
np.save("X.npy", X)
np.save("y.npy", y)

# Load arrays
X_loaded = np.load("X.npy")
y_loaded = np.load("y.npy")
