**IMAGE CLASSIFICATION USING CIFAR-10 DATASET: A COMPARISON OF MANHATTAN (L1) AND EUCLIDEAN
(L2) DISTANCES WITH 5-FOLD CROSS-VALIDATION**



# Data Downlode
[CIFAR-10 DATASET](https://www.kaggle.com/datasets/ashishsaxena2209/animal-image-datasetdog-cat-and-panda)



In [None]:
import kagglehub

path = kagglehub.dataset_download("ashishsaxena2209/animal-image-datasetdog-cat-and-panda")

print("Path to dataset files:", path)

**=================== STEP 1: SETUP ===================**

In [None]:
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
from random import shuffle, sample
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

**=================== STEP 2: COPY IMAGES ===================**

In [None]:
categories = ['dogs', 'cats', 'panda']
source_base = '/input/animal-image-datasetdog-cat-and-panda/animals'
target_base = '/input/animal-image-datasetdog-cat-and-panda/images'

def copy_images(source_dir, target_dir, num_images=1000):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    all_files = os.listdir(source_dir)
    shuffle(all_files)
    for file in all_files[:num_images]:
        src_path = os.path.join(source_dir, file)
        dst_path = os.path.join(target_dir, file)
        shutil.copy(src_path, dst_path)

for category in categories:
    copy_images(os.path.join(source_base, category),
                os.path.join(target_base, category))

**=================== STEP 3: LOAD & PREPROCESS ===================**

In [None]:
IMG_SIZE = 32
X_list, y_list = [], []

for label, cls in enumerate(categories):
    cls_path = os.path.join(target_base, cls)
    for img_name in os.listdir(cls_path):
        img_path = os.path.join(cls_path, img_name)
        try:
            img = cv2.imread(img_path)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            gray = cv2.equalizeHist(gray)
            resized = cv2.resize(gray, (IMG_SIZE, IMG_SIZE))
            X_list.append(resized.flatten())
            y_list.append(label)
        except:
            continue

X_np = np.array(X_list)
y_np = np.array(y_list)

scaler = StandardScaler()
X_np = scaler.fit_transform(X_np)

pca = PCA(n_components=100)
X_np = pca.fit_transform(X_np)

print(f"Processed dataset shape: {X_np.shape}, Labels shape: {y_np.shape}")

**=================== STEP 4: CROSS-VALIDATION ===================**

In [None]:
k_values = range(1, 16)
acc_l1, acc_l2 = [], []
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for k in k_values:
    fold_l1, fold_l2 = [], []

    for train_idx, test_idx in kf.split(X_np, y_np):
        X_train, X_test = X_np[train_idx], X_np[test_idx]
        y_train, y_test = y_np[train_idx], y_np[test_idx]

        # Manhattan (L1) with weighted distance
        model_l1 = KNeighborsClassifier(n_neighbors=k, metric='manhattan', weights='distance')
        model_l1.fit(X_train, y_train)
        preds_l1 = model_l1.predict(X_test)
        fold_l1.append(accuracy_score(y_test, preds_l1))

        # Euclidean (L2) with weighted distance
        model_l2 = KNeighborsClassifier(n_neighbors=k, metric='euclidean', weights='distance')
        model_l2.fit(X_train, y_train)
        preds_l2 = model_l2.predict(X_test)
        fold_l2.append(accuracy_score(y_test, preds_l2))

    acc_l1.append(np.mean(fold_l1))
    acc_l2.append(np.mean(fold_l2))

**=================== STEP 5: PLOT ACCURACY ===================**

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(k_values, acc_l1, 'o-', label='Manhattan (L1)')
plt.plot(k_values, acc_l2, 's-', label='Euclidean (L2)')
plt.xlabel('Number of Neighbors (K)')
plt.ylabel('Cross-Validated Accuracy')
plt.title('k-NN Accuracy with Distance Weighting (L1 vs L2)')
plt.legend()
plt.grid(True)
plt.show()

**=================== STEP 6: TOP 5 PREDICTIONS ===================**

In [None]:
final_knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean', weights='distance')
final_knn.fit(X_np, y_np)

# Show predictions for 5 random images
indices = sample(range(len(X_np)), 5)
samples = X_np[indices]
true_labels = y_np[indices]
predicted_labels = final_knn.predict(samples)

plt.figure(figsize=(15, 4))
for i in range(5):
    inv = pca.inverse_transform(samples[i].reshape(1, -1))
    inv = scaler.inverse_transform(inv)
    img = inv.reshape(IMG_SIZE, IMG_SIZE)
    true_cls = categories[true_labels[i]]
    pred_cls = categories[predicted_labels[i]]
    plt.subplot(1, 5, i + 1)
    plt.imshow(img, cmap='gray')
    plt.title(f"True: {true_cls}\nPred: {pred_cls}")
    plt.axis('off')

plt.tight_layout()
plt.show()