Importing necessary libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import cifar10

Loading the CIFAR-10 dataset

In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


Converting images to grayscale

In [3]:
def rgb_to_grayscale(rgb_images):
    return np.dot(rgb_images[...,:3], [0.2989, 0.5870, 0.1140])

x_train_gray = rgb_to_grayscale(x_train).astype(np.float32)
x_test_gray = rgb_to_grayscale(x_test).astype(np.float32)


Normalizing the data

In [4]:
x_train_gray /= 255.0
x_test_gray /= 255.0

Flatten the images for k-NN

In [16]:
x_train_gray_flattened = x_train_gray.reshape(x_train_gray.shape[0], -1)
x_test_gray_flattened = x_test_gray.reshape(x_test_gray.shape[0], -1)

Checking the shape of y_train and x_train_gray_flattened

In [17]:
print("x_train_gray_flattened shape:", x_train_gray_flattened.shape)
print("y_train shape:", y_train.shape)

x_train_gray_flattened shape: (50000, 1024)
y_train shape: (40000, 1)


Adjust x_train_gray_flattened to match y_train in size

In [19]:
x_train_gray_flattened_adjusted = x_train_gray_flattened[:y_train.shape[0]]

In [20]:
print("Adjusted x_train_gray_flattened shape:", x_train_gray_flattened_adjusted.shape)
print("y_train shape:", y_train.shape)

Adjusted x_train_gray_flattened shape: (40000, 1024)
y_train shape: (40000, 1)


#Model Building with k-NN

In [18]:
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

Initialize variables for storing results

In [12]:
k_values = range(1, 6)  # Example range, adjust based on assignment requirements
accuracy_l1 = {k: [] for k in k_values}  # For Manhattan distance
accuracy_l2 = {k: [] for k in k_values}  # For Euclidean distance

5-Fold Cross-Validation

In [13]:
kf = KFold(n_splits=5, shuffle=True)

In [14]:
for train_idx, val_idx in kf.split(x_train_gray_flattened):
    X_train_fold, X_val_fold = x_train_gray_flattened[train_idx], x_train_gray_flattened[val_idx]
    y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

    for k in k_values:
        # Manhattan distance
        knn_l1 = KNeighborsClassifier(n_neighbors=k, p=1)
        knn_l1.fit(X_train_fold, y_train_fold.ravel())  # Flatten y_train for fitting
        predictions_l1 = knn_l1.predict(X_val_fold)
        accuracy_l1[k].append(accuracy_score(y_val_fold, predictions_l1))

        # Euclidean distance
        knn_l2 = KNeighborsClassifier(n_neighbors=k, p=2)
        knn_l2.fit(X_train_fold, y_train_fold.ravel())  # Flatten y_train for fitting
        predictions_l2 = knn_l2.predict(X_val_fold)
        accuracy_l2[k].append(accuracy_score(y_val_fold, predictions_l2))


IndexError: index 40000 is out of bounds for axis 0 with size 40000