<a href="https://colab.research.google.com/github/szandian/Computer_Vision/blob/main/HW1_szandian_Computer_Vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.datasets import mnist, cifar10, fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt


#########################################
# Q1: Distance Analysis on MNIST & CIFAR-10
#########################################

In [None]:

# For reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# ---------------------
# 1. Load MNIST dataset
(x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = mnist.load_data()
x_train_mnist = x_train_mnist / 255.0
x_test_mnist  = x_test_mnist  / 255.0

# ---------------------
# 3. Load CIFAR-10 dataset
(x_train_cifar, y_train_cifar), (x_test_cifar, y_test_cifar) = cifar10.load_data()
x_train_cifar = x_train_cifar / 255.0
x_test_cifar  = x_test_cifar  / 255.0
y_train_cifar = y_train_cifar.flatten()
y_test_cifar  = y_test_cifar.flatten()

# ---------------------
# Functions to compute distances
def l1_distance(img1, img2):
    return np.sum(np.abs(img1 - img2))

def l2_distance(img1, img2):
    return np.sqrt(np.sum((img1 - img2) ** 2))

# ---------------------
# 2. & 4. Select one random image per class and compute L1 distance matrices

def select_random_samples(x_data, y_data, num_classes=10):
    """Select one random sample per class from the given dataset."""
    samples = {}
    for i in range(num_classes):
        indices = np.where(y_data == i)[0]
        random_idx = np.random.choice(indices)
        samples[i] = x_data[random_idx]
    return samples

# Select random samples for MNIST and CIFAR-10
mnist_class_samples = select_random_samples(x_train_mnist, y_train_mnist)
cifar_class_samples = select_random_samples(x_train_cifar, y_train_cifar)

# Compute L1 distance matrix for MNIST
l1_dist_mnist = np.zeros((10, 10))
for i in range(10):
    for j in range(10):
        l1_dist_mnist[i, j] = l1_distance(mnist_class_samples[i], mnist_class_samples[j])
print("L1 Distance Matrix for MNIST:")
print(l1_dist_mnist)

# Compute L1 distance matrix for CIFAR-10
l1_dist_cifar = np.zeros((10, 10))
for i in range(10):
    for j in range(10):
        l1_dist_cifar[i, j] = l1_distance(cifar_class_samples[i], cifar_class_samples[j])
print("\nL1 Distance Matrix for CIFAR-10:")
print(l1_dist_cifar)




Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step
L1 Distance Matrix for MNIST:
[[  0.         195.00784314 182.6        130.49411765 222.72941176
  174.24705882 188.06666667 199.89019608 154.74509804 197.90196078]
 [195.00784314   0.         103.78823529 137.83921569 139.58039216
  139.1372549  145.08235294 111.76078431 159.72941176  80.19607843]
 [182.6        103.78823529   0.         137.36078431 141.6745098
  175.74117647 140.61960784 143.77647059 143.4627451  111.21568627]
 [130.49411765 137.83921569 137.36078431   0.         174.94901961
  132.6        165.90196078 177.77254902 107.63137255 134.36470588]
 [222.72941176 139.58039216 141.6745098  174.94901961   0.
  152.38039216 145.66666667 1

In [None]:
# ---------------------
# Q1.5: Find the closest neighboring class (K=1) for each class (using L1 distance)
print("\nClosest Neighbor (based on L1 distance):")
for i in range(10):
    distances = l1_dist_mnist[i].copy()
    distances[i] = np.inf  # Exclude self-distance
    closest_class = np.argmin(distances)
    print(f"MNIST class {i} is closest to class {closest_class} (L1 distance: {distances[closest_class]:.2f})")

# (Commentary: In some cases, the pixel distributions of digits might be similar—for example, '1' and '7'
# can share similar stroke patterns in certain handwritten styles.)

# ---------------------
# 6. Repeat steps using L2 distance

# Compute L2 distance matrix for MNIST
l2_dist_mnist = np.zeros((10, 10))
for i in range(10):
    for j in range(10):
        l2_dist_mnist[i, j] = l2_distance(mnist_class_samples[i], mnist_class_samples[j])
print("\nL2 Distance Matrix for MNIST:")
print(l2_dist_mnist)

# Compute L2 distance matrix for CIFAR-10
l2_dist_cifar = np.zeros((10, 10))
for i in range(10):
    for j in range(10):
        l2_dist_cifar[i, j] = l2_distance(cifar_class_samples[i], cifar_class_samples[j])
print("\nL2 Distance Matrix for CIFAR-10:")
print(l2_dist_cifar)


Closest Neighbor (based on L1 distance):
MNIST class 0 is closest to class 3 (L1 distance: 130.49)
MNIST class 1 is closest to class 9 (L1 distance: 80.20)
MNIST class 2 is closest to class 1 (L1 distance: 103.79)
MNIST class 3 is closest to class 8 (L1 distance: 107.63)
MNIST class 4 is closest to class 7 (L1 distance: 132.51)
MNIST class 5 is closest to class 9 (L1 distance: 117.00)
MNIST class 6 is closest to class 2 (L1 distance: 140.62)
MNIST class 7 is closest to class 1 (L1 distance: 111.76)
MNIST class 8 is closest to class 3 (L1 distance: 107.63)
MNIST class 9 is closest to class 1 (L1 distance: 80.20)

L2 Distance Matrix for MNIST:
[[ 0.         13.08104745 12.27430746 10.16450768 13.85306537 12.03485966
  12.47300445 13.01477468 11.1093921  12.95478488]
 [13.08104745  0.          9.08295741 10.80493358 10.88182016 10.97699387
  10.94106683  9.54179217 11.71301389  7.86947538]
 [12.27430746  9.08295741  0.         10.52256834 10.6556568  12.17522333
  10.4745399  10.71520889

#########################################
# Q2: KNN Classification on MNIST
#########################################

In [None]:
# 1. Create an 80/20 train-test split from the MNIST training set
mnist_total = x_train_mnist.shape[0]
train_size = int(0.8 * mnist_total)
x_train_mnist_knn, y_train_mnist_knn = x_train_mnist[:train_size], y_train_mnist[:train_size]
x_test_mnist_knn,  y_test_mnist_knn  = x_train_mnist[train_size:], y_train_mnist[train_size:]

# 2. & 3. Define a function to run KNN for different k-values and print accuracy and computation time
def knn_classification(train_data, train_labels, test_data, test_labels, k_values, metric):
    print(f"\nKNN Classification using {metric} distance:")
    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
        start_time = time.time()
        # Flatten images
        knn.fit(train_data.reshape(train_data.shape[0], -1), train_labels)
        accuracy = knn.score(test_data.reshape(test_data.shape[0], -1), test_labels)
        end_time = time.time()
        print(f"  K={k:3d} -> Accuracy: {accuracy:.4f}, Time: {end_time - start_time:.2f} sec")

k_values = [5, 25, 55, 105]
knn_classification(x_train_mnist_knn, y_train_mnist_knn,
                   x_test_mnist_knn, y_test_mnist_knn, k_values, metric='manhattan')
knn_classification(x_train_mnist_knn, y_train_mnist_knn,
                   x_test_mnist_knn, y_test_mnist_knn, k_values, metric='euclidean')


KNN Classification using manhattan distance:
  K=  5 -> Accuracy: 0.9631, Time: 108.24 sec
  K= 25 -> Accuracy: 0.9493, Time: 108.17 sec
  K= 55 -> Accuracy: 0.9391, Time: 108.38 sec
  K=105 -> Accuracy: 0.9283, Time: 108.24 sec

KNN Classification using euclidean distance:
  K=  5 -> Accuracy: 0.9688, Time: 4.24 sec
  K= 25 -> Accuracy: 0.9578, Time: 4.66 sec
  K= 55 -> Accuracy: 0.9475, Time: 4.26 sec
  K=105 -> Accuracy: 0.9385, Time: 4.34 sec


#########################################
# Q3: MLP Experiments on MNIST
#########################################

In [None]:

# Use the original MNIST train/test split provided by Keras for MLP training
# (60000 training samples, 10000 test samples)

def create_and_train_mlp(model, x_train, y_train, x_test, y_test, epochs=5, batch_size=32):
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)
    test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
    return test_accuracy

input_shape = (28, 28)

# Model 1: Baseline MLP (from reference)
model1 = Sequential([
    Flatten(input_shape=input_shape),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])
acc1 = create_and_train_mlp(model1, x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist)
print(f"\nMLP Model 1 (Baseline) Test Accuracy: {acc1:.4f}")

# Model 2: Second layer neurons equal to input size (784)
model2 = Sequential([
    Flatten(input_shape=input_shape),
    Dense(784, activation='relu'),
    Dense(10, activation='softmax')
])
acc2 = create_and_train_mlp(model2, x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist)
print(f"MLP Model 2 (Hidden size = 784) Test Accuracy: {acc2:.4f}")

# Model 3: Second layer neurons equal to 2x input size (1568)
model3 = Sequential([
    Flatten(input_shape=input_shape),
    Dense(1568, activation='relu'),
    Dense(10, activation='softmax')
])
acc3 = create_and_train_mlp(model3, x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist)
print(f"MLP Model 3 (Hidden size = 1568) Test Accuracy: {acc3:.4f}")

# Model 4: Adding an extra dense layer (e.g., 128 neurons then 64 neurons)
model4 = Sequential([
    Flatten(input_shape=input_shape),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])
acc4 = create_and_train_mlp(model4, x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist)
print(f"MLP Model 4 (Extra hidden layer) Test Accuracy: {acc4:.4f}")


  super().__init__(**kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.8766 - loss: 0.4312
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9631 - loss: 0.1273
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9762 - loss: 0.0814
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9828 - loss: 0.0576
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9875 - loss: 0.0430

MLP Model 1 (Baseline) Test Accuracy: 0.9738
Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9044 - loss: 0.3203
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9747 - loss: 0.0834
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9855 -

#########################################
# Q4: Summary of Key Observations
#########################################

In [None]:
print("\nSummary of Key Observations:")
print("""
1. Q1 (Distance Analysis):
   - The L1 and L2 distance matrices provide insight into how similar the randomly chosen images from each class are.
   - For example, some digit classes (like 1 and 7) may have relatively small distances, reflecting similar stroke patterns.
2. Q2 (KNN Classification):
   - The KNN classifier’s accuracy and runtime vary with the choice of K and the distance metric.
   - Smaller K values often lead to higher variance, whereas larger K values can smooth out predictions.
3. Q3 (MLP Experiments):
   - Changing the hidden layer size or adding extra layers can impact model capacity.
   - There is a trade-off between model complexity and generalization; the different architectures yield slightly different test accuracies.
""")



Summary of Key Observations:

1. Q1 (Distance Analysis):
   - The L1 and L2 distance matrices provide insight into how similar the randomly chosen images from each class are.
   - For example, some digit classes (like 1 and 7) may have relatively small distances, reflecting similar stroke patterns.
2. Q2 (KNN Classification):
   - The KNN classifier’s accuracy and runtime vary with the choice of K and the distance metric.
   - Smaller K values often lead to higher variance, whereas larger K values can smooth out predictions.
3. Q3 (MLP Experiments):
   - Changing the hidden layer size or adding extra layers can impact model capacity.
   - There is a trade-off between model complexity and generalization; the different architectures yield slightly different test accuracies.



#########################################
# Q5: KNN and MLP on Fashion MNIST
#########################################


In [None]:
# 1. Load Fashion MNIST data
(x_train_fashion, y_train_fashion), (x_test_fashion, y_test_fashion) = fashion_mnist.load_data()
x_train_fashion = x_train_fashion / 255.0
x_test_fashion  = x_test_fashion  / 255.0

# 2. Apply KNN to Fashion MNIST (using k=5 and Manhattan distance)
knn_fashion = KNeighborsClassifier(n_neighbors=5, metric='manhattan')
start_time = time.time()
knn_fashion.fit(x_train_fashion.reshape(x_train_fashion.shape[0], -1), y_train_fashion)
fashion_knn_accuracy = knn_fashion.score(x_test_fashion.reshape(x_test_fashion.shape[0], -1), y_test_fashion)
end_time = time.time()
print(f"\nFashion MNIST KNN (k=5, Manhattan): Accuracy = {fashion_knn_accuracy:.4f}, Time = {end_time - start_time:.2f} sec")

# 3. Build and train an MLP on Fashion MNIST (baseline architecture)
fashion_mlp = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])
fashion_mlp.compile(optimizer='adam',
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])
fashion_mlp.fit(x_train_fashion, y_train_fashion, epochs=5, batch_size=32, verbose=1)
fashion_mlp_eval = fashion_mlp.evaluate(x_test_fashion, y_test_fashion, verbose=0)
print(f"Fashion MNIST MLP: Test Accuracy = {fashion_mlp_eval[1]:.4f}")

print("""
Observation for Q5:
   - The MLP typically outperforms the KNN classifier on Fashion MNIST.
   - This indicates that learning task-specific features via deep networks can better capture the nuances in the data
     compared to distance-based methods.
""")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step

Fashion MNIST KNN (k=5, Manhattan): Accuracy = 0.8623, Time = 118.23 sec


  super().__init__(**kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.7801 - loss: 0.6351
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8598 - loss: 0.3913
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8743 - loss: 0.3463
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8826 - loss: 0.3186
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8887 - loss: 0.2996
Fashion MNIST MLP: Test Accuracy = 0.8722

Observation for Q5:
   - The MLP typically outperforms the KNN classifier on Fashion MNIST.
   - This indicates that learning task-specific features via deep networks can better capture the nuances in the data
     compared to distance-based methods.

