# Assignment 2: EN4553-Deep Learning for Vision

### Index No: 200041E, 200087A, 200285E, 200505G


## Question2

The squared Euclidean distance between two vectors \(x_i\) and \(y_j\) is given by:

$$
Z_{i,j} = \|x_i - y_j\|^2 = \sum_{k=1}^{d} (x_{i,k} - y_{j,k})^2
$$

Expanding the formula:

$$
\|x_i - y_j\|^2 = \|x_i\|^2 + \|y_j\|^2 - 2(x_i \cdot y_j)
$$

Where:
- \(\|x_i\|^2\) is the squared norm of \(x_i\),
- \(\|y_j\|^2\) is the squared norm of \(y_j\),
- \(x_i . y_j\) is the dot product between \(x_i\) and \(y_j\).


In [1]:
import numpy as np

def pairwise_squared_euclidean_distance(X, Y):
    # Compute squared norm of each row in X and reshape to column vector
    X_norm_squared = np.sum(X ** 2, axis=1).reshape(-1, 1)
    
    # Compute squared norm of each row in Y and reshape to row vector
    Y_norm_squared = np.sum(Y ** 2, axis=1).reshape(1, -1)
    
    # Compute dot product between X and Y
    XY_dot_product = np.dot(X, Y.T)
    
    # Compute the squared Euclidean distance
    Z = X_norm_squared + Y_norm_squared - 2 * XY_dot_product
    
    return Z

#Example
X = np.random.rand(5, 3)
Y = np.random.rand(4, 3)

Z = pairwise_squared_euclidean_distance(X, Y)

print(f"X: \n{X}\nShape of X: {X.shape}")
print(f"Y: \n{Y}\nShape of Y: {Y.shape}")
print(f"Z: \n{Z}\nShape of Z: {Z.shape}")


X: 
[[0.58293747 0.15459849 0.70378007]
 [0.11538553 0.62095453 0.39857726]
 [0.01616187 0.8805374  0.6377318 ]
 [0.34936314 0.48307309 0.14553748]
 [0.37271952 0.69222724 0.18500232]]
Shape of X: (5, 3)
Y: 
[[0.19272834 0.81113917 0.79151547]
 [0.64919182 0.89972801 0.63312437]
 [0.56029314 0.994152   0.09161111]
 [0.10411625 0.75379262 0.03300997]]
Shape of Y: (4, 3)
Z: 
[[0.59100633 0.56459987 1.08011371 1.03823589]
 [0.19655254 0.41767616 0.43144736 0.1514124 ]
 [0.05964125 0.40111644 0.60723493 0.3894887 ]
 [0.54944938 0.50123953 0.30860118 0.14609754]
 [0.41439508 0.32030688 0.13506434 0.09903969]]
Shape of Z: (5, 4)


## Question 3

### Load the Caltech-101 Dataset

In [21]:
import tensorflow as tf
import tensorflow_datasets as tfds

# Load the Caltech-101 dataset and split into trian and test
dataset, info = tfds.load('caltech101', split=['train', 'test'], with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset


In [22]:
# print the size of training and test dataset
print(f"Training dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Training dataset size: 3060
Test dataset size: 6084


### Preprocess the Data

We need to preprocess the data before feeding it into the model

In [23]:
IMG_SIZE = 224 # ResNet50 input size

In [24]:
# prepocess the image suitable for ResNet50 model

def preprocess_image(image, label):
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.keras.applications.resnet50.preprocess_input(image)
    return image, label


In [25]:
# Apply preprocessing
train_dataset = train_dataset.map(preprocess_image).batch(32).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.map(preprocess_image).batch(32).prefetch(tf.data.AUTOTUNE)

### Part (a): k-NN Classification with Pre-trained ResNet-50

In [26]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the pre-trained ResNet-50 model
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')


In [27]:
# Extract embeddings Function
def extract_embeddings(dataset):
  embeddings = []
  labels = []
  for images, labels_batch in dataset:
    embeddings_batch = base_model.predict(images)
    embeddings.append(embeddings_batch)
    labels.append(labels_batch.numpy())
  embeddings = np.concatenate(embeddings)
  labels = np.concatenate(labels)
  return embeddings, labels

In [28]:
# Extract embedding for train and test datasets
train_embeddings, train_labels = extract_embeddings(train_dataset)
test_embeddings, test_labels = extract_embeddings(test_dataset)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━

In [29]:
# K-NN classification
k = 101
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(train_embeddings, train_labels)

In [30]:
y_pred = knn.predict(test_embeddings)
# Report accuracy
accuracy = accuracy_score(test_labels, y_pred)
print(f'Accuracy of k-NN classification: {accuracy:.4f}')

Accuracy of k-NN classification: 0.7503
