# Assignment: EN4553 (Machine Vision)

In [None]:
# !wget https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz
# !tar -xzf images.tar.gz
# !wget https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz
# !tar -xzf annotations.tar.gz

In [None]:
# Upload the file in colab to test full sketch at once
!python3 q1_knn_classifier.py

In [4]:
import torch
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import OxfordIIITPet
from torch.utils.data import DataLoader
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score

In [5]:
def data_loader(dir='data',batch_size=32):
  # Define the transformation to be applied to the images
  transform = transforms.Compose([
      transforms.Resize((224, 224)),
      transforms.ToTensor(),
  ])

  # Create the dataset
  train_data = OxfordIIITPet(root=dir+"/train", split='trainval'  , transform=transform,download=True)
  test_data = OxfordIIITPet(root=dir+"/test", split='test'  , transform=transform,download=True)

  # Create a data loader
  train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
  test_data_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

  return train_data_loader, test_data_loader

In [6]:
train_data_loader, test_data_loader = data_loader()

Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to data/train/oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:39<00:00, 20297482.88it/s]


Extracting data/train/oxford-iiit-pet/images.tar.gz to data/train/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to data/train/oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:02<00:00, 9071447.26it/s] 


Extracting data/train/oxford-iiit-pet/annotations.tar.gz to data/train/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to data/test/oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:38<00:00, 20839738.61it/s]


Extracting data/test/oxford-iiit-pet/images.tar.gz to data/test/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to data/test/oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:02<00:00, 7964589.33it/s] 


Extracting data/test/oxford-iiit-pet/annotations.tar.gz to data/test/oxford-iiit-pet


In [7]:
def find_embeddings(data_loader, device):
    # Load pre-trained ResNet-50 model
    resnet50 = models.resnet50(pretrained=True)

    # Remove the last classifier layer (fully connected layer)
    model = nn.Sequential(*list(resnet50.children())[:-1])
    model.eval()

    # Move the model to GPU if available
    model.to(device)

    # Extract ResNet-50 embeddings and labels
    embeddings, labels = [], []

    with torch.no_grad():
        for batch in data_loader:
            images, batch_labels = batch
            # Move the input images and labels to GPU if available
            images, batch_labels = images.to(device), batch_labels.to(device)

            batch_embeddings = model(images)
            embeddings.append(batch_embeddings)
            labels.append(batch_labels)

    # Concatenate embeddings and labels
    embeddings = torch.cat(embeddings, dim=0)
    labels = torch.cat(labels, dim=0)

    # Flatten the embeddings
    embeddings = embeddings.view(embeddings.size(0), -1)

    return embeddings.cpu().numpy(), labels.cpu().numpy()

In [8]:
# Check if GPU is available and use it, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Embedding find start")
X_train, y_train = find_embeddings(train_data_loader, device)
print("Training embedding found successfully")

X_test, y_test = find_embeddings(test_data_loader, device)
print("Test embedding found successfully")

Embedding find start


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 154MB/s]


Training embedding found successfully
Test embedding found successfully


In [10]:
len(X_test[0])

2048

In [None]:
# Create and train k-NN classifier
k = 37
knn_classifier = KNeighborsClassifier(n_neighbors=k)
knn_classifier.fit(X_train, y_train)

# Make predictions on the test set
predictions = knn_classifier.predict(X_test)


# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")

accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions,average='weighted')
recall = recall_score(y_test, predictions,average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)


Accuracy: 0.8146633960207141
Accuracy: 0.8146633960207141
Precision: 0.824486255096862
Recall: 0.8146633960207141
