In [6]:
#
# - Resize each cropped image to a 224 × 224 pixel image. (Similar to Assignment 1 Question 2(a))
# - Normalize the resized image dataset.
#

# I've copied over my code from assignment 2: 
# https://github.com/kickroot/cs02505_2/blob/main/Programming%20Assignment%202.ipynb


import os, shutil
import cv2 as cv


print(f"Working from {os.getcwd()}")

image_folders = ["n02099712-Labrador_retriever", "n02107312-miniature_pinscher", 
                 "n02100735-English_setter", "n02113799-standard_poodle"]


# map breed -> list(image file names)
image_map = dict()
for breed in image_folders:
    image_map[breed] = []

image_count = 0
for folder in image_folders:    
    for (root,dirs,files) in os.walk("images/" + folder):    
        head, tail = os.path.split(root)
        for file in files:
            image_count += 1
            image_map[tail].append(file)        
print(f"Loaded {image_count} images from {image_folders}")

#
# Build up and apply the bounding boxes to all images
#
import xml.etree.ElementTree as ET

# For a given file path, return a list of BoundingBox objects. A single file may have more than one
# bounding box!

# The following function was inspired by https://www.kaggle.com/code/espriella/stanford-dogs-transfer-crop-stack/notebook
def get_boxes(file_path):
    boxes = []
    
    tree = ET.parse(file_path)
    root = tree.getroot()
    objects = root.findall('object')
    bbox = []
    for o in objects:
        bndbox = o.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)        
        boxes.append({"xmin": xmin, "ymin" : ymin, "xmax": xmax, "ymax": ymax})
    
    return boxes
    
    
# Map <image file name> -> <bounding boxes>
bounding_boxes = dict()

#
# Loop through all annotations and map bounding boxes to image names.  Since the names appear globally unique 
# we can make use of a global index using only file names (no breed paths required)
#
for (root,dirs,files) in os.walk("annotations"):    
    for anno in files:        
        bounding_boxes[anno] = get_boxes(f"{root}/{anno}")

        
print(f"Parsed {len(bounding_boxes)} files")

from PIL import Image

#
# For each image, apply the bounding box and write a newly cropped+scaled image into the
# cropped/ folder.
#

def crop_resize_write(cropped_folder, breed, image_file, bounding_box):
    # Bounding box my contain more than one box, for simplicity we're only going to use the first
    # one as nothing in the instructions listed supporting multiple as a requirement.
    bb = bounding_box[0]
    head, tail = os.path.split(breed)
    im = Image.open(f"{breed}/{image_file}")
    im = im.crop((bb['xmin'], bb['ymin'], bb['xmax'], bb['ymax'])).resize((224, 224), Image.LANCZOS)
    im = im.convert('RGB')
    im.save(f"{cropped_folder}/{tail}/{image_file}")

# Let's start with a fresh folder structure
cropped_folder = "cropped"
if os.path.isdir(cropped_folder):
    shutil.rmtree(cropped_folder)    
    
    
for folder in image_folders:
  os.makedirs(f"{cropped_folder}/{folder}")  

# Iterate over the image files and apply the bounding box
for (root,dirs,files) in os.walk("images"):    
    for image_file in files:
        key = image_file.replace(".jpg", "")
        bb = bounding_boxes[key]
        crop_resize_write(cropped_folder, root, image_file, bb)


Working from /home/jason/rowan/cs02505/cs02505_4
Loaded 675 images from ['n02099712-Labrador_retriever', 'n02107312-miniature_pinscher', 'n02100735-English_setter', 'n02113799-standard_poodle']
Parsed 675 files


In [67]:
#
# Normalize the resized image dataset.
#
normalized_images = dict()
for breed in image_folders:
    normalized_images[breed] = []

# normalized_folder = "normalized"
# if os.path.isdir(normalized_folder):
#     shutil.rmtree(normalized_folder)    
    
# for folder in image_folders:
#   os.makedirs(f"{normalized_folder}/{folder}")

from PIL import Image
import torchvision.transforms as transforms

# Iterate over the image files and apply normalization as described in
# https://www.geeksforgeeks.org/how-to-normalize-images-in-pytorch/
for (root,dirs,files) in os.walk(cropped_folder):    
    for image_file in files:
        img = Image.open(f"{root}/{image_file}")
        
        transform = transforms.Compose([
            transforms.ToTensor()
        ])
        img_tr = transform(img)
        mean, std = img_tr.mean([1,2]), img_tr.std([1,2])
        
        transform_norm = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
        img_normalized = transform_norm(img)
        normalized_images[os.path.split(root)[1]].append(img_normalized)
        

In [68]:
#
# Extract features for each image from the last convolution layer of “ResNet18” (You can follow
# https://kozodoi.me/blog/20210527/extracting-features. But you must reference
# this website in your solution) (2.5 points)
#
import torch
import timm

device = torch.device('cpu')
model = timm.create_model(model_name = 'resnet18', pretrained = True)
model.to(device)
print(model)

def get_features(name):
    def hook(model, input, output):
        features[name] = output.detach()
    return hook

model.global_pool.register_forward_hook(get_features('feats'))

# placeholders
PREDS = []
FEATS = []
Y = []

# placeholder for batch features
features = {}


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act1): ReLU(inplace=True)
      (aa): Identity()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, m

In [69]:
for label, tensor_list in normalized_images.items():
    for t in tensor_list:
        t.to(device)
        preds = model(t.unsqueeze(0))
        Y.append(label)
        PREDS.append(preds.detach().cpu().numpy())
        FEATS.append(features['feats'].cpu().numpy())    
    
import numpy as np
PREDS = np.concatenate(PREDS)
FEATS = np.concatenate(FEATS)



In [70]:
#
# 2. (Dimension Reduction) Perform dimension reduction on your new dog image representation dataset
# to reduce the dimension to 2 (similar to Assignment 1 Question 2(f)). (0.5 points)
#

# This is based on my original code here: https://github.com/kickroot/cs02505_1/blob/main/assignment_1.ipynb

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

pca = PCA(n_components=2)
pca.fit(FEATS)
X=pca.transform(FEATS)

In [87]:
#
# 3. (Clustering Algorithm) Perform clustering using the following approaches on the 2D dataset you
# preprocessed in Item 2:
#

results = dict()

#
# (a) K-means clustering: (Use KMeans with init = ‘Random’) (0.5 point)
#
from sklearn.cluster import KMeans
k_means = KMeans(init="random", n_clusters=4, n_init=10)
results["k_means"] = k_means.fit_transform(X)


#
# (b) KMeans with init=‘k-means++’ (0.5 point)
#
k_means = KMeans(init="k-means++", n_clusters=4, n_init=10)
results["k_means++"] = k_means.fit_transform(X)

#
# (c) Bisecting K-means (sklearn.cluster.BisectingKMeans with init = ‘Random’) (0.5 point)
#
from sklearn.cluster import BisectingKMeans
k_means = KMeans(init="random", n_clusters=4, n_init=10)
results["bisecting_k_means"] = k_means.fit_transform(X)

#
# (d) spectral clustering (sklearn.cluster.SpectralClustering with default parameters) (0.5 point)
#
from sklearn.cluster import SpectralClustering
k_means = SpectralClustering(n_clusters=4)
results["spectral"] = k_means.fit(X)

In [None]:
#
# DBSCAN (0.5 point)
#