# Part A - Feature Extraction

## 1. Feature Extraction

This part of the coursework is involved with extracting all the features from the big folder of images celeba_selection.

In [96]:
import numpy
import pandas
import torch
from torchvision import models, transforms
from PIL import Image
from tqdm.notebook import tqdm
import os

EMBEDDINGS_FILE = "extracted_features.npy"
NUM_IMAGES_TO_PROCESS = 20000
IMAGE_DIR = "celeba_selection"
ATTRIBUTE_FILE = "list_attr_celeba.txt"
SAMPLE_ATTRIBUTES_FILENAME = "celeba_sampled_attributes"
SELECTED_ATTRIBUTES = ["Smiling", "Male", "Young", "Blond_Hair", "Wearing_Hat"]
PREDICTION_ATTRIBUTE = "Smiling"

TRAINING_PERCENT = 0.7
CALIB_PERCENT = 0.15

First, we cut down the attribute dataset to only include the attributes we want.

In [97]:
# First we open the attribute file

with open(ATTRIBUTE_FILE, "r") as file:
    lines = file.readlines()


# Extract the headers from first row

columns = lines[1].strip().split()


# Extract the data part (from line 2 onwards)

dataRows = lines[2:]

data = []
imageNames = []
for row in dataRows:
    

    # Seperating the image name (first column) from rest of the dataset
    
    parts = row.strip().split()
    imageNames.append(parts[0])    
    data.append([int(x) for x in parts[1:]])

attributeData = pandas.DataFrame(data, columns=columns, index=imageNames)


# Now we create the subset based on our specific attributes

cutAttributes = attributeData[SELECTED_ATTRIBUTES]
print(cutAttributes.head())

            Smiling  Male  Young  Blond_Hair  Wearing_Hat
000001.jpg        1    -1      1          -1           -1
000002.jpg        1    -1      1          -1           -1
000003.jpg       -1     1      1          -1           -1
000004.jpg       -1    -1      1          -1           -1
000005.jpg       -1    -1      1          -1           -1


The attribute dataset is now sampled randomly based on the sample size.

In [98]:
sampledAttributes = cutAttributes.sample(n=NUM_IMAGES_TO_PROCESS, random_state=42)
sampledImageNames = sampledAttributes.index.to_list()
sampledAttributes.to_csv(SAMPLE_ATTRIBUTES_FILENAME + ".csv", index=True)

print(f"Sampled attributes saved to: {SAMPLE_ATTRIBUTES_FILENAME}.csv")

Sampled attributes saved to: celeba_sampled_attributes.csv


Below is the code for executing the feature extraction process

In [92]:
def extractFeatures():

    # First we skip this expensive step if feature file already exists.

    if os.path.exists(EMBEDDINGS_FILE):
        print(f"Embeddings file '{EMBEDDINGS_FILE} already exists. Extracting is skipped")
        return
    

    # We now load the pre-trained Vision Transformer and set it to evaluation mode

    device = torch.device("cuda")
    vit = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_V1).to(device)
    vit.eval()


    # Preprocessing is defined; resizing, centre-cropping to 224x224, converted to tensors, and normalised to match ImageNet training

    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])


    # List of images is prepared from feature list earlier

    sampledAttributes = pandas.read_csv(SAMPLE_ATTRIBUTES_FILENAME + ".csv", index_col=0)
    sampledImageNames = sampledAttributes.index.tolist()
    


    allFeatures = []


    # Features are extracted for each image

    with torch.no_grad():
        for fname in tqdm(sampledImageNames, desc="Extracting ViT Feature Embeddings", colour="#ebbcba"):
            img = Image.open(os.path.join(IMAGE_DIR, fname)).convert("RGB")
            x = preprocess(img).unsqueeze(0).to(device)


            ## Forward pass to get feature embeddings without classification head

            x_processed = vit._process_input(x)
            n = x_processed.shape[0]
            
            batch_class_token = vit.class_token.expand(n, -1, -1)
            x_with_token = torch.cat([batch_class_token, x_processed], dim=1)

            encoded_features = vit.encoder(x_with_token)
            features = encoded_features[:, 0]

            allFeatures.append(features.cpu().numpy().flatten())
    

    all_features_np = numpy.array(allFeatures)
    print(f"Feature matrix shape: {all_features_np.shape}")


    # Save feature embeddings for future use

    numpy.save(EMBEDDINGS_FILE, all_features_np)
    print(f"Embeddings saved to '{EMBEDDINGS_FILE}'.")




In [99]:
extractFeatures()

Extracting ViT Feature Embeddings:   0%|          | 0/20000 [00:00<?, ?it/s]

FileNotFoundError: [Errno 2] No such file or directory: 'celeba_selection\\093242.jpg'

## 2. Training the Classifiers

First, we need top load the features and labels generated in part 1

In [None]:
features = numpy.load(EMBEDDINGS_FILE)
sampledAttributes = pandas.read_csv(SAMPLE_ATTRIBUTES_FILENAME + ".csv", index_col=0)



# Only want to confirm if the image is similing or not

labels = sampledAttributes[PREDICTION_ATTRIBUTE].values

# Since the attribute data records binary values as -1 and 1, we need to convert to 0 and 1 for Gaussian/Normal classifier

labels = ((labels + 1) // 2).astype(int)

Now, the split sizes need to be computed to ensure the correct number of cases for each data set.

In [None]:
trainSize = int(TRAINING_PERCENT * NUM_IMAGES_TO_PROCESS)
calibSize = int(CALIB_PERCENT * NUM_IMAGES_TO_PROCESS)
testSize = NUM_IMAGES_TO_PROCESS - trainSize - calibSize

The datasets can now be produced.

In [None]:
# Training Set

XTrain = features[:trainSize]
yTrain = labels[:trainSize]

# Calibration Set

XCalib = features[trainSize:trainSize + calibSize]
yCalib = labels[trainSize:trainSize + calibSize]

# Testing Set

XTest = features[trainSize + calibSize:]
yTest = labels[trainSize + calibSize:]

print("Train:", XTrain.shape, yTrain.shape)
print("Calibration:", XCalib.shape, yCalib.shape)
print("Test:", XTest.shape, yTest.shape)
