In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
import os
import json # loads the class names

MODEL_SAVE_PATH_CLASSIFIER = '/content/drive/MyDrive/person_classifier_model.pth'
CLASSES_SAVE_PATH = '/content/drive/MyDrive/person_reid_classes.json'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load class names to know num_classes
# To load our saved model weights, PyTorch needs to know the exact "shape" or architecture
# of the model they belong to. We start by loading the class names to find out how many
# output neurons our final layer should have (one for each person).
try:
    with open(CLASSES_SAVE_PATH, 'r') as f:
        class_names = json.load(f)
    num_classes = len(class_names)
    print(f"Loaded class names: {class_names}")
    print(f"Number of classes: {num_classes}")
except FileNotFoundError:
    print(f"{CLASSES_SAVE_PATH} not found.")

# Now, we build a "shell" of the ResNet18 model. It has the same layers as our trained model,
# but with random, uninitialized weights for now.
model_to_load = models.resnet18(weights=None) # Start with an uninitialized ResNet18
num_ftrs = model_to_load.fc.in_features

# We replace the final fully connected (fc) layer with one that matches our specific task
# ex. outputting a score for each of our 3 people
model_to_load.fc = nn.Linear(num_ftrs, num_classes)
model_to_load = model_to_load.to(device)

# Load the saved weights
if os.path.exists(MODEL_SAVE_PATH_CLASSIFIER):
    model_to_load.load_state_dict(torch.load(MODEL_SAVE_PATH_CLASSIFIER, map_location=device))
    model_to_load.eval() # Set the model to evaluation mode
    print(f"Trained classification model loaded successfully from {MODEL_SAVE_PATH_CLASSIFIER}")
else:
    print(f"Model file not found at {MODEL_SAVE_PATH_CLASSIFIER}.")

Loaded class names: ['person_ben', 'person_kenny', 'person_pryce']
Number of classes: 3
Trained classification model loaded successfully from /content/drive/MyDrive/person_classifier_model.pth


In [None]:
class FeatureExtractor(nn.Module):
  # Essentially this will create a new model that stops just short of the
  # final classification layer and instead just outputs powerful feature vector.
    def __init__(self, original_model):
        super(FeatureExtractor, self).__init__()
        # take all the layers of the original model EXCEPT for the last one (fc layer)
        self.features = nn.Sequential(*list(original_model.children())[:-1])

    def forward(self, x):
        # Pass the input image through all the convolutional layers.
        # This output from ResNet feature layer ends up being a 4D tensor. Which we need to flatten.
        x = self.features(x)
        x = torch.flatten(x, 1) # we flatten this to the shape [1, 512] to get our final embedding
        return x

# Create instance of our feature extractor using the classifier we just loaded.
if 'model_to_load' in locals() and model_to_load is not None:
    embedding_model = FeatureExtractor(model_to_load)
    embedding_model = embedding_model.to(device)
    embedding_model.eval() # Set to evaluation mode for inference
    print("Feature extractor embedding created.")
    print(f"Output embedding dimension: {num_ftrs}") # Should be 512 for ResNet18 (check)
else:
    print("feature embedding not created")

In [None]:
# To get meaningful results, any new image we show the model must be processed
# in the exact same way as the images it saw during training/validation.
# This includes resizing, cropping, converting to a tensor, and normalizing.

input_size = 224 # ResNet input size
inference_transform = transforms.Compose([
    transforms.Resize(input_size + 32), # Resize slightly larger
    transforms.CenterCrop(input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet stats
])

In [None]:
from PIL import Image

# This function takes a single image path, processes it, and returns its embedding vector.
def get_embedding(image_path, model, transform, device):
    try:
        # open using the Pillow library to ensure it's in RGB format.
        img = Image.open(image_path).convert('RGB')
    except FileNotFoundError:
        print(f"Image not found at {image_path}")
        return None

    img_transformed = transform(img)
    # PyTorch models expect a batch of images, even if we're only processing one
    # .unsqueeze(0) adds a new dimension, changing the shape from [3, 224, 224] to [1, 3, 224, 224].
    img_batch = img_transformed.unsqueeze(0).to(device)

    with torch.no_grad(): # Important: no gradients needed for inference (only training)
        embedding = model(img_batch)

    return embedding.cpu().numpy() # Return embedding as a NumPy array

# Example test
example_image_path = '/content/drive/MyDrive/dataset/validation/person_pryce/pryce_close_up_frame_00003.jpg' # CHANGE THIS for each different image test

if 'embedding_model' in locals() and os.path.exists(example_image_path):
  example_embedding = get_embedding(example_image_path, embedding_model, inference_transform, device)
  if example_embedding is not None:
    print(f"Shape of extracted embedding: {example_embedding.shape}") # Should be (1, 512)
    print(f"Embedding vector: {example_embedding}")
else:
  print("Example Embedding did not work.")

Shape of extracted embedding: (1, 512)
Embedding vector: [[1.58542216e+00 1.46742627e-01 9.99294281e-01 4.72742766e-02
  1.87943816e+00 8.88246179e-01 1.20832741e-01 6.95589185e-01
  9.97585654e-02 6.87132239e-01 6.70889020e-01 8.58204722e-01
  0.00000000e+00 2.55493373e-01 5.16166016e-02 9.43767607e-01
  8.78534615e-01 5.90496659e-02 1.61917007e+00 1.21391334e-01
  5.18330932e-02 2.24496722e+00 9.87011492e-01 8.59058321e-01
  1.92775333e+00 8.01303238e-03 6.00792095e-03 1.18445909e+00
  2.99010277e-01 3.44768018e-01 9.25906301e-01 9.64122355e-01
  5.31638443e-01 2.36872345e-01 1.73967153e-01 5.54729402e-01
  1.28029609e+00 7.03313425e-02 1.37593424e+00 5.11817098e-01
  3.98509502e-01 9.71403599e-01 2.16634616e-01 1.04438707e-01
  1.10476899e+00 2.45347977e+00 7.73929119e-01 8.33565712e-01
  4.40420419e-01 2.24054193e+00 1.29326925e-01 6.81358576e-01
  0.00000000e+00 7.67900348e-01 1.97619915e+00 5.90615720e-02
  2.17641830e+00 4.37796414e-01 3.67841184e-01 4.10303265e-01
  9.95721743e

This is where we grab the invididual embeddings of all our images and average them. I've done this for everyone so far and I can send the .npy download soon.

In [None]:
# The goal here is to create a single embedding for a person.
# A single photo might have odd lighting or a strange expression. By averaging the
# embeddings from several good, representative photos, we create a more robust and
# stable representation of that person.

import numpy as np
validation_folder_base_path = '/content/drive/MyDrive/dataset/validation/'

# Which person we want to create a prototype for
person_id = 'person_ben'
person_specific_validation_folder = os.path.join(validation_folder_base_path, person_id)

# Manually choose 10 of the best images for this person from the validation folder
# Replace these with the actual filenames of your chosen images
best_image_filenames_for_person = [
    'ben_IMG_7093_frame_00370.jpg',
    'ben_IMG_7093_frame_00327.jpg',
    'ben_IMG_7093_frame_00319.jpg',
    'ben_IMG_7093_frame_00036.jpg',
    'ben_IMG_7093_frame_00138.jpg',
    'ben_IMG_7092_frame_00700.jpg',
    'ben_IMG_7092_frame_00597.jpg',
    'ben_IMG_7092_frame_00539.jpg',
    'ben_IMG_7092_frame_00479.jpg',
    'ben_IMG_7092_frame_00500.jpg',
]

#list holds the embedding vector for each of the selected images.
person_embeddings_list = []

print(f"Processing images for: {person_id}")
for image_filename in best_image_filenames_for_person:
    image_path = os.path.join(person_specific_validation_folder, image_filename)

    print(f"  Getting embedding for: {image_path}")
    # Use the get_embedding function we defined earlier
    embedding_vector_batch = get_embedding(image_path, embedding_model, inference_transform, device)

    if embedding_vector_batch is not None:
        # The get_embedding function returns a shape like (1, 512)
        # We need to get the actual (512,) array out of the batch
        embedding_vector_single = embedding_vector_batch[0]
        person_embeddings_list.append(embedding_vector_single)
        print(f"    Successfully retrieved embedding with shape: {embedding_vector_single.shape}")
    else:
        print(f"    Warning: Could not retrieve embedding for {image_path}")

# Make sure to get these for each person so we can make averages of the embeddings to compare!

Processing images for: person_ben
  Getting embedding for: /content/drive/MyDrive/dataset/validation/person_ben/ben_IMG_7093_frame_00370.jpg
    Successfully retrieved embedding with shape: (512,)
  Getting embedding for: /content/drive/MyDrive/dataset/validation/person_ben/ben_IMG_7093_frame_00327.jpg
    Successfully retrieved embedding with shape: (512,)
  Getting embedding for: /content/drive/MyDrive/dataset/validation/person_ben/ben_IMG_7093_frame_00319.jpg
    Successfully retrieved embedding with shape: (512,)
  Getting embedding for: /content/drive/MyDrive/dataset/validation/person_ben/ben_IMG_7093_frame_00036.jpg
    Successfully retrieved embedding with shape: (512,)
  Getting embedding for: /content/drive/MyDrive/dataset/validation/person_ben/ben_IMG_7093_frame_00138.jpg
    Successfully retrieved embedding with shape: (512,)
  Getting embedding for: /content/drive/MyDrive/dataset/validation/person_ben/ben_IMG_7092_frame_00700.jpg
    Successfully retrieved embedding with sh

In [None]:
if person_embeddings_list:
    if len(person_embeddings_list) == len(best_image_filenames_for_person):
        print(f"\ncollected {len(person_embeddings_list)} embeddings.")
    else:
        print(f"\ncollected {len(person_embeddings_list)} embeddings out of {len(best_image_filenames_for_person)}. error")

    # np.mean with axis=0 calculates the average value
    # for each of the 512 dimensions across all the gathered embedding vectors.
    prototype_embedding = np.mean(person_embeddings_list, axis=0)

    print(f"\nAveraged embedding for {person_id}:")
    print(f"  Shape: {prototype_embedding.shape}") # Should be (512,)
    print(f"  Averaged embedding: {prototype_embedding}")

    # save this averaged embedding to a .npy file
    # Our real-time application will load these .npy files to create its gallery of known people.
    prototype_save_path = f'/content/drive/MyDrive/{person_id}_prototype_embedding.npy'
    np.save(prototype_save_path, prototype_embedding)
    print(f" embedding saved to: {prototype_save_path}")

else:
    print(f"\nNo embeddings for {person_id}.")