In [None]:
#google drive path
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import os
import time
import copy
import json

DATA_DIR = '/content/drive/MyDrive/dataset/' #"train" & "validation" folder locations
MODEL_SAVE_PATH_CLASSIFIER = '/content/drive/MyDrive/person_classifier_model.pth' # resnet18 model path
CLASSES_SAVE_PATH = '/content/drive/MyDrive/person_reid_classes.json' #storing ben and pryce names in json format (ideal for storing class labels)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #safety net for hardware

In [None]:
#trains images, loads ben/pryce datasets, stores class names

input_size = 224 #resnet standard input image size

data_transforms = { #dictionary for image processing pipelines
    'train': transforms.Compose([ #crops portion of image. the rest are conditions to train & validate images
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'validation': transforms.Compose([
        transforms.Resize(input_size + 32),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

try:
    image_datasets = {x: datasets.ImageFolder(os.path.join(DATA_DIR, x), data_transforms[x]) #loading photos/data from "train" and "validation". ImageFoler auto assigns labels based on folder names
                      for x in ['train', 'validation']}
    BATCH_SIZE = 16 #num of images processed per training step
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE,
                                                 shuffle=True, num_workers=2)
                  for x in ['train', 'validation']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'validation']}
    class_names = image_datasets['train'].classes #gets folder names as class labels
    num_classes = len(class_names)  #counts num of people being trained

    with open(CLASSES_SAVE_PATH, 'w') as f:
        json.dump(class_names, f) #saves class names to use later on

except Exception as e: #error message
    print(f"Error loading data: {e}. Ensure DATA_DIR and subfolder structure are correct.")

In [None]:
#loads pertrained resnet18 model & uses it for ben & pryce

def setup_classification_model(num_classes_to_predict): #creates & returns custom resnet model
    model_ft = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) #loading resnet onto imagenet for weight loading
    #fine tunes resnet layers
    for param in model_ft.parameters():
        param.requires_grad = True

    num_ftrs = model_ft.fc.in_features #size of embedding we'll get later
    model_ft.fc = nn.Linear(num_ftrs, num_classes_to_predict) # replaces FC layer with new one
    model_ft = model_ft.to(device) #moves model to GPU
    return model_ft, num_ftrs

if 'num_classes' in locals() or 'num_classes' in globals(): #check for num_classses definition
    classification_model, embedding_dim_size = setup_classification_model(num_classes) #calls and stores training model and size of vector
else:
    print("ERROR")

In [None]:
#prepares componenets needed for training
if 'classification_model' in locals() or 'classification_model' in globals(): #checks if classification_model already defined
    criterion = nn.CrossEntropyLoss() #loss function for training resnet- how wrong predictions are
    # Observe that all parameters are being optimized
    optimizer_ft = optim.Adam(classification_model.parameters(), lr=0.0001) #adam optimizer for fine tuning model weights
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) #learning rate
else:
    print("classification model ERROR")

In [None]:
#TRAINING
def train_classification_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time() #to measure runtime later
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0 #initial model weights for later
    history = {'train_loss': [], 'train_acc': [], 'validation_loss': [], 'validation_acc': []} #tracking highest validation accuracy

    for epoch in range(num_epochs):
        for phase in ['train', 'validation']: #model learns or validates
            if phase == 'train':
                model.train() #allows dropout
            else:
                model.eval() #prevents gradient
            #looping over images and labels
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in dataloaders[phase]: #returning images
                inputs = inputs.to(device) #moves data to GPU
                labels = labels.to(device) #moves data to GPU
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'): #computes gradients
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1) #FORWARD PASS
                    loss = criterion(outputs, labels)
                    #backpropagation for training
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()
            #compute & record epochs
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            #average loss & accuracy
            history[f'{phase}_loss'].append(epoch_loss)
            history[f'{phase}_acc'].append(epoch_acc.item())
            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            #save best model
            if phase == 'validation' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), MODEL_SAVE_PATH_CLASSIFIER)
                print(f"New best validation accuracy: {best_acc:.4f}. Model saved to {MODEL_SAVE_PATH_CLASSIFIER}")
        print()
    time_elapsed = time.time() - since
    model.load_state_dict(best_model_wts)
    return model, history

In [None]:
#initiating training process
if 'classification_model' in locals() and 'criterion' in locals() and 'optimizer_ft' in locals() and 'exp_lr_scheduler' in locals(): #check all componentns are defined and ready
    print("starting")
    NUM_EPOCHS = 15 #15-25 good range for fine tuning
    #trains and validates models. passes in all components
    trained_classification_model, training_history = train_classification_model(
        classification_model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=NUM_EPOCHS
    )
    print(" training done")
else:
    print("error in training")