# Fine-tuning the modified model with our data

## Imports


In [2]:
# connecting drive to colab notebook
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torchvision
import numpy as np
import torch
import argparse
import cv2
from PIL import Image
from google.colab.patches import cv2_imshow
import torch.nn as nn
from collections import OrderedDict
import torch.optim as optim
import time
import copy
import torchvision.transforms as transforms
from torchvision.io.image import read_image
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
from torchvision.transforms.functional import to_pil_image
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import sys
sys.path.append('/content/drive/MyDrive/drive_folder')
from custom_dataset_loader import TaiChiDataset, ToTensor

## Helper functions

In [7]:
# converting all the images to tensors and then normalize them
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [8]:
def meshgrid2d(B, Y, X, stack=False, device='cuda'):
    # returns a meshgrid sized B x Y x X

    grid_y = torch.linspace(0.0, Y-1, Y, device=torch.device(device))
    grid_y = torch.reshape(grid_y, [1, Y, 1])
    grid_y = grid_y.repeat(B, 1, X)

    grid_x = torch.linspace(0.0, X-1, X, device=torch.device(device))
    grid_x = torch.reshape(grid_x, [1, 1, X])
    grid_x = grid_x.repeat(B, Y, 1)

    if stack:
        # note we stack in xy order
        # (see https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.grid_sample)
        grid = torch.stack([grid_x, grid_y], dim=-1)
        return grid
    else:
        return grid_y, grid_x

def get_normalised_grid(N, B, H, W):
  N_ = np.sqrt(N).round().astype(np.int32)
  grid_y, grid_x = meshgrid2d(B, N_, N_, stack=False, device='cuda')
  grid_y =  grid_y.reshape(B, -1)/float(N_-1) * (H-16)
  grid_x =  grid_x.reshape(B, -1)/float(N_-1) * (W-16)

  # normalise to values of range [-1, 1] - x = -1, y = -1 is the left-top pixel
  grid_x = (grid_x - W) / W 
  grid_y = (grid_y - H) / H
  xy = torch.stack([grid_x, grid_y], dim=-1) # B, N_*N_, 2
  xy = xy.view(B, N_, N_, 2)

  return xy

## Our prediction model

In [9]:
def initialise_model(device):
  # Initialize model with the best available weights
  # create an instance of (e.g.) torchvision.models.segmentation.fcn_resnet50
  # and tell it to load pretrained weights
  weights = FCN_ResNet50_Weights.DEFAULT
  modified_model = fcn_resnet50(weights=weights)

  # we are feature extracting so we only need to compute weights for the new layer
  set_parameter_requires_grad(modified_model, True)

  # modify that model by removing its final layer and replacing with a 2D vector output at each pixel(?) (instead of 20 class logits)
  # instead of torch.Size([1, 21, 120, 240]) -> torch.Size([1, 2, 120, 240])
  modified_model.classifier[3] = nn.Sequential()
  modified_model.classifier[4] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
  # print(modified_model)

  # model to train() and load onto computation devicce
  modified_model.to(device)

  return modified_model

### Train function

In [36]:
# define train function
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_aux=False):
  model.to(device)
  since = time.time()

  val_loss_history = []

  best_model_wts = copy.deepcopy(model.state_dict())

  for epoch in range(num_epochs):
        print('-' * 10)
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            total_samples=0

            # Iterate over data.
            for i_batch, sample_batched in enumerate(dataloaders[phase]):
              batch_size = len(sample_batched['id'])
              total_samples+=batch_size

              inputs = sample_batched['image0']
              coords = sample_batched['coords'] 

              inputs = inputs.to(device).float() #torch.Size([B, 3, 120, 240])
              coords = coords.to(device) # torch.Size([B, 1, 4096, 2])
              
              # zero the parameter gradients
              optimizer.zero_grad()

              # forward
              # track history if only in train
              with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs) # torch.Size([B, 2, H, W]) same as inputs shape
                    outputs = outputs['out']

                    # take the values of outputs from the NxN grid points
                    grid = get_normalised_grid(coords.shape[2], batch_size, outputs.shape[2], outputs.shape[3]) # ([B, H(64), W(64), 2])
                    outputs = torch.nn.functional.grid_sample(outputs, grid) # torch.size([B,2,H,W])

                    # reshape to match coords shape
                    outputs = torch.permute(outputs, (0, 2, 3, 1)) # torch.Size([B, H, W, 2])
                    # outputs = outputs.view(batch_size,1,-1,2) # torch.Size([B, 1, 120*240, 2])
                    coords = coords.view(batch_size, outputs.shape[1], outputs.shape[2], 2) # torch.Size([B, H, W, 2])

                    loss = criterion(outputs, coords)
                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)

            if total_samples != 0:
              epoch_loss = running_loss / total_samples
              print('{} Loss: {:.4f}'.format(phase, epoch_loss))

            # deep copy the model
            if epoch == 0:
                best_loss = epoch_loss
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_loss_history.append(epoch_loss)

            print()
            
        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
        # print('Best val loss: {:4f}\n'.format(best_loss))

  # load best model weights
  model.load_state_dict(best_model_wts)
  return model, val_loss_history




## Initializing Datasets and Dataloaders

In [12]:
# Load dataset

TRAIN_DATA = "training_data2_2023-01-16"
!unzip -d "$TRAIN_DATA"/ /content/drive/MyDrive/"$TRAIN_DATA".zip # unziping training data

dataset = TaiChiDataset(log_file=TRAIN_DATA+'/sample_ids.txt',
                        root_dir=TRAIN_DATA,
                        check=True,
                        transform=ToTensor()
                        )
print(len(dataset))

Archive:  /content/drive/MyDrive/training_data2_2023-01-16.zip
replace training_data2_2023-01-16/frame0/-CR4xjdQbkc_920.npy? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
21


In [16]:
def split_dataset(dataset, validation_split, batch_size, shuffle_dataset, random_seed):
  dataset_size = len(dataset)
  indices = list(range(dataset_size))
  split = int(np.floor(validation_split * dataset_size))
  if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)
  train_indices, val_indices = indices[split:], indices[:split]
  train_sampler = SubsetRandomSampler(train_indices)
  valid_sampler = SubsetRandomSampler(val_indices)
  
  train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
  validation_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)

  return train_loader, validation_loader

## Training & evaluating the model

In [37]:
# set computation device
device = torch.device('cuda')

# initialise model
modified_model = initialise_model(device)

# see what parameters will be tuned
params_to_update = modified_model.parameters()
print('Params to learn:')
for name, param in modified_model.named_parameters():
  if param.requires_grad:
      print(name)

Params to learn:
classifier.4.weight
classifier.4.bias


In [38]:
# Define parameters
validation_split = .0
shuffle_dataset = True
random_seed = 42
batch_size = len(dataset)
num_epochs = 400

# Get train and validation dataloaders
train_loader, validation_loader = split_dataset(dataset, validation_split, batch_size, shuffle_dataset, random_seed)
dataloaders_dict = {'train': train_loader, 'val': validation_loader}

# Define optimizer
my_optimizer = optim.SGD(params_to_update, lr=0.1, momentum=0.9)

# Setup the loss
my_criterion = nn.MSELoss()

# Train and evaluate
modified_model, hist = train_model(modified_model,
                                   dataloaders_dict,
                                   my_criterion,
                                   my_optimizer,
                                   num_epochs=num_epochs)

----------
Epoch 0/399
----------
train Loss: 88252.6406


Training complete in 0m 0s
----------
Epoch 1/399
----------
train Loss: 198899.3906


Training complete in 0m 0s
----------
Epoch 2/399
----------
train Loss: 26299.6016


Training complete in 0m 1s
----------
Epoch 3/399
----------
train Loss: 91878.7500


Training complete in 0m 1s
----------
Epoch 4/399
----------
train Loss: 128475.6953


Training complete in 0m 1s
----------
Epoch 5/399
----------
train Loss: 12897.6641


Training complete in 0m 1s
----------
Epoch 6/399
----------
train Loss: 92849.4766


Training complete in 0m 2s
----------
Epoch 7/399
----------
train Loss: 70710.1172


Training complete in 0m 2s
----------
Epoch 8/399
----------
train Loss: 11056.5303


Training complete in 0m 2s
----------
Epoch 9/399
----------
train Loss: 83539.7891


Training complete in 0m 2s
----------
Epoch 10/399
----------
train Loss: 37742.8398


Training complete in 0m 2s
----------
Epoch 11/399
----------
train Loss: 1805

In [47]:
for name, param in modified_model.named_parameters():
  if (name == 'classifier.4.weight') or (name == 'classifier.4.bias'):
    print(name, param)

classifier.4.weight Parameter containing:
tensor([[[[ 8.0498e+00]],

         [[ 2.7167e+00]],

         [[ 1.8490e+00]],

         ...,

         [[ 4.6232e+00]],

         [[ 2.0870e+00]],

         [[-2.7501e-01]]],


        [[[-3.2070e-02]],

         [[-3.3037e-03]],

         [[-2.2896e-02]],

         ...,

         [[-1.8999e-02]],

         [[-3.9636e-02]],

         [[-1.4570e-02]]]], device='cuda:0', requires_grad=True)
classifier.4.bias Parameter containing:
tensor([30.1415,  0.0376], device='cuda:0', requires_grad=True)
