## Set up directory

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

In [None]:
%cd ./gdrive/MyDrive/deep_learning/

## Reading in batch image and feeding it into CNN

In [None]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from torchtext.vocab import GloVe
import pandas as pd
import torch
import random 
import json
import glob
import numpy as np
import torch.nn as nn
import re
import os 
from utils import *

# LOG: 
# model_chkpt - base model --> poor performance 
# model_chkpt_s2s - base model (encoder/decoder, dropout 0.2) --> poor performance

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [None]:
feature_extractor = torch.hub.load('pytorch/vision:v0.9.0', 'resnet18', pretrained=True)
feature_extractor = torch.nn.Sequential(*list(feature_extractor.children())[:-1]) # strip last layer

# GLOBAL VARIABLES
validation_size = 0.2
max_epochs = 4

# PARAMS (DATA LOAD)
params = {'batch_size': 12,
          'shuffle': True}

# LOAD LABELS
f = open('data/training_annotation.json')
targets = json.load(f)
f.close()

image_ids = list(targets.keys())
random.seed(10)
random.shuffle(image_ids)

# Split data into validation and train set
partition = {
    'validation': image_ids[:int(validation_size*len(image_ids))],
    'train': image_ids[int(validation_size*len(image_ids)):]
}

# Initiliaze video frame transformer
train_transformer =  transforms.Compose([transforms.Resize((224,224)), 
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])

training_set = FrameDataset(partition['train'], targets, train_transformer, feature_extractor)
training_generator = DataLoader(training_set, **params)

validation_set = FrameDataset(partition['validation'], targets, train_transformer, feature_extractor)
validation_generator = DataLoader(validation_set, **params)

In [None]:
criterion = nn.CrossEntropyLoss(reduction='sum')
loss = criterion(pred1, labels[0].to(device))

In [None]:
feature_extractor = torch.hub.load('pytorch/vision:v0.9.0', 'resnet18', pretrained=True)
feature_extractor = torch.nn.Sequential(*list(feature_extractor.children())[:-1]) # strip last layer

# GLOBAL VARIABLES
validation_size = 0.2
max_epochs = 4

# PARAMS (DATA LOAD)
params = {'batch_size': 12,
          'shuffle': True}

# LOAD LABELS
f = open('data/training_annotation.json')
targets = json.load(f)
f.close()

image_ids = list(targets.keys())
random.seed(10)
random.shuffle(image_ids)

partition = {
    'validation': image_ids[:int(validation_size*len(image_ids))],
    'train': image_ids[int(validation_size*len(image_ids)):]
}

train_transformer =  transforms.Compose([transforms.Resize((224,224)), 
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])

training_set = FrameDataset(partition['train'], targets, train_transformer, feature_extractor)
training_generator = DataLoader(training_set, **params)

validation_set = FrameDataset(partition['validation'], targets, train_transformer, feature_extractor)
validation_generator = DataLoader(validation_set, **params)

In [None]:
criterion = nn.CrossEntropyLoss(reduction='sum')
model = Seq2Seq() # change type of model to experiment different ones 
model.to(device)
optimizer = torch.optim.Adam(model.parameters())

start_epoch = 0 
valid_loss_min = np.Inf
train_loss_list = []
valid_loss_list = []
train_loss_it = []


checkpoint_path = './model/current_checkpoint_ori_adam.pt'
best_model_path = './model/best_model_ori_adam.pt'

# load the saved checkpoint (uncomment line below if loading previously saved model)
model, optimizer, start_epoch, valid_loss_min, train_loss_list, valid_loss_list, train_loss_it = load_ckp(checkpoint_path, model, optimizer)
start_params = model.named_parameters() # save initial state of model to check if model parameters are updated at all 

for epoch in range(start_epoch, max_epochs):
    print(f'---------- Starting epoch {epoch} ----------')
    train_loss = 0
    valid_loss = 0
    # Training
    model.train()
    for batch_idx, (batch_data, labels) in enumerate(training_generator):
        batch_size = batch_data.shape[0]
        # Transfer to GPU
        batch_data, labels = batch_data.to(device), [label.to(device) for label in labels]
        object1_pred, relationship_pred, object2_pred = model(batch_data)
        loss = criterion(object1_pred, labels[0]) + criterion(relationship_pred, labels[1]) + criterion(object2_pred, labels[2])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_idx % 5 == 0:
            av_loss = loss/batch_size
            print(f'Iteration {batch_idx} completed with avg loss {av_loss}')
        train_loss_it.append(av_loss)
        train_loss = train_loss + loss
    
    model.eval()
    with torch.set_grad_enabled(False):
        val_loss = []
        for batch_idx, (batch_data, labels) in enumerate(validation_generator):
             batch_data, labels = batch_data.to(device), [label.to(device) for label in labels]
             object1_pred, relationship_pred, object2_pred = model(batch_data)
             loss = criterion(object1_pred, labels[0]) + criterion(relationship_pred, labels[1]) + criterion(object2_pred, labels[2])
             if batch_idx % 5 == 0:
                print(f'Validation iteration {batch_idx} completed')
             valid_loss = valid_loss + loss

    # calculate average losses
    train_loss = train_loss/len(partition['train'])
    valid_loss = valid_loss/len(partition['validation'])
    train_loss_list.append(train_loss)
    valid_loss_list.append(valid_loss)
    
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, 
        train_loss,
        valid_loss
        ))
    
    # create checkpoint variable and add important data
    checkpoint = {
        'epoch': epoch + 1,
        'valid_loss_min': valid_loss,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'train_loss_list': train_loss_list,
        'valid_loss_list': valid_loss_list,
        'train_loss_it': train_loss_it
    }
    
    # save checkpoint
    save_ckp(checkpoint, False, checkpoint_path, best_model_path)
    
    ## save the model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
        # save checkpoint as best model
        save_ckp(checkpoint, True, checkpoint_path, best_model_path)
        valid_loss_min = valid_loss

In [None]:
from matplotlib import pyplot as plt

# visualize training loss over time - huge fluctuations, but generally the loss went down
plt.plot(train_loss_it)

Generating Predictions

In [None]:
best_model_path = './model/best_model_s2s.pt'
model, optimizer, start_epoch, valid_loss_min, train_loss_list, valid_loss_list, train_loss_it = load_ckp(best_model_path, model, optimizer)

In [None]:
feature_extractor = torch.hub.load('pytorch/vision:v0.9.0', 'resnet18', pretrained=True)
feature_extractor = torch.nn.Sequential(*list(feature_extractor.children())[:-1]) # strip last layer
test_data_dir = 'data/test/test/'
test_ids = os.listdir(test_data_dir)
test_transformer =  transforms.Compose([transforms.Resize((224,224)), 
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])
label = []
model.eval()
for ID in test_ids:
    path2frames = glob.glob(test_data_dir + ID + '/*.jpg')
    path2frames.sort() 
    path2frames = path2frames[1::3]
    test_images = extract_test_images(test_transformer, feature_extractor, path2frames)
    test_images = test_images.to(device)
    with torch.set_grad_enabled(False):
        object1_pred, relationship_pred, object2_pred = model(test_images)
    te = (torch.topk(object1_pred,5)[1].squeeze().cpu()).numpy()
    label.append(' '.join(map(str, te)))
    te = (torch.topk(relationship_pred,5)[1].squeeze().cpu()).numpy()
    label.append(' '.join(map(str, te)))
    te = (torch.topk(object2_pred,5)[1].squeeze().cpu()).numpy()
    label.append(' '.join(map(str, te)))

ID_list = [i for i in range(len(label))]
df = pd.DataFrame(list(zip(ID_list, label)),
               columns =['ID', 'label'])
df.to_csv('predictions_02042021.csv', index=False)