## Train video clip classifier

In [None]:
import sys
sys.path.insert(0, '..')

import os
import numpy as np

from config.paths import Path, Name
from config.constants import Constants
from dataset.labeled_dataset import LabeledDataset
from logger.train_logger import TrainLogger
from logger.test_logger import TestLogger

In [None]:
SECONDS_PER_CLIP = Constants.SecondsPerClip.THREE_SEC

NUM_CLASSES = Constants.DataType.Balanced.NUM_CLASSES
DATA_TYPE = Constants.DataType.Balanced.TYPE

HOME_PATH = Path.DATA_HOME
PRETRAINED_MODEL = Path.AUTOENCODER_MODEL_PATH.format(sec=SECONDS_PER_CLIP, module='encoder')
MODEL_PATH = Path.CLASSIFIER_MODEL_PATH.format(sec=SECONDS_PER_CLIP, type=DATA_TYPE)
MODEL_NAME = Name.CLASSIFIER_MODEL_NAME
METRICS_PATH = Path.METRICS_PATH.format(sec=SECONDS_PER_CLIP, type=DATA_TYPE)
TRAINING_METRIC = Name.TRAINING_METRIC
EVALUATION_METRIC = Name.EVALUATION_METRIC

In [None]:
train_dataset = LabeledDataset(SECONDS_PER_CLIP, DATA_TYPE, 'train')
test_dataset = LabeledDataset(SECONDS_PER_CLIP, DATA_TYPE, 'test')

In [None]:
print(train_dataset.__len__())

### Define the model

In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data

from torch.optim import Adagrad
from torch.autograd import Variable
from torch.nn.functional import cross_entropy

use_cuda = torch.cuda.is_available()

In [None]:
class VideoEncoder(nn.Module):
    
    def __init__(self):
        super(VideoEncoder, self).__init__()
        self.lstm = nn.LSTM(512, 2048, 2)
        
    def forward(self, x):
        output, hidden = self.lstm(x)
        return output, hidden

In [None]:
class VideoClassifier(nn.Module):
    
    def __init__(self, encoder_state_dict):
        super(VideoClassifier, self).__init__()
        self.encoder = VideoEncoder()
        self.encoder.load_state_dict(encoder_state_dict)
        if use_cuda:
            self.encoder = self.encoder.cuda()
        self.linear = nn.Linear(SECONDS_PER_CLIP*6*2048, NUM_CLASSES)
    
    def forward(self, x):
        output, hidden = self.encoder(x)
        output = output.view(output.shape[1], output.shape[0], output.shape[2])
        output = output.view(output.shape[0], -1)
        output = self.linear(output)
        return output

### Model Utils

In [None]:
def load_model_state(path=HOME_PATH + PRETRAINED_MODEL):
    checkpoint = torch.load(path)
    return checkpoint['state_dict']

In [None]:
def save_model(model_state_dict, optimizer_state_dict, split, path=HOME_PATH+MODEL_PATH):
    if not os.path.exists(path):
        os.makedirs(path)
    path = path + MODEL_NAME.format(split)
    checkpoint = {
        'state_dict': model_state_dict,
        'optimizer': optimizer_state_dict,
    }
    torch.save(checkpoint, path)

### Metric Utils

In [None]:
from sklearn.metrics import confusion_matrix

def get_confusion_matrix(y_true, y_pred, normalise=False):
    conf_mat = confusion_matrix(y_true, y_pred)
    if normalise:
        conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
    return conf_mat

In [None]:
def get_error_rates(confusion_matrix):
    tp = np.diag(confusion_matrix)
    fp = np.sum(confusion_matrix, axis=0) - tp
    fn = np.sum(confusion_matrix, axis=1) - tp
    num_classes = confusion_matrix.shape[0]
    tn = []
    for i in range(num_classes):
        temp = np.delete(conf_mat, i, 0)    # delete ith row
        temp = np.delete(temp, i, 1)  # delete ith column
        tn.append(sum(sum(temp)))
    tn = np.array(tn)
    return tp, fp, fn, tn

In [None]:
def get_precision_recall(tp, fp, fn):
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    return precision, recall

In [None]:
def save_training_metrics(y_true, y_pred, y_pred_score, epoch_losses, split, epoch, avg_loss, path=HOME_PATH+METRICS_PATH):
    if not os.path.exists(path):
        os.makedirs(path)
    path = path + TRAINING_METRIC.format(split, epoch, avg_loss)
    with h5py.File(path, 'w') as f:
        f.create_dataset('y_true', data=y_true, compression='gzip')
        f.create_dataset('y_pred', data=y_pred, compression='gzip')
        f.create_dataset('y_pred_score', data=y_pred_score, compression='gzip')
        f.create_dataset('epoch_losses', data=epoch_losses, compression='gzip')

In [None]:
def save_evaluation_metrics(y_true, y_pred, y_pred_score, split, path=HOME_PATH+METRICS_PATH):
    if not os.path.exists(path):
        os.makedirs(path)
    path = path + EVALUATION_METRIC.format(split)
    with h5py.File(path, 'w') as f:
        f.create_dataset('y_true', data=y_true, compression='gzip')
        f.create_dataset('y_pred', data=y_pred, compression='gzip')
        f.create_dataset('y_pred_score', data=y_pred_score, compression='gzip')

### Train Hyperparams

In [None]:
BATCH_SIZE = 1
PRINT_EVERY = 50
EPOCHS = 50

encoder_state = load_model_state()

In [None]:
def train_model(epochs, print_every, split_number, batch_size):
    print('Training the model...')
    
    train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, 
                                       num_workers=1)
    train_logger = TrainLogger(batch_size, print_every, train_dataloader.__len__())
    
    classifier = VideoClassifier(encoder_state)
    classifier.train()
    loss_function = nn.CrossEntropyLoss()
    if use_cuda:
        classifier = classifier.cuda()
        loss_function = loss_function.cuda()
    
    classifier_optimizer = Adagrad(classifier.parameters())
    prev_epoch_loss = float('inf')

    for e in range(epochs):
        y_pred = []
        for i, (frame, annotation) in enumerate(train_dataloader):
            classifier.zero_grad()

            classifier_input = Variable(frame, requires_grad=True)
            label = Variable(annotation)
            if use_cuda:
                classifier_input = classifier_input.cuda()
                label = label.cuda()
            classifier_input = classifier_input.view(classifier_input.shape[1], batch_size, classifier_input.shape[2])
            prediction = classifier(classifier_input)
            y_pred.extend(np.argmax(prediction.data.cpu().numpy(), axis=1))

            loss = loss_function(prediction, label)
            epoch_loss = train_logger.update(e, i, prediction, label, loss)
            loss.backward()
            classifier_optimizer.step()

        if epoch_loss < prev_epoch_loss:
            save_model(classifier.state_dict(), classifier_optimizer.state_dict(), split_number)
            save_training_metrics(train_logger.y_true, y_pred, 
                                  train_logger.y_pred_score, train_logger.epoch_losses, 
                                  split_number, e, (epoch_loss / i))
            prev_epoch_loss = epoch_loss
            print('\n', get_confusion_matrix(y_true, y_pred))
        else:
            break
    return classifier

In [None]:
def evaluate_model(classifier, print_every, batch_size):
    print('\nEvaluating the model...')
    
    test_dataloader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, 
                                       num_workers=1)
    test_logger = TestLogger(batch_size, print_every, test_dataloader.__len__())
    
    classifier.eval()
    y_pred = []
    for i, (frame, annotation) in enumerate(train_dataloader):
        classifier_input = Variable(frame)
        label = Variable(annotations)
        if use_cuda:
            classifier_input = classifier_input.cuda()
            label = label.cuda()
        classifier_input = classifier_input.view(classifier_input.shape[1], batch_size, classifier_input.shape[2])
        prediction = classifier(classifier_input)
        y_pred.append(np.argmax(prediction.data.cpu().numpy(), axis=1)[0])
        
        test_logger.update(i, prediction, label)
    
    return test_logger.y_true, y_pred, test_logger.y_pred_score

In [None]:
trained_model = train_model(EPOCHS, PRINT_EVERY, 0, BATCH_SIZE)
y_true_eval, y_pred_eval, y_pred_score_eval = evaluate_model(trained_model, PRINT_EVERY, frames_validate, annotations_validate, BATCH_SIZE)
save_evaluation_metrics(y_true_eval, y_pred_eval, y_pred_score_eval, 0)
conf_mat = get_confusion_matrix(y_true_eval, y_pred_eval)
tp, fp, fn, tn = get_error_rates(conf_mat)
precision, recall = get_precision_recall(tp, fp, fn)
print('\nConfusion Matrix: ', precision)
print('Precision: ', precision)
print('Recall: ', recall)