# SETUP
The following blocks are to be executed before of anything else, to setup import, classes, functions and 
constants that are needed for all stages

In [2]:
import os
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import resnet34

from PIL import Image
from tqdm import tqdm

from gtea_dataset import GTEA61, GTEA61_flow, GTEA61_2Stream
from objectAttentionModelConvLSTM import attentionModel
from flow_resnet import flow_resnet34
from twoStreamModel import twoStreamAttentionModel
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)

In [3]:
DEVICE = 'cuda' # 'cuda' or 'cpu'
NUM_CLASSES = 61 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 32     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

RGB_PREFIX = 'model_rgb_state_dict'
FLOW_PREFIX = 'model_flow_state_dict'
JOINT_PREFIX = 'model_twoStream_state_dict'
LOG_PREFIX = 'log_stage'
VAL_LOG_PREFIX = 'val_log_stage'
DATA_DIR = '../GTEA61'
model_folder = '../saved_models'

In [4]:
import pickle

class Logger():
    def __init__(self, **params):
        self.params = params
        self.data = []
        self.step_data = []
        
    def add_epoch_data(self, epoch, acc, loss):
        self.data.append({epoch:(acc, loss)})
        
    def add_step_data(self, step, acc, loss):
        self.step_data.append({step:(acc, loss)})
    
    def save(self, path):
        with open(path, 'wb') as logfile:
            pickle.dump(self, logfile)
    
    @classmethod
    def load(cls, path):
        with open(path, 'rb') as logfile:
            new_instance = pickle.load(logfile)
        return new_instance

    
def generate_model_checkpoint_name(stage, n_frames, ms_block=False, loss=None, optional=''):
    name = ""
    if stage < 3:
        name += RGB_PREFIX
        if stage == 2:
            name += '_stage2'
    elif stage == 3:
        name += FLOW_PREFIX
    else:
        name += JOINT_PREFIX
    name += '_'+str(n_frames)+'frames'
    if loss is not None:
            name += '_'+loss
    if ms_block:
        name += '_msblock'
    name += optional+".pth"
    
    return name


def generate_log_filenames(stage, n_frames, ms_block=False, loss=None, optional=''):
    train = LOG_PREFIX + str(stage) + '_'+str(n_frames)+'frames'
    val = VAL_LOG_PREFIX + str(stage) + '_'+str(n_frames)+'frames'
    if loss is not None:
        train += '_'+loss
        val += '_'+loss
    if ms_block:
        train += '_msblock'
        val  += '_msblock'
    train += optional+".obj"
    val  += optional+".obj"
    
    return train, val

In [5]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])
spatial_transform_val = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

# Stage 0, that is, Stages 1 and 2 without the cam

# Stage 0.1 specific-setup

In [69]:
STAGE = 0

LR = 0.001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 200      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [25, 75, 150] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7

# this dictionary is needed for the logger class
parameters = {'DEVICE':DEVICE, 'NUM_CLASSES':NUM_CLASSES, 'BATCH_SIZE':BATCH_SIZE,
             'LR':LR, 'MOMENTUM':MOMENTUM, 'WEIGHT_DECAY':WEIGHT_DECAY, 'NUM_EPOCHS':NUM_EPOCHS,
             'STEP_SIZE':STEP_SIZE, 'GAMMA':GAMMA, 'MEM_SIZE':512, 'SEQ_LEN':SEQ_LEN}

In [70]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform, seq_len=SEQ_LEN)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [71]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare Stage 0.1

In [72]:
validate = True

model = attentionModel(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.train(False)
for params in model.parameters():
    params.requires_grad = False

for params in model.lstm_cell.parameters():
    params.requires_grad = True

for params in model.classifier.parameters():
    params.requires_grad = True
model.lstm_cell.train(True)
model.classifier.train(True)
model.to(DEVICE)

trainable_params = [p for p in model.parameters() if p.requires_grad]

loss_fn = nn.CrossEntropyLoss()

optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 0.1

In [None]:
train_iter = 0
val_iter = 0
min_accuracy = 0
trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset) 
iterPerEpoch = len(train_loader)
val_steps = len(val_loader)
cudnn.benchmark

train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, optional='_1_NOCAM')
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, optional='_1_NOCAM')

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger = Logger(**parameters)
val_logger = Logger(**parameters)

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    model.lstm_cell.train(True)
    model.classifier.train(True)
        
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        output_label, _ = model(inputVariable, no_cam=True)
        
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
            
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        epoch_loss += step_loss
        train_logger.add_step_data(train_iter, numCorrTrain, step_loss)
    
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    print('Train: Epoch = {} | Loss = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, trainAccuracy))
    if validate:
        if (epoch+1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger.add_step_data(val_iter, numCorr, val_loss_step)
                
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps
            val_logger.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val||||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        
    train_logger.save(train_log_file)
    val_logger.save(val_log_file)
    optim_scheduler.step()

# Stage 0.2 specific-setup

In [6]:
STAGE = 0

LR = 0.0001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 150      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [25, 75] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7

parameters = {'DEVICE':DEVICE, 'NUM_CLASSES':NUM_CLASSES, 'BATCH_SIZE':BATCH_SIZE,
             'LR':LR, 'MOMENTUM':MOMENTUM, 'WEIGHT_DECAY':WEIGHT_DECAY, 'NUM_EPOCHS':NUM_EPOCHS,
             'STEP_SIZE':STEP_SIZE, 'GAMMA':GAMMA, 'MEM_SIZE':512, 'SEQ_LEN':SEQ_LEN}

In [None]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform, seq_len=SEQ_LEN)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare stage 0.2

In [None]:
best_old_stage = model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, optional='_1_NOCAM')
stage1_dict = os.path.join(model_folder, best_old_stage)
validate = True

model = attentionModel(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.load_state_dict(torch.load(stage1_dict))
model.train(False)
for params in model.parameters():
    params.requires_grad = False
        
for params in model.resNet.layer4[0].conv1.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[0].conv2.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[1].conv1.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[1].conv2.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[2].conv1.parameters():
    params.requires_grad = True
#
for params in model.resNet.layer4[2].conv2.parameters():
    params.requires_grad = True
#
for params in model.resNet.fc.parameters():
    params.requires_grad = True

model.resNet.layer4[0].conv1.train(True)
model.resNet.layer4[0].conv2.train(True)
model.resNet.layer4[1].conv1.train(True)
model.resNet.layer4[1].conv2.train(True)
model.resNet.layer4[2].conv1.train(True)
model.resNet.layer4[2].conv2.train(True)
model.resNet.fc.train(True)

for params in model.lstm_cell.parameters():
    params.requires_grad = True

for params in model.classifier.parameters():
    params.requires_grad = True


model.lstm_cell.train(True)
model.classifier.train(True)

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()

trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = torch.optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 0.2

In [None]:
train_iter = 0
val_iter = 0
min_accuracy = 0
trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset) 
iterPerEpoch = len(train_loader)
val_steps = len(val_loader)
cudnn.benchmark

train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, optional='_2_NOCAM')
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, optional='_2_NOCAM')

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_2 = Logger(**parameters)
val_logger_2 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    
    model.lstm_cell.train(True)
    model.classifier.train(True)
    model.resNet.layer4[0].conv1.train(True)
    model.resNet.layer4[0].conv2.train(True)
    model.resNet.layer4[1].conv1.train(True)
    model.resNet.layer4[1].conv2.train(True)
    model.resNet.layer4[2].conv1.train(True)
    model.resNet.layer4[2].conv2.train(True)
    model.resNet.fc.train(True)
    
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        output_label, _ = model(inputVariable)
        
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        epoch_loss += step_loss
        train_logger_2.add_step_data(train_iter, numCorrTrain, step_loss)

        
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_2.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    
    print('Train: Epoch = {} | Loss = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, trainAccuracy))
    if validate is not None:
        if (epoch+1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable, no_cam=True)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_2.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps
            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            val_logger_2.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()
    train_logger_2.save(train_log_file)
    val_logger_2.save(val_log_file)

# Stage 1 specific-setup

In [38]:
STAGE = 1

LR = 0.001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 200      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [25, 75, 150] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7

# this dictionary is needed for the logger class
parameters = {'DEVICE':DEVICE, 'NUM_CLASSES':NUM_CLASSES, 'BATCH_SIZE':BATCH_SIZE,
             'LR':LR, 'MOMENTUM':MOMENTUM, 'WEIGHT_DECAY':WEIGHT_DECAY, 'NUM_EPOCHS':NUM_EPOCHS,
             'STEP_SIZE':STEP_SIZE, 'GAMMA':GAMMA, 'MEM_SIZE':512, 'SEQ_LEN':SEQ_LEN}

In [6]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform, seq_len=SEQ_LEN)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [7]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare stage 1

In [8]:
validate = True

model = attentionModel(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.train(False)
for params in model.parameters():
    params.requires_grad = False

for params in model.lstm_cell.parameters():
    params.requires_grad = True

for params in model.classifier.parameters():
    params.requires_grad = True
model.lstm_cell.train(True)
model.classifier.train(True)
model.to(DEVICE)

trainable_params = [p for p in model.parameters() if p.requires_grad]

loss_fn = nn.CrossEntropyLoss()

optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 1

In [9]:
train_iter = 0
val_iter = 0
min_accuracy = 0
trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset) 
iterPerEpoch = len(train_loader)
val_steps = len(val_loader)
cudnn.benchmark

train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger = Logger(**parameters)
val_logger = Logger(**parameters)

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    model.lstm_cell.train(True)
    model.classifier.train(True)
        
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        output_label, _ = model(inputVariable)
        
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
            
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        epoch_loss += step_loss
        train_logger.add_step_data(train_iter, numCorrTrain, step_loss)
    
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    print('Train: Epoch = {} | Loss = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, trainAccuracy))
    if validate:
        if (epoch+1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger.add_step_data(val_iter, numCorr, val_loss_step)
                
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps
            val_logger.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val||||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        
    train_logger.save(train_log_file)
    val_logger.save(val_log_file)
    optim_scheduler.step()

torch.Size([32, 61])


# Stage 2 specific-setup

In [40]:
STAGE = 2

LR = 0.0001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 150      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [25, 75] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7

parameters = {'DEVICE':DEVICE, 'NUM_CLASSES':NUM_CLASSES, 'BATCH_SIZE':BATCH_SIZE,
             'LR':LR, 'MOMENTUM':MOMENTUM, 'WEIGHT_DECAY':WEIGHT_DECAY, 'NUM_EPOCHS':NUM_EPOCHS,
             'STEP_SIZE':STEP_SIZE, 'GAMMA':GAMMA, 'MEM_SIZE':512, 'SEQ_LEN':SEQ_LEN}

In [None]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform, seq_len=SEQ_LEN)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare stage 2

In [None]:
best_old_stage = generate_model_checkpoint_name(stage=1, n_frames=SEQ_LEN)
stage1_dict = os.path.join(model_folder, best_old_stage)
validate = True

model = attentionModel(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.load_state_dict(torch.load(stage1_dict))
model.train(False)
for params in model.parameters():
    params.requires_grad = False
        
for params in model.resNet.layer4[0].conv1.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[0].conv2.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[1].conv1.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[1].conv2.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[2].conv1.parameters():
    params.requires_grad = True

for params in model.resNet.layer4[2].conv2.parameters():
    params.requires_grad = True

for params in model.resNet.fc.parameters():
    params.requires_grad = True

model.resNet.layer4[0].conv1.train(True)
model.resNet.layer4[0].conv2.train(True)
model.resNet.layer4[1].conv1.train(True)
model.resNet.layer4[1].conv2.train(True)
model.resNet.layer4[2].conv1.train(True)
model.resNet.layer4[2].conv2.train(True)
model.resNet.fc.train(True)

for params in model.lstm_cell.parameters():
    params.requires_grad = True

for params in model.classifier.parameters():
    params.requires_grad = True


model.lstm_cell.train(True)
model.classifier.train(True)

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()

trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = torch.optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 2

In [None]:
train_iter = 0
val_iter = 0
min_accuracy = 0
trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset) 
iterPerEpoch = len(train_loader)
val_steps = len(val_loader)
cudnn.benchmark

train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_2 = Logger(**parameters)
val_logger_2 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    
    model.lstm_cell.train(True)
    model.classifier.train(True)
    model.resNet.layer4[0].conv1.train(True)
    model.resNet.layer4[0].conv2.train(True)
    model.resNet.layer4[1].conv1.train(True)
    model.resNet.layer4[1].conv2.train(True)
    model.resNet.layer4[2].conv1.train(True)
    model.resNet.layer4[2].conv2.train(True)
    model.resNet.fc.train(True)
    
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        output_label, _ = model(inputVariable)
        
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        epoch_loss += step_loss
        train_logger_2.add_step_data(train_iter, numCorrTrain, step_loss)

        
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_2.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    
    print('Train: Epoch = {} | Loss = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, trainAccuracy))
    if validate is not None:
        if (epoch+1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_2.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps
            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            val_logger_2.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()
    train_logger_2.save(train_log_file)
    val_logger_2.save(val_log_file)

# Temporal network specific-setup

In [48]:
STAGE = 3

LR = 0.01            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4  # Regularization, you can keep this at the default

NUM_EPOCHS = 750      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [150, 300, 500] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.5          # Multiplicative factor for learning rate step-down
STACK_SIZE = 5

parameters = {'DEVICE':DEVICE, 'NUM_CLASSES':NUM_CLASSES, 'BATCH_SIZE':BATCH_SIZE,
             'LR':LR, 'MOMENTUM':MOMENTUM, 'WEIGHT_DECAY':WEIGHT_DECAY, 'NUM_EPOCHS':NUM_EPOCHS,
             'STEP_SIZE':STEP_SIZE, 'GAMMA':GAMMA, 'MEM_SIZE':512, 'SEQ_LEN':SEQ_LEN}

In [7]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61_flow(DATA_DIR, split='train', transform=spatial_transform_train, seq_len=STACK_SIZE)
test_dataset = GTEA61_flow(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=STACK_SIZE)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [8]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare stage 3

In [9]:
validate = True

model = flow_resnet34(True, channels=2*STACK_SIZE, num_classes=NUM_CLASSES)
model.train(True)
train_params = list(model.parameters())

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()

optimizer_fn = torch.optim.SGD(train_params, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 3

In [None]:
train_iter = 0
val_iter = 0
min_accuracy = 0
trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset) 
iterPerEpoch = len(train_loader)
val_steps = len(val_loader)
cudnn.benchmark

train_log, val_log = generate_log_filenames(STAGE, STACK_SIZE)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)

train_logger_3 = Logger(**parameters)
val_logger_3 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    model.train(True)
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.to(DEVICE)
        labelVariable = targets.to(DEVICE)
        
        output_label, _ = model(inputVariable)
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        epoch_loss += step_loss
        train_logger_3.add_step_data(train_iter, numCorrTrain, step_loss)
        
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    print('Train: Epoch = {} | Loss = {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_loss, trainAccuracy))
    train_logger_3.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
        
    if validate:
        if (epoch+1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                inputVariable = inputs.to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                step_loss = val_loss.data.item()
                val_loss_epoch += step_loss
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_3.add_step_data(val_iter, numCorr, step_loss)
                
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps
            val_logger_3.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            print('Validation: Epoch = {} | Loss = {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()
    train_logger_3.save(train_log_file)
    val_logger_3.save(val_log_file)

# 2 Stream joint training specific-setup

In [52]:
STAGE = 4

LR = 0.01            # The initial Learning Rate
LR_FLOW = 0.0001
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4  # Regularization, you can keep this at the default

NUM_EPOCHS = 250      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 1 # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.99          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
STACK_SIZE = 5
SEQ_LEN = 7

# this dictionary is needed for the logger class
parameters = {'DEVICE':DEVICE, 'NUM_CLASSES':NUM_CLASSES, 'BATCH_SIZE':BATCH_SIZE,
             'LR':LR, 'MOMENTUM':MOMENTUM, 'WEIGHT_DECAY':WEIGHT_DECAY, 'NUM_EPOCHS':NUM_EPOCHS,
             'STEP_SIZE':STEP_SIZE, 'GAMMA':GAMMA, 'MEM_SIZE':512, 'SEQ_LEN':SEQ_LEN}

In [5]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61_2Stream(DATA_DIR, split='train', transform=spatial_transform_train, seq_len=SEQ_LEN, 
                               stack_size=STACK_SIZE)
test_dataset = GTEA61_2Stream(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=SEQ_LEN, 
                              stack_size=STACK_SIZE)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [6]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare joint training

In [10]:
flow_model_name = generate_model_checkpoint_name(stage=3, n_frames=STACK_SIZE)
rgb_model_name = generate_model_checkpoint_name(stage=2, n_frames=SEQ_LEN)
flowModel = os.path.join(model_folder, flow_model_name)
rgbModel = os.path.join(model_folder, rgb_model_name)
validate = True

model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=STACK_SIZE, memSize=MEM_SIZE,
                                    num_classes=NUM_CLASSES)

for params in model.parameters():
    params.requires_grad = False
model.train(False)

for params in model.classifier.parameters():
    params.requires_grad = True

for params in model.frameModel.lstm_cell.parameters():
    params.requires_grad = True

for params in model.frameModel.resNet.layer4[0].conv1.parameters():
    params.requires_grad = True

for params in model.frameModel.resNet.layer4[0].conv2.parameters():
    params.requires_grad = True
       
for params in model.frameModel.resNet.layer4[1].conv1.parameters():
    params.requires_grad = True     

for params in model.frameModel.resNet.layer4[1].conv2.parameters():
    params.requires_grad = True
       
for params in model.frameModel.resNet.layer4[2].conv1.parameters():
    params.requires_grad = True

for params in model.frameModel.resNet.layer4[2].conv2.parameters():
    params.requires_grad = True
            
for params in model.frameModel.resNet.fc.parameters():
    params.requires_grad = True

for params in model.flowModel.layer4.parameters():
    params.requires_grad = True


model.classifier.train(True)
model.flowModel.layer4.train(True)
model.frameModel.lstm_cell.train(True)
model.frameModel.classifier.train(True)

frame_trainable_params = [p for p in model.frameModel.parameters() if p.requires_grad]
flow_trainable_params = [p for p in model.flowModel.parameters() if p.requires_grad]

model.to(DEVICE)

loss_fn = nn.NLLLoss()

optimizer_fn = torch.optim.SGD([
        {'params': frame_trainable_params},
        {'params': flow_trainable_params, 'lr': LR_FLOW},
    ], lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

optim_scheduler = optim.lr_scheduler.StepLR(optimizer_fn, step_size=STEP_SIZE, gamma=GAMMA)

# Joint training

In [None]:
train_iter = 0
val_iter = 0
min_accuracy = 0
trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset) 
iterPerEpoch = len(train_loader)
val_steps = len(val_loader)
cudnn.benchmark

train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, loss='NLL')
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_4 = Logger(**parameters)
val_logger_4 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    model.classifier.train(True)
    model.flowModel.layer4.train(True)
    model.frameModel.lstm_cell.train(True)
    model.frameModel.classifier.train(True)
    for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
        train_iter += 1
        optimizer_fn.zero_grad()
        
        inputVariableFlow = inputFlow.to(DEVICE)
        inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        
        output_label = model(inputVariableFlow, inputVariableFrame)
        loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
        loss.backward()
        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        epoch_loss += step_loss
        train_logger_4.add_step_data(train_iter, numCorrTrain, step_loss)

    avg_loss = epoch_loss / iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_4.add_epoch_data(epoch+1, trainAccuracy, avg_loss)

    print('Average training loss after {} epoch = {:.3f} '.format(epoch + 1, avg_loss))
    print('Training accuracy after {} epoch = {:.3f}% '.format(epoch + 1, trainAccuracy))
    
    if validate:
        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                val_iter += 1
                
                inputVariableFlow = inputFlow.to(DEVICE)
                inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                 
                output_label = model(inputVariableFlow, inputVariableFrame)
                val_loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_4.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            val_logger_4.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)

            print('Val Loss after {} epochs, loss = {:.3f}'.format(epoch + 1, avg_val_loss))
            print('Val Accuracy after {} epochs = {:.3f}%'.format(epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    train_logger_4.save(train_log_file)
    val_logger_4.save(val_log_file)
    optim_scheduler.step()