In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
%cd /content/drive/My\ Drive/
%cd FPAR-Project-MLDL/

/content/drive/My Drive
/content/drive/My Drive/FPAR-Project-MLDL


# SETUP
The following blocks are to be executed first of anything else, to setup import, classes, functions and 
constants that are needed for all stages

In [None]:
import os
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.backends import cudnn
import torchvision
from torchvision import transforms
from torchvision.models import resnet34

from PIL import Image
from tqdm import tqdm

from logs import Logger, generate_model_checkpoint_name, generate_log_filenames

from gtea_dataset import GTEA61, GTEA61_flow, GTEA61_2Stream
# from clean_dataset import adjust_flows
from AttentMS2 import attention_model_ms
from flow_resnet import flow_resnet34
from twoStreamModel import twoStreamAttentionModel
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip, DownSampling, To1Dimension)

In [None]:
DEVICE = 'cuda' # 'cuda' or 'cpu'
NUM_CLASSES = 61 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 32     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

MMAP_LENGTH = 49
DATA_DIR = '../GTEA61'
model_folder = '../saved_models'

RUN = '_run01'

In [None]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
default_transforms = [
    Scale(256),
    RandomHorizontalFlip(),
    MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
    ToTensor()
]

spatial_transform = Compose(default_transforms + [normalize])
spatial_transform_mmaps = Compose(default_transforms + [DownSampling(), To1Dimension()])
spatial_transform_val = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

# Stage 1 specific setup

In [None]:
STAGE = 1

LR = 0.001                            # The initial Learning Rate
MOMENTUM = 0.9                        # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5                   # Regularization, you can keep this at the default

NUM_EPOCHS = 150                      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [30, 80]             # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1                           # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7

# this dictionary is needed for the logger class
parameters = {
    'DEVICE': DEVICE,
    'NUM_CLASSES': NUM_CLASSES,
    'BATCH_SIZE': BATCH_SIZE,
    'LR': LR,
    'MOMENTUM': MOMENTUM,
    'WEIGHT_DECAY': WEIGHT_DECAY,
    'NUM_EPOCHS': NUM_EPOCHS,
    'STEP_SIZE': STEP_SIZE,
    'GAMMA': GAMMA,
    'MEM_SIZE': MEM_SIZE,
    'SEQ_LEN': SEQ_LEN
}

In [None]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split = 'train', transform = spatial_transform, seq_len = SEQ_LEN,
                       mmaps = True, mmaps_transform = spatial_transform_mmaps, static_frames = True)
test_dataset = GTEA61(DATA_DIR, split = 'test', transform = spatial_transform_val, seq_len = SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 4, drop_last = True)
val_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 4)

# Prepare Stage 1

In [None]:
validate = True

model = attention_model_ms(num_classes = NUM_CLASSES, mem_size = MEM_SIZE)
model.train(False)

for params in model.parameters():
    params.requires_grad = False

for params in model.lstm_cell.parameters():
    params.requires_grad = True

for params in model.classifier.parameters():
    params.requires_grad = True

model.lstm_cell.train(True)
model.classifier.train(True)
model.to(DEVICE)

trainable_params = [p for p in model.parameters() if p.requires_grad]

loss_fn = nn.CrossEntropyLoss()

optimizer_fn = optim.Adam(trainable_params, lr = LR, weight_decay = WEIGHT_DECAY, eps = 1e-4)

optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones = STEP_SIZE, gamma = GAMMA)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.cache/torch/checkpoints/resnet34-333f7ec4.pth


HBox(children=(FloatProgress(value=0.0, max=87306240.0), HTML(value='')))




# Stage 1

In [None]:
train_iter = 0
val_iter = 0
min_accuracy = 0

trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset) 
iterPerEpoch = len(train_loader)
val_steps = len(val_loader)

cudnn.benchmark

train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, ms_block = True, \
                                            optional = '_2neur_DS_' + RUN)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, ms_block = True, \
                                                  optional = '_2neur_DS_' + RUN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)

train_logger = Logger(**parameters)
val_logger = Logger(**parameters)

for epoch in range(NUM_EPOCHS):

    epoch_loss = 0
    numCorrTrain = 0
    
    model.lstm_cell.train(True)
    model.classifier.train(True)
        
    for i, (inputs, _, _, targets) in enumerate(train_loader):

        train_iter += 1

        optimizer_fn.zero_grad()

        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        output_label, _ = model(inputVariable)
        
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
            
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()

        step_loss = loss.data.item()
        epoch_loss += step_loss

        train_logger.add_step_data(train_iter, numCorrTrain, step_loss)
    
    avg_loss = epoch_loss/iterPerEpoch

    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    print('Train: Epoch = {} | Loss = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, trainAccuracy))

    if validate:

        if (epoch+1) % 1 == 0:

            model.train(False)

            val_loss_epoch = 0
            numCorr = 0

            for j, (inputs, targets) in enumerate(val_loader):

                val_iter += 1

                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)

                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()

                val_logger.add_step_data(val_iter, numCorr, val_loss_step)
                
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps
            val_logger.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))

            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val||||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        
    train_logger.save(train_log_file)
    val_logger.save(val_log_file)
    optim_scheduler.step()

Train: Epoch = 1 | Loss = 4.084 | Accuracy = 4.688
Val: Epoch = 1 | Loss 3.914 | Accuracy = 6.897
[||| NEW BEST on val||||]
Train: Epoch = 2 | Loss = 4.056 | Accuracy = 3.750
Val: Epoch = 2 | Loss 3.901 | Accuracy = 5.172
Train: Epoch = 3 | Loss = 3.989 | Accuracy = 6.250
Val: Epoch = 3 | Loss 3.874 | Accuracy = 5.172
Train: Epoch = 4 | Loss = 3.907 | Accuracy = 7.187
Val: Epoch = 4 | Loss 3.777 | Accuracy = 6.897
Train: Epoch = 5 | Loss = 3.812 | Accuracy = 6.875
Val: Epoch = 5 | Loss 3.665 | Accuracy = 10.345
[||| NEW BEST on val||||]
Train: Epoch = 6 | Loss = 3.647 | Accuracy = 12.500
Val: Epoch = 6 | Loss 3.513 | Accuracy = 8.621
Train: Epoch = 7 | Loss = 3.578 | Accuracy = 12.812
Val: Epoch = 7 | Loss 3.546 | Accuracy = 12.931
[||| NEW BEST on val||||]
Train: Epoch = 8 | Loss = 3.513 | Accuracy = 11.875
Val: Epoch = 8 | Loss 3.596 | Accuracy = 11.207
Train: Epoch = 9 | Loss = 3.587 | Accuracy = 12.812
Val: Epoch = 9 | Loss 3.506 | Accuracy = 8.621
Train: Epoch = 10 | Loss = 3.495 

# Stage 2 specific setup

In [None]:
STAGE = 2

LR = 0.0001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 150      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [30, 80] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.2          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7

parameters = {
    'DEVICE': DEVICE,
    'NUM_CLASSES': NUM_CLASSES,
    'BATCH_SIZE': BATCH_SIZE,
    'LR': LR,
    'MOMENTUM': MOMENTUM,
    'WEIGHT_DECAY': WEIGHT_DECAY,
    'NUM_EPOCHS': NUM_EPOCHS,
    'STEP_SIZE': STEP_SIZE,
    'GAMMA': GAMMA,
    'MEM_SIZE': MEM_SIZE,
    'SEQ_LEN': SEQ_LEN
}

In [None]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform,
                       seq_len=SEQ_LEN, mmaps = True, mmaps_transform = spatial_transform_mmaps, static_frames = True)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val,
                      seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare Stage 2

In [None]:
best_old_stage = generate_model_checkpoint_name(stage=1, n_frames=SEQ_LEN, ms_block = True, \
                                                optional = '_2neur_DS__run01')
stage1_dict = os.path.join(model_folder, best_old_stage)
validate = True

model = attention_model_ms(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.load_state_dict(torch.load(stage1_dict))

model.train(False)

for params in model.parameters():
    params.requires_grad = False

layers_on_off = [
    model.resNet.layer4[0].conv1,
    model.resNet.layer4[0].conv2,
    model.resNet.layer4[1].conv1,
    model.resNet.layer4[1].conv2,
    model.resNet.layer4[2].conv1,
    model.resNet.layer4[2].conv2,
    model.resNet.fc,
    model.msBlock
]

layers_to_train = layers_on_off + [
    model.lstm_cell,
    model.classifier,
    model.classifier_SD
]

for layer in layers_to_train:
    for params in layer.parameters():
        params.requires_grad = True

for layer in layers_to_train:
    layer.train(True)

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()
loss_fn_sum = nn.CrossEntropyLoss(reduction = 'sum')

trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = torch.optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 2

In [None]:
RUN = "_run03"

train_iter = 0
val_iter = 0
min_accuracy = 0

trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset)

iterPerEpoch = len(train_loader)
val_steps = len(val_loader)

cudnn.benchmark
torch.autograd.set_detect_anomaly(True)
train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, ms_block = True, \
                                            optional = '_2neur_DS_asymmetric_train_SDCtrained_' + RUN)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, ms_block = True, \
                                                  optional = '_2neur_DS_asymmetric_train_SDCtrained_' + RUN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_2 = Logger(**parameters)
val_logger_2 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):

    epoch_loss = 0
    epoch_loss2 = 0
    numCorrTrain = 0
    
    for layer in layers_to_train:
        layer.train(True)
    
    for i, (inputs, inputs_DS, mmaps, targets) in enumerate(train_loader):

        mmaps = mmaps.permute(1, 0, 2)

        train_iter += 1

        optimizer_fn.zero_grad()

        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        inputVariable_DS = inputs_DS.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)

        output_label, _, output_label_mmaps = model(inputVariable, mmaps = True)

        output_label_mmaps = output_label_mmaps.view(SEQ_LEN*BATCH_SIZE*MMAP_LENGTH, 2).to(DEVICE)
        mmaps = torch.flatten(mmaps).long().to(DEVICE)

        loss = loss_fn_sum(output_label_mmaps, mmaps)/(BATCH_SIZE * SEQ_LEN) + loss_fn(output_label, labelVariable)
        loss.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = False

        output_label_ds1, _ = model(inputVariable, static_frames = True)
        output_label_ds2, _ = model(inputVariable_DS, static_frames = True)
        
        gt_ds1 = torch.zeros(BATCH_SIZE).long().to(DEVICE)
        gt_ds2 = torch.ones(BATCH_SIZE).long().to(DEVICE)

        loss2 = loss_fn(output_label_ds1, gt_ds1) + loss_fn(output_label_ds2, gt_ds2)

        loss2.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = True

        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        step_loss2 = loss2.data.item()
        epoch_loss += step_loss
        epoch_loss2 += step_loss2
        train_logger_2.add_step_data(train_iter, numCorrTrain, step_loss)

        
    avg_loss = epoch_loss/iterPerEpoch
    avg_loss2 = epoch_loss2/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_2.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    
    print('Train: Epoch = {} | Loss = {:.3f} | Loss2 = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, avg_loss2, trainAccuracy))

    if validate is not None:

        if (epoch+1) % 1 == 0:

            model.train(False)
            val_loss_epoch = 0
            numCorr = 0

            for j, (inputs, targets) in enumerate(val_loader):

                val_iter += 1

                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)

                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step

                _, predicted = torch.max(output_label.data, 1)

                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_2.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            
            val_logger_2.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()

    train_logger_2.save(train_log_file)
    val_logger_2.save(val_log_file)

Train: Epoch = 1 | Loss = 35.652 | Loss2 = 1.480 | Accuracy = 39.375
Val: Epoch = 1 | Loss 2.214 | Accuracy = 40.517
[||| NEW BEST on val |||]
Train: Epoch = 2 | Loss = 35.605 | Loss2 = 1.451 | Accuracy = 39.062
Val: Epoch = 2 | Loss 2.334 | Accuracy = 38.793
Train: Epoch = 3 | Loss = 35.473 | Loss2 = 1.407 | Accuracy = 40.938
Val: Epoch = 3 | Loss 1.949 | Accuracy = 37.069
Train: Epoch = 4 | Loss = 35.412 | Loss2 = 1.368 | Accuracy = 43.438
Val: Epoch = 4 | Loss 1.972 | Accuracy = 47.414
[||| NEW BEST on val |||]
Train: Epoch = 5 | Loss = 35.372 | Loss2 = 1.400 | Accuracy = 47.188
Val: Epoch = 5 | Loss 2.022 | Accuracy = 43.103
Train: Epoch = 6 | Loss = 35.392 | Loss2 = 1.339 | Accuracy = 44.688
Val: Epoch = 6 | Loss 2.096 | Accuracy = 43.966
Train: Epoch = 7 | Loss = 35.341 | Loss2 = 1.292 | Accuracy = 50.000
Val: Epoch = 7 | Loss 2.014 | Accuracy = 37.931
Train: Epoch = 8 | Loss = 35.386 | Loss2 = 1.326 | Accuracy = 46.562
Val: Epoch = 8 | Loss 1.855 | Accuracy = 44.828
Train: Epoch

# Stage 2 specific setup

In [None]:
STAGE = 2

LR = 0.0001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 150      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [30, 80] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.2          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7
ALPHA = 0.5

parameters = {
    'DEVICE': DEVICE,
    'NUM_CLASSES': NUM_CLASSES,
    'BATCH_SIZE': BATCH_SIZE,
    'LR': LR,
    'MOMENTUM': MOMENTUM,
    'WEIGHT_DECAY': WEIGHT_DECAY,
    'NUM_EPOCHS': NUM_EPOCHS,
    'STEP_SIZE': STEP_SIZE,
    'GAMMA': GAMMA,
    'MEM_SIZE': MEM_SIZE,
    'SEQ_LEN': SEQ_LEN
}

In [None]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform,
                       seq_len=SEQ_LEN, mmaps = True, mmaps_transform = spatial_transform_mmaps, static_frames = True)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val,
                      seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare Stage 2

In [None]:
best_old_stage = generate_model_checkpoint_name(stage=1, n_frames=SEQ_LEN, ms_block = True, \
                                                optional = '_2neur_DS__run01')
stage1_dict = os.path.join(model_folder, best_old_stage)
validate = True

model = attention_model_ms(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.load_state_dict(torch.load(stage1_dict))

model.train(False)

for params in model.parameters():
    params.requires_grad = False

layers_on_off = [
    model.resNet.layer4[0].conv1,
    model.resNet.layer4[0].conv2,
    model.resNet.layer4[1].conv1,
    model.resNet.layer4[1].conv2,
    model.resNet.layer4[2].conv1,
    model.resNet.layer4[2].conv2,
    model.resNet.fc,
    model.msBlock
]

layers_to_train = layers_on_off + [
    model.lstm_cell,
    model.classifier,
    model.classifier_SD
]

for layer in layers_to_train:
    for params in layer.parameters():
        params.requires_grad = True

for layer in layers_to_train:
    layer.train(True)

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()
loss_fn_sum = nn.CrossEntropyLoss(reduction = 'sum')

trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = torch.optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 2

In [None]:
RUN = "_run01"

train_iter = 0
val_iter = 0
min_accuracy = 0

trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset)

iterPerEpoch = len(train_loader)
val_steps = len(val_loader)

cudnn.benchmark
torch.autograd.set_detect_anomaly(True)
train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, ms_block = True, \
                                            optional = '_2neur_DS_alpha_' + RUN)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, ms_block = True, \
                                                  optional = '_2neur_DS_alpha_' + RUN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_2 = Logger(**parameters)
val_logger_2 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):

    epoch_loss = 0
    epoch_loss2 = 0
    numCorrTrain = 0
    
    for layer in layers_to_train:
        layer.train(True)
    
    for i, (inputs, inputs_DS, mmaps, targets) in enumerate(train_loader):

        mmaps = mmaps.permute(1, 0, 2)

        train_iter += 1

        optimizer_fn.zero_grad()

        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        inputVariable_DS = inputs_DS.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)

        output_label, _, output_label_mmaps = model(inputVariable, mmaps = True)

        output_label_mmaps = output_label_mmaps.view(SEQ_LEN*BATCH_SIZE*MMAP_LENGTH, 2).to(DEVICE)
        mmaps = torch.flatten(mmaps).long().to(DEVICE)

        loss = loss_fn_sum(output_label_mmaps, mmaps)/(BATCH_SIZE * SEQ_LEN) + loss_fn(output_label, labelVariable)
        loss.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = False

        output_label_ds1, _ = model(inputVariable, static_frames = True)
        output_label_ds2, _ = model(inputVariable_DS, static_frames = True)
        
        gt_ds1 = torch.zeros(BATCH_SIZE).long().to(DEVICE)
        gt_ds2 = torch.ones(BATCH_SIZE).long().to(DEVICE)

        loss2 = ALPHA * (loss_fn(output_label_ds1, gt_ds1) + loss_fn(output_label_ds2, gt_ds2))

        loss2.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = True

        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        step_loss2 = loss2.data.item()
        epoch_loss += step_loss
        epoch_loss2 += step_loss2
        train_logger_2.add_step_data(train_iter, numCorrTrain, step_loss)

        
    avg_loss = epoch_loss/iterPerEpoch
    avg_loss2 = epoch_loss2/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_2.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    
    print('Train: Epoch = {} | Loss = {:.3f} | Loss2 = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, avg_loss2, trainAccuracy))

    if validate is not None:

        if (epoch+1) % 1 == 0:

            model.train(False)
            val_loss_epoch = 0
            numCorr = 0

            for j, (inputs, targets) in enumerate(val_loader):

                val_iter += 1

                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)

                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step

                _, predicted = torch.max(output_label.data, 1)

                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_2.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            
            val_logger_2.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()

    train_logger_2.save(train_log_file)
    val_logger_2.save(val_log_file)

Train: Epoch = 1 | Loss = 36.228 | Loss2 = 0.746 | Accuracy = 33.750
Val: Epoch = 1 | Loss 2.457 | Accuracy = 32.759
[||| NEW BEST on val |||]
Train: Epoch = 2 | Loss = 35.774 | Loss2 = 0.769 | Accuracy = 38.125
Val: Epoch = 2 | Loss 2.180 | Accuracy = 41.379
[||| NEW BEST on val |||]
Train: Epoch = 3 | Loss = 35.584 | Loss2 = 0.776 | Accuracy = 39.062
Val: Epoch = 3 | Loss 2.091 | Accuracy = 43.966
[||| NEW BEST on val |||]
Train: Epoch = 4 | Loss = 35.316 | Loss2 = 0.753 | Accuracy = 50.000
Val: Epoch = 4 | Loss 2.031 | Accuracy = 40.517
Train: Epoch = 5 | Loss = 35.466 | Loss2 = 0.725 | Accuracy = 47.500
Val: Epoch = 5 | Loss 1.925 | Accuracy = 48.276
[||| NEW BEST on val |||]
Train: Epoch = 6 | Loss = 35.349 | Loss2 = 0.758 | Accuracy = 46.562
Val: Epoch = 6 | Loss 1.931 | Accuracy = 51.724
[||| NEW BEST on val |||]
Train: Epoch = 7 | Loss = 35.332 | Loss2 = 0.704 | Accuracy = 49.688
Val: Epoch = 7 | Loss 1.973 | Accuracy = 46.552
Train: Epoch = 8 | Loss = 35.183 | Loss2 = 0.711 | 

# Stage 2 specific setup

In [None]:
STAGE = 2

LR = 0.0001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 150      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [30, 80] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.2          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7
ALPHA = 0.5

parameters = {
    'DEVICE': DEVICE,
    'NUM_CLASSES': NUM_CLASSES,
    'BATCH_SIZE': BATCH_SIZE,
    'LR': LR,
    'MOMENTUM': MOMENTUM,
    'WEIGHT_DECAY': WEIGHT_DECAY,
    'NUM_EPOCHS': NUM_EPOCHS,
    'STEP_SIZE': STEP_SIZE,
    'GAMMA': GAMMA,
    'MEM_SIZE': MEM_SIZE,
    'SEQ_LEN': SEQ_LEN
}

In [None]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform,
                       seq_len=SEQ_LEN, mmaps = True, mmaps_transform = spatial_transform_mmaps, static_frames = True)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val,
                      seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare Stage 2

In [None]:
best_old_stage = generate_model_checkpoint_name(stage=1, n_frames=SEQ_LEN, ms_block = True, \
                                                optional = '_2neur_DS__run01')
stage1_dict = os.path.join(model_folder, best_old_stage)
validate = True

model = attention_model_ms(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.load_state_dict(torch.load(stage1_dict))

model.train(False)

for params in model.parameters():
    params.requires_grad = False

layers_on_off = [
    model.resNet.layer4[0].conv1,
    model.resNet.layer4[0].conv2,
    model.resNet.layer4[1].conv1,
    model.resNet.layer4[1].conv2,
    model.resNet.layer4[2].conv1,
    model.resNet.layer4[2].conv2,
    model.resNet.fc,
    model.msBlock
]

layers_to_train = layers_on_off + [
    model.lstm_cell,
    model.classifier,
    model.classifier_SD
]

for layer in layers_to_train:
    for params in layer.parameters():
        params.requires_grad = True

for layer in layers_to_train:
    layer.train(True)

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()
loss_fn_sum = nn.CrossEntropyLoss(reduction = 'sum')

trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = torch.optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 2

In [None]:
RUN = "_run02"

train_iter = 0
val_iter = 0
min_accuracy = 0

trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset)

iterPerEpoch = len(train_loader)
val_steps = len(val_loader)

cudnn.benchmark
torch.autograd.set_detect_anomaly(True)
train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, ms_block = True, \
                                            optional = '_2neur_DS_alpha_' + RUN)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, ms_block = True, \
                                                  optional = '_2neur_DS_alpha_' + RUN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_2 = Logger(**parameters)
val_logger_2 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):

    epoch_loss = 0
    epoch_loss2 = 0
    numCorrTrain = 0
    
    for layer in layers_to_train:
        layer.train(True)
    
    for i, (inputs, inputs_DS, mmaps, targets) in enumerate(train_loader):

        mmaps = mmaps.permute(1, 0, 2)

        train_iter += 1

        optimizer_fn.zero_grad()

        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        inputVariable_DS = inputs_DS.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)

        output_label, _, output_label_mmaps = model(inputVariable, mmaps = True)

        output_label_mmaps = output_label_mmaps.view(SEQ_LEN*BATCH_SIZE*MMAP_LENGTH, 2).to(DEVICE)
        mmaps = torch.flatten(mmaps).long().to(DEVICE)

        loss = loss_fn_sum(output_label_mmaps, mmaps)/(BATCH_SIZE * SEQ_LEN) + loss_fn(output_label, labelVariable)
        loss.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = False

        output_label_ds1, _ = model(inputVariable, static_frames = True)
        output_label_ds2, _ = model(inputVariable_DS, static_frames = True)
        
        gt_ds1 = torch.zeros(BATCH_SIZE).long().to(DEVICE)
        gt_ds2 = torch.ones(BATCH_SIZE).long().to(DEVICE)

        loss2 = ALPHA * (loss_fn(output_label_ds1, gt_ds1) + loss_fn(output_label_ds2, gt_ds2))

        loss2.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = True

        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        step_loss2 = loss2.data.item()
        epoch_loss += step_loss
        epoch_loss2 += step_loss2
        train_logger_2.add_step_data(train_iter, numCorrTrain, step_loss)

        
    avg_loss = epoch_loss/iterPerEpoch
    avg_loss2 = epoch_loss2/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_2.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    
    print('Train: Epoch = {} | Loss = {:.3f} | Loss2 = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, avg_loss2, trainAccuracy))

    if validate is not None:

        if (epoch+1) % 1 == 0:

            model.train(False)
            val_loss_epoch = 0
            numCorr = 0

            for j, (inputs, targets) in enumerate(val_loader):

                val_iter += 1

                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)

                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step

                _, predicted = torch.max(output_label.data, 1)

                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_2.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            
            val_logger_2.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()

    train_logger_2.save(train_log_file)
    val_logger_2.save(val_log_file)

Train: Epoch = 1 | Loss = 35.907 | Loss2 = 0.856 | Accuracy = 35.938
Val: Epoch = 1 | Loss 2.798 | Accuracy = 31.897
[||| NEW BEST on val |||]
Train: Epoch = 2 | Loss = 35.651 | Loss2 = 0.790 | Accuracy = 42.500
Val: Epoch = 2 | Loss 2.363 | Accuracy = 33.621
[||| NEW BEST on val |||]
Train: Epoch = 3 | Loss = 35.557 | Loss2 = 0.763 | Accuracy = 42.500
Val: Epoch = 3 | Loss 2.259 | Accuracy = 39.655
[||| NEW BEST on val |||]
Train: Epoch = 4 | Loss = 35.441 | Loss2 = 0.752 | Accuracy = 45.312
Val: Epoch = 4 | Loss 2.543 | Accuracy = 36.207
Train: Epoch = 5 | Loss = 35.535 | Loss2 = 0.747 | Accuracy = 40.312
Val: Epoch = 5 | Loss 2.564 | Accuracy = 37.931
Train: Epoch = 6 | Loss = 35.384 | Loss2 = 0.710 | Accuracy = 45.312
Val: Epoch = 6 | Loss 2.243 | Accuracy = 37.931
Train: Epoch = 7 | Loss = 35.302 | Loss2 = 0.731 | Accuracy = 44.062
Val: Epoch = 7 | Loss 2.385 | Accuracy = 37.069
Train: Epoch = 8 | Loss = 35.405 | Loss2 = 0.731 | Accuracy = 43.125
Val: Epoch = 8 | Loss 2.322 | Accu

# Stage 2 specific setup

In [None]:
STAGE = 2

LR = 0.0001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 150      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [30, 80] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.2          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
SEQ_LEN = 7
ALPHA = 0.1

parameters = {
    'DEVICE': DEVICE,
    'NUM_CLASSES': NUM_CLASSES,
    'BATCH_SIZE': BATCH_SIZE,
    'LR': LR,
    'MOMENTUM': MOMENTUM,
    'WEIGHT_DECAY': WEIGHT_DECAY,
    'NUM_EPOCHS': NUM_EPOCHS,
    'STEP_SIZE': STEP_SIZE,
    'GAMMA': GAMMA,
    'MEM_SIZE': MEM_SIZE,
    'SEQ_LEN': SEQ_LEN,
    'ALPHA': ALPHA
}

In [None]:
# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform,
                       seq_len=SEQ_LEN, mmaps = True, mmaps_transform = spatial_transform_mmaps, static_frames = True)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform_val,
                      seq_len=SEQ_LEN)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare Stage 2

In [None]:
best_old_stage = generate_model_checkpoint_name(stage=1, n_frames=SEQ_LEN, ms_block = True, \
                                                optional = '_2neur_DS__run01')
stage1_dict = os.path.join(model_folder, best_old_stage)
validate = True

model = attention_model_ms(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.load_state_dict(torch.load(stage1_dict))

model.train(False)

for params in model.parameters():
    params.requires_grad = False

layers_on_off = [
    model.resNet.layer4[0].conv1,
    model.resNet.layer4[0].conv2,
    model.resNet.layer4[1].conv1,
    model.resNet.layer4[1].conv2,
    model.resNet.layer4[2].conv1,
    model.resNet.layer4[2].conv2,
    model.resNet.fc,
    model.msBlock
]

layers_to_train = layers_on_off + [
    model.lstm_cell,
    model.classifier,
    model.classifier_SD
]

for layer in layers_to_train:
    for params in layer.parameters():
        params.requires_grad = True

for layer in layers_to_train:
    layer.train(True)

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()
loss_fn_sum = nn.CrossEntropyLoss(reduction = 'sum')

trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = torch.optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.cache/torch/checkpoints/resnet34-333f7ec4.pth


HBox(children=(FloatProgress(value=0.0, max=87306240.0), HTML(value='')))




# Stage 2

In [None]:
# RUN = "_run05_a0.1"

train_iter = 0
val_iter = 0
min_accuracy = 0

trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset)

iterPerEpoch = len(train_loader)
val_steps = len(val_loader)

cudnn.benchmark
torch.autograd.set_detect_anomaly(True)
train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, ms_block = True, \
                                            optional = '_dsloss')
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, ms_block = True, \
                                                  optional = '_dsloss')

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_2 = Logger(**parameters)
val_logger_2 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):

    epoch_loss1 = 0
    epoch_loss2 = 0
    numCorrTrain = 0
    
    for layer in layers_to_train:
        layer.train(True)
    
    for i, (inputs, inputs_DS, mmaps, targets) in enumerate(train_loader):

        mmaps = mmaps.permute(1, 0, 2)

        train_iter += 1

        optimizer_fn.zero_grad()

        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        inputVariable_DS = inputs_DS.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)

        output_label, _, output_label_mmaps = model(inputVariable, mmaps = True)

        output_label_mmaps = output_label_mmaps.view(SEQ_LEN*BATCH_SIZE*MMAP_LENGTH, 2).to(DEVICE)
        mmaps = torch.flatten(mmaps).long().to(DEVICE)

        loss1 = loss_fn(output_label, labelVariable)
        loss = loss_fn_sum(output_label_mmaps, mmaps)/(BATCH_SIZE * SEQ_LEN) + loss1
        loss.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = False

        output_label_ds1, _ = model(inputVariable, static_frames = True)
        output_label_ds2, _ = model(inputVariable_DS, static_frames = True)
        
        gt_ds1 = torch.zeros(BATCH_SIZE).long().to(DEVICE)
        gt_ds2 = torch.ones(BATCH_SIZE).long().to(DEVICE)

        loss2 = ALPHA * (loss_fn(output_label_ds1, gt_ds1) + loss_fn(output_label_ds2, gt_ds2))

        loss2.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = True

        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss1 = loss1.data.item()
        step_loss2 = loss2.data.item()
        epoch_loss1 += step_loss1
        epoch_loss2 += step_loss2
        # train_logger_2.add_step_data(train_iter, numCorrTrain, step_loss1, step_loss2)

        
    avg_loss1 = epoch_loss1/iterPerEpoch
    avg_loss2 = epoch_loss2/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_2.add_epoch_data(epoch+1, trainAccuracy, avg_loss1, avg_loss2)
    
    print('Train: Epoch = {} | Loss = {:.3f} | Loss2 = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss1, avg_loss2, trainAccuracy))

    if validate is not None:

        if (epoch+1) % 1 == 0:

            model.train(False)
            val_loss_epoch = 0
            numCorr = 0

            for j, (inputs, targets) in enumerate(val_loader):

                val_iter += 1

                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)

                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step

                _, predicted = torch.max(output_label.data, 1)

                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_2.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            
            val_logger_2.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()

    train_logger_2.save(train_log_file)
    val_logger_2.save(val_log_file)

Train: Epoch = 1 | Loss = 2.216 | Loss2 = 0.155 | Accuracy = 35.312
Val: Epoch = 1 | Loss 2.283 | Accuracy = 35.345
[||| NEW BEST on val |||]
Train: Epoch = 2 | Loss = 2.054 | Loss2 = 0.159 | Accuracy = 42.500
Val: Epoch = 2 | Loss 2.341 | Accuracy = 37.931
[||| NEW BEST on val |||]
Train: Epoch = 3 | Loss = 1.892 | Loss2 = 0.154 | Accuracy = 41.875
Val: Epoch = 3 | Loss 2.404 | Accuracy = 35.345
Train: Epoch = 4 | Loss = 1.948 | Loss2 = 0.151 | Accuracy = 43.125
Val: Epoch = 4 | Loss 2.089 | Accuracy = 43.103
[||| NEW BEST on val |||]
Train: Epoch = 5 | Loss = 1.890 | Loss2 = 0.155 | Accuracy = 47.812
Val: Epoch = 5 | Loss 2.052 | Accuracy = 41.379
Train: Epoch = 6 | Loss = 1.788 | Loss2 = 0.153 | Accuracy = 47.500
Val: Epoch = 6 | Loss 2.105 | Accuracy = 44.828
[||| NEW BEST on val |||]
Train: Epoch = 7 | Loss = 1.801 | Loss2 = 0.149 | Accuracy = 50.313
Val: Epoch = 7 | Loss 2.241 | Accuracy = 41.379
Train: Epoch = 8 | Loss = 1.838 | Loss2 = 0.154 | Accuracy = 44.375
Val: Epoch = 8 |

In [None]:
RUN = "_run06_a0.1"

train_iter = 0
val_iter = 0
min_accuracy = 0

trainSamples = len(train_dataset) - (len(train_dataset) % BATCH_SIZE)
val_samples = len(test_dataset)

iterPerEpoch = len(train_loader)
val_steps = len(val_loader)

cudnn.benchmark
torch.autograd.set_detect_anomaly(True)
train_log, val_log = generate_log_filenames(STAGE, SEQ_LEN, ms_block = True, \
                                            optional = '_2neur_DS_alpha_' + RUN)
model_checkpoint = generate_model_checkpoint_name(STAGE, SEQ_LEN, ms_block = True, \
                                                  optional = '_2neur_DS_alpha_' + RUN)

train_log_file = os.path.join(model_folder, train_log)
val_log_file = os.path.join(model_folder, val_log)
train_logger_2 = Logger(**parameters)
val_logger_2 = Logger(**parameters)

for epoch in range(NUM_EPOCHS):

    epoch_loss = 0
    epoch_loss2 = 0
    numCorrTrain = 0
    
    for layer in layers_to_train:
        layer.train(True)
    
    for i, (inputs, inputs_DS, mmaps, targets) in enumerate(train_loader):

        mmaps = mmaps.permute(1, 0, 2)

        train_iter += 1

        optimizer_fn.zero_grad()

        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        inputVariable_DS = inputs_DS.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)

        output_label, _, output_label_mmaps = model(inputVariable, mmaps = True)

        output_label_mmaps = output_label_mmaps.view(SEQ_LEN*BATCH_SIZE*MMAP_LENGTH, 2).to(DEVICE)
        mmaps = torch.flatten(mmaps).long().to(DEVICE)

        loss = loss_fn_sum(output_label_mmaps, mmaps)/(BATCH_SIZE * SEQ_LEN) + loss_fn(output_label, labelVariable)
        loss.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = False

        output_label_ds1, _ = model(inputVariable, static_frames = True)
        output_label_ds2, _ = model(inputVariable_DS, static_frames = True)
        
        gt_ds1 = torch.zeros(BATCH_SIZE).long().to(DEVICE)
        gt_ds2 = torch.ones(BATCH_SIZE).long().to(DEVICE)

        loss2 = ALPHA * (loss_fn(output_label_ds1, gt_ds1) + loss_fn(output_label_ds2, gt_ds2))

        loss2.backward()

        for layer in layers_on_off:
            for params in layer.parameters():
                params.requires_grad = True

        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        step_loss = loss.data.item()
        step_loss2 = loss2.data.item()
        epoch_loss += step_loss
        epoch_loss2 += step_loss2
        train_logger_2.add_step_data(train_iter, numCorrTrain, step_loss)

        
    avg_loss = epoch_loss/iterPerEpoch
    avg_loss2 = epoch_loss2/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    train_logger_2.add_epoch_data(epoch+1, trainAccuracy, avg_loss)
    
    print('Train: Epoch = {} | Loss = {:.3f} | Loss2 = {:.3f} | Accuracy = {:.3f}'.format(epoch+1, avg_loss, avg_loss2, trainAccuracy))

    if validate is not None:

        if (epoch+1) % 1 == 0:

            model.train(False)
            val_loss_epoch = 0
            numCorr = 0

            for j, (inputs, targets) in enumerate(val_loader):

                val_iter += 1

                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)

                val_loss = loss_fn(output_label, labelVariable)
                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step

                _, predicted = torch.max(output_label.data, 1)

                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_logger_2.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps

            print('Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            
            val_logger_2.add_epoch_data(epoch+1, val_accuracy, avg_val_loss)
            
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val |||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            
    optim_scheduler.step()

    train_logger_2.save(train_log_file)
    val_logger_2.save(val_log_file)

Train: Epoch = 1 | Loss = 34.050 | Loss2 = 0.145 | Accuracy = 86.250
Val: Epoch = 1 | Loss 1.509 | Accuracy = 62.069
[||| NEW BEST on val |||]
Train: Epoch = 2 | Loss = 34.105 | Loss2 = 0.140 | Accuracy = 80.938
Val: Epoch = 2 | Loss 1.516 | Accuracy = 61.207
Train: Epoch = 3 | Loss = 34.112 | Loss2 = 0.140 | Accuracy = 81.250
Val: Epoch = 3 | Loss 1.506 | Accuracy = 63.793
[||| NEW BEST on val |||]
Train: Epoch = 4 | Loss = 34.064 | Loss2 = 0.137 | Accuracy = 82.812
Val: Epoch = 4 | Loss 1.487 | Accuracy = 62.931
Train: Epoch = 5 | Loss = 34.042 | Loss2 = 0.144 | Accuracy = 81.250
Val: Epoch = 5 | Loss 1.517 | Accuracy = 63.793
Train: Epoch = 6 | Loss = 34.127 | Loss2 = 0.138 | Accuracy = 81.875
Val: Epoch = 6 | Loss 1.554 | Accuracy = 59.483
Train: Epoch = 7 | Loss = 34.148 | Loss2 = 0.141 | Accuracy = 80.000
Val: Epoch = 7 | Loss 1.567 | Accuracy = 59.483
Train: Epoch = 8 | Loss = 34.008 | Loss2 = 0.143 | Accuracy = 84.375
Val: Epoch = 8 | Loss 1.546 | Accuracy = 60.345
Train: Epoch