In [1]:
%run data_preprocess.ipynb
%run models.ipynb
import torch.optim as optim
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import random
from scipy.special import expit

In [2]:
loss_fn = nn.CrossEntropyLoss()
def calculate_loss(x, y, lengths):
    batch_size = len(x)
    predict_cat = None
    groundT_cat = None
    flag = True

    for batch in range(batch_size):
        predict = x[batch]
        ground_truth = y[batch]
        seq_len = lengths[batch] -1

        predict = predict[:seq_len]
        ground_truth = ground_truth[:seq_len]
        if flag:
            predict_cat = predict
            groundT_cat = ground_truth
            flag = False
        else:
            predict_cat = torch.cat((predict_cat, predict), dim=0)
            groundT_cat = torch.cat((groundT_cat, ground_truth), dim=0)

    loss = loss_fn(predict_cat, groundT_cat)
    avg_loss = loss/batch_size

    return loss

In [3]:
def minibatch(data):

    data.sort(key=lambda x: len(x[1]), reverse=True)
    avi_data, captions = zip(*data) 
    avi_data = torch.stack(avi_data, 0)

    # Merge captions (from tuple of 1D tensor to 2D tensor).
    lengths = [len(cap) for cap in captions]
    targets = torch.zeros(len(captions), max(lengths)).long()
    for i, cap in enumerate(captions):
        end = lengths[i]
        targets[i, :end] = cap[:end]
    return avi_data, targets, lengths

In [5]:
def train(model, epoch, train_loader = train_dataloader):
    model.train()
    print(epoch)
    model = model.cuda()
    parameters = model.parameters()
    optimizer = optim.Adam(parameters, lr=0.001)
    
    for batch_idx, batch in enumerate(train_loader):
        avi_feats, ground_truths, lengths = batch
        avi_feats, ground_truths = avi_feats.cuda(), ground_truths.cuda()
        avi_feats, ground_truths = Variable(avi_feats), Variable(ground_truths)
        
        optimizer.zero_grad()
        seq_logProb, seq_predictions = model(avi_feats, target_sentences=ground_truths, mode='train', tr_steps=epoch)
            
        ground_truths = ground_truths[:, 1:]  
        loss = calculate_loss(seq_logProb, ground_truths, lengths)
        loss.backward()
        optimizer.step()

    loss = loss.item()
    print(loss)

In [6]:
def evaluate(test_loader = test_dataloader):
    # set model to evaluation(testing) mode
    model.eval()
    test_predictions, test_truth = None, None
    for batch_idx, batch in enumerate(test_loader):
        avi_feats, ground_truths, lengths = batch
        avi_feats, ground_truths = avi_feats.cuda(), ground_truths.cuda()
        avi_feats, ground_truths = Variable(avi_feats), Variable(ground_truths)

        seq_logProb, seq_predictions = model(avi_feats, mode='inference')
        ground_truths = ground_truths[:, 1:]
        test_predictions = seq_predictions[:3]
        test_truth = ground_truths[:3]
        break

In [7]:
epochs_n = 10
ModelSaveLoc = 'SavedModel'
if not os.path.exists(ModelSaveLoc):
    os.mkdir(ModelSaveLoc)

encoder = encoderRNN()
decoder = decoderRNN(512, len(i2w) +4, len(i2w) +4, 1024, 0.3)
model = MODELS(encoder=encoder, decoder=decoder)

start = time.time()
for epoch in range(epochs_n):
    train(model,epoch+1)
    evaluate()

end = time.time()
torch.save(model, "{}/{}.h5".format(ModelSaveLoc, 'model0'))
print("Training finished {}  elapsed time: {: .3f} seconds. \n".format('test', end-start))

1
4.175546169281006
2
3.9250540733337402
3
3.7937264442443848
4
3.2259786128997803
5
3.3789379596710205
6
3.364809989929199
7
3.088815212249756
8
3.287259578704834
9
3.0930025577545166
10
3.202562093734741
Training finished test  elapsed time:  595.861 seconds. 

