In [None]:
import os
import datetime

import torch
import torch.nn as nn
import torch.optim as optim
from tensorboardX import SummaryWriter


In [None]:
from data import get_loader, load_coco_files

phases = ['train', 'test_A']
batch_size = {'train': 200, 'test_A': 1000}
shuffle = {'train': True, 'test_A': False}
num_workers = {'train': 4, 'test_A': 1}
pin_memory = {'train': True, 'test_A': False}

loaders = {}
for phase in phases:
    folder_dir = os.path.join('', phase)
    file_names = os.path.join(folder_dir, '{}_images_names.txt'.format(phase))
    file_vectors = os.path.join(folder_dir, '{}_images_vectors.bin'.format(phase))
    file_captions = os.path.join(folder_dir, '{}_captions.txt'.format(phase))
    images_names, visual_feats, captions = load_coco_files(file_names, file_vectors, file_captions, 2048)
    loaders[phase] = get_loader(images_names, visual_feats, captions, batch_size[phase], shuffle[phase], 
                              num_workers[phase], pin_memory[phase])
    
    if phase == 'train':
        names, train_texts = zip(*captions)


## Seleccionar el modelo para representación de los textos


In [None]:
text_descriptor_name = 'tf-idf'
assert text_descriptor_name in ['bow', 'tf-idf', 'lsa']

if text_descriptor_name == 'bow':
    from text_descriptors.bow import TextDescriptor
    text_descriptor = TextDescriptor(type='bow', texts=train_texts, lowecase=False, ngrams_range=(1,1), 
                                     max_df=.8, min_df=.01)
elif text_descriptor_name == 'tf-idf':
    from text_descriptors.bow import TextDescriptor
    text_descriptor = TextDescriptor(type='tf-idf', texts=train_texts, lowecase=False, ngrams_range=(1,3), 
                                     max_df=.8, min_df=.01)
elif text_descriptor_name == 'lsa':
    from text_descriptors.lsa import LSADescriptor
    text_descriptor = LSADescriptor(type='tf-idf', texts=train_texts, lowecase=False, ngrams_range=(1,3), 
                                    max_df=.8, min_df=.01, n_components=100)
elif text_descriptor_name == 'embedding':
    from text_descriptors.embedding import WordEmbedding
    text_descriptor = WordEmbedding(texts=train_texts, lowecase=False, ngrams_range=(1,1), max_df=.8, min_df=.01)

print(text_descriptor.out_size)


## Defines Regression model


In [None]:
from text_encoders.regressor import MLP, RNN

regression_model_name = 'mlp' # ['mlp', 'rnn']

if regression_model_name == 'mlp':
    regression_model = MLP(in_size=text_descriptor.out_size, h_size=2048)
elif regression_model_name == 'rnn':
    regression_model = RNN(in_size=text_descriptor.out_size, h_size=2048)


## loss function and optimizer


In [None]:
criterion = nn.MSELoss()


## optimizers


In [None]:
encoder_optimizer = optim.Adam(regression_model.parameters(), lr=0.001)
if regression_model_name == 'rnn':
    embedding_optimizer = optim.Adam(text_descriptor.parameters(), lr=0.001)


# initialize tensorboard logger

In [None]:
exp_name = '{}-{}'.format(text_descriptor_name, regression_model_name)
datetime_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
writer = SummaryWriter(log_dir=os.path.join('./log/runs/', '{}-{}'.format(exp_name, datetime_str)))


# Train Regression

In [None]:
epochs = 100
for e in range(epochs):
    for phase in phases:
        loss_count = 0
        for i, (images_names, visual_feats, captions) in enumerate(loaders[phase]):
            with torch.set_grad_enabled(phase == 'train'):
                if regression_model_name == 'mlp':
                    descriptors = text_descriptor.transform(captions)
                    encodes = regression_model(descriptors)
                elif regression_model_name == 'rnn':
                    idx_texts = text_descriptor.word_to_idx(captions)
                    descriptors = text_descriptor(idx_texts)
                    encodes = regression_model(descriptors)
            
                # Evaluate the loss function
                loss = criterion(encodes, visual_feats)
    
            if phase == 'train':
                loss.backward()
                encoder_optimizer.step()
                if regression_model_name == 'rnn':
                    embedding_optimizer.step()
            
            loss_count += loss.item()
            writer.add_scalar('{}-loss'.format(phase), loss, e * len(loaders[phase]) + i)
