## Imports

In [22]:
import preprocessing as prep
import model
import main
from tqdm import tqdm
import torch
import torch.utils.data
import os
from torchvision.utils import make_grid
import torch.nn as nn
import torch.optim as optim
from timeit import default_timer as timer
import model
import preprocessing as prep
import argparse
from torch.utils.tensorboard import SummaryWriter
from pathlib import Path
import json

#End - Do not change these!
HYPER_PARAMETER_CONFIG = "../hparams.json"
MODEL_STORAGE="../model_storage"
ROOT="../data"
SEED = 1
PADDING_WORD="<MASK>"
hparams = prep.read_json_config(HYPER_PARAMETER_CONFIG)
hparams["root"]=ROOT
hparams["model_storage"]=MODEL_STORAGE
trainset_name = "train"
valset_name = "val"
testset_name = "test"
#End - Do not change these!

## Parameters to set. Currently configured to load the model file: TODO add the name of the best model file

In [23]:
hparams = {
   # You can configure the following parameters.
   # Currently configured to load ...
   # Changing these parameter can force a new training
   "batch_size": 32,
   "hidden_dim": 512,
   "rnn_layers" : 2,
   "shuffle" : True,
   "cnn_model": "resnet50",#values "lstm", "gru", "resnet50"
   "rnn_model": "gru",
   "embedding_dim" : 300,
   "improve_embedding": True,
   "improve_cnn": False,
   "image_size": 480,# between 256 and 640
   "crop_size": 320,# minimum 224, must be lower than "image_size"
   "caption_number" : 3,
   "cutoff": 2,
   "clip_grad": None,# set to 1.0 if NaN values appear during training
   "weight_decay": 0.00001, # L2 Regularization
   "num_epochs": 250,
   "lr": 0.001,# learning rate
   "sgd_momentum": None,# 
   "break_training_loop_percentage": 1,
   "drop_out_prob" : 0.2,# only when "rnn_layers" > 1, otherwise set to 0
   "sampling_method": "beam_search",# either "beam_search", "sample_search" or "" for a greedy search
   "beam_width": 3,
   "training_report_frequency" : 5,
   "last_saved_model" : None,
   #Do not change the following parameters
   "use_pixel_normalization" : True,
   "compute_val_loss": True,
   "keep_best_val_loss": False,
   "keep_best_total_loss": True,
   "gold_eval_with_original": True,
   "device": "cuda:0",
   "root": "../data",
   "annotation_without_punctuation": False,
   "train": "train2017",
   "val": "val2017",
   "test": "test2017",
   "timestamp_prefix": "%d%b%Y_%H_%M_%S_%f",
   "debug": False,
   "print_prediction" : False,
   "save_eval_results" : False,
   "model_name": "_.pt",
   "use_glove": True,
   "use_padding_idx": True,
   "model_storage": "./model_storage",
   "glove_embedding": "glove.6B.300d.bin.word2vec",
   "save_pending_model" : False
}


## Training and evaluation

In [25]:
torch.cuda.empty_cache()

device = hparams["device"]
if not torch.cuda.is_available():
    print("Warning, only CPU processing available!")
    device = "cpu"
else:
    print("CUDA GPU is available", "Number of machines:", torch.cuda.device_count())

#Makes results more reproducible
prep.set_seed_everywhere(SEED)

#Prepare the vectorizer, mapping words to indices and vice versa
cleaned_captions = prep.get_captions(hparams, trainset_name)
cutoff_for_unknown_words = hparams["cutoff"]
c_vectorizer = model.CaptionVectorizer.from_dataframe(cleaned_captions, cutoff_for_unknown_words)


#Creates embedding model
padding_idx = None
if (hparams["use_padding_idx"]):
    padding_idx = c_vectorizer.get_vocab()._token_to_idx[PADDING_WORD]
embedding = model.create_embedding(hparams, c_vectorizer, padding_idx)

#Creates Data loaders
train_loader = model.CocoDatasetWrapper.create_dataloader(hparams, c_vectorizer, trainset_name)
#the pictures for the training set and the test set shares the same directory, "train2017"
test_loader = model.CocoDatasetWrapper.create_dataloader(hparams, c_vectorizer, testset_name, "train2017")
val_loader = model.CocoDatasetWrapper.create_dataloader(hparams, c_vectorizer, valset_name)

#Creates the network
network = model.RNNModel(hparams["hidden_dim"], pretrained_embeddings=embedding,
                         cnn_model=hparams["cnn_model"], rnn_layers=hparams["rnn_layers"],
                         rnn_model=hparams["rnn_model"], drop_out_prob=hparams["drop_out_prob"],
                         improve_cnn=hparams["improve_cnn"]).to(device)

#Either load previous state or starts a training
start_training = init_model(hparams, network, args.train)
break_training_loop_idx, break_val_loop_idx, break_test_loop_idx = get_stop_loop_indices(hparams, train_loader,
                                                                                         val_loader, test_loader)

#Training
if start_training:
    loss_function = nn.NLLLoss().to(device)
    train(hparams, loss_function, network, train_loader, device, break_training_loop_idx, val_loader)

# Evaluation Part    
model.BleuScorer.perform_whole_evaluation(hparams, train_loader, network, break_training_loop_idx, "train")
model.BleuScorer.perform_whole_evaluation(hparams, test_loader, network, break_test_loop_idx, "test")


0it [00:00, ?it/s]

CUDA GPU is available Number of machines: 1
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


22it [00:08,  2.69it/s]


KeyboardInterrupt: 

## Show some picture with predictions and reference caption

In [None]:
#TODO