# Generate captions for the test set 
This notebook contains code that produces the prediction probabilities and corresponding captions for an unseen test dataset which is compromised by 7 recipe types that were never seen during the training process.

In [10]:
import torch
import pandas as pd
import sys
import numpy as np
import json
import pickle

#specify the root path for importing modules
sys.path.append("C:/Users/User/foodcap")

#misc
import src.config as config
from src.data.data_loading import get_loader
from src.data.load_build_vocab import Vocabulary
from src.utils import create_df

### REMARK: The parameter recipe1M_embeddings and resNet have to be set such that they match the training setting of the loaded model

In [11]:
recipe1M_embeddings = True
resNet = True

#Specify the folder and model name which should be loaded
model_folder = '../models/2019-08-02_17-12-01/'
model_name = "last_model"

In [13]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

if recipe1M_embeddings:
    vocab_path = '../data/vocab_recipe1M.pkl'
else:
    vocab_path = "../data/vocab.pkl"
    
data_dir = config.DATA["data_dir"]
feature_dir = data_dir + "resnet_features/"
num_workers = 0

limit_test = 25

### Load vocabulary and index data file for test set

In [14]:
print("Load vocabulary")
# Load vocabulary wrapper
with open(vocab_path, 'rb') as f:
        vocab = pickle.load(f)
vocab_size= len(vocab)
print("Vocab size",vocab_size)

yc2_all = pd.read_csv("../data/data_all.csv")


test_idx = yc2_all[yc2_all.subset_new == "test"].index
yc2_test = yc2_all.iloc[test_idx]
yc2_test_small = yc2_test[['video_seg_id','recipe_label','sentence','recipe_index']]

bs = len(yc2_test_small)

del yc2_all
del yc2_test

Load vocabulary
Vocab size 30462


### Load Model

In [15]:
model = torch.load(model_folder+model_name)
decoder = model.decoder

data_loader_test = get_loader(feature_dir, yc2_test_small, vocab, decoder, batch_size = bs ,
                            shuffle=True, num_workers=num_workers, data_dir = data_dir, resnet = resNet, device = device)

### Generate prediction probabilities for the test set

In [16]:
x_test, captions_test, x_lengths_test, y_lengths_test, x_recipe_types = next(iter(data_loader_test))
seq_probs_test = model(vocab, x_test, x_lengths_test, captions_test, x_recipe_types, teaching_forcing_ratio =0,
                       limit= limit_test, train = False)

### Transfer the predicted probabilities to the corresponding words from the vocabulary

In [20]:
df = create_df(yc2_test_small.shape[0], vocab, seq_probs_test, captions_test)
with open(model_folder+"predictions_test_"+model_name+".json", 'w') as f:
    f.write(json.dumps(df))