# Investigate generated captions of the test set with unseen recipe types
The captioning capabilities of the model are evaluated on an unseen test dataset which is compromised by 
7 recipe types that were never seen during the training process. 

In [13]:
import pandas as pd 
import numpy as np 
import json
import sys 

#specify root path for importing modules
sys.path.append("C:/Users/User/foodcap/")

from src.utils import create_prediction_df, calculate_scores, WordCounter

In [14]:
#Specify the folder and model name 
model_folder = '../models/2019-08-02_17-12-01/'
model_name = "last_model"

In [15]:
data = pd.read_csv("../data/data_all.csv")

json_pred_path = model_folder+ "predictions_test_"+model_name+".json"
df = create_prediction_df(json_pred_path_zs,data, "test")

## Quantitative Evaluation Scores
The assessment of the model's captioning capability is
recorded by several standard metrics that are commonly used in the field of natural 
language generation. These include the METEOR and ROUGE-L score. <br />
Furthermore, the BLEU(1-4) and the CIDEr metric can be calculated. To calculate these scores download and work with the [pycocoeval package](https://github.com/tylin/coco-caption) 

In [16]:
df = df.apply(calculate_scores,1)

#### METEOR Score

In [17]:
df.meteor.mean()

0.10216201169789231

#### ROUGE-L Score

In [18]:
df["rouge-l"].mean()

0.18153934275752437

#### METEOR and ROUGE-L score reported for each recipe type separately

In [19]:
df.groupby("recipe")["meteor","rouge-l"].mean()

Unnamed: 0_level_0,meteor,rouge-l
recipe,Unnamed: 1_level_1,Unnamed: 2_level_1
burger,0.102162,0.181539


##  Qualitative Results -  Look at the predicted words in each recipe type

In [20]:
wc = WordCounter()
words = wc.count_words_per_recipe(df_zs)

In [21]:
for recipe in words_zs.recipe.unique(): 
    
    print(recipe)
    print(words[(words.recipe == recipe)&(words.type == "prediction")].head(10))    
    print("-----------------------------")

burger
        word  count  recipe        type  sum_count     ratio
1        cut    191  burger  prediction        653  0.292496
3       thin    177  burger  prediction        653  0.271057
4   tomatoes    170  burger  prediction        653  0.260337
7      aside     57  burger  prediction        653  0.087289
9        set     37  burger  prediction        653  0.056662
10    slices     13  burger  prediction        653  0.019908
11     paper      8  burger  prediction        653  0.012251
-----------------------------
