# Model comparison
## Meta pretrain model vs models train on different datasets

#### Meta model was train on the 1M recipe dataset
#### Custom models were train on Kaggle Food Ingredients and Recipes Dataset with Images
https://www.kaggle.com/datasets/pes12017000148/food-ingredients-and-recipe-dataset-with-images?select=Food+Images

#### Configure drive and import libraries

The notebook is run on colab, the following command will mount the project folder

In [1]:
from google.colab import drive
drive.mount("/content/drive")
# cd to a folder in your Drive - in my case is this route
%cd '/content/drive/Othercomputers/Mi portátil/gastroml/src'

Mounted at /content/drive
/content/drive/Othercomputers/Mi portátil/gastroml/src


In [2]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import os
from args import get_parser
import pickle
from model import get_model
from torchvision import transforms
from utils.output_utils import prepare_output
from PIL import Image
import time
from build_vocab import Vocabulary

%load_ext autoreload
%autoreload 2

In [3]:
# code will run in gpu if available and if the flag is set to True, else it will run on cpu
use_gpu = True
device = torch.device('cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'

# Model and vocab definitions and paths

### Constants definitions

In [4]:
params_dict = {}
model_list = ["model", "modeResnet18", "modeResnet101", "modelResnet152", "modelMeta"]
model_dict = {"model":"Resnet50", "modeResnet18":"Resnet18", "modeResnet101":"Resnet101", "modelResnet152":"Resnet152","modelMeta":"Meta"}
model_dir = "../checkpoints/inversecooking"
data_dir = '../data'
vocab_ings_dict = {"model":"recipe1m_vocab_ingrs", "modeResnet18":"recipe1m_vocab_ingrs", "modeResnet101":"recipe1m_vocab_ingrs", "modelResnet152":"recipe1m_vocab_ingrs","modelMeta":"ingr_vocab_meta"}
vocab_inst_dict = {"model":"recipe1m_vocab_toks", "modeResnet18":"recipe1m_vocab_toks", "modeResnet101":"recipe1m_vocab_toks", "modelResnet152":"recipe1m_vocab_toks","modelMeta":"instr_vocab_meta"}

In [11]:
train_image_folder = os.path.join("../Kaggle data/images", 'train')
test_image_folder = os.path.join("../Kaggle data/images", 'test')
val_image_folder = os.path.join("../Kaggle data/images", 'val')
demo_image_folder = os.path.join("../data", 'demo_imgs')


train_imgs = os.listdir(train_image_folder)
test_imgs = os.listdir(test_image_folder)
val_imgs = os.listdir(val_image_folder)
demo_imgs = os.listdir(demo_image_folder)

In [12]:
len(val_imgs)

1978

In [13]:
len(demo_imgs)

6

### Functions to get vocab, load_model based on model name

In [14]:
def get_vocab_pickle(model_name):
    """Function to get the vocab pickle file for a given model name.
    If the model name is "modelMeta", the function will return the meta vocab pickle files.
    The meta vocab pickle files are pretrained and were downloaded from the original repo.
    url: https://github.com/facebookresearch/inversecooking
    If the model name is "model", the function will return the recipe1m vocab pickle files.
    The vocab files were created using the build_vocab.py script with the kaggle dataset.

    Args:
        model_name (str): Model name. It can be "model", "modelMeta", "modeResnet18", "modeResnet101", "modelResnet152"

    Returns:
        vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim: vocab pickle files and vocab sizes
    """
    vocab_ing_name = vocab_ings_dict[model_name]
    vocab_inst_name = vocab_inst_dict[model_name]
    if model_name != "modelMeta":
        ing_vocab_name = "ingr_vocab"
        vocab_name = "instr_vocab"
        ingrs_vocab = pickle.load(open(os.path.join(data_dir, f'{vocab_ing_name}.pkl'), 'rb'))
        ingrs_vocab = [min(w, key=len) if not isinstance(w, str) else w for w in ingrs_vocab.idx2word.values()]
        vocab = pickle.load(open(os.path.join(data_dir, f'{vocab_inst_name}.pkl'), 'rb')).idx2word

    else:
        ing_vocab_name = "ingr_vocab_meta"
        vocab_name = "instr_vocab_meta"
        ingrs_vocab = pickle.load(open(os.path.join(data_dir, 'ingr_vocab_meta.pkl'), 'rb'))
        vocab = pickle.load(open(os.path.join(data_dir, 'instr_vocab_meta.pkl'), 'rb'))

    pickle.dump(ingrs_vocab, open(f'../data/f{ing_vocab_name}.pkl', 'wb'))
    pickle.dump(vocab, open(f'../data/f{vocab_name}.pkl', 'wb'))

    ingr_vocab_size = len(ingrs_vocab)
    instrs_vocab_size = len(vocab)
    output_dim = instrs_vocab_size
    return vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim

In [15]:
# for model_name in model_list:
#     print(model_name)
#     vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim = get_vocab_pickle(model_name)
#     print(ingr_vocab_size, instrs_vocab_size, output_dim)

In [16]:
import sys; sys.argv=['']; del sys
def load_model(model_name, model_dir, map_loc):
    """Function to load a model given a model name, a model directory and a map location.
    The map location is the device where the model will be loaded.
    The model directory is the directory where the model is stored.
    The model name is the name of the model to be loaded.
    The meta model is the model trained with the meta vocab pickle files and
    downloaded from the original repo. This model was trained with the original dataset.

    Args:
        model_name (str): Model name. It can be "model", "modelMeta", "modeResnet18", "modeResnet101", "modelResnet152"
        model_dir (str): Folder where the model is stored.
        map_loc (str): Device where the model will be loaded. It can be "cpu" or "cuda"

    Returns:
        _type_: _description_
    """
    # print(model_name)
    vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim = get_vocab_pickle(model_name)
    args = get_parser()
    args.maxseqlen = 15
    args.ingrs_only=False
    # if metal model, use default arguments for image model
    if model_name != "modelMeta":
        args.image_model = model_dict[model_name].lower()
    model = get_model(args, ingr_vocab_size, instrs_vocab_size)
    model_path = os.path.join(model_dir, model_name+'/checkpoints/modelbest.ckpt')
    # print(model_path)
    model.load_state_dict(torch.load(model_path, map_location=map_loc))
    model.to(device)
    model.eval()
    model.ingrs_only = False
    model.recipe_only = False

    return model, vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim

In [17]:
# for model_name in model_list:
#     print(model_name)
#     model = load_model(model_name, model_dir, map_loc)
#     print(model)


### get predictions on the datasets

In [18]:
transf_list_batch = []
transf_list_batch.append(transforms.ToTensor())
transf_list_batch.append(transforms.Normalize((0.485, 0.456, 0.406),
                                              (0.229, 0.224, 0.225)))
to_input_transf = transforms.Compose(transf_list_batch)

greedy = True
beam =-1
temperature = 1.0
numgens = 1



In [20]:
def generate_predictions(model_name,dataset,model_dir, map_loc):
    """Function to generate predictions given a model name and a dataset.
    The model name is the name of the model to be loaded.
    The dataset is the dataset to be used to generate the predictions.
    The function will return a list of lists with the following structure:
    [dataset, model_name, img_file, greedy,beam, outs['title'], outs['ingrs'], outs['recipe']]
    The list of lists will contain the dataset name, the model name, the image file name,
    the greedy flag, the beam size, the title, the ingredients and the recipe.

    Args:
        model_name (str): Model name. It can be "model", "modelMeta", "modeResnet18", "modeResnet101", "modelResnet152"
        dataset (str): Dataset to be used. It can be "train", "test" or "val"
    """
    t = time.time()
    if dataset == "train":
        set_imgs = train_imgs
        image_folder = train_image_folder
    elif dataset == "test":
        set_imgs = test_imgs
        image_folder = test_image_folder
    elif dataset == "val":
        set_imgs = val_imgs
        image_folder = val_image_folder
    elif dataset == "demo":
        set_imgs = demo_imgs
        image_folder = demo_image_folder
    else:
        print("Dataset not valid")
        return None

    model, vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim = load_model(model_name, model_dir, map_loc)
    predictions = []
    for img_num, img_file in enumerate(set_imgs):
        image_path = os.path.join(image_folder, img_file)
        image = Image.open(image_path).convert('RGB')

        transf_list = []
        transf_list.append(transforms.Resize(256))
        transf_list.append(transforms.CenterCrop(224))
        transform = transforms.Compose(transf_list)

        image_transf = transform(image)
        image_tensor = to_input_transf(image_transf).unsqueeze(0).to(device)
        # plt.imshow(image_transf)
        # plt.axis('off')
        # plt.show()
        # plt.close()

        pred_valid = False
        counter_not_valid = 0

        while not pred_valid:
            with torch.no_grad():
                outputs = model.sample(image_tensor, greedy=greedy,
                                    temperature=temperature, beam=beam, true_ingrs=None)

            ingr_ids = outputs['ingr_ids'].cpu().numpy()
            recipe_ids = outputs['recipe_ids'].cpu().numpy()

            outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)
            # check if output is valid, if not try again until a valid output is found
            # or try again 5 times and then return the last valid output
            if valid['is_valid'] or counter_not_valid < 5:

                predictions.append([dataset, model_name, model_dict[model_name], img_file, greedy,beam, outs['title'], outs['ingrs'], outs['recipe']])

                pred_valid=True

            else:
                pass
                print ("Not a valid recipe!")
                print ("Reason: ", valid['reason'])
                print( outs['title'], outs['ingrs'])
                counter_not_valid += 1
        if img_num % 100 == 0 and img_num >0:
            print(img_num)
            print("Elapsed time:", time.time() -t)
            columns = ['dataset', 'model_name', 'conv_model', 'img_file', 'greedy','beam', 'title', 'ingrs', 'recipe']
            back_up_predictions = pd.DataFrame(predictions, columns=columns)
            back_up_predictions.to_csv(f'../data/predictions/{dataset}_{model_name}_{img_num}.csv', index=False)

    return predictions

In [21]:

demo_predictions = {}
for dataset in ["demo"]:
    print(dataset)
    for model_name in model_list:
        print(model_name)
        predictions = generate_predictions(model_name,dataset,model_dir, map_loc)
        columns = ['dataset', 'model_name', 'conv_model', 'img_file', 'greedy','beam', 'title', 'ingrs', 'recipe']
        predictions = pd.DataFrame(predictions, columns=columns)
        predictions.to_csv(f'../data/predictions/{dataset}_{model_name}.csv', index=False)
        demo_predictions[model_name] = predictions

demo
model


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 79.6MB/s]


modeResnet18


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 152MB/s]


modeResnet101


Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:02<00:00, 78.3MB/s]


modelResnet152


Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:03<00:00, 71.3MB/s]


modelMeta




In [None]:
test_predictions = {}
for dataset in ["test"]:#,"val","train"]:
    print(dataset)
    for model_name in model_list:
        print(model_name)
        predictions = generate_predictions(model_name,dataset,model_dir, map_loc)
        columns = ['dataset', 'model_name', 'conv_model', 'img_file', 'greedy','beam', 'title', 'ingrs', 'recipe']
        predictions = pd.DataFrame(predictions, columns=columns)
        predictions.to_csv(f'../data/predictions/{dataset}_{model_name}.csv', index=False)
        demo_predictions[model_name] = predictions

test
model




100
Elapsed time: 361.2993576526642
200
Elapsed time: 713.6494615077972
300
Elapsed time: 1068.8414583206177
400
Elapsed time: 1423.039216518402
500
Elapsed time: 1778.6623120307922


In [26]:
demo_predictions_df = pd.concat(demo_predictions).reset_index()

In [47]:
for model in demo_predictions_df["level_0"].unique():
  print(model)
  dp = demo_predictions_df[demo_predictions_df["level_0"] == model]
  for x in dp.to_dict("records"):
    print(x["img_file"])
    print(x["title"])
    print(str(x["ingrs"]).replace("'",""))
    print(x["recipe"])

model
1.jpg
Grilled red onion salad
[oil, salt, pepper, garlic, onion, olive, vinegar, juice, lemon]
['Heat the olive oil in a large skillet over medium heat. add the onion and cook for 5 minutes, stirring occasionally, until the garlic is fragrant, about 5 minutes. add the garlic and cook for 5 minutes, stirring occasionally, until the garlic is fragrant, about 5 minutes. add the garlic and cook for 10 minutes, stirring occasionally, until the onion is absorbed, about 5 minutes. add the olive oil and cook for 5 minutes. add the onion and cook for 5 minutes, stirring occasionally, until the pasta is absorbed, about 5 minutes. add the pasta and cook for 5 minutes. add the pasta and cook for 5 minutes, stirring occasionally, until the pasta is absorbed, about 5 minutes. add the pasta and']
2.jpg
Spiced fried eggs
[salt, oil, sugar, pepper, garlic, egg, flour]
['Preheat oven to 350â°f. line a rimmed baking sheet with parchment paper.', 'Whisk flour, salt, and pepper in a medium bowl. whis

### Create metrics

In [None]:
!pip install -U sentence_transformers

In [None]:
from ast import literal_eval

# beging code taken from https://www.geeksforgeeks.org/how-to-calculate-jaccard-similarity-in-python/
def jaccard_similarity(list1, list2):
    # intersection of two sets
    set1 = set(list1)
    set2 = set(list2)
    intersection = len(set1.intersection(set2))
    # Unions of two sets
    union = len(set1.union(set2))

    return intersection / union



def jaccard_distance(list1, list2):
    #Symmetric difference of two sets
    set1 = set(list1)
    set2 = set(list2)
    Symmetric_difference = set1.symmetric_difference(set2)
    # Unions of two sets
    union = set1.union(set2)

    return len(Symmetric_difference)/len(union)
# end code taken from https://www.geeksforgeeks.org/how-to-calculate-jaccard-similarity-in-python/

In [None]:
from sentence_transformers import SentenceTransformer, util
simmodel = SentenceTransformer('all-MiniLM-L6-v2')

def simscore(str1, str2):
    em1 = simmodel.encode(str1, convert_to_tensor=True)
    em2 = simmodel.encode(str2, convert_to_tensor=True)

    return util.cos_sim(em1, em2).item()

In [None]:
## get actual recipe names
import pandas as pd
recipe_names = pd.read_csv('../Kaggle data/final_data.csv')
recipe_names["image_name"] = recipe_names["image_name"]+".jpg"

recipe_names["ingredients_rawmats"] = recipe_names["ingredients_rawmats"].apply(literal_eval)
#recipe_names["recipe"] = recipe_names["recipe"].apply(literal_eval)
recipe_names.head()

In [None]:
# get predictions
predictions_folder = '../data/predictions'
pred_files = os.listdir(predictions_folder)
pred_df = pd.read_csv(os.path.join(predictions_folder, 'val_model.csv'))

pred_df["ingrs"] = pred_df["ingrs"].apply(literal_eval)
pred_df["recipe"] = pred_df["recipe"].apply(literal_eval)
pred_df.head(2)

## Title and Ingredients similarity


In [None]:
# join dataframes, real and predictions
pred_df = pd.merge(
    pred_df,
    recipe_names,
    left_on="img_file",
    right_on="image_name",
    how="left",
    suffixes=("_pred", "_true")
    )

In [None]:
# get title similarity score
pred_df["title_simscore"] = [simscore(actual_title, pred_title) for actual_title, pred_title in zip(pred_df["title_true"], pred_df["title_pred"])]

In [None]:
pred_df["ingr_num_pred"] = [len(ingrs) for ingrs in pred_df["ingrs"]]
pred_df["ingr_num_true"] = [len(ingrs) for ingrs in pred_df["ingredients_rawmats"]]

pred_df["ingr_jaccard"] = [jaccard_similarity(ingrs_true, ingrs_pred) for ingrs_true, ingrs_pred in zip(pred_df["ingredients_rawmats"], pred_df["ingrs"])]
pred_df["ingr_jaccard_dist"] = [jaccard_distance(ingrs_true, ingrs_pred) for ingrs_true, ingrs_pred in zip(pred_df["ingredients_rawmats"], pred_df["ingrs"])]

In [None]:
pred_df["ingr_num_diff"] = pred_df["ingr_num_true"] - pred_df["ingr_num_pred"]

In [None]:
def array_to_string(array, delimiter=" "):
    return delimiter.join(array)

In [None]:
pred_df["ings_simscore"] = [simscore(array_to_string(np.sort(x)),array_to_string(np.sort(y))) for x,y in zip(pred_df["ingredients_rawmats"], pred_df["ingrs"])]