# Model comparison
## Meta pretrain model vs models train on different datasets

#### Meta model was train on the 1M recipe dataset
#### Custom models were train on Kaggle Food Ingredients and Recipes Dataset with Images
https://www.kaggle.com/datasets/pes12017000148/food-ingredients-and-recipe-dataset-with-images?select=Food+Images

#### Configure drive and import libraries

The notebook is run on colab, the following command will mount the project folder

In [1]:
from google.colab import drive
drive.mount("/content/drive")
# cd to a folder in your Drive - in my case is this route
%cd '/content/drive/Othercomputers/Mi portátil/gastroml/src'

Mounted at /content/drive
/content/drive/Othercomputers/Mi portátil/gastroml/src


In [1]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import os
from args import get_parser
import pickle
from model import get_model
from torchvision import transforms
from utils.output_utils import prepare_output
from PIL import Image
import time
from build_vocab import Vocabulary

%load_ext autoreload
%autoreload 2

In [2]:
# code will run in gpu if available and if the flag is set to True, else it will run on cpu
use_gpu = False
device = torch.device('cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'

# Model and vocab definitions and paths

### Constants definitions

In [3]:
params_dict = {}
model_list = ["model", "modeResnet18", "modeResnet101", "modelResnet152", "modelMeta"]
model_dict = {"model":"Resnet50", "modeResnet18":"Resnet18", "modeResnet101":"Resnet101", "modelResnet152":"Resnet152","modelMeta":"Meta"}
model_dir = "../checkpoints/inversecooking"
data_dir = '../data'
vocab_ings_dict = {"model":"recipe1m_vocab_ingrs", "modeResnet18":"recipe1m_vocab_ingrs", "modeResnet101":"recipe1m_vocab_ingrs", "modelResnet152":"recipe1m_vocab_ingrs","modelMeta":"ingr_vocab_meta"}
vocab_inst_dict = {"model":"recipe1m_vocab_toks", "modeResnet18":"recipe1m_vocab_toks", "modeResnet101":"recipe1m_vocab_toks", "modelResnet152":"recipe1m_vocab_toks","modelMeta":"instr_vocab_meta"}

In [4]:
train_image_folder = os.path.join("../Kaggle data/images", 'train')
test_image_folder = os.path.join("../Kaggle data/images", 'test')
val_image_folder = os.path.join("../Kaggle data/images", 'val')

train_imgs = os.listdir(train_image_folder)
test_imgs = os.listdir(test_image_folder)
val_imgs = os.listdir(val_image_folder)

In [5]:
len(val_imgs)

1978

### Functions to get vocab, load_model based on model name

In [6]:
def get_vocab_pickle(model_name):
    """Function to get the vocab pickle file for a given model name.
    If the model name is "modelMeta", the function will return the meta vocab pickle files.
    The meta vocab pickle files are pretrained and were downloaded from the original repo.
    url: https://github.com/facebookresearch/inversecooking
    If the model name is "model", the function will return the recipe1m vocab pickle files.
    The vocab files were created using the build_vocab.py script with the kaggle dataset.

    Args:
        model_name (str): Model name. It can be "model", "modelMeta", "modeResnet18", "modeResnet101", "modelResnet152"

    Returns:
        vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim: vocab pickle files and vocab sizes
    """
    vocab_ing_name = vocab_ings_dict[model_name]
    vocab_inst_name = vocab_inst_dict[model_name]
    if model_name != "modelMeta":
        ing_vocab_name = "ingr_vocab"
        vocab_name = "instr_vocab"
        ingrs_vocab = pickle.load(open(os.path.join(data_dir, f'{vocab_ing_name}.pkl'), 'rb'))
        ingrs_vocab = [min(w, key=len) if not isinstance(w, str) else w for w in ingrs_vocab.idx2word.values()]
        vocab = pickle.load(open(os.path.join(data_dir, f'{vocab_inst_name}.pkl'), 'rb')).idx2word

    else:
        ing_vocab_name = "ingr_vocab_meta"
        vocab_name = "instr_vocab_meta"
        ingrs_vocab = pickle.load(open(os.path.join(data_dir, 'ingr_vocab_meta.pkl'), 'rb'))
        vocab = pickle.load(open(os.path.join(data_dir, 'instr_vocab_meta.pkl'), 'rb'))

    pickle.dump(ingrs_vocab, open(f'../data/f{ing_vocab_name}.pkl', 'wb'))
    pickle.dump(vocab, open(f'../data/f{vocab_name}.pkl', 'wb'))

    ingr_vocab_size = len(ingrs_vocab)
    instrs_vocab_size = len(vocab)
    output_dim = instrs_vocab_size
    return vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim

In [7]:
# for model_name in model_list:
#     print(model_name)
#     vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim = get_vocab_pickle(model_name)
#     print(ingr_vocab_size, instrs_vocab_size, output_dim)

In [19]:
import sys; sys.argv=['']; del sys
def load_model(model_name, model_dir, map_loc):
    """Function to load a model given a model name, a model directory and a map location.
    The map location is the device where the model will be loaded.
    The model directory is the directory where the model is stored.
    The model name is the name of the model to be loaded.
    The meta model is the model trained with the meta vocab pickle files and
    downloaded from the original repo. This model was trained with the original dataset.

    Args:
        model_name (str): Model name. It can be "model", "modelMeta", "modeResnet18", "modeResnet101", "modelResnet152"
        model_dir (str): Folder where the model is stored.
        map_loc (str): Device where the model will be loaded. It can be "cpu" or "cuda"

    Returns:
        _type_: _description_
    """
    # print(model_name)
    vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim = get_vocab_pickle(model_name)
    args = get_parser()
    args.maxseqlen = 15
    args.ingrs_only=False
    # if metal model, use default arguments for image model
    if model_name != "modelMeta":
        args.image_model = model_dict[model_name].lower()
    model = get_model(args, ingr_vocab_size, instrs_vocab_size)
    model_path = os.path.join(model_dir, model_name+'/checkpoints/modelbest.ckpt')
    # print(model_path)
    model.load_state_dict(torch.load(model_path, map_location=map_loc))
    model.to(device)
    model.eval()
    model.ingrs_only = False
    model.recipe_only = False

    return model, vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim

In [20]:
# for model_name in model_list:
#     print(model_name)
#     model = load_model(model_name, model_dir, map_loc)
#     print(model)


### get predictions on the datasets

In [21]:
transf_list_batch = []
transf_list_batch.append(transforms.ToTensor())
transf_list_batch.append(transforms.Normalize((0.485, 0.456, 0.406),
                                              (0.229, 0.224, 0.225)))
to_input_transf = transforms.Compose(transf_list_batch)

greedy = True
beam =-1
temperature = 1.0
numgens = 1



In [22]:
def generate_predictions(model_name,dataset,model_dir, map_loc):
    """Function to generate predictions given a model name and a dataset.
    The model name is the name of the model to be loaded.
    The dataset is the dataset to be used to generate the predictions.
    The function will return a list of lists with the following structure:
    [dataset, model_name, img_file, greedy,beam, outs['title'], outs['ingrs'], outs['recipe']]
    The list of lists will contain the dataset name, the model name, the image file name,
    the greedy flag, the beam size, the title, the ingredients and the recipe.

    Args:
        model_name (str): Model name. It can be "model", "modelMeta", "modeResnet18", "modeResnet101", "modelResnet152"
        dataset (str): Dataset to be used. It can be "train", "test" or "val"
    """
    if dataset == "train":
        set_imgs = train_imgs
        image_folder = train_image_folder
    elif dataset == "test":
        set_imgs = test_imgs
        image_folder = test_image_folder
    elif dataset == "val":
        set_imgs = val_imgs
        image_folder = val_image_folder
    else:
        print("Dataset not valid")
        return None

    model, vocab, ingrs_vocab, ingr_vocab_size, instrs_vocab_size, output_dim = load_model(model_name, model_dir, map_loc)
    predictions = []
    for img_num, img_file in enumerate(set_imgs):
        image_path = os.path.join(image_folder, img_file)
        image = Image.open(image_path).convert('RGB')

        transf_list = []
        transf_list.append(transforms.Resize(256))
        transf_list.append(transforms.CenterCrop(224))
        transform = transforms.Compose(transf_list)

        image_transf = transform(image)
        image_tensor = to_input_transf(image_transf).unsqueeze(0).to(device)
        # plt.imshow(image_transf)
        # plt.axis('off')
        # plt.show()
        # plt.close()

        pred_valid = False
        counter_not_valid = 0

        while not pred_valid:
            with torch.no_grad():
                outputs = model.sample(image_tensor, greedy=greedy,
                                    temperature=temperature, beam=beam, true_ingrs=None)

            ingr_ids = outputs['ingr_ids'].cpu().numpy()
            recipe_ids = outputs['recipe_ids'].cpu().numpy()

            outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)
            # check if output is valid, if not try again until a valid output is found
            # or try again 5 times and then return the last valid output
            if valid['is_valid'] or counter_not_valid < 5:

                predictions.append([dataset, model_name, model_dict[model_name], img_file, greedy,beam, outs['title'], outs['ingrs'], outs['recipe']])

                pred_valid=True

            else:
                pass
                print ("Not a valid recipe!")
                print ("Reason: ", valid['reason'])
                print( outs['title'], outs['ingrs'])
                counter_not_valid += 1
        if img_num % 100 == 0 and img_num >0:
            columns = ['dataset', 'model_name', 'conv_model', 'img_file', 'greedy','beam', 'title', 'ingrs', 'recipe']
            back_up_predictions = pd.DataFrame(predictions, columns=columns)
            back_up_predictions.to_csv(f'../data/predictions/{dataset}_{model_name}_{img_num}.csv', index=False)

    return predictions

In [12]:
for model_name in model_list:
    print(model_name)
    for dataset in ["train", "test", "val"]:
        print(dataset)
        predictions = generate_predictions(model_name,dataset)
        columns = ['dataset', 'model_name', 'conv_model', 'img_file', 'greedy','beam', 'title', 'ingrs', 'recipe']
        predictions = pd.DataFrame(predictions, columns=columns)
        predictions.to_csv(f'../data/predictions/{dataset}_{model_name}.csv', index=False)

model
train


usage: ipykernel_launcher.py [-h] [--save_dir SAVE_DIR]
                             [--project_name PROJECT_NAME]
                             [--model_name MODEL_NAME]
                             [--transfer_from TRANSFER_FROM] [--suff SUFF]
                             [--image_model {resnet18,resnet50,resnet101,resnet152,inception_v3}]
                             [--recipe1m_dir RECIPE1M_DIR]
                             [--aux_data_dir AUX_DATA_DIR]
                             [--crop_size CROP_SIZE] [--image_size IMAGE_SIZE]
                             [--log_step LOG_STEP]
                             [--learning_rate LEARNING_RATE]
                             [--scale_learning_rate_cnn SCALE_LEARNING_RATE_CNN]
                             [--lr_decay_rate LR_DECAY_RATE]
                             [--lr_decay_every LR_DECAY_EVERY]
                             [--weight_decay WEIGHT_DECAY]
                             [--embed_size EMBED_SIZE] [--n_att N_ATT]
             

AttributeError: 'tuple' object has no attribute 'tb_frame'

In [None]:
t = time.time()
import sys; sys.argv=['']; del sys
args = get_parser()
args.maxseqlen = 15
args.ingrs_only=False
model = get_model(args, ingr_vocab_size, instrs_vocab_size)
# Load the trained model parameters
model_path = os.path.join(data_dir, 'modelbest.ckpt')
model.load_state_dict(torch.load(model_path, map_location=map_loc))
model.to(device)
model.eval()
model.ingrs_only = False
model.recipe_only = False
print ('loaded model')
print ("Elapsed time:", time.time() -t)

In [None]:
transf_list_batch = []
transf_list_batch.append(transforms.ToTensor())
transf_list_batch.append(transforms.Normalize((0.485, 0.456, 0.406),
                                              (0.229, 0.224, 0.225)))
to_input_transf = transforms.Compose(transf_list_batch)

In [None]:
greedy = [True, False, False, False]
beam = [-1, -1, -1, -1]
temperature = 1.0
numgens = len(greedy)

In [None]:
show_anyways = False
image_folder = os.path.join(data_dir, 'demo_imgs')

demo_imgs = os.listdir(image_folder)
demo_files = demo_imgs

In [None]:
for img_file in demo_files:

    image_path = os.path.join(image_folder, img_file)
    image = Image.open(image_path).convert('RGB')

    transf_list = []
    transf_list.append(transforms.Resize(256))
    transf_list.append(transforms.CenterCrop(224))
    transform = transforms.Compose(transf_list)

    image_transf = transform(image)
    image_tensor = to_input_transf(image_transf).unsqueeze(0).to(device)

    plt.imshow(image_transf)
    plt.axis('off')
    plt.show()
    plt.close()

    num_valid = 1
    for i in range(numgens):
        with torch.no_grad():
            outputs = model.sample(image_tensor, greedy=greedy[i],
                                   temperature=temperature, beam=beam[i], true_ingrs=None)

        ingr_ids = outputs['ingr_ids'].cpu().numpy()
        recipe_ids = outputs['recipe_ids'].cpu().numpy()

        outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)

        if valid['is_valid'] or show_anyways:

            print ('RECIPE', num_valid)
            num_valid+=1
            #print ("greedy:", greedy[i], "beam:", beam[i])

            BOLD = '\033[1m'
            END = '\033[0m'
            print (BOLD + '\nTitle:' + END,outs['title'])

            print (BOLD + '\nIngredients:'+ END)
            print (', '.join(outs['ingrs']))

            print (BOLD + '\nInstructions:'+END)
            print ('-'+'\n-'.join(outs['recipe']))

            print ('='*20)

        else:
            pass
            print ("Not a valid recipe!")
            print ("Reason: ", valid['reason'])

In [None]:
train_image_folder = os.path.join("../Kaggle data/images", 'train')
test_image_folder = os.path.join("../Kaggle data/images", 'test')
val_image_folder = os.path.join("../Kaggle data/images", 'val')

train_imgs = os.listdir(train_image_folder)
test_imgs = os.listdir(test_image_folder)
val_imgs = os.listdir(val_image_folder)


In [None]:
train_imgs[:5]

In [None]:
for img_num, img_file in enumerate(val_imgs[:5]):
    if img_num % 100:
        print(img_num)
    image_path = os.path.join(val_image_folder, img_file)
    image = Image.open(image_path).convert('RGB')

    transf_list = []
    transf_list.append(transforms.Resize(256))
    transf_list.append(transforms.CenterCrop(224))
    transform = transforms.Compose(transf_list)

    image_transf = transform(image)
    image_tensor = to_input_transf(image_transf).unsqueeze(0).to(device)
    plt.imshow(image_transf)
    plt.axis('off')
    plt.show()
    plt.close()

    num_valid = 1
    for i in range(numgens):
        with torch.no_grad():
            outputs = model.sample(image_tensor, greedy=greedy[i],
                                   temperature=temperature, beam=beam[i], true_ingrs=None)

        ingr_ids = outputs['ingr_ids'].cpu().numpy()
        recipe_ids = outputs['recipe_ids'].cpu().numpy()

        outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)

        if valid['is_valid'] or show_anyways:

            print ('RECIPE', num_valid)
            num_valid+=1
            #print ("greedy:", greedy[i], "beam:", beam[i])

            BOLD = '\033[1m'
            END = '\033[0m'
            print (BOLD + '\nTitle:' + END,outs['title'])

            print (BOLD + '\nIngredients:'+ END)
            print (', '.join(outs['ingrs']))

            print (BOLD + '\nInstructions:'+END)
            print ('-'+'\n-'.join(outs['recipe']))

            print ('='*20)

        else:
            pass
            print ("Not a valid recipe!")
            print ("Reason: ", valid['reason'])


In [None]:
outs