In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, IterableDataset

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import time
import os
import copy
import json
import random

from PIL import Image

from transformers import BertTokenizer, BertModel
import pickle

from sklearn.decomposition import PCA
from cca_zoo.models import CCA

# from cca_zoo.deepmodels import architectures
# from cca_zoo.deepmodels import DVCCA, DCCA
# from cca_zoo.deepmodels.architectures import BaseEncoder, Encoder, Decoder
# from cca_zoo.deepmodels.dcca import _DCCA_base

from sklearn.preprocessing import normalize

# making sure that the whole embedding tensor is printed in output
torch.set_printoptions(threshold=10_000)

# Loading necessary files

In [2]:
# validation image and full text data
img_val = torch.load("img_val.pt")
text_val = torch.load("text_val.pt")

# train image and full text data
img_train = torch.load("img_train.pt")
text_train = torch.load("text_train.pt")

# test image and full text data
img_test = torch.load("img_test.pt")
text_test = torch.load("text_test.pt")

#individual text test data
ingredients_test = torch.load("test_ingredients.pt")
instructions_test = torch.load("test_instructions.pt")
title_test = torch.load("test_title.pt")

#individual text train data
ingredients_train = torch.load("train_ingredients.pt")
instructions_train = torch.load("train_instructions.pt")
title_train = torch.load("train_title.pt")

## Ranking function

In [3]:
"""Retrieval ranking function for the learnt representations from the official code of im2recipe paper"""
def ranker(im_vecs, instr_vecs, N = 1000, flag = "image"):
    idxs = range(N)

    glob_rank = []
    glob_recall = {1:0.0,5:0.0,10:0.0}
    for i in range(10):

        ids = random.sample(range(0,len(im_vecs)), N)
        
        im_sub = im_vecs[ids,:]
        instr_sub = instr_vecs[ids,:]

        if flag == "image":
            sims = np.dot(im_sub,instr_sub.T) # for im2recipe
        else:
            sims = np.dot(instr_sub,im_sub.T) # for recipe2im

        med_rank = []
        recall = {1:0.0,5:0.0,10:0.0}

        for ii in idxs:

            # name = ids_sub[ii]
            # get a column of similarities
            sim = sims[ii,:]

            # sort indices in descending order
            sorting = np.argsort(sim)[::-1].tolist()

            # find where the index of the pair sample ended up in the sorting
            pos = sorting.index(ii)

            if (pos+1) == 1:
                recall[1]+=1
            if (pos+1) <=5:
                recall[5]+=1
            if (pos+1)<=10:
                recall[10]+=1

            # store the position
            med_rank.append(pos+1)

        for i in recall.keys():
            recall[i]=recall[i]/N

        med = np.median(med_rank)

        for i in recall.keys():
            glob_recall[i]+=recall[i]
        glob_rank.append(med)

    for i in glob_recall.keys():
        glob_recall[i] = glob_recall[i]/10
    
    print ("Mean median", np.average(glob_rank))
    print ("Recall", glob_recall)

## 2b Using Triplet Loss to train NN

In [6]:
import matplotlib.pyplot as plt
import numpy as np

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [7]:
!export CUDA_VISIBLE_DEVICES='0,1,2,3'

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Negative Training Data Sampling

In [9]:
indices = list(range(0, len(text_train)))
random.seed(0)
random.shuffle(indices)

In [10]:
neg_text_train = [text_train[i] for i in indices]
neg_title_train = [title_train[i] for i in indices]
neg_ingredients_train = [ingredients_train[i] for i in indices]
neg_instructions_train = [instructions_train[i] for i in indices]

### Model definition and training loop

In [11]:
class EmbeddingDataset(Dataset):
    def __init__(self, anchor_emb, positive_emb, negative_emb, transform=None):
        self.anchor_emb = torch.as_tensor(np.array(anchor_emb))
        self.positive_emb = torch.as_tensor(np.array(positive_emb))
        self.negative_emb = torch.as_tensor(np.array(negative_emb))
        self.transform = transform

    def __len__(self):
        return len(self.anchor_emb)

    def __getitem__(self, idx):
        return self.anchor_emb[idx], self.positive_emb[idx], self.negative_emb[idx]

In [12]:
class EmbeddingNetwork(nn.Module):
    def __init__(self, output_size, input_size=1024):
        super().__init__()

        self.layer1 = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(),
            nn.LeakyReLU()
        )
        self.layer2 = nn.Linear(512, output_size)

    def forward(self, x):
        x = self.layer1(x)
        return self.layer2(x)

In [13]:
class AverageMeter(object):
    # Utility function for timers
    
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.val, self.avg, self.sum, self.count = 0, 0, 0, 0
    
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [14]:
def train(train_loader, img_model, txt_model, criterion, optimizer, epoch):
    print('Starting training epoch {}'.format(epoch))
    img_model.train()
    txt_model.train()
    
    batch_time, data_time, losses = AverageMeter(), AverageMeter(), AverageMeter()
    end = time.time()
    running_loss = 0.
    last_loss = 0.
    optimizer.zero_grad()
    
    for i, (anchor_emb, positive_emb, negative_emb) in enumerate(train_loader):
    
        # Use GPU if available
        if use_gpu: 
            anchor_emb, positive_emb, negative_emb = anchor_emb.to(device), positive_emb.to(device), negative_emb.to(device)

        data_time.update(time.time() - end)

        # Run forward pass
        out_anchor_emb = img_model(anchor_emb) 
        out_positive_emb = txt_model(positive_emb)
        out_negative_emb = txt_model(negative_emb)
        loss = criterion(out_anchor_emb, out_positive_emb, out_negative_emb) 

        # Compute gradient and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        # Print model accuracy -- in the code below
        running_loss += loss.item()
        if i % 10000 == 0:
            last_loss = running_loss / 1000 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            running_loss = 0.
        
        if i % 10000 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                'Time {batch_time.val} ({batch_time.avg})\t'
                'Data {data_time.val} ({data_time.avg})\t'.format(
                  epoch, i, len(train_loader), batch_time=batch_time,
                 data_time=data_time)) 

    print('Finished training epoch {}'.format(epoch))


### Training dims = 512; all components

#### im2recipe and recipe2im

In [None]:
train_dataset = EmbeddingDataset(img_train, text_train, neg_text_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)

img_model = EmbeddingNetwork(512)
# img_model= nn.DataParallel(img_model, device_ids=[2,3])
img_model.to(device);

txt_model = EmbeddingNetwork(512);
# txt_model= nn.DataParallel(txt_model, device_ids=[2,3])
txt_model.to(device);

optimizer = torch.optim.Adam(list(img_model.parameters()) + list(txt_model.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model = img_model.to(device)
    txt_model = txt_model.to(device)
    
best_losses = 1e10
epochs = 10
for epoch in range(epochs):

    train(train_loader, img_model, txt_model, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model.state_dict(), 'triplet_checkpoints/img-model-full-512-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model.state_dict(), 'triplet_checkpoints/txt-model-full-512-epoch-{}.pth'.format(epoch+1))

#### im2title and title2im

In [None]:
title_dataset = EmbeddingDataset(img_train, title_train, neg_title_train)
title_loader = DataLoader(title_dataset, batch_size=64, shuffle=False)

img_model_title = EmbeddingNetwork(512)
# img_model_title = nn.DataParallel(img_model_title, device_ids=[2,3])
img_model_title.to(device);

txt_model_title = EmbeddingNetwork(512);
# txt_model_title = nn.DataParallel(txt_model_title, device_ids=[2,3])
txt_model_title.to(device);

optimizer = torch.optim.Adam(list(img_model_title.parameters()) + list(txt_model_title.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_title = img_model_title.to(device)
    txt_model_title = txt_model_title.to(device)

best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(title_loader, img_model_title, txt_model_title, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_title.state_dict(), 'triplet_checkpoints/img-model-title-512-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_title.state_dict(), 'triplet_checkpoints/txt-model-title-512-epoch-{}.pth'.format(epoch+1))

#### im2ingredients and ingredients2im

In [None]:
ingredients_dataset = EmbeddingDataset(img_train, ingredients_train, neg_ingredients_train)
ingredients_loader = DataLoader(ingredients_dataset, batch_size=64, shuffle=False)

img_model_ingredients = EmbeddingNetwork(512)
# img_model_ingredients = nn.DataParallel(img_model_ingredients, device_ids=[2,3])
img_model_ingredients.to(device);

txt_model_ingredients = EmbeddingNetwork(512);
# txt_model_ingredients = nn.DataParallel(txt_model_ingredients, device_ids=[2,3])
txt_model_ingredients.to(device);

optimizer = torch.optim.Adam(list(img_model_ingredients.parameters()) + list(txt_model_ingredients.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_ingredients = img_model_ingredients.to(device)
    txt_model_ingredients = txt_model_ingredients.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(ingredients_loader, img_model_ingredients, txt_model_ingredients, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_ingredients.state_dict(), 'triplet_checkpoints/img-model-ingredients-512-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_ingredients.state_dict(), 'triplet_checkpoints/txt-model-ingredients-512-epoch-{}.pth'.format(epoch+1))

#### im2instructions and instructions2im

In [None]:
instructions_dataset = EmbeddingDataset(img_train, instructions_train, neg_instructions_train)
instructions_loader = DataLoader(instructions_dataset, batch_size=64, shuffle=False)

img_model_instructions = EmbeddingNetwork(512)
# img_model_instructions = nn.DataParallel(img_model_instructions, device_ids=[2,3])
img_model_instructions.to(device);

txt_model_instructions = EmbeddingNetwork(512);
# txt_model_instructions = nn.DataParallel(txt_model_instructions, device_ids=[2,3])
txt_model_instructions.to(device);

optimizer = torch.optim.Adam(list(img_model_instructions.parameters()) + list(txt_model_instructions.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_instructions = img_model_instructions.to(device)
    txt_model_instructions = txt_model_instructions.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(instructions_loader, img_model_instructions, txt_model_instructions, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_instructions.state_dict(), 'triplet_checkpoints/img-model-instructions-512-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_instructions.state_dict(), 'triplet_checkpoints/txt-model-instructions-512-epoch-{}.pth'.format(epoch+1))

### Training dims = 256; all components

#### im2recipe and recipe2im

In [None]:
train_dataset = EmbeddingDataset(img_train, text_train, neg_text_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)

img_model = EmbeddingNetwork(256)
# img_model= nn.DataParallel(img_model, device_ids=[2,3])
img_model.to(device);

txt_model = EmbeddingNetwork(256);
# txt_model= nn.DataParallel(txt_model, device_ids=[2,3])
txt_model.to(device);

optimizer = torch.optim.Adam(list(img_model.parameters()) + list(txt_model.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model = img_model.to(device)
    txt_model = txt_model.to(device)
    
best_losses = 1e10
epochs = 10
for epoch in range(epochs):

    train(train_loader, img_model, txt_model, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model.state_dict(), 'triplet_checkpoints/img-model-full-256-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model.state_dict(), 'triplet_checkpoints/txt-model-full-256-epoch-{}.pth'.format(epoch+1))

#### im2title and title2im

In [None]:
title_dataset = EmbeddingDataset(img_train, title_train, neg_title_train)
title_loader = DataLoader(title_dataset, batch_size=64, shuffle=False)

img_model_title = EmbeddingNetwork(256)
# img_model_title = nn.DataParallel(img_model_title, device_ids=[2,3])
img_model_title.to(device);

txt_model_title = EmbeddingNetwork(256);
# txt_model_title = nn.DataParallel(txt_model_title, device_ids=[2,3])
txt_model_title.to(device);

optimizer = torch.optim.Adam(list(img_model_title.parameters()) + list(txt_model_title.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_title = img_model_title.to(device)
    txt_model_title = txt_model_title.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(title_loader, img_model_title, txt_model_title, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_title.state_dict(), 'triplet_checkpoints/img-model-title-256-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_title.state_dict(), 'triplet_checkpoints/txt-model-title-256-epoch-{}.pth'.format(epoch+1))

#### im2ingredients and ingredients2im

In [None]:
ingredients_dataset = EmbeddingDataset(img_train, ingredients_train, neg_ingredients_train)
ingredients_loader = DataLoader(ingredients_dataset, batch_size=64, shuffle=False)

img_model_ingredients = EmbeddingNetwork(256)
# img_model_ingredients = nn.DataParallel(img_model_ingredients, device_ids=[2,3])
img_model_ingredients.to(device);

txt_model_ingredients = EmbeddingNetwork(256);
# txt_model_ingredients = nn.DataParallel(txt_model_ingredients, device_ids=[2,3])
txt_model_ingredients.to(device);

optimizer = torch.optim.Adam(list(img_model_ingredients.parameters()) + list(txt_model_ingredients.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_ingredients = img_model_ingredients.to(device)
    txt_model_ingredients = txt_model_ingredients.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(ingredients_loader, img_model_ingredients, txt_model_ingredients, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_ingredients.state_dict(), 'triplet_checkpoints/img-model-ingredients-256-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_ingredients.state_dict(), 'triplet_checkpoints/txt-model-ingredients-256-epoch-{}.pth'.format(epoch+1))

#### im2instructions and instructions2im

In [None]:
instructions_dataset = EmbeddingDataset(img_train, instructions_train, neg_instructions_train)
instructions_loader = DataLoader(instructions_dataset, batch_size=64, shuffle=False)

img_model_instructions = EmbeddingNetwork(256)
# img_model_instructions = nn.DataParallel(img_model_instructions, device_ids=[2,3])
img_model_instructions.to(device);

txt_model_instructions = EmbeddingNetwork(256);
# txt_model_instructions = nn.DataParallel(txt_model_instructions, device_ids=[2,3])
txt_model_instructions.to(device);

optimizer = torch.optim.Adam(list(img_model_instructions.parameters()) + list(txt_model_instructions.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_instructions = img_model_instructions.to(device)
    txt_model_instructions = txt_model_instructions.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(instructions_loader, img_model_instructions, txt_model_instructions, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_instructions.state_dict(), 'triplet_checkpoints/img-model-instructions-256-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_instructions.state_dict(), 'triplet_checkpoints/txt-model-instructions-256-epoch-{}.pth'.format(epoch+1))

### Training dims = 128; all components

#### im2recipe and recipe2im

In [15]:
train_dataset = EmbeddingDataset(img_train, text_train, neg_text_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)

img_model = EmbeddingNetwork(128)
# img_model= nn.DataParallel(img_model, device_ids=[2,3])
img_model.to(device);

txt_model = EmbeddingNetwork(128);
# txt_model= nn.DataParallel(txt_model, device_ids=[2,3])
txt_model.to(device);

optimizer = torch.optim.Adam(list(img_model.parameters()) + list(txt_model.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model = img_model.to(device)
    txt_model = txt_model.to(device)
    
best_losses = 1e10
epochs = 10
for epoch in range(epochs):

    train(train_loader, img_model, txt_model, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model.state_dict(), 'triplet_checkpoints/img-model-full-128-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model.state_dict(), 'triplet_checkpoints/txt-model-full-128-epoch-{}.pth'.format(epoch+1))

#### im2title and title2im

In [19]:
title_dataset = EmbeddingDataset(img_train, title_train, neg_title_train)
title_loader = DataLoader(title_dataset, batch_size=64, shuffle=False)

img_model_title = EmbeddingNetwork(128)
# img_model_title = nn.DataParallel(img_model_title, device_ids=[2,3])
img_model_title.to(device);

txt_model_title = EmbeddingNetwork(128);
# txt_model_title = nn.DataParallel(txt_model_title, device_ids=[2,3])
txt_model_title.to(device);

optimizer = torch.optim.Adam(list(img_model_title.parameters()) + list(txt_model_title.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_title = img_model_title.to(device)
    txt_model_title = txt_model_title.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(title_loader, img_model_title, txt_model_title, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_title.state_dict(), 'triplet_checkpoints/img-model-title-128-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_title.state_dict(), 'triplet_checkpoints/txt-model-title-128-epoch-{}.pth'.format(epoch+1))

#### im2ingredients and ingredients2im

In [23]:
ingredients_dataset = EmbeddingDataset(img_train, ingredients_train, neg_ingredients_train)
ingredients_loader = DataLoader(ingredients_dataset, batch_size=64, shuffle=False)

img_model_ingredients = EmbeddingNetwork(128)
# img_model_ingredients = nn.DataParallel(img_model_ingredients, device_ids=[2,3])
img_model_ingredients.to(device);

txt_model_ingredients = EmbeddingNetwork(128);
# txt_model_ingredients = nn.DataParallel(txt_model_ingredients, device_ids=[2,3])
txt_model_ingredients.to(device);

optimizer = torch.optim.Adam(list(img_model_ingredients.parameters()) + list(txt_model_ingredients.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_ingredients = img_model_ingredients.to(device)
    txt_model_ingredients = txt_model_ingredients.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(ingredients_loader, img_model_ingredients, txt_model_ingredients, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_ingredients.state_dict(), 'triplet_checkpoints/img-model-ingredients-128-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_ingredients.state_dict(), 'triplet_checkpoints/txt-model-ingredients-128-epoch-{}.pth'.format(epoch+1))

#### im2instructions and instructions2im

In [15]:
instructions_dataset = EmbeddingDataset(img_train, instructions_train, neg_instructions_train)
instructions_loader = DataLoader(instructions_dataset, batch_size=64, shuffle=False)

img_model_instructions = EmbeddingNetwork(128)
# img_model_instructions = nn.DataParallel(img_model_instructions, device_ids=[2,3])
img_model_instructions.to(device);

txt_model_instructions = EmbeddingNetwork(128);
# txt_model_instructions = nn.DataParallel(txt_model_instructions, device_ids=[2,3])
txt_model_instructions.to(device);

optimizer = torch.optim.Adam(list(img_model_instructions.parameters()) + list(txt_model_instructions.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_instructions = img_model_instructions.to(device)
    txt_model_instructions = txt_model_instructions.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(instructions_loader, img_model_instructions, txt_model_instructions, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_instructions.state_dict(), 'triplet_checkpoints/img-model-instructions-128-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_instructions.state_dict(), 'triplet_checkpoints/txt-model-instructions-128-epoch-{}.pth'.format(epoch+1))

### Training dims = 64; all components

#### im2recipe and recipe2im

In [24]:
train_dataset = EmbeddingDataset(img_train, text_train, neg_text_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)

img_model = EmbeddingNetwork(64)
# img_model= nn.DataParallel(img_model, device_ids=[2,3])
img_model.to(device);

txt_model = EmbeddingNetwork(64);
# txt_model= nn.DataParallel(txt_model, device_ids=[2,3])
txt_model.to(device);

optimizer = torch.optim.Adam(list(img_model.parameters()) + list(txt_model.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model = img_model.to(device)
    txt_model = txt_model.to(device)
    
best_losses = 1e10
epochs = 10
for epoch in range(epochs):

    train(train_loader, img_model, txt_model, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model.state_dict(), 'triplet_checkpoints/img-model-full-64-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model.state_dict(), 'triplet_checkpoints/txt-model-full-64-epoch-{}.pth'.format(epoch+1))

#### im2title and title2im

In [28]:
title_dataset = EmbeddingDataset(img_train, title_train, neg_title_train)
title_loader = DataLoader(title_dataset, batch_size=64, shuffle=False)

img_model_title = EmbeddingNetwork(64)
# img_model_title = nn.DataParallel(img_model_title, device_ids=[2,3])
img_model_title.to(device);

txt_model_title = EmbeddingNetwork(64);
# txt_model_title = nn.DataParallel(txt_model_title, device_ids=[2,3])
txt_model_title.to(device);

optimizer = torch.optim.Adam(list(img_model_title.parameters()) + list(txt_model_title.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_title = img_model_title.to(device)
    txt_model_title = txt_model_title.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(title_loader, img_model_title, txt_model_title, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_title.state_dict(), 'triplet_checkpoints/img-model-title-64-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_title.state_dict(), 'triplet_checkpoints/txt-model-title-64-epoch-{}.pth'.format(epoch+1))

#### im2ingredients and ingredients2im

In [15]:
ingredients_dataset = EmbeddingDataset(img_train, ingredients_train, neg_ingredients_train)
ingredients_loader = DataLoader(ingredients_dataset, batch_size=64, shuffle=False)

img_model_ingredients = EmbeddingNetwork(64)
# img_model_ingredients = nn.DataParallel(img_model_ingredients, device_ids=[2,3])
img_model_ingredients.to(device);

txt_model_ingredients = EmbeddingNetwork(64);
# txt_model_ingredients = nn.DataParallel(txt_model_ingredients, device_ids=[2,3])
txt_model_ingredients.to(device);

optimizer = torch.optim.Adam(list(img_model_ingredients.parameters()) + list(txt_model_ingredients.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_ingredients = img_model_ingredients.to(device)
    txt_model_ingredients = txt_model_ingredients.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(ingredients_loader, img_model_ingredients, txt_model_ingredients, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_ingredients.state_dict(), 'triplet_checkpoints/img-model-ingredients-64-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_ingredients.state_dict(), 'triplet_checkpoints/txt-model-ingredients-64-epoch-{}.pth'.format(epoch+1))

#### im2instructions and instructions2im

In [19]:
instructions_dataset = EmbeddingDataset(img_train, instructions_train, neg_instructions_train)
instructions_loader = DataLoader(instructions_dataset, batch_size=64, shuffle=False)

img_model_instructions = EmbeddingNetwork(64)
# img_model_instructions = nn.DataParallel(img_model_instructions, device_ids=[2,3])
img_model_instructions.to(device);

txt_model_instructions = EmbeddingNetwork(64);
# txt_model_instructions = nn.DataParallel(txt_model_instructions, device_ids=[2,3])
txt_model_instructions.to(device);

optimizer = torch.optim.Adam(list(img_model_instructions.parameters()) + list(txt_model_instructions.parameters()), lr=1e-2, weight_decay=0.0)
criterion = nn.TripletMarginLoss(margin = 1)
# criterion = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu: 
    criterion = criterion.to(device)
    img_model_instructions = img_model_instructions.to(device)
    txt_model_instructions = txt_model_instructions.to(device)
    
best_losses = 1e10
epochs = 5
for epoch in range(epochs):

    train(instructions_loader, img_model_instructions, txt_model_instructions, criterion, optimizer, epoch)
        
  # Save checkpoint and replace old best model if current model is betterabs
torch.save(img_model_instructions.state_dict(), 'triplet_checkpoints/img-model-instructions-64-epoch-{}.pth'.format(epoch+1))
torch.save(txt_model_instructions.state_dict(), 'triplet_checkpoints/txt-model-instructions-64-epoch-{}.pth'.format(epoch+1))

### Dimensional Analysis on val data. Dims: [64, 128, 256, 512]; all components

#### 512

In [None]:
# im2recipe 512
img_model_full_512 = EmbeddingNetwork(512)
# img_model_full_512 = nn.DataParallel(img_model_full_512, device_ids=[1])
img_model_full_512.load_state_dict(torch.load("triplet_checkpoints/img-model-full-512-epoch-10.pth"))
# img_model_full_512.to((f'cuda:{img_model_full_512.device_ids[0]}'));
img_model_full_512.to('cpu')
img_model_full_512.eval();
txt_model_full_512 = EmbeddingNetwork(512)
# txt_model_full_512 = nn.DataParallel(txt_model_full_512, device_ids=[1])
txt_model_full_512.load_state_dict(torch.load("triplet_checkpoints/txt-model-full-512-epoch-10.pth"))
# txt_model_full_512.to((f'cuda:{txt_model_full_512.device_ids[0]}'));
txt_model_full_512.to('cpu')
txt_model_full_512.eval();

# im2title 512
img_model_title_512 = EmbeddingNetwork(512)
# img_model_title_512 = nn.DataParallel(img_model_title_512, device_ids=[1])
img_model_title_512.load_state_dict(torch.load("triplet_checkpoints/img-model-title-512-epoch-5.pth"))
# img_model_title_512.to((f'cuda:{img_model_title_512.device_ids[0]}'));
img_model_title_512.to('cpu')
img_model_title_512.eval();
txt_model_title_512 = EmbeddingNetwork(512)
# txt_model_title_512 = nn.DataParallel(txt_model_title_512, device_ids=[1])
txt_model_title_512.load_state_dict(torch.load("triplet_checkpoints/txt-model-title-512-epoch-5.pth"))
# txt_model_title_512.to((f'cuda:{txt_model_title_512.device_ids[0]}'));
txt_model_title_512.to('cpu')
txt_model_title_512.eval();

# im2instructions 512
img_model_instructions_512 = EmbeddingNetwork(512)
# img_model_instructions_512 = nn.DataParallel(img_model_instructions_512, device_ids=[1])
img_model_instructions_512.load_state_dict(torch.load("triplet_checkpoints/img-model-instructions-512-epoch-5.pth"))
# img_model_instructions_512.to((f'cuda:{img_model_instructions_512.device_ids[0]}'));
img_model_instructions_512.to('cpu')
img_model_instructions_512.eval();
txt_model_instructions_512 = EmbeddingNetwork(512)
# txt_model_instructions_512 = nn.DataParallel(txt_model_instructions_512, device_ids=[1])
txt_model_instructions_512.load_state_dict(torch.load("triplet_checkpoints/txt-model-instructions-512-epoch-5.pth"))
# txt_model_instructions_512.to((f'cuda:{txt_model_instructions_512.device_ids[0]}'));
txt_model_instructions_512.to('cpu')
txt_model_instructions_512.eval();

#im2ingredients 512
img_model_ingredients_512 = EmbeddingNetwork(512)
# img_model_ingredients_512 = nn.DataParallel(img_model_ingredients_512, device_ids=[1])
img_model_ingredients_512.load_state_dict(torch.load("triplet_checkpoints/img-model-ingredients-512-epoch-5.pth"))
# img_model_ingredients_512.to((f'cuda:{img_model_full_512.device_ids[0]}'));
img_model_ingredients_512.to('cpu')
img_model_ingredients_512.eval();
txt_model_ingredients_512 = EmbeddingNetwork(512)
# txt_model_ingredients_512 = nn.DataParallel(txt_model_ingredients_512, device_ids=[1])
txt_model_ingredients_512.load_state_dict(torch.load("triplet_checkpoints/txt-model-ingredients-512-epoch-5.pth"))
# txt_model_ingredients_512.to((f'cuda:{txt_model_ingredients_512.device_ids[0]}'));
txt_model_ingredients_512.to('cpu')
txt_model_ingredients_512.eval();

#### Full recipe

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 512))
text_val_nonlinear = np.zeros(shape = (len(img_val), 512))

# img_val_nonlinear.to(device)
# text_val_nonlinear.to(device)

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_full_512(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_full_512(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2recipe and recipe2im
print("Running im2recipe for dims = 512 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2recipe for dims = 512 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2recipe for dims = 512 and sample = 1000
Mean median 2.4
Recall {1: 0.3538, 5: 0.6819, 10: 0.7863000000000001}
Running im2recipe for dims = 512 and sample = 10000
Mean median 15.9
Recall {1: 0.10740000000000001, 5: 0.3032, 10: 0.41979}


#### Title

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 512))
text_val_nonlinear = np.zeros(shape = (len(img_val), 512))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_title_512(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_title_512(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2title and title2im
print("Running im2title for dims = 512 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2title for dims = 512 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2title for dims = 512 and sample = 1000
Mean median 5.1
Recall {1: 0.20360000000000006, 5: 0.5205, 10: 0.6624000000000001}
Running im2title for dims = 512 and sample = 10000
Mean median 42.3
Recall {1: 0.043919999999999994, 5: 0.15214, 10: 0.23866}


#### Ingredients

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 512))
text_val_nonlinear = np.zeros(shape = (len(img_val), 512))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_ingredients_512(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_ingredients_512(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2ingredients and ingredients2im
print("Running im2ingredients for dims = 512 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2ingredients for dims = 512 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2ingredients for dims = 512 and sample = 1000
Mean median 4.1
Recall {1: 0.263, 5: 0.5714999999999999, 10: 0.6995000000000001}
Running im2ingredients for dims = 512 and sample = 10000
Mean median 31.0
Recall {1: 0.06504, 5: 0.20672000000000001, 10: 0.30518999999999996}


#### Instructions

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 512))
text_val_nonlinear = np.zeros(shape = (len(img_val), 512))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_instructions_512(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_instructions_512(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2instructions and instructions2im
print("Running im2instructions for dims = 512 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2instructions for dims = 512 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2instructions for dims = 512 and sample = 1000
Mean median 3.0
Recall {1: 0.3229, 5: 0.6366999999999999, 10: 0.7559}
Running im2instructions for dims = 512 and sample = 10000
Mean median 22.1
Recall {1: 0.08642999999999999, 5: 0.25167, 10: 0.36322000000000004}


#### 256

In [None]:
# im2recipe 256
img_model_full_256 = EmbeddingNetwork(256)
# img_model_full_256 = nn.DataParallel(img_model_full_256, device_ids=[1])
img_model_full_256.load_state_dict(torch.load("triplet_checkpoints/img-model-full-256-epoch-10.pth"))
# img_model_full_256.to((f'cuda:{img_model_full_256.device_ids[0]}'));
img_model_full_256.to('cpu')
img_model_full_256.eval();
txt_model_full_256 = EmbeddingNetwork(256)
# txt_model_full_256 = nn.DataParallel(txt_model_full_256, device_ids=[1])
txt_model_full_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-full-256-epoch-10.pth"))
# txt_model_full_256.to((f'cuda:{txt_model_full_256.device_ids[0]}'));
txt_model_full_256.to('cpu')
txt_model_full_256.eval();

#im2title 256
img_model_title_256 = EmbeddingNetwork(256)
# img_model_title_256 = nn.DataParallel(img_model_title_256, device_ids=[1])
img_model_title_256.load_state_dict(torch.load("triplet_checkpoints/img-model-title-256-epoch-5.pth"))
# img_model_title_256.to((f'cuda:{img_model_title_256.device_ids[0]}'));
img_model_title_256.to('cpu')
img_model_title_256.eval();
txt_model_title_256 = EmbeddingNetwork(256)
# txt_model_title_256 = nn.DataParallel(txt_model_title_256, device_ids=[1])
txt_model_title_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-title-256-epoch-5.pth"))
# txt_model_title_256.to((f'cuda:{txt_model_title_256.device_ids[0]}'));
txt_model_title_256.to('cpu')
txt_model_title_256.eval();

#im2instructions 256
img_model_instructions_256 = EmbeddingNetwork(256)
# img_model_instructions_256 = nn.DataParallel(img_model_instructions_256, device_ids=[1])
img_model_instructions_256.load_state_dict(torch.load("triplet_checkpoints/img-model-instructions-256-epoch-5.pth"))
# img_model_instructions_256.to((f'cuda:{img_model_instructions_256.device_ids[0]}'));
img_model_instructions_256.to('cpu')
img_model_instructions_256.eval();
txt_model_instructions_256 = EmbeddingNetwork(256)
# txt_model_instructions_256 = nn.DataParallel(txt_model_instructions_256, device_ids=[1])
txt_model_instructions_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-instructions-256-epoch-5.pth"))
# txt_model_instructions_256.to((f'cuda:{txt_model_instructions_256.device_ids[0]}'));
txt_model_instructions_256.to('cpu')
txt_model_instructions_256.eval();

#im2ingredients 256
img_model_ingredients_256 = EmbeddingNetwork(256)
# img_model_ingredients_256 = nn.DataParallel(img_model_ingredients_256, device_ids=[1])
img_model_ingredients_256.load_state_dict(torch.load("triplet_checkpoints/img-model-ingredients-256-epoch-5.pth"))
# img_model_ingredients_256.to((f'cuda:{img_model_full_256.device_ids[0]}'));
img_model_ingredients_256.to('cpu')
img_model_ingredients_256.eval();
txt_model_ingredients_256 = EmbeddingNetwork(256)
# txt_model_ingredients_256 = nn.DataParallel(txt_model_ingredients_256, device_ids=[1])
txt_model_ingredients_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-ingredients-256-epoch-5.pth"))
# txt_model_ingredients_256.to((f'cuda:{txt_model_ingredients_256.device_ids[0]}'));
txt_model_ingredients_256.to('cpu')
txt_model_ingredients_256.eval();

#### Full recipe

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 256))
text_val_nonlinear = np.zeros(shape = (len(img_val), 256))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_full_256(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_full_256(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2recipe and recipe2im
print("Running im2recipe for dims = 256 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2recipe for dims = 256 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2recipe for dims = 256 and sample = 1000
Mean median 2.1
Recall {1: 0.3718, 5: 0.697, 10: 0.7957}
Running im2recipe for dims = 256 and sample = 10000
Mean median 15.0
Recall {1: 0.11236000000000002, 5: 0.31418, 10: 0.43373999999999996}


#### Title

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 256))
text_val_nonlinear = np.zeros(shape = (len(img_val), 256))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_title_256(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_title_256(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2title and title2im
print("Running im2title for dims = 256 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2title for dims = 256 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2title for dims = 256 and sample = 1000
Mean median 4.9
Recall {1: 0.2165, 5: 0.5333, 10: 0.6711}
Running im2title for dims = 256 and sample = 10000
Mean median 39.5
Recall {1: 0.04725999999999999, 5: 0.16085, 10: 0.24957}


#### Ingredients

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 256))
text_val_nonlinear = np.zeros(shape = (len(img_val), 256))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_ingredients_256(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_ingredients_256(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2ingredients and ingredients2im
print("Running im2ingredients for dims = 256 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2ingredients for dims = 256 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2ingredients for dims = 256 and sample = 1000
Mean median 3.9
Recall {1: 0.2761, 5: 0.5915, 10: 0.7167000000000001}
Running im2ingredients for dims = 256 and sample = 10000
Mean median 28.5
Recall {1: 0.0715, 5: 0.22025999999999998, 10: 0.32059000000000004}


#### Instructions

In [None]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 256))
text_val_nonlinear = np.zeros(shape = (len(img_val), 256))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_instructions_256(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_instructions_256(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2instructions and instructions2im
print("Running im2instructions for dims = 256 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2instructions for dims = 256 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2instructions for dims = 256 and sample = 1000
Mean median 3.2
Recall {1: 0.3028, 5: 0.6179, 10: 0.74}
Running im2instructions for dims = 256 and sample = 10000
Mean median 24.7
Recall {1: 0.08179, 5: 0.24386000000000002, 10: 0.34886}


#### 128

In [19]:
# im2recipe 128
img_model_full_128 = EmbeddingNetwork(128)
# img_model_full_128 = nn.DataParallel(img_model_full_128, device_ids=[1])
img_model_full_128.load_state_dict(torch.load("triplet_checkpoints/img-model-full-128-epoch-10.pth"))
# img_model_full_128.to((f'cuda:{img_model_full_128.device_ids[0]}'));
img_model_full_128.to('cpu')
img_model_full_128.eval();
txt_model_full_128 = EmbeddingNetwork(128)
# txt_model_full_128 = nn.DataParallel(txt_model_full_128, device_ids=[1])
txt_model_full_128.load_state_dict(torch.load("triplet_checkpoints/txt-model-full-128-epoch-10.pth"))
# txt_model_full_128.to((f'cuda:{txt_model_full_128.device_ids[0]}'));
txt_model_full_128.to('cpu')
txt_model_full_128.eval();

#im2title 128
img_model_title_128 = EmbeddingNetwork(128)
# img_model_title_128 = nn.DataParallel(img_model_title_128, device_ids=[1])
img_model_title_128.load_state_dict(torch.load("triplet_checkpoints/img-model-title-128-epoch-5.pth"))
# img_model_title_128.to((f'cuda:{img_model_title_128.device_ids[0]}'));
img_model_title_128.to('cpu')
img_model_title_128.eval();
txt_model_title_128 = EmbeddingNetwork(128)
# txt_model_title_128 = nn.DataParallel(txt_model_title_128, device_ids=[1])
txt_model_title_128.load_state_dict(torch.load("triplet_checkpoints/txt-model-title-128-epoch-5.pth"))
# txt_model_title_128.to((f'cuda:{txt_model_title_128.device_ids[0]}'));
txt_model_title_128.to('cpu')
txt_model_title_128.eval();

#im2instructions 128
img_model_instructions_128 = EmbeddingNetwork(128)
# img_model_instructions_128 = nn.DataParallel(img_model_instructions_128, device_ids=[1])
img_model_instructions_128.load_state_dict(torch.load("triplet_checkpoints/img-model-instructions-128-epoch-5.pth"))
# img_model_instructions_128.to((f'cuda:{img_model_instructions_128.device_ids[0]}'));
img_model_instructions_128.to('cpu')
img_model_instructions_128.eval();
txt_model_instructions_128 = EmbeddingNetwork(128)
# txt_model_instructions_128 = nn.DataParallel(txt_model_instructions_128, device_ids=[1])
txt_model_instructions_128.load_state_dict(torch.load("triplet_checkpoints/txt-model-instructions-128-epoch-5.pth"))
# txt_model_instructions_128.to((f'cuda:{txt_model_instructions_128.device_ids[0]}'));
txt_model_instructions_128.to('cpu')
txt_model_instructions_128.eval();

#im2ingredients 128
img_model_ingredients_128 = EmbeddingNetwork(128)
# img_model_ingredients_128 = nn.DataParallel(img_model_ingredients_128, device_ids=[1])
img_model_ingredients_128.load_state_dict(torch.load("triplet_checkpoints/img-model-ingredients-128-epoch-5.pth"))
# img_model_ingredients_128.to((f'cuda:{img_model_full_128.device_ids[0]}'));
img_model_ingredients_128.to('cpu')
img_model_ingredients_128.eval();
txt_model_ingredients_128 = EmbeddingNetwork(128)
# txt_model_ingredients_128 = nn.DataParallel(txt_model_ingredients_128, device_ids=[1])
txt_model_ingredients_128.load_state_dict(torch.load("triplet_checkpoints/txt-model-ingredients-128-epoch-5.pth"))
# txt_model_ingredients_128.to((f'cuda:{txt_model_ingredients_128.device_ids[0]}'));
txt_model_ingredients_128.to('cpu')
txt_model_ingredients_128.eval();

#### Full recipe

In [20]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 128))
text_val_nonlinear = np.zeros(shape = (len(img_val), 128))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_full_128(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_full_128(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2recipe and recipe2im
print("Running im2recipe for dims = 128 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2recipe for dims = 128 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2recipe for dims = 128 and sample = 1000
Mean median 2.0
Recall {1: 0.3781, 5: 0.6936, 10: 0.797}
Running im2recipe for dims = 128 and sample = 10000
Mean median 13.9
Recall {1: 0.12241000000000002, 5: 0.32748000000000005, 10: 0.44809}


#### Title

In [21]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 128))
text_val_nonlinear = np.zeros(shape = (len(img_val), 128))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_title_128(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_title_128(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2title and title2im
print("Running im2title for dims = 128 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2title for dims = 128 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2title for dims = 128 and sample = 1000
Mean median 5.1
Recall {1: 0.21220000000000003, 5: 0.5246000000000001, 10: 0.6697}
Running im2title for dims = 128 and sample = 10000
Mean median 41.6
Recall {1: 0.04628, 5: 0.15596, 10: 0.24558}


#### Ingredients

In [22]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 128))
text_val_nonlinear = np.zeros(shape = (len(img_val), 128))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_ingredients_128(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_ingredients_128(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2ingredients and ingredients2im
print("Running im2ingredients for dims = 128 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2ingredients for dims = 128 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2ingredients for dims = 128 and sample = 1000
Mean median 3.85
Recall {1: 0.26820000000000005, 5: 0.5885, 10: 0.7089000000000001}
Running im2ingredients for dims = 128 and sample = 10000
Mean median 29.0
Recall {1: 0.06841, 5: 0.21638000000000002, 10: 0.31759000000000004}


#### Instructions

In [23]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 128))
text_val_nonlinear = np.zeros(shape = (len(img_val), 128))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_instructions_128(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_instructions_128(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2instructions and instructions2im
print("Running im2instructions for dims = 128 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2instructions for dims = 128 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2instructions for dims = 128 and sample = 1000
Mean median 3.0
Recall {1: 0.32460000000000006, 5: 0.6363, 10: 0.7506}
Running im2instructions for dims = 128 and sample = 10000
Mean median 21.8
Recall {1: 0.08918000000000001, 5: 0.25699000000000005, 10: 0.36541}


#### 64

In [23]:
# im2recipe 64
img_model_full_64 = EmbeddingNetwork(64)
# img_model_full_64 = nn.DataParallel(img_model_full_64, device_ids=[1])
img_model_full_64.load_state_dict(torch.load("triplet_checkpoints/img-model-full-64-epoch-10.pth"))
# img_model_full_64.to((f'cuda:{img_model_full_64.device_ids[0]}'));
img_model_full_64.to('cpu')
img_model_full_64.eval();
txt_model_full_64 = EmbeddingNetwork(64)
# txt_model_full_64 = nn.DataParallel(txt_model_full_64, device_ids=[1])
txt_model_full_64.load_state_dict(torch.load("triplet_checkpoints/txt-model-full-64-epoch-10.pth"))
# txt_model_full_64.to((f'cuda:{txt_model_full_64.device_ids[0]}'));
txt_model_full_64.to('cpu')
txt_model_full_64.eval();

#im2title 64
img_model_title_64 = EmbeddingNetwork(64)
# img_model_title_64 = nn.DataParallel(img_model_title_64, device_ids=[1])
img_model_title_64.load_state_dict(torch.load("triplet_checkpoints/img-model-title-64-epoch-5.pth"))
# img_model_title_64.to((f'cuda:{img_model_title_64.device_ids[0]}'));
img_model_title_64.to('cpu')
img_model_title_64.eval();
txt_model_title_64 = EmbeddingNetwork(64)
# txt_model_title_64 = nn.DataParallel(txt_model_title_64, device_ids=[1])
txt_model_title_64.load_state_dict(torch.load("triplet_checkpoints/txt-model-title-64-epoch-5.pth"))
# txt_model_title_64.to((f'cuda:{txt_model_title_64.device_ids[0]}'));
txt_model_title_64.to('cpu')
txt_model_title_64.eval();

#im2instructions 64
img_model_instructions_64 = EmbeddingNetwork(64)
# img_model_instructions_64 = nn.DataParallel(img_model_instructions_64, device_ids=[1])
img_model_instructions_64.load_state_dict(torch.load("triplet_checkpoints/img-model-instructions-64-epoch-5.pth"))
# img_model_instructions_64.to((f'cuda:{img_model_instructions_64.device_ids[0]}'));
img_model_instructions_64.to('cpu')
img_model_instructions_64.eval();
txt_model_instructions_64 = EmbeddingNetwork(64)
# txt_model_instructions_64 = nn.DataParallel(txt_model_instructions_64, device_ids=[1])
txt_model_instructions_64.load_state_dict(torch.load("triplet_checkpoints/txt-model-instructions-64-epoch-5.pth"))
# txt_model_instructions_64.to((f'cuda:{txt_model_instructions_64.device_ids[0]}'));
txt_model_instructions_64.to('cpu')
txt_model_instructions_64.eval();

#im2ingredients 64
img_model_ingredients_64 = EmbeddingNetwork(64)
# img_model_ingredients_64 = nn.DataParallel(img_model_ingredients_64, device_ids=[1])
img_model_ingredients_64.load_state_dict(torch.load("triplet_checkpoints/img-model-ingredients-64-epoch-5.pth"))
# img_model_ingredients_64.to((f'cuda:{img_model_full_64.device_ids[0]}'));
img_model_ingredients_64.to('cpu')
img_model_ingredients_64.eval();
txt_model_ingredients_64 = EmbeddingNetwork(64)
# txt_model_ingredients_64 = nn.DataParallel(txt_model_ingredients_64, device_ids=[1])
txt_model_ingredients_64.load_state_dict(torch.load("triplet_checkpoints/txt-model-ingredients-64-epoch-5.pth"))
# txt_model_ingredients_64.to((f'cuda:{txt_model_ingredients_64.device_ids[0]}'));
txt_model_ingredients_64.to('cpu')
txt_model_ingredients_64.eval();

#### Full recipe

In [24]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 64))
text_val_nonlinear = np.zeros(shape = (len(img_val), 64))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_full_64(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_full_64(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2recipe and recipe2im
print("Running im2recipe for dims = 64 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2recipe for dims = 64 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2recipe for dims = 64 and sample = 1000
Mean median 2.4
Recall {1: 0.35960000000000003, 5: 0.6848, 10: 0.7952}
Running im2recipe for dims = 64 and sample = 10000
Mean median 14.9
Recall {1: 0.11610999999999998, 5: 0.31612999999999997, 10: 0.43578}


#### Title

In [25]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 64))
text_val_nonlinear = np.zeros(shape = (len(img_val), 64))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_title_64(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_title_64(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2title and title2im
print("Running im2title for dims = 64 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2title for dims = 64 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2title for dims = 64 and sample = 1000
Mean median 4.9
Recall {1: 0.21749999999999997, 5: 0.5327999999999999, 10: 0.6723}
Running im2title for dims = 64 and sample = 10000
Mean median 39.25
Recall {1: 0.046579999999999996, 5: 0.16155, 10: 0.25102}


#### Ingredients

In [26]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 64))
text_val_nonlinear = np.zeros(shape = (len(img_val), 64))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_ingredients_64(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_ingredients_64(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2ingredients and ingredients2im
print("Running im2ingredients for dims = 64 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2ingredients for dims = 64 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2ingredients for dims = 64 and sample = 1000
Mean median 4.0
Recall {1: 0.269, 5: 0.5833999999999999, 10: 0.7110999999999998}
Running im2ingredients for dims = 64 and sample = 10000
Mean median 28.6
Recall {1: 0.07246999999999999, 5: 0.22054, 10: 0.32203}


#### Instructions

In [27]:
img_val_nonlinear = np.zeros(shape = (len(img_val), 64))
text_val_nonlinear = np.zeros(shape = (len(img_val), 64))

for i in range(len(img_val)):
    img_val_nonlinear[i] = img_model_instructions_64(torch.Tensor(np.expand_dims(img_val[i], 0))).detach().numpy()
    text_val_nonlinear[i] = txt_model_instructions_64(torch.Tensor(np.expand_dims(text_val[i], 0))).detach().numpy()

# im2instructions and instructions2im
print("Running im2instructions for dims = 64 and sample = 1000")
ranker(img_val_nonlinear, text_val_nonlinear, 1000, "image")
print("Running im2instructions for dims = 64 and sample = 10000")
ranker(img_val_nonlinear, text_val_nonlinear, 10000, "image")

Running im2instructions for dims = 64 and sample = 1000
Mean median 3.0
Recall {1: 0.3296, 5: 0.6529, 10: 0.7682}
Running im2instructions for dims = 64 and sample = 10000
Mean median 20.0
Recall {1: 0.09388999999999999, 5: 0.27048000000000005, 10: 0.38217}


### Evaluation and Ablation Studies

 We can see that dimensions = 256 has a better performance

In [None]:
# im2recipe 256
img_model_full_256 = EmbeddingNetwork(256)
# img_model_full_256 = nn.DataParallel(img_model_full_256, device_ids=[1])
img_model_full_256.load_state_dict(torch.load("triplet_checkpoints/img-model-full-256-epoch-10.pth"))
# img_model_full_256.to((f'cuda:{img_model_full_256.device_ids[0]}'));
img_model_full_256.to('cpu')
img_model_full_256.eval();
txt_model_full_256 = EmbeddingNetwork(256)
# txt_model_full_256 = nn.DataParallel(txt_model_full_256, device_ids=[1])
txt_model_full_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-full-256-epoch-10.pth"))
# txt_model_full_256.to((f'cuda:{txt_model_full_256.device_ids[0]}'));
txt_model_full_256.to('cpu')
txt_model_full_256.eval();

#im2title 256
img_model_title_256 = EmbeddingNetwork(256)
# img_model_title_256 = nn.DataParallel(img_model_title_256, device_ids=[1])
img_model_title_256.load_state_dict(torch.load("triplet_checkpoints/img-model-title-256-epoch-5.pth"))
# img_model_title_256.to((f'cuda:{img_model_title_256.device_ids[0]}'));
img_model_title_256.to('cpu')
img_model_title_256.eval();
txt_model_title_256 = EmbeddingNetwork(256)
# txt_model_title_256 = nn.DataParallel(txt_model_title_256, device_ids=[1])
txt_model_title_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-title-256-epoch-5.pth"))
# txt_model_title_256.to((f'cuda:{txt_model_title_256.device_ids[0]}'));
txt_model_title_256.to('cpu')
txt_model_title_256.eval();

#im2instructions 256
img_model_instructions_256 = EmbeddingNetwork(256)
# img_model_instructions_256 = nn.DataParallel(img_model_instructions_256, device_ids=[1])
img_model_instructions_256.load_state_dict(torch.load("triplet_checkpoints/img-model-instructions-256-epoch-5.pth"))
# img_model_instructions_256.to((f'cuda:{img_model_instructions_256.device_ids[0]}'));
img_model_instructions_256.to('cpu')
img_model_instructions_256.eval();
txt_model_instructions_256 = EmbeddingNetwork(256)
# txt_model_instructions_256 = nn.DataParallel(txt_model_instructions_256, device_ids=[1])
txt_model_instructions_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-instructions-256-epoch-5.pth"))
# txt_model_instructions_256.to((f'cuda:{txt_model_instructions_256.device_ids[0]}'));
txt_model_instructions_256.to('cpu')
txt_model_instructions_256.eval();

#im2ingredients 256
img_model_ingredients_256 = EmbeddingNetwork(256)
# img_model_ingredients_256 = nn.DataParallel(img_model_ingredients_256, device_ids=[1])
img_model_ingredients_256.load_state_dict(torch.load("triplet_checkpoints/img-model-ingredients-256-epoch-5.pth"))
# img_model_ingredients_256.to((f'cuda:{img_model_full_256.device_ids[0]}'));
img_model_ingredients_256.to('cpu')
img_model_ingredients_256.eval();
txt_model_ingredients_256 = EmbeddingNetwork(256)
# txt_model_ingredients_256 = nn.DataParallel(txt_model_ingredients_256, device_ids=[1])
txt_model_ingredients_256.load_state_dict(torch.load("triplet_checkpoints/txt-model-ingredients-256-epoch-5.pth"))
# txt_model_ingredients_256.to((f'cuda:{txt_model_ingredients_256.device_ids[0]}'));
txt_model_ingredients_256.to('cpu')
txt_model_ingredients_256.eval();

#### Full recipe

In [None]:
img_test_nonlinear = np.zeros(shape = (len(img_test), 256))
text_test_nonlinear = np.zeros(shape = (len(img_test), 256))

for i in range(len(img_test)):
    img_test_nonlinear[i] = img_model_full_256(torch.Tensor(np.expand_dims(img_test[i], 0))).detach().numpy()
    text_test_nonlinear[i] = txt_model_full_256(torch.Tensor(np.expand_dims(text_test[i], 0))).detach().numpy()

# im2recipe and recipe2im
print("Running im2recipe for dims = 256 and sample = 1000")
ranker(img_test_nonlinear, text_test_nonlinear, 1000, "image")
print("Running im2recipe for dims = 256 and sample = 10000")
ranker(img_test_nonlinear, text_test_nonlinear, 10000, "image")

Running im2recipe for dims = 256 and sample = 1000
Mean median 2.0
Recall {1: 0.3794, 5: 0.6984, 10: 0.7993}
Running im2recipe for dims = 256 and sample = 10000
Mean median 15.1
Recall {1: 0.11216, 5: 0.31331000000000003, 10: 0.43186}


#### Title

In [None]:
img_test_nonlinear = np.zeros(shape = (len(img_test), 256))
text_test_nonlinear = np.zeros(shape = (len(img_test), 256))

for i in range(len(img_test)):
    img_test_nonlinear[i] = img_model_title_256(torch.Tensor(np.expand_dims(img_test[i], 0))).detach().numpy()
    text_test_nonlinear[i] = txt_model_title_256(torch.Tensor(np.expand_dims(text_test[i], 0))).detach().numpy()

# im2title and title2im
print("Running im2title for dims = 256 and sample = 1000")
ranker(img_test_nonlinear, text_test_nonlinear, 1000, "image")
print("Running im2title for dims = 256 and sample = 10000")
ranker(img_test_nonlinear, text_test_nonlinear, 10000, "image")

Running im2title for dims = 256 and sample = 1000
Mean median 4.85
Recall {1: 0.217, 5: 0.5293, 10: 0.665}
Running im2title for dims = 256 and sample = 10000
Mean median 40.25
Recall {1: 0.04741000000000001, 5: 0.16038000000000002, 10: 0.25114000000000003}


#### Ingredients

In [None]:
img_test_nonlinear = np.zeros(shape = (len(img_test), 256))
text_test_nonlinear = np.zeros(shape = (len(img_test), 256))

for i in range(len(img_test)):
    img_test_nonlinear[i] = img_model_ingredients_256(torch.Tensor(np.expand_dims(img_test[i], 0))).detach().numpy()
    text_test_nonlinear[i] = txt_model_ingredients_256(torch.Tensor(np.expand_dims(text_test[i], 0))).detach().numpy()

# im2ingredients and ingredients2im
print("Running im2ingredients for dims = 256 and sample = 1000")
ranker(img_test_nonlinear, text_test_nonlinear, 1000, "image")
print("Running im2ingredients for dims = 256 and sample = 10000")
ranker(img_test_nonlinear, text_test_nonlinear, 10000, "image")

Running im2ingredients for dims = 256 and sample = 1000
Mean median 3.8
Recall {1: 0.276, 5: 0.5836, 10: 0.7084}
Running im2ingredients for dims = 256 and sample = 10000
Mean median 28.4
Recall {1: 0.07182999999999999, 5: 0.22010000000000002, 10: 0.31999}


#### Instructions

In [None]:
img_test_nonlinear = np.zeros(shape = (len(img_test), 256))
text_test_nonlinear = np.zeros(shape = (len(img_test), 256))

for i in range(len(img_test)):
    img_test_nonlinear[i] = img_model_instructions_256(torch.Tensor(np.expand_dims(img_test[i], 0))).detach().numpy()
    text_test_nonlinear[i] = txt_model_instructions_256(torch.Tensor(np.expand_dims(text_test[i], 0))).detach().numpy()

# im2instructions and instructions2im
print("Running im2instructions for dims = 256 and sample = 1000")
ranker(img_test_nonlinear, text_test_nonlinear, 1000, "image")
print("Running im2instructions for dims = 256 and sample = 10000")
ranker(img_test_nonlinear, text_test_nonlinear, 10000, "image")

Running im2instructions for dims = 256 and sample = 1000
Mean median 3.4
Recall {1: 0.29660000000000003, 5: 0.6060999999999999, 10: 0.7335}
Running im2instructions for dims = 256 and sample = 10000
Mean median 24.7
Recall {1: 0.07762, 5: 0.23694999999999994, 10: 0.34320000000000006}
