In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, IterableDataset

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import time
import os
import copy
import json
import random

from PIL import Image

from transformers import BertTokenizer, BertModel
import pickle

from sklearn.decomposition import PCA
from cca_zoo.models import CCA

# from cca_zoo.deepmodels import architectures
# from cca_zoo.deepmodels import DVCCA, DCCA
# from cca_zoo.deepmodels.architectures import BaseEncoder, Encoder, Decoder
# from cca_zoo.deepmodels.dcca import _DCCA_base

from sklearn.preprocessing import normalize

# making sure that the whole embedding tensor is printed in output
torch.set_printoptions(threshold=10_000)

# Loading necessary files

In [2]:
# validation image and full text data
img_val = torch.load("img_val.pt")
text_val = torch.load("text_val.pt")

# train image and full text data
img_train = torch.load("img_train.pt")
text_train = torch.load("text_train.pt")

# test image and full text data
img_test = torch.load("img_test.pt")
text_test = torch.load("text_test.pt")

#individual text test data
ingredients_test = torch.load("test_ingredients.pt")
instructions_test = torch.load("test_instructions.pt")
title_test = torch.load("test_title.pt")

#individual text train data
ingredients_train = torch.load("train_ingredients.pt")
instructions_train = torch.load("train_instructions.pt")
title_train = torch.load("train_title.pt")

## Ranking function

In [3]:
"""Retrieval ranking function for the learnt representations from the official code of im2recipe paper"""
def ranker(im_vecs, instr_vecs, N = 1000, flag = "image"):
    idxs = range(N)

    glob_rank = []
    glob_recall = {1:0.0,5:0.0,10:0.0}
    for i in range(10):

        ids = random.sample(range(0,len(im_vecs)), N)
        
        im_sub = im_vecs[ids,:]
        instr_sub = instr_vecs[ids,:]

        if flag == "image":
            sims = np.dot(im_sub,instr_sub.T) # for im2recipe
        else:
            sims = np.dot(instr_sub,im_sub.T) # for recipe2im

        med_rank = []
        recall = {1:0.0,5:0.0,10:0.0}

        for ii in idxs:

            # name = ids_sub[ii]
            # get a column of similarities
            sim = sims[ii,:]

            # sort indices in descending order
            sorting = np.argsort(sim)[::-1].tolist()

            # find where the index of the pair sample ended up in the sorting
            pos = sorting.index(ii)

            if (pos+1) == 1:
                recall[1]+=1
            if (pos+1) <=5:
                recall[5]+=1
            if (pos+1)<=10:
                recall[10]+=1

            # store the position
            med_rank.append(pos+1)

        for i in recall.keys():
            recall[i]=recall[i]/N

        med = np.median(med_rank)

        for i in recall.keys():
            glob_recall[i]+=recall[i]
        glob_rank.append(med)

    for i in glob_recall.keys():
        glob_recall[i] = glob_recall[i]/10
    
    print ("Mean median", np.average(glob_rank))
    print ("Recall", glob_recall)

# STEP 1: CCA on improved pipeline representations

## Recipe retrieval (im2recipe)

### Dimensional Analysis with Val data

In [4]:
"""Function that outputs retrieval ranks for samples from validation data in order to determine optimal representation dimension size"""
def determine_latent_dims(dims, size, flag = "image"):

    print("Applying CCA")
    cca = CCA(latent_dims = dims, random_state = 0)
    cca.fit((img_train, text_train))
    print("CCA done")
    
    print("Transforming")
    img_train_r, text_train_r = cca.transform((img_train, text_train))
    img_val_r, text_val_r = cca.transform((img_val, text_val))

    print("Results for latent dims:", str(dims), " and test sample:", str(size), " and im2recipe")
    ranker(img_val_r, text_val_r, size, flag)

##### For 1k samples

In [None]:
# For 1k samples
for dim in [2, 10, 25, 50, 100, 200, 500, 1000]:
    determine_latent_dims(dim, 1000)

Applying CCA
CCA done
Transforming
Results for latent dims: 2  and test sample: 1000  and im2recipe
Mean median 208.7
Recall {1: 0.0029000000000000002, 5: 0.015200000000000002, 10: 0.030700000000000005}
Applying CCA
CCA done
Transforming
Results for latent dims: 10  and test sample: 1000  and im2recipe
Mean median 22.65
Recall {1: 0.0509, 5: 0.20049999999999998, 10: 0.31489999999999996}
Applying CCA
CCA done
Transforming
Results for latent dims: 25  and test sample: 1000  and im2recipe
Mean median 5.0
Recall {1: 0.2132, 5: 0.524, 10: 0.6721000000000001}
Applying CCA
CCA done
Transforming
Results for latent dims: 50  and test sample: 1000  and im2recipe
Mean median 2.5
Recall {1: 0.3512, 5: 0.6982999999999999, 10: 0.8155999999999999}
Applying CCA
CCA done
Transforming
Results for latent dims: 100  and test sample: 1000  and im2recipe
Mean median 2.0
Recall {1: 0.41369999999999996, 5: 0.7453999999999998, 10: 0.8433999999999999}
Applying CCA
CCA done
Transforming
Results for latent dims: 

##### For 10k samples

In [5]:
# For 10k samples
for dim in [2, 10, 25, 50, 100, 200, 500, 1000]:
    determine_latent_dims(dim, 10000)

Applying CCA
CCA done
Transforming
Results for latent dims: 2  and test sample: 10000  and im2recipe
Mean median 2027.75
Recall {1: 0.00039, 5: 0.00163, 10: 0.0031100000000000004}
Applying CCA
CCA done
Transforming
Results for latent dims: 10  and test sample: 10000  and im2recipe
Mean median 218.55
Recall {1: 0.00664, 5: 0.03003, 10: 0.05709000000000001}
Applying CCA
CCA done
Transforming
Results for latent dims: 25  and test sample: 10000  and im2recipe
Mean median 39.9
Recall {1: 0.04641000000000001, 5: 0.15919, 10: 0.24772}
Applying CCA
CCA done
Transforming
Results for latent dims: 50  and test sample: 10000  and im2recipe
Mean median 17.1
Recall {1: 0.10279, 5: 0.2861599999999999, 10: 0.40345000000000003}
Applying CCA
CCA done
Transforming
Results for latent dims: 100  and test sample: 10000  and im2recipe
Mean median 11.8
Recall {1: 0.1444, 5: 0.35265, 10: 0.47423000000000004}
Applying CCA
CCA done
Transforming
Results for latent dims: 200  and test sample: 10000  and im2recipe


### Ablation Studies with fixed dim = 500

##### For sample size = 1000, latent_dims = 500

In [None]:
cca = CCA(latent_dims = 500, random_state = 0)

In [None]:
# im2recipe
print("Applying CCA")
cca.fit((img_train, text_train))
print("CCA done")
print("Transforming")
img_train_r, text_train_r = cca.transform((img_train, text_train))
img_test_r, text_test_r = cca.transform((img_test, text_test))
print("Results for im2recipe: latent dims = 500, all the recipe fields")
ranker(img_test_r, text_test_r,  1000, "image")

# im2instructions
print("Applying CCA")
cca.fit((img_train, instructions_train))
print("CCA done")
img_test_r, instructions_test_r = cca.transform((img_test, instructions_test))
print("Results for im2instructions: latent dims = 500, only instructions")
ranker(img_test_r, instructions_test_r, 1000, "image")

# im2ingredients
print("Applying CCA")
cca.fit((img_train, ingredients_train))
print("CCA done")
img_test_r, ingredients_test_r = cca.transform((img_test, ingredients_test))
print("Results for im2ingredients: latent dims = 500, only ingredients")
ranker(img_test_r, ingredients_test_r,  1000, "image")

# im2title
print("Applying CCA")
cca.fit((img_train, title_train))
print("CCA done")
img_test_r, title_test_r = cca.transform((img_test, title_test))
print("Results for im2title: latent dims = 500, only title")
ranker(img_test_r, title_test_r,  1000, "image")

Applying CCA
CCA done
Transforming
Results for im2recipe: latent dims = 500, all the recipe fields
Mean median 1.0
Recall {1: 0.5516, 5: 0.7968000000000001, 10: 0.85}
Applying CCA
CCA done
Results for im2instructions: latent dims = 500, only instructions
Mean median 2.8
Recall {1: 0.3527, 5: 0.6108, 10: 0.6873}
Applying CCA
CCA done
Results for im2ingredients: latent dims = 500, only ingredients
Mean median 3.0
Recall {1: 0.35550000000000004, 5: 0.6089, 10: 0.6807000000000001}
Applying CCA
CCA done
Results for im2title: latent dims = 500, only title
Mean median 9.6
Recall {1: 0.22159999999999996, 5: 0.4396, 10: 0.5117}


##### For sample size = 10000, latent_dims = 500

In [None]:
cca = CCA(latent_dims = 500, random_state = 0)

In [6]:
# im2recipe
print("Applying CCA")
cca.fit((img_train, text_train))
print("CCA done")
print("Transforming")
img_train_r, text_train_r = cca.transform((img_train, text_train))
img_test_r, text_test_r = cca.transform((img_test, text_test))
print("Results for im2recipe: latent dims = 500, all the recipe fields")
ranker(img_test_r, text_test_r,  10000, "image")

# im2instructions
print("Applying CCA")
cca.fit((img_train, instructions_train))
print("CCA done")
img_test_r, instructions_test_r = cca.transform((img_test, instructions_test))
print("Results for im2instructions: latent dims = 500, only instructions")
ranker(img_test_r, instructions_test_r, 10000, "image")

# im2ingredients
print("Applying CCA")
cca.fit((img_train, ingredients_train))
print("CCA done")
img_test_r, ingredients_test_r = cca.transform((img_test, ingredients_test))
print("Results for im2ingredients: latent dims = 500, only ingredients")
ranker(img_test_r, ingredients_test_r,  10000, "image")

# im2title
cca = CCA(latent_dims = 500, random_state = 0)
cca.fit((img_train, title_train))
print("CCA done")
img_test_r, title_test_r = cca.transform((img_test, title_test))
print("Results for im2title: latent dims = 500, only title")
ranker(img_test_r, title_test_r,  10000, "image")

Applying CCA
CCA done
Transforming
Results for im2recipe: latent dims = 500, all the recipe fields
Mean median 5.0
Recall {1: 0.26223, 5: 0.51435, 10: 0.62035}
Applying CCA
CCA done
Results for im2instructions: latent dims = 500, only instructions
Mean median 19.4
Recall {1: 0.1299, 5: 0.31060999999999994, 10: 0.40711}
Applying CCA
CCA done
Results for im2ingredients: latent dims = 500, only ingredients
Mean median 19.3
Recall {1: 0.13674, 5: 0.31982000000000005, 10: 0.4159400000000001}
Applying CCA
CCA done
Results for im2title: latent dims = 500, only title
Mean median 86.55
Recall {1: 0.060280000000000014, 5: 0.17925000000000002, 10: 0.25656999999999996}


## Image retrieval (recipe2im)

### Dimemsional Analysis with val data

In [8]:
"""Function that outputs retrieval ranks for samples from validation data in order to determine optimal representation dimension size"""
def determine_latent_dims_recipe2im(dims, size, flag = "text"):

    print("Applying CCA")
    cca = CCA(latent_dims = dims, random_state = 0)
    cca.fit((text_train, img_train))
    print("CCA done")
    
    print("Transforming")
    text_train_r, img_train_r = cca.transform((img_train, text_train))
    text_val_r, img_val_r = cca.transform((text_val, img_val))

    print("Results for latent dims:", str(dims), " and test sample:", str(size), " and im2recipe")
    ranker(img_val_r, text_val_r, size, flag)

##### For 1k samples

In [None]:
# For 1k samples
for dim in [2, 10, 25, 50, 100, 200, 500, 1000]:
    determine_latent_dims_recipe2im(dim, 1000, "text")

Applying CCA
CCA done
Transforming
Results for latent dims: 2  and test sample: 1000  and im2recipe
Mean median 206.7
Recall {1: 0.0035000000000000005, 5: 0.016900000000000005, 10: 0.032900000000000006}
Applying CCA
CCA done
Transforming
Results for latent dims: 10  and test sample: 1000  and im2recipe
Mean median 22.45
Recall {1: 0.0506, 5: 0.1991, 10: 0.3191}
Applying CCA
CCA done
Transforming
Results for latent dims: 25  and test sample: 1000  and im2recipe
Mean median 5.0
Recall {1: 0.22290000000000001, 5: 0.5223000000000001, 10: 0.6703000000000001}
Applying CCA
CCA done
Transforming
Results for latent dims: 50  and test sample: 1000  and im2recipe
Mean median 2.6
Recall {1: 0.35409999999999997, 5: 0.6824, 10: 0.8046000000000001}
Applying CCA
CCA done
Transforming
Results for latent dims: 100  and test sample: 1000  and im2recipe
Mean median 2.0
Recall {1: 0.4122, 5: 0.7384999999999998, 10: 0.8432000000000001}
Applying CCA
CCA done
Transforming
Results for latent dims: 200  and tes

##### For 10k samples

In [9]:
# For 10k samples
for dim in [2, 10, 25, 50, 100, 200, 500, 1000]:
    determine_latent_dims_recipe2im(dim, 10000, "text")

Applying CCA
CCA done
Transforming
Results for latent dims: 2  and test sample: 10000  and im2recipe
Mean median 2090.65
Recall {1: 0.00028, 5: 0.0015699999999999998, 10: 0.0032300000000000002}
Applying CCA
CCA done
Transforming
Results for latent dims: 10  and test sample: 10000  and im2recipe
Mean median 217.75
Recall {1: 0.00683, 5: 0.031740000000000004, 10: 0.05775}
Applying CCA
CCA done
Transforming
Results for latent dims: 25  and test sample: 10000  and im2recipe
Mean median 39.9
Recall {1: 0.0474, 5: 0.16423, 10: 0.25244}
Applying CCA
CCA done
Transforming
Results for latent dims: 50  and test sample: 10000  and im2recipe
Mean median 16.75
Recall {1: 0.10979999999999998, 5: 0.29761999999999994, 10: 0.41186}
Applying CCA
CCA done
Transforming
Results for latent dims: 100  and test sample: 10000  and im2recipe
Mean median 11.6
Recall {1: 0.1482, 5: 0.3598800000000001, 10: 0.47989999999999994}
Applying CCA
CCA done
Transforming
Results for latent dims: 200  and test sample: 10000 

### Ablation Studies with fixed dims = 500

##### For sample size = 1000, latent_dims = 500

In [None]:
cca = CCA(latent_dims = 500, random_state = 0)

In [None]:
# recipe2im
print("Applying CCA")
cca.fit((text_train, img_train))
print("CCA done")
print("Transforming")
text_train_r, img_train_r = cca.transform((text_train, img_train))
text_test_r, img_test_r = cca.transform((text_test, img_test))
print("Results for recipe2im: latent dims = 500, all the recipe fields")
ranker(img_test_r, text_test_r,  1000, "text")

# instructions2im
print("Applying CCA")
cca.fit((instructions_train, img_train))
print("CCA done")
instructions_test_r, img_test_r = cca.transform((instructions_test, img_test))
print("Results for instructions2im: latent dims = 500, only instructions")
ranker(img_test_r, instructions_test_r, 1000, "text")

# ingredients2im
print("Applying CCA")
cca.fit((ingredients_train, img_train))
print("CCA done")
ingredients_test_r, img_test_r = cca.transform((ingredients_test, img_test))
print("Results for ingredients2im: latent dims = 500, only ingredients")
ranker(img_test_r, ingredients_test_r,  1000, "text")

# title2im
print("Applying CCA")
cca.fit((title_train, img_train))
print("CCA done")
title_test_r, img_test_r  = cca.transform((title_test, img_test))
print("Results for title2im: latent dims = 500, only title")
ranker(img_test_r, title_test_r,  1000, "text")

Applying CCA
CCA done
Transforming
Results for recipe2im: latent dims = 500, all the recipe fields
Mean median 1.0
Recall {1: 0.5437000000000001, 5: 0.7927000000000001, 10: 0.8454}
Applying CCA
CCA done
Results for instructions2im: latent dims = 500, only instructions
Mean median 2.9
Recall {1: 0.36050000000000004, 5: 0.6161, 10: 0.6944}
Applying CCA
CCA done
Results for ingredients2im: latent dims = 500, only ingredients
Mean median 2.8
Recall {1: 0.371, 5: 0.6043999999999999, 10: 0.6749}
Applying CCA
CCA done
Results for title2im: latent dims = 500, only title
Mean median 9.8
Recall {1: 0.21880000000000002, 5: 0.43279999999999996, 10: 0.5057}


##### For sample size = 10000, latent_dims = 500

In [None]:
cca = CCA(latent_dims = 500, random_state = 0)

In [10]:
# recipe2im
print("Applying CCA")
cca.fit((text_train, img_train))
print("CCA done")
print("Transforming")
text_train_r, img_train_r = cca.transform((text_train, img_train))
text_test_r, img_test_r = cca.transform((text_test, img_test))
print("Results for recipe2im: latent dims = 500, all the recipe fields")
ranker(img_test_r, text_test_r,  10000, "text")

# instructions2im
print("Applying CCA")
cca.fit((instructions_train, img_train))
print("CCA done")
instructions_test_r, img_test_r = cca.transform((instructions_test, img_test))
print("Results for instructions2im: latent dims = 500, only instructions")
ranker(img_test_r, instructions_test_r, 10000, "text")

# ingredients2im
print("Applying CCA")
cca.fit((ingredients_train, img_train))
print("CCA done")
ingredients_test_r, img_test_r = cca.transform((ingredients_test, img_test))
print("Results for ingredients2im: latent dims = 500, only ingredients")
ranker(img_test_r, ingredients_test_r,  10000, "text")

# title2im
print("Applying CCA")
cca.fit((title_train, img_train))
print("CCA done")
title_test_r, img_test_r  = cca.transform((title_test, img_test))
print("Results for title2im: latent dims = 500, only title")
ranker(img_test_r, title_test_r,  10000, "text")

Applying CCA
CCA done
Transforming
Results for recipe2im: latent dims = 500, all the recipe fields
Mean median 5.0
Recall {1: 0.27521000000000007, 5: 0.51742, 10: 0.62133}
Applying CCA
CCA done
Results for instructions2im: latent dims = 500, only instructions
Mean median 18.6
Recall {1: 0.13687999999999997, 5: 0.31769000000000003, 10: 0.41434}
Applying CCA
CCA done
Results for ingredients2im: latent dims = 500, only ingredients
Mean median 18.25
Recall {1: 0.14966000000000002, 5: 0.33231, 10: 0.42429000000000006}
Applying CCA
CCA done
Results for title2im: latent dims = 500, only title
Mean median 88.9
Recall {1: 0.0618, 5: 0.17691, 10: 0.25399}
