In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [2]:
import pandas as pd
import pickle
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tqdm import tqdm
import tensorflow as tf
import random
import os
from pathlib import Path

In [3]:
class linear_cca():
    def __init__(self):
        self.w_ = [None, None]
        self.m_ = [None, None]

    def fit(self, H1, H2, n_components):
        r1 = 1e-4
        r2 = 1e-4

        m = H1.shape[0]
        o1 = H1.shape[1]
        o2 = H2.shape[1]

        self.m_[0] = np.mean(H1, axis=0)
        self.m_[1] = np.mean(H2, axis=0)
        H1bar = H1 - np.tile(self.m_[0], (m, 1))
        H2bar = H2 - np.tile(self.m_[1], (m, 1))

        # Compute covariance matrices
        SigmaHat12 = (1.0 / (m - 1)) * np.dot(H1bar.T, H2bar)
        SigmaHat11 = (1.0 / (m - 1)) * np.dot(H1bar.T,
                                              H1bar) + r1 * np.identity(o1)
        SigmaHat22 = (1.0 / (m - 1)) * np.dot(H2bar.T,
                                              H2bar) + r2 * np.identity(o2)

        [D1, V1] = np.linalg.eigh(SigmaHat11)
        [D2, V2] = np.linalg.eigh(SigmaHat22)
        SigmaHat11RootInv = np.dot(
            np.dot(V1, np.diag(D1 ** -0.5)), V1.T)
        SigmaHat22RootInv = np.dot(
            np.dot(V2, np.diag(D2 ** -0.5)), V2.T)

        Tval = np.dot(np.dot(SigmaHat11RootInv,
                             SigmaHat12), SigmaHat22RootInv)

        [U, D, V] = np.linalg.svd(Tval)
        V = V.T
        self.w_[0] = np.dot(SigmaHat11RootInv, U[:, 0:n_components])
        self.w_[1] = np.dot(SigmaHat22RootInv, V[:, 0:n_components])
        D = D[0:n_components]

    def _get_result(self, x, idx):
        result = x - self.m_[idx].reshape([1, -1]).repeat(len(x), axis=0)
        result = np.dot(result, self.w_[idx])
        return result

    def transform(self, H1, H2):
        return [self._get_result(H1, 0), self._get_result(H2, 1)]

In [4]:
def cca_loss(outdim_size, use_all_singular_values):
    """
    The main loss function (inner_cca_objective) is wrapped in this function due to
    the constraints imposed by Keras on objective functions
    """

    def inner_cca_objective(y_true, y_pred):
        """
        It is the loss function of CCA as introduced in the original paper.
        """

        r1 = 1e-4
        r2 = 1e-4
        eps = 1e-12
        o1 = o2 = int(y_pred.shape[1] // 2)

        # unpack (separate) the output of networks for view 1 and view 2
        H1 = tf.transpose(a=y_pred[:, 0:o1])
        H2 = tf.transpose(a=y_pred[:, o1: o1 + o2])
        
        

        m = tf.shape(input=H1)[1]

        H1bar = H1 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(
            H1, tf.ones([m, m])
        )
        H2bar = H2 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(
            H2, tf.ones([m, m])
        )

        SigmaHat12 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(
            H1bar, H2bar, transpose_b=True
        )  # [dim, dim]
        SigmaHat11 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(
            H1bar, H1bar, transpose_b=True
        ) + r1 * tf.eye(o1)
        SigmaHat22 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(
            H2bar, H2bar, transpose_b=True
        ) + r2 * tf.eye(o2)

        # Calculating the root inverse of covariance matrices by using eigen decomposition
        [D1, V1] = tf.linalg.eigh(SigmaHat11)
        [D2, V2] = tf.linalg.eigh(SigmaHat22)  # Added to increase stability

        posInd1 = tf.compat.v1.where(tf.greater(D1, eps))
        D1 = tf.gather_nd(D1, posInd1)  # get eigen values that are larger than eps
        V1 = tf.transpose(
            a=tf.nn.embedding_lookup(params=tf.transpose(a=V1), ids=tf.squeeze(posInd1))
        )

        posInd2 = tf.compat.v1.where(tf.greater(D2, eps))
        D2 = tf.gather_nd(D2, posInd2)
        V2 = tf.transpose(
            a=tf.nn.embedding_lookup(params=tf.transpose(a=V2), ids=tf.squeeze(posInd2))
        )

        SigmaHat11RootInv = tf.matmul(
            tf.matmul(V1, tf.linalg.tensor_diag(D1 ** -0.5)), V1, transpose_b=True
        )  # [dim, dim]
        SigmaHat22RootInv = tf.matmul(
            tf.matmul(V2, tf.linalg.tensor_diag(D2 ** -0.5)), V2, transpose_b=True
        )

        Tval = tf.matmul(tf.matmul(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv)

        if use_all_singular_values:
            corr = tf.sqrt(tf.linalg.trace(tf.matmul(Tval, Tval, transpose_a=True)))
        else:
            [U, V] = tf.linalg.eigh(tf.matmul(Tval, Tval, transpose_a=True))
            U = tf.gather_nd(U, tf.compat.v1.where(tf.greater(U, eps)))
            kk = tf.reshape(tf.cast(tf.shape(input=U), tf.int32), [])
            K = tf.minimum(kk, outdim_size)
            w, _ = tf.nn.top_k(U, k=K)
            corr = tf.reduce_sum(input_tensor=tf.sqrt(w))

        return -corr

    return inner_cca_objective

In [5]:
def get_model(layer_sizes1, layer_sizes2, input_size1, input_size2, reg_lambda):
    view1_input = keras.layers.Input(shape=(input_size1,))
    view2_input = keras.layers.Input(shape=(input_size2,))
    
    view1 = view1_input
    view2 = view2_input

    for i, comp in enumerate(layer_sizes1):
        if i == len(layer_sizes1)-1:
            view1 = keras.layers.Dense(comp, activation='sigmoid', kernel_regularizer=keras.regularizers.l2(reg_lambda))(view1)
        else:
            view1 = keras.layers.Dense(comp, activation='sigmoid', kernel_regularizer=keras.regularizers.l2(reg_lambda))(view1)
            
    for i, comp in enumerate(layer_sizes2):
        if i == len(layer_sizes2)-1:
            view2 = keras.layers.Dense(comp, activation='sigmoid', kernel_regularizer=keras.regularizers.l2(reg_lambda))(view2)
        else:
            view2 = keras.layers.Dense(comp, activation='sigmoid',kernel_regularizer=keras.regularizers.l2(reg_lambda))(view2)

    merged = keras.layers.Concatenate()([view1, view2])
    model = keras.Model(inputs=[view1_input, view2_input], outputs=merged)

    return model



def train_model(output_dir, view1_train_data, view2_train_data, view1_val_data, view2_val_data,
                layer_sizes1, layer_sizes2, latent_size, input_size1, input_size2, 
                learning_rate, epochs, batch_size, reg_lambda):
    
    
    model = get_model(layer_sizes1, layer_sizes2, input_size1, input_size2, reg_lambda)
    sgd = keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)
    model.compile(loss=cca_loss(latent_size, False), optimizer=sgd)
    
    checkpointer = keras.callbacks.ModelCheckpoint(filepath=output_dir +"/"+ 'model.h5', save_best_only=True, save_weights_only=False)
    
    model.fit(x=[view1_train_data, view2_train_data],y=np.zeros(len(view1_train_data)), 
              validation_data = ([view1_val_data,view2_val_data],np.zeros(len(view1_val_data))),
              epochs=epochs, batch_size=batch_size, callbacks = [checkpointer])
    
    return model

def predict_model(model, view1_test_data, view2_test_data):
    preds = model.predict([view1_test_data, view2_test_data])
    
    img_l = int(preds.shape[1] / 2)
    text_l = 2*int(preds.shape[1] / 2)

    image_embeds = preds[:,0:img_l]
    text_embeds = preds[:,img_l:text_l]
    l1 = linear_cca()
    l1.fit(image_embeds,text_embeds,img_l)
    transformed = l1.transform(image_embeds,text_embeds)
    return (transformed[0], transformed[1])  
    
    

In [6]:
def evaluation(data_X, data_Y, data_ids, im2recipe, samples_to_draw, time_sample=10):
    idxs = np.argsort(data_ids)
    names = data_ids[idxs]
    image_vecs = data_X[idxs]
    text_vecs = data_Y[idxs]
    idxs = range(samples_to_draw)
    
    glob_rank = []
    glob_recall = {1:0.0,5:0.0,10:0.0}
    for i in range(time_sample):
        ids = random.sample(range(0,len(names)), samples_to_draw)
        im_sub = image_vecs[ids,:]
        instr_sub = text_vecs[ids,:]
        ids_sub = names[ids]

        if im2recipe:
            sims = np.dot(im_sub,instr_sub.T) # for im2recipe
        else:
            sims = np.dot(instr_sub,im_sub.T) # for recipe2im

        med_rank = []
        recall = {1:0.0,5:0.0,10:0.0}
        for ii in idxs:
            name = ids_sub[ii]
            # get a column of similarities
            sim = sims[ii,:]

            # sort indices in descending order
            sorting = np.argsort(sim)[::-1].tolist()

            # find where the index of the pair sample ended up in the sorting
            pos = sorting.index(ii)

            if (pos+1) == 1:
                recall[1]+=1
            if (pos+1) <=5:
                recall[5]+=1
            if (pos+1)<=10:
                recall[10]+=1

            # store the position
            med_rank.append(pos+1)

        for i in recall.keys():
            recall[i]=recall[i]/samples_to_draw

        med = np.median(med_rank)

        for i in recall.keys():
            glob_recall[i]+=recall[i]
        glob_rank.append(med)

    for i in glob_recall.keys():
        glob_recall[i] = glob_recall[i]/time_sample

    return np.average(glob_rank), glob_recall
    

### Running models on average embeddings

In [7]:
with open('/common/home/aj780/machine_learning/CCA Data/embeddings_train1.pkl','rb') as f:
    image_data = pickle.load(f)
    
    
image_train_data = image_data[0]
text_train_data = image_data[1]



with open("/common/home/aj780/machine_learning/CCA Data/embeddings_test1.pkl",'rb') as f:
    image_data = pickle.load(f)

test_ids = image_data[2]
image_test_data = image_data[0]
text_test_data = image_data[1]



with open("/common/home/aj780/machine_learning/CCA Data/embeddings_val1.pkl",'rb') as f:
    image_data = pickle.load(f)

image_val_data = image_data[0]
text_val_data = image_data[1]

print("Data read")

n_components = [10, 20, 50, 100, 200, 500]
i = 0
metric_data_im2recipe = pd.DataFrame(columns = ['n_comp','medR','r@1', 'r@5','r@10'], index=range(len(n_components)))
metric_data_recipe2im = pd.DataFrame(columns = ['n_comp','medR','r@1', 'r@5','r@10'], index=range(len(n_components)))
embeddings_type = "averageEmbeddings"
output_dir = "CCA_Fina_Part2_withouttriplet"

embeddings_dir = output_dir + "/" + embeddings_type 


learning_rate = 0.001
margin = 0.4
epochs = 10
batch_size = 512
reg_lambda = 1e-5
for n_comp in n_components:
    
    model_out_dir = embeddings_dir +  "/" + str(n_comp)
    Path(model_out_dir).mkdir(parents=True, exist_ok=True)
    layer_sizes1 = [512, n_comp]
    layer_sizes2 = [512,n_comp]
    
    model = train_model(model_out_dir, image_train_data, text_train_data,
                        image_val_data, text_val_data,layer_sizes1, layer_sizes2,
                        n_comp, 1024, 1024,learning_rate, epochs, batch_size, reg_lambda)
    
    dataX, dataY = predict_model(model,image_test_data, text_test_data)
    medR, recall = evaluation(dataX, dataY, test_ids, True , 1000, 10)
    metric_data_im2recipe.loc[i] = [n_comp, medR, recall[1], recall[5], recall[10]]
    
    medR, recall = evaluation(dataX, dataY, test_ids, False , 1000, 10)
    metric_data_recipe2im.loc[i] = [n_comp, medR, recall[1], recall[5], recall[10]]
    
    
    print("n_comp: " + str(n_comp) + " done..!")
    i+=1
    
metric_data_im2recipe.to_csv(embeddings_dir + "/metrics_" + embeddings_type + "_im2recipe.csv", index=False)
metric_data_recipe2im.to_csv(embeddings_dir + "/metrics_" + embeddings_type + "_recipe2im.csv", index=False)

    

Data read
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
n_comp: 10 done..!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
n_comp: 20 done..!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
n_comp: 50 done..!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
n_comp: 100 done..!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
n_comp: 200 done..!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
n_comp: 500 done..!


### Running models on ingredients embeddings