### Use all ingredients in recipes as ingredient nodes, instead of only detected ingredients

In [1]:
!nvidia-smi

Mon Aug 16 22:07:55 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.80       Driver Version: 460.80       CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:02:00.0 Off |                    0 |
| N/A   31C    P0    28W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  Off  | 00000000:03:00.0 Off |                    0 |
| N/A   27C    P0    27W / 250W |      2MiB / 16280MiB |      0%      Defaul

In [2]:
!module load conda cudnn cuda

In [3]:
import json
import pickle
import re
import nltk
from collections import Counter
import pandas as pd
import random
import heapq
import csv
from tqdm import tqdm
import os
import numpy as np
import time
import math
import lmdb
import gensim

import dgl
import dgl.nn as dglnn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchfile

from sklearn.metrics import precision_score, recall_score, f1_score

Using backend: pytorch


In [4]:
device = ("cuda:0" if torch.cuda.is_available() else "cpu")
print('device: ', device)

device:  cuda:0


## build graph - Small dataset

In [5]:
dataset_folder = '../data/'


In [6]:
# recipes_subset = json.load(open(dataset_folder+'/recipes_weighted_and_USDAmapped.json', 'r'))
# print('length of recipes_subset: ', len(recipes_subset))

# partition_list = []
# for recipe in recipes_subset:
#     partition = recipe['partition']
#     partition_list.append(partition)
# print('train/val/test split: ', Counter(partition_list))


length of recipes_subset:  77733
train/val/test split:  Counter({'train': 54358, 'test': 11759, 'val': 11616})


In [8]:
def get_graph():
    print('generating graph ...')
    edge_src, edge_dst, r_i_edge_weight = torch.load(dataset_folder+'/edge_src_and_edge_dst_and_r_i_edge_weight.pt')
    recipe_edge_src, recipe_edge_dst, recipe_edge_weight = torch.load(dataset_folder+'/recipe_edge_src_and_recipe_edge_dst_and_weight.pt')
    ingre_edge_src, ingre_edge_dst, ingre_edge_weight = torch.load(dataset_folder+'/ingre_edge_src_and_dst_and_weight.pt')

    graph = dgl.heterograph({
        ('recipe', 'r-i', 'ingredient'): (edge_src, edge_dst),
        ('ingredient', 'i-r', 'recipe'): (edge_dst, edge_src),
        ('recipe', 'r-r', 'recipe'): (recipe_edge_src, recipe_edge_dst),
        ('ingredient', 'i-i', 'ingredient'): (ingre_edge_src, ingre_edge_dst)
    })

    graph.edges['r-i'].data['weight'] = torch.FloatTensor(r_i_edge_weight)
    graph.edges['i-r'].data['weight'] = torch.FloatTensor(r_i_edge_weight)
    graph.edges['r-r'].data['weight'] = torch.FloatTensor(recipe_edge_weight)
    graph.edges['i-i'].data['weight'] = torch.FloatTensor(ingre_edge_weight)
    
    recipe_nodes_instruction_features = torch.load(dataset_folder+'recipe_nodes_instruction_features.pt')
    ingredient_nodes_nutrient_features_minus1 = torch.load(dataset_folder+'/ingredient_nodes_nutrient_features.pt')
    train_mask = torch.load(dataset_folder+'/recipe_nodes_train_mask.pt')
    val_mask = torch.load(dataset_folder+'/recipe_nodes_val_mask.pt')
    test_mask = torch.load(dataset_folder+'/recipe_nodes_test_mask.pt')
    recipe_nodes_labels = torch.load(dataset_folder+'/recipe_nodes_labels.pt')

    graph.nodes['recipe'].data['instr_feature'] = recipe_nodes_instruction_features
    graph.nodes['ingredient'].data['nutrient_feature'] = ingredient_nodes_nutrient_features_minus1
    graph.nodes['recipe'].data['train_mask'] = train_mask
    graph.nodes['recipe'].data['val_mask'] = val_mask
    graph.nodes['recipe'].data['test_mask'] = test_mask
    graph.nodes['recipe'].data['label'] = recipe_nodes_labels.long()
    
    return graph

graph = get_graph()
graph.to(device)
print('graph: ', graph)

generating graph ...
graph:  Graph(num_nodes={'ingredient': 9271, 'recipe': 77733},
      num_edges={('ingredient', 'i-i', 'ingredient'): 148168, ('ingredient', 'i-r', 'recipe'): 524671, ('recipe', 'r-i', 'ingredient'): 524671, ('recipe', 'r-r', 'recipe'): 850354},
      metagraph=[('ingredient', 'ingredient', 'i-i'), ('ingredient', 'recipe', 'i-r'), ('recipe', 'ingredient', 'r-i'), ('recipe', 'recipe', 'r-r')])


## model

In [9]:
# get train/val/test mask
train_mask = graph.nodes['recipe'].data['train_mask'].to(device)
val_mask = graph.nodes['recipe'].data['val_mask'].to(device)
test_mask = graph.nodes['recipe'].data['test_mask'].to(device)
labels = graph.nodes['recipe'].data['label'].to(device)

print('train_mask: ', train_mask.size())
print('val_mask: ', val_mask.size())
print('test_mask: ', test_mask.size())
print('labels: ', labels.size())

train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze()
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()

train_mask:  torch.Size([77733])
val_mask:  torch.Size([77733])
test_mask:  torch.Size([77733])
labels:  torch.Size([77733])


## Set Transformer

In [10]:
# get ingre neighbors for each recipe nodes
def get_recipe2ingreNeighbor_dict():
    max_length = 33
    # print(max(len(x) for x in neighbor_list))
    out = {}
    neighbor_list = []
    ingre_length_list = []
    total_length_index_list = []
    total_ingre_neighbor_list = []
    total_length_index = 0
    total_length_index_list.append(total_length_index)
    for recipeNodeID in tqdm(range(graph.number_of_nodes('recipe'))):
        _, succs = graph.out_edges(recipeNodeID, etype='r-i')
        succs_list = list(set(succs.tolist()))
        total_ingre_neighbor_list.extend(succs_list)
        cur_length = len(succs_list)
        ingre_length_list.append(cur_length)
        
        total_length_index += cur_length
        total_length_index_list.append(total_length_index)
        while len(succs_list) < max_length:
            succs_list.append(77733)
        neighbor_list.append(succs_list)

    ingre_neighbor_tensor = torch.tensor(neighbor_list)
    ingre_length_tensor = torch.tensor(ingre_length_list)
    total_ingre_neighbor_tensor = torch.tensor(total_ingre_neighbor_list)
    return ingre_neighbor_tensor, ingre_length_tensor, total_length_index_list, total_ingre_neighbor_tensor

ingre_neighbor_tensor, ingre_length_tensor, total_length_index_list, total_ingre_neighbor_tensor = get_recipe2ingreNeighbor_dict()
print('ingre_neighbor_tensor: ', ingre_neighbor_tensor.shape)
print('ingre_length_tensor: ', ingre_length_tensor.shape)
print('total_length_index_list: ', len(total_length_index_list))
print('total_ingre_neighbor_tensor: ', total_ingre_neighbor_tensor.shape)

def find(tensor, values):
    return torch.nonzero(tensor[..., None] == values)

# example for find()
# a = torch.tensor([0, 10, 20, 30])
# b = torch.tensor([[ 0, 30, 20,  10, 77733],[ 0, 30, 20,  10, 77733]])
# find(b, a)[:, 2]

100%|██████████| 77733/77733 [00:19<00:00, 3932.27it/s]

ingre_neighbor_tensor:  torch.Size([77733, 33])
ingre_length_tensor:  torch.Size([77733])
total_length_index_list:  77734
total_ingre_neighbor_tensor:  torch.Size([515189])





In [11]:
def get_ingredient_neighbors_link_scores(blocks, output_nodes, secondToLast_ingre, recipe):
    ingreNodeIDs = blocks[1].srcdata['_ID']['ingredient']
    recipeNodeIDs = output_nodes['recipe']
    batch_ingre_neighbors = ingre_neighbor_tensor[recipeNodeIDs].to(device)
    batch_ingre_length = ingre_length_tensor[recipeNodeIDs]
    valid_batch_ingre_neighbors = find(batch_ingre_neighbors, ingreNodeIDs)[:, 2]

    # based on valid_batch_ingre_neighbors each row index
    _, valid_batch_ingre_length = torch.unique(find(batch_ingre_neighbors, ingreNodeIDs)[:, 0], return_counts=True)
    batch_sum_ingre_length = np.cumsum(valid_batch_ingre_length.cpu())

    total_ingre_emb = None
    total_pos_score = None
    total_neg_score = None

    for i in range(len(recipeNodeIDs)):
        if i == 0:
            recipeNode_ingres = valid_batch_ingre_neighbors[0:batch_sum_ingre_length[i]]
            potential_neg_ingres = valid_batch_ingre_neighbors[batch_sum_ingre_length[i]:]
            neg_ingres = potential_neg_ingres[torch.randint(len(potential_neg_ingres), (len(recipeNode_ingres),))]
            a = secondToLast_ingre[recipeNode_ingres]
            b = secondToLast_ingre[neg_ingres]
        else:
            recipeNode_ingres = valid_batch_ingre_neighbors[batch_sum_ingre_length[i-1]:batch_sum_ingre_length[i]]
            potential_neg_ingres = torch.cat([valid_batch_ingre_neighbors[:batch_sum_ingre_length[i-1]], valid_batch_ingre_neighbors[batch_sum_ingre_length[i]:]])
            neg_ingres = potential_neg_ingres[torch.randint(len(potential_neg_ingres), (len(recipeNode_ingres),))]
            a = secondToLast_ingre[recipeNode_ingres]
            b = secondToLast_ingre[neg_ingres]
        
        cur_recipe = recipe[i,:]
        pos_score = torch.mm(a, cur_recipe.unsqueeze(1))
        neg_score = torch.mm(b, cur_recipe.unsqueeze(1))
        
        if total_pos_score == None:
            total_pos_score = pos_score
            total_neg_score = neg_score
        else:
            total_pos_score = torch.cat([total_pos_score, pos_score], dim = 0)
            total_neg_score = torch.cat([total_neg_score, neg_score], dim = 0)

    total_pos_score = total_pos_score.squeeze()
    total_neg_score = total_neg_score.squeeze()

    return total_pos_score, total_neg_score


In [12]:
def get_ingredient_neighbors_all_embeddings(blocks, output_nodes, secondToLast_ingre):
    ingreNodeIDs = blocks[1].srcdata['_ID']['ingredient']
    recipeNodeIDs = output_nodes['recipe']
    batch_ingre_neighbors = ingre_neighbor_tensor[recipeNodeIDs].to(device)
    batch_ingre_length = ingre_length_tensor[recipeNodeIDs]
    valid_batch_ingre_neighbors = find(batch_ingre_neighbors, ingreNodeIDs)[:, 2]
    
    # based on valid_batch_ingre_neighbors each row index
    _, valid_batch_ingre_length = torch.unique(find(batch_ingre_neighbors, ingreNodeIDs)[:, 0], return_counts=True)
    batch_sum_ingre_length = np.cumsum(valid_batch_ingre_length.cpu())

    total_ingre_emb = None
    for i in range(len(recipeNodeIDs)):
        if i == 0:
            recipeNode_ingres = valid_batch_ingre_neighbors[0:batch_sum_ingre_length[i]]
            a = secondToLast_ingre[recipeNode_ingres]
        else:
            recipeNode_ingres = valid_batch_ingre_neighbors[batch_sum_ingre_length[i-1]:batch_sum_ingre_length[i]]
            a = secondToLast_ingre[recipeNode_ingres]
    
        # all ingre instead of average
        a_rows = a.shape[0]
        a_columns = a.shape[1]
        max_rows = 5
        if a_rows < max_rows:
            a = torch.cat([a, torch.zeros(max_rows-a_rows, a_columns).cuda()])
        else:
            a = a[:max_rows, :]
        
        if total_ingre_emb == None:
            total_ingre_emb = a.unsqueeze(0)
        else:
            total_ingre_emb = torch.cat([total_ingre_emb,a.unsqueeze(0)], dim = 0)
            if torch.isnan(total_ingre_emb).any():
                print('Error!')

    return total_ingre_emb


In [13]:
class Attention(nn.Module):
    """Scaled Dot-Product Attention."""

    def __init__(self, temperature):
        super().__init__()

        self.temperature = temperature
        self.softmax = nn.Softmax(dim=2)

    def forward(self, queries, keys, values):
        """
        It is equivariant to permutations
        of the batch dimension (`b`).

        It is equivariant to permutations of the
        second dimension of the queries (`n`).

        It is invariant to permutations of the
        second dimension of keys and values (`m`).

        Arguments:
            queries: a float tensor with shape [b, n, d].
            keys: a float tensor with shape [b, m, d].
            values: a float tensor with shape [b, m, d'].
        Returns:
            a float tensor with shape [b, n, d'].
        """

        attention = torch.bmm(queries, keys.transpose(1, 2))
        attention = self.softmax(attention / self.temperature)
        # it has shape [b, n, m]

        return torch.bmm(attention, values)


class MultiheadAttention(nn.Module):

    def __init__(self, d, h):
        """
        Arguments:
            d: an integer, dimension of queries and values.
                It is assumed that input and
                output dimensions are the same.
            h: an integer, number of heads.
        """
        super().__init__()

        assert d % h == 0
        self.h = h

        # everything is projected to this dimension
        p = d // h

        self.project_queries = nn.Linear(d, d)
        self.project_keys = nn.Linear(d, d)
        self.project_values = nn.Linear(d, d)
        self.concatenation = nn.Linear(d, d)
        self.attention = Attention(temperature=p**0.5)

    def forward(self, queries, keys, values):
        """
        Arguments:
            queries: a float tensor with shape [b, n, d].
            keys: a float tensor with shape [b, m, d].
            values: a float tensor with shape [b, m, d].
        Returns:
            a float tensor with shape [b, n, d].
        """

        h = self.h
        b, n, d = queries.size()
        _, m, _ = keys.size()
        p = d // h

        queries = self.project_queries(queries)  # shape [b, n, d]
        keys = self.project_keys(keys)  # shape [b, m, d]
        values = self.project_values(values)  # shape [b, m, d]

        queries = queries.view(b, n, h, p)
        keys = keys.view(b, m, h, p)
        values = values.view(b, m, h, p)

        queries = queries.permute(2, 0, 1, 3).contiguous().view(h * b, n, p)
        keys = keys.permute(2, 0, 1, 3).contiguous().view(h * b, m, p)
        values = values.permute(2, 0, 1, 3).contiguous().view(h * b, m, p)

        output = self.attention(queries, keys, values)  # shape [h * b, n, p]
        output = output.view(h, b, n, p)
        output = output.permute(1, 2, 0, 3).contiguous().view(b, n, d)
        output = self.concatenation(output)  # shape [b, n, d]

        return output

class RFF(nn.Module):
    """
    Row-wise FeedForward layers.
    """
    def __init__(self, d):
        super().__init__()

        self.layers = nn.Sequential(
            nn.Linear(d, d), nn.ReLU(inplace=True),
            nn.Linear(d, d), nn.ReLU(inplace=True),
            nn.Linear(d, d), nn.ReLU(inplace=True)
        )

    def forward(self, x):
        """
        Arguments:
            x: a float tensor with shape [b, n, d].
        Returns:
            a float tensor with shape [b, n, d].
        """
        return self.layers(x)

class MultiheadAttentionBlock(nn.Module):

    def __init__(self, d, h, rff):
        """
        Arguments:
            d: an integer, input dimension.
            h: an integer, number of heads.
            rff: a module, row-wise feedforward layers.
                It takes a float tensor with shape [b, n, d] and
                returns a float tensor with the same shape.
        """
        super().__init__()

        self.multihead = MultiheadAttention(d, h)
        self.layer_norm1 = nn.LayerNorm(d)
        self.layer_norm2 = nn.LayerNorm(d)
        self.rff = rff

    def forward(self, x, y):
        """
        It is equivariant to permutations of the
        second dimension of tensor x (`n`).

        It is invariant to permutations of the
        second dimension of tensor y (`m`).

        Arguments:
            x: a float tensor with shape [b, n, d].
            y: a float tensor with shape [b, m, d].
        Returns:
            a float tensor with shape [b, n, d].
        """
        h = self.layer_norm1(x + self.multihead(x, y, y))
        return self.layer_norm2(h + self.rff(h))

class SetAttentionBlock(nn.Module):

    def __init__(self, d, h, rff):
        super().__init__()
        self.mab = MultiheadAttentionBlock(d, h, rff)

    def forward(self, x):
        """
        Arguments:
            x: a float tensor with shape [b, n, d].
        Returns:
            a float tensor with shape [b, n, d].
        """
        return self.mab(x, x)

class InducedSetAttentionBlock(nn.Module):

    def __init__(self, d, m, h, rff1, rff2):
        """
        Arguments:
            d: an integer, input dimension.
            m: an integer, number of inducing points.
            h: an integer, number of heads.
            rff1, rff2: modules, row-wise feedforward layers.
                It takes a float tensor with shape [b, n, d] and
                returns a float tensor with the same shape.
        """
        super().__init__()
        self.mab1 = MultiheadAttentionBlock(d, h, rff1)
        self.mab2 = MultiheadAttentionBlock(d, h, rff2)
        self.inducing_points = nn.Parameter(torch.randn(1, m, d))

    def forward(self, x):
        """
        Arguments:
            x: a float tensor with shape [b, n, d].
        Returns:
            a float tensor with shape [b, n, d].
        """
        b = x.size(0)
        p = self.inducing_points
        p = p.repeat([b, 1, 1])  # shape [b, m, d]
        h = self.mab1(p, x)  # shape [b, m, d]
        return self.mab2(x, h)

class PoolingMultiheadAttention(nn.Module):

    def __init__(self, d, k, h, rff):
        """
        Arguments:
            d: an integer, input dimension.
            k: an integer, number of seed vectors.
            h: an integer, number of heads.
            rff: a module, row-wise feedforward layers.
                It takes a float tensor with shape [b, n, d] and
                returns a float tensor with the same shape.
        """
        super().__init__()
        self.mab = MultiheadAttentionBlock(d, h, rff)
        self.seed_vectors = nn.Parameter(torch.randn(1, k, d))

    def forward(self, z):
        """
        Arguments:
            z: a float tensor with shape [b, n, d].
        Returns:
            a float tensor with shape [b, k, d].
        """
        b = z.size(0)
        s = self.seed_vectors
        s = s.repeat([b, 1, 1])  # random seed vector: shape [b, k, d]

        output = self.mab(s, z)
        # print('PoolingMultiheadAttention', output.shape)

        return output

In [14]:
# Set transformer for ingredient representation
class SetTransformer(nn.Module):
    def __init__(self):
        super(SetTransformer, self).__init__()
        in_dimension = 46 # 300
        out_dimension = 128 # 600

        d = in_dimension
        m = 16 # number of inducing points
        h = 2  # number of heads
        k = 2  # number of seed vectors

        self.encoder = nn.Sequential(
            InducedSetAttentionBlock(d, m, h, RFF(d), RFF(d)),
            InducedSetAttentionBlock(d, m, h, RFF(d), RFF(d))
        )
        self.decoder = nn.Sequential(
            PoolingMultiheadAttention(d, k, h, RFF(d)),
            SetAttentionBlock(d, h, RFF(d))
        )
        self.decoder_2 = nn.Sequential(
            PoolingMultiheadAttention(d, k, h, RFF(d))
        )
        self.decoder_3 = nn.Sequential(
            SetAttentionBlock(d, h, RFF(d))
        )

        self.predictor = nn.Linear(k * d, out_dimension)


    def forward(self, x):
        """
        Arguments:
            x: a float tensor with shape [batch, n, in_dimension].
        Returns:
            a float tensor with shape [batch, out_dimension].
        """

        x = self.encoder(x) # shape [batch, batch_max_len, d]
        x = self.decoder(x) # shape [batch, k, d]

        b, k, d = x.shape
        x = x.view(b, k * d)
        y = self.predictor(x)
        
        return y


## textCNN

In [15]:
class textCNN(nn.Module):
    def __init__(self, dim_channel, kernel_wins, dropout_rate, num_class):
        super(textCNN, self).__init__()
    
        # Convolutional Layers with different window size kernels
        self.convs = nn.ModuleList([nn.Conv2d(1, dim_channel, (w, 1024)) for w in kernel_wins])
        # Dropout layer
        self.dropout = nn.Dropout(dropout_rate)
        
        self.fc = nn.Linear(len(kernel_wins)*dim_channel, num_class)
        
    def forward(self, x):
        con_x = [conv(x) for conv in self.convs]
        pool_x = [F.max_pool1d(x.squeeze(-1), x.size()[2]) for x in con_x]
        fc_x = torch.cat(pool_x, dim=1)
        fc_x = fc_x.squeeze(-1)
        fc_x = self.dropout(fc_x)
        logit = self.fc(fc_x)
        
        return logit
    

## GNN

In [16]:
class RelationAttention(nn.Module):
    def __init__(self, in_size, hidden_size=16):
        super(RelationAttention, self).__init__()

        self.project = nn.Sequential(
            nn.Linear(in_size, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1, bias=False)
        )

    def forward(self, z):
        w = self.project(z).mean(0)                    # (M, 1)
        beta = torch.softmax(w, dim=0)                 # (M, 1)
        beta = beta.expand((z.shape[0],) + beta.shape) # (N, M, 1)
        out = (beta * z).sum(1)                        # (N, D * K)
        
        return out
    
    
class GNN(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats, rel_names):
        super().__init__()
        

        self.num_heads = 8
        self.hid_feats = int(hid_feats/self.num_heads)
        self.out_feats = int(out_feats/self.num_heads)
        
        self.relation_attention = RelationAttention(in_feats) # in_feats*self.num_heads
        
        self.gatconv1 = dglnn.HeteroGraphConv({
            'i-r': dglnn.GATConv(in_feats, self.hid_feats, num_heads=self.num_heads),
            'r-i': dglnn.GATConv(in_feats, self.hid_feats, num_heads=self.num_heads),
            'r-r': dglnn.GATConv(in_feats, self.hid_feats, num_heads=self.num_heads),
            'i-i': dglnn.GATConv(in_feats, self.hid_feats, num_heads=self.num_heads),
            }, aggregate='stack') # sum
        
        self.gatconv2 = dglnn.HeteroGraphConv({
            'i-r': dglnn.GATConv(self.hid_feats*self.num_heads, self.out_feats, num_heads=self.num_heads),
            'r-i': dglnn.GATConv(self.hid_feats*self.num_heads, self.out_feats, num_heads=self.num_heads),
            'r-r': dglnn.GATConv(self.hid_feats*self.num_heads, self.out_feats, num_heads=self.num_heads),
            'i-i': dglnn.GATConv(self.hid_feats*self.num_heads, self.out_feats, num_heads=self.num_heads),
            }, aggregate='stack') # sum
        
        self.embedding = nn.Sequential(
            nn.Linear(self.out_feats*self.num_heads, out_feats)
        )
        
        self.combineSetTransformerLinear = nn.Sequential(
            nn.Linear(256, 128)
        )
        
    def forward(self, blocks, inputs, total_ingre_emb):
        edge_weight_0 = blocks[0].edata['weight']
        edge_weight_1 = blocks[1].edata['weight']
    
        h = self.gatconv1(blocks[0], inputs, edge_weight_0)
        h = {k: F.relu(v).flatten(2) for k, v in h.items()}
        h = {k: self.relation_attention(v) for k, v in h.items()} 

        secondToLast_ingre = h['ingredient']
        h = self.gatconv2(blocks[-1], h, edge_weight_1) # (h, h)
        last_ingre_and_instr = h['recipe'].flatten(2) # [64, 2, 128]
        
        temp = last_ingre_and_instr[:,1,:]
        total_ingre_emb = total_ingre_emb
        temp = torch.cat([temp, total_ingre_emb], 1)
        temp = self.combineSetTransformerLinear(temp)
        combine_the_other = torch.cat([last_ingre_and_instr[:,0,:].unsqueeze(1), temp.unsqueeze(1)], 1)
        
        h = {'recipe':self.relation_attention(combine_the_other)}
        # attention-head Weight matrix
        h = {k: self.embedding(v) for k, v in h.items()}

        return torch.squeeze(h['recipe']), secondToLast_ingre, last_ingre_and_instr


## Model

In [17]:
def norm(input, p=1, dim=1, eps=1e-12):
    return input / input.norm(p, dim, keepdim=True).clamp(min=eps).expand_as(input)

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # user
        self.user_embedding = nn.Sequential(
            nn.Linear(300, 128),
            nn.Tanh()
        )
        
        # transform input embeddings
        self.instr_embedding = nn.Sequential(
            nn.Linear(1024, 128),
            nn.Tanh()
        )
        self.ingredient_embedding = nn.Sequential(
            nn.Linear(46, 128),
            nn.Tanh()
        )
    
        self.setTransformer_ = SetTransformer()
        self.gnn = GNN(128, 128, 128, graph.etypes) # 128
        self.cnn = textCNN(128, kernel_wins=[3,4,5], dropout_rate=0.5, num_class=128)
    
        # output transformation
        self.out = nn.Sequential(
            nn.Linear(128, 9)
        )
        
    def forward(self, graph, inputs, output_nodes):
        instr, ingredient, ingredient_of_dst_recipe = inputs
        
        # instruction - textcnn
        instr = self.cnn(instr.unsqueeze(1))
        instr = norm(instr)

        # ingredient
        ingredient = self.ingredient_embedding(ingredient)
        ingredient = norm(ingredient)

        # for setTransformer
        all_ingre_emb_for_each_recipe = get_ingredient_neighbors_all_embeddings(graph, output_nodes, ingredient_of_dst_recipe)
        all_ingre_emb_for_each_recipe = norm(all_ingre_emb_for_each_recipe)
        total_ingre_emb = self.setTransformer_(all_ingre_emb_for_each_recipe)
        
        # GNN
        output, secondToLast_ingre, last_ingre_and_instr = self.gnn(graph, {'recipe': instr, 'ingredient': ingredient}, total_ingre_emb)
        total_pos_score, total_neg_score = get_ingredient_neighbors_link_scores(graph, output_nodes, secondToLast_ingre, output)
        
        return self.out(output), secondToLast_ingre, output, total_pos_score, total_neg_score


## Data Loader

In [18]:
# dataloader
sampler = dgl.dataloading.MultiLayerNeighborSampler([{('recipe', 'r-i', 'ingredient'): 20, 
                                                     ('ingredient', 'i-r', 'recipe'): 20, 
                                                     ('recipe', 'r-r', 'recipe'): 20,
                                                     ('ingredient', 'i-i', 'ingredient'): 20
                                                     }]*2)

train_dataloader = dgl.dataloading.NodeDataLoader(
    graph, {'recipe': train_idx.cpu()}, sampler,
    batch_size=128, shuffle=True, drop_last=False, num_workers=0)

val_dataloader = dgl.dataloading.NodeDataLoader(
    graph, {'recipe': val_idx.cpu()}, sampler,
    batch_size=128, shuffle=True, drop_last=False, num_workers=0)

test_dataloader = dgl.dataloading.NodeDataLoader(
    graph, {'recipe': test_idx.cpu()}, sampler,
    batch_size=128, shuffle=True, drop_last=False, num_workers=0)
print('# of batches in train_dataloader: ', len(train_dataloader))
print('# of batches in val_dataloader: ', len(val_dataloader))
print('# of batches in test_dataloader: ', len(test_dataloader))
print()

for input_nodes, output_nodes, blocks in train_dataloader:
    print('blocks: ', blocks)
    break

# of batches in train_dataloader:  425
# of batches in val_dataloader:  91
# of batches in test_dataloader:  92

blocks:  [Block(num_src_nodes={'ingredient': 3434, 'recipe': 12248},
      num_dst_nodes={'ingredient': 448, 'recipe': 683},
      num_edges={('ingredient', 'i-i', 'ingredient'): 7135, ('ingredient', 'i-r', 'recipe'): 4366, ('recipe', 'r-i', 'ingredient'): 8364, ('recipe', 'r-r', 'recipe'): 9527},
      metagraph=[('ingredient', 'ingredient', 'i-i'), ('ingredient', 'recipe', 'i-r'), ('recipe', 'ingredient', 'r-i'), ('recipe', 'recipe', 'r-r')]), Block(num_src_nodes={'ingredient': 448, 'recipe': 683},
      num_dst_nodes={'ingredient': 0, 'recipe': 128},
      num_edges={('ingredient', 'i-i', 'ingredient'): 0, ('ingredient', 'i-r', 'recipe'): 846, ('recipe', 'r-i', 'ingredient'): 0, ('recipe', 'r-r', 'recipe'): 564},
      metagraph=[('ingredient', 'ingredient', 'i-i'), ('ingredient', 'recipe', 'i-r'), ('recipe', 'ingredient', 'r-i'), ('recipe', 'recipe', 'r-r')])]


## Evaluation and helper functions

In [19]:
def get_link_prediction_loss(pos_score, neg_score):
    # Margin loss
    n_edges = pos_score.shape[0]
    return (1 - pos_score.unsqueeze(1) + neg_score.view(n_edges, -1)).clamp(min=0).mean()

def get_score(y_pred, y_true):
    score = {
        "precision": precision_score(y_true, y_pred, labels=[1, 2, 3, 4, 5, 6, 7, 8], average='micro'), 
        "recall": recall_score(y_true, y_pred, labels=[1, 2, 3, 4, 5, 6, 7, 8], average='micro'),
        "f1": f1_score(y_true, y_pred, labels=[1, 2, 3, 4, 5, 6, 7, 8], average='micro')
    }

    detailed_score = {
        "precision": precision_score(y_true, y_pred, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8], average=None, zero_division=0),
        "recall": recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8], average=None, zero_division=0),
        "f1": f1_score(y_true, y_pred, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8], average=None, zero_division=0)
    }
    return score, detailed_score

def evaluate(model, dataloader, device):
    # print('evaluating ... ')
    evaluate_start = time.time()
    model.eval()
    total_loss = 0
    cosine_total_loss = 0
    link_prediction_total_loss = 0
    total_precision = 0
    total_recall = 0
    total_f1 = 0
    
    detailed_precision = 0
    detailed_recall = 0
    detailed_f1 = 0
    count = 0
    
    all_y_preds = None
    all_labels = None
    
    with torch.no_grad():
        for input_nodes, output_nodes, blocks in dataloader:

            blocks = [blk.to(device) for blk in blocks]
            
            # input
            input_instr = blocks[0].srcdata['instr_feature']['recipe']
            input_ingredient = blocks[0].srcdata['nutrient_feature']['ingredient']
            ingredient_of_dst_recipe = blocks[1].srcdata['nutrient_feature']['ingredient']

            labels = blocks[-1].dstdata['label']['recipe']

            inputs = [input_instr, input_ingredient, ingredient_of_dst_recipe]
            logits, secondToLast_ingre, last_instr, total_pos_score, total_neg_score = model(blocks, inputs, output_nodes)
            y_pred = np.argmax(logits.cpu(), axis=1)
            
            if all_y_preds is None:
                all_y_preds = y_pred
                all_labels = labels.cpu().numpy()
            else:
                all_y_preds = np.append(all_y_preds, y_pred, axis=0)
                all_labels = np.append(all_labels, labels.cpu().numpy(), axis=0)
            
            # Loss
            link_prediction_loss = get_link_prediction_loss(total_pos_score, total_neg_score)
            loss = criterion(logits, labels) + 0.1*link_prediction_loss

            total_loss += loss.item()
            link_prediction_total_loss += link_prediction_loss.item()

            count += len(labels)
        
        score, detailed_score = get_score(all_y_preds, all_labels)
        total_precision = score['precision']
        total_recall = score['recall']
        total_f1 = score['f1']
        detailed_precision = detailed_score['precision']
        detailed_recall = detailed_score['recall']
        detailed_f1 = detailed_score['f1']
        
        total_loss /= count
        link_prediction_total_loss /= count
        evalutate_time = time.strftime("%M:%S min", time.gmtime(time.time()-evaluate_start))
        
    return total_loss, total_precision, total_recall, total_f1, evalutate_time, detailed_precision, detailed_recall, detailed_f1, link_prediction_total_loss


## Training

In [20]:
model = Net().to(device)
opt = torch.optim.Adam(model.parameters(), lr=0.005)
scheduler = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.95)

weights_class = torch.Tensor(9).fill_(1)
criterion = nn.CrossEntropyLoss(weight=weights_class).to(device)
criterion_cosine = torch.nn.CosineEmbeddingLoss(margin=0.1)

print('start ... ')
for epoch in range(50):
    train_start = time.time()
    epoch_loss = 0
    cosine_epoch_loss = 0
    epoch_precision = 0
    epoch_recall = 0
    epoch_f1 = 0
    iteration_cnt = 0
        
    for batch, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader):
        model.train()
        blocks = [b.to(device) for b in blocks]

        # input
        input_instr = blocks[0].srcdata['instr_feature']['recipe'] 
        input_ingredient = blocks[0].srcdata['nutrient_feature']['ingredient']
        ingredient_of_dst_recipe = blocks[1].srcdata['nutrient_feature']['ingredient']
        labels = blocks[-1].dstdata['label']['recipe'] 

        inputs = [input_instr, input_ingredient, ingredient_of_dst_recipe]
        logits, secondToLast_ingre, last_instr, total_pos_score, total_neg_score = model(blocks, inputs, output_nodes)

        # training scores
        y_pred = np.argmax(logits.detach().cpu(), axis=1)
    
        # compute loss
        link_prediction_loss = get_link_prediction_loss(total_pos_score, total_neg_score)
        loss = criterion(logits, labels) + 0.1*link_prediction_loss
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        
        epoch_loss += loss.item()
        iteration_cnt += 1 
        # break
        
    epoch_loss /= iteration_cnt
    cosine_epoch_loss /= iteration_cnt
    train_end = time.strftime("%M:%S min", time.gmtime(time.time()-train_start))
    
    print('Epoch: {0},  Loss: {l:.4f},  Time: {t}, LR: {lr:.6f}'
          .format(epoch, l=epoch_loss, t=train_end, lr=opt.param_groups[0]['lr']))

    scheduler.step()

    # Evaluation
    # For demonstration purpose, only test set result is reported here. Please use val_dataloader for comprehensiveness.
    test_loss, test_precision, test_recall, test_f1, test_time, test_detailed_precision, test_detailed_recall, test_detailed_f1, link_prediction_test_loss \
    = evaluate(model, test_dataloader, device)
    print('Testing: ')
    print('Total Loss: {l:.4f},  Precision: {precision:.4f},  Recall: {recall:.4f},  F1: {f1:.6f},  Time: {t}, Link Loss: {link_loss:.4f}'
          .format(l=test_loss, precision=test_precision, recall=test_recall, f1=test_f1, t=test_time, link_loss=link_prediction_test_loss))
    print('detailed_precision: ', [float('{:.4f}'.format(i)) for i in list(test_detailed_precision)])
    print('detailed_recall: ', [float('{:.4f}'.format(i)) for i in list(test_detailed_recall)])
    print('detailed_f1: ', [float('{:.4f}'.format(i)) for i in list(test_detailed_f1)])
    print()


start ... 
Epoch: 0,  Loss: 1.4874,  Time: 10:53 min, LR: 0.005000
Testing: 
Total Loss: 0.0094,  Precision: 0.6173,  Recall: 0.6348,  F1: 0.625940,  Time: 01:43 min, Link Loss: 0.0042
detailed_precision:  [0.4435, 0.3141, 0.7973, 0.7909, 0.0, 0.4188, 0.7093, 0.4206, 0.4728]
detailed_recall:  [0.3971, 0.0483, 0.7143, 0.6013, 0.0, 0.259, 0.9068, 0.0642, 0.9147]
detailed_f1:  [0.419, 0.0837, 0.7535, 0.6832, 0.0, 0.3201, 0.796, 0.1114, 0.6234]

Epoch: 1,  Loss: 1.1655,  Time: 09:58 min, LR: 0.004750
Testing: 
Total Loss: 0.0083,  Precision: 0.6720,  Recall: 0.6871,  F1: 0.679463,  Time: 01:45 min, Link Loss: 0.0041
detailed_precision:  [0.4878, 0.4684, 0.852, 0.7155, 0.2857, 0.6256, 0.7312, 0.4384, 0.6459]
detailed_recall:  [0.4476, 0.3793, 0.7076, 0.7492, 0.0175, 0.3987, 0.9062, 0.4365, 0.7241]
detailed_f1:  [0.4669, 0.4192, 0.7731, 0.7319, 0.0329, 0.4871, 0.8093, 0.4375, 0.6828]

Epoch: 2,  Loss: 1.0598,  Time: 09:58 min, LR: 0.004513
Testing: 
Total Loss: 0.0080,  Precision: 0.6723,  R

Epoch: 20,  Loss: 0.7554,  Time: 10:01 min, LR: 0.001792
Testing: 
Total Loss: 0.0066,  Precision: 0.7401,  Recall: 0.7820,  F1: 0.760463,  Time: 01:44 min, Link Loss: 0.0028
detailed_precision:  [0.6351, 0.6352, 0.8392, 0.7615, 0.6618, 0.6991, 0.7792, 0.678, 0.6958]
detailed_recall:  [0.5026, 0.5695, 0.8494, 0.8331, 0.393, 0.5871, 0.91, 0.5407, 0.8341]
detailed_f1:  [0.5611, 0.6005, 0.8442, 0.7957, 0.4932, 0.6382, 0.8396, 0.6016, 0.7587]

Epoch: 21,  Loss: 0.7509,  Time: 09:47 min, LR: 0.001703
Testing: 
Total Loss: 0.0066,  Precision: 0.7314,  Recall: 0.7846,  F1: 0.757056,  Time: 01:41 min, Link Loss: 0.0028
detailed_precision:  [0.6736, 0.6856, 0.8244, 0.7325, 0.4067, 0.6672, 0.7953, 0.6425, 0.7024]
detailed_recall:  [0.493, 0.5113, 0.8527, 0.8555, 0.6376, 0.6954, 0.8899, 0.6334, 0.7955]
detailed_f1:  [0.5693, 0.5858, 0.8383, 0.7893, 0.4966, 0.681, 0.8399, 0.6379, 0.7461]

Epoch: 22,  Loss: 0.7438,  Time: 09:47 min, LR: 0.001618
Testing: 
Total Loss: 0.0066,  Precision: 0.7393,  Re

Epoch: 39,  Loss: 0.6592,  Time: 09:51 min, LR: 0.000676
Testing: 
Total Loss: 0.0066,  Precision: 0.7417,  Recall: 0.7885,  F1: 0.764361,  Time: 01:42 min, Link Loss: 0.0025
detailed_precision:  [0.6746, 0.6854, 0.8305, 0.7059, 0.5607, 0.6625, 0.8119, 0.6807, 0.6947]
detailed_recall:  [0.5177, 0.5172, 0.8571, 0.8925, 0.5852, 0.7425, 0.8692, 0.5991, 0.8324]
detailed_f1:  [0.5858, 0.5896, 0.8436, 0.7883, 0.5726, 0.7002, 0.8396, 0.6373, 0.7573]

Epoch: 40,  Loss: 0.6524,  Time: 09:51 min, LR: 0.000643
Testing: 
Total Loss: 0.0066,  Precision: 0.7512,  Recall: 0.7752,  F1: 0.763025,  Time: 01:42 min, Link Loss: 0.0025
detailed_precision:  [0.6358, 0.6716, 0.8438, 0.7373, 0.6257, 0.6878, 0.8214, 0.6569, 0.6938]
detailed_recall:  [0.5611, 0.534, 0.8372, 0.8645, 0.4672, 0.6813, 0.8714, 0.5763, 0.8197]
detailed_f1:  [0.5961, 0.595, 0.8405, 0.7959, 0.535, 0.6845, 0.8456, 0.614, 0.7515]

Epoch: 41,  Loss: 0.6489,  Time: 09:49 min, LR: 0.000610
Testing: 
Total Loss: 0.0067,  Precision: 0.7412,  