# Triton Meets ArangoDB: Amazon Product Recommendation Application using GraphSage Algorithm

In this notebook, we will build an Amazon Product Recommendation application by leveraging three technologies at a time i.e. Graph Machine Learning, Nvidia's Triton inference server and ArangoDB. 

In [None]:
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.nn.functional as F
from tqdm import tqdm
from torch_geometric.data import NeighborSampler
from torch_geometric.nn import SAGEConv
import os.path as osp
import pandas as pd
import numpy as np
import collections
from pandas.core.common import flatten
# importing obg datatset
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from pandas.core.common import flatten

import matplotlib.pyplot as plt

import collections
from scipy.special import softmax

from arango import ArangoClient
import pprint
import tritongrpcclient
import tritongrpcclient.model_config_pb2 as mc
import tritonhttpclient
from tritonclientutils import triton_to_np_dtype
from tritonclientutils import InferenceServerException
from scipy.special import softmax


In [None]:
root = "./create_triton_acceptable_models/datasets"


In [None]:
dataset = PygNodePropPredDataset('ogbn-products', root)

In [None]:
# getting train val test split idx based on sales ranking
split_idx = dataset.get_idx_split()
evaluator = Evaluator(name='ogbn-products')
data = dataset[0]

In [None]:
# storing train, validation and test node indices
train_idx = split_idx['train']
valid_idx = split_idx['valid']
test_idx = split_idx['test']


In [None]:
# test node indexes
test_idx

In [None]:
# neighborhood sampling of test nodes
test_loader = NeighborSampler(data.edge_index, node_idx=test_idx,
                              sizes=[15, 10, 5], batch_size=1,
                              shuffle=False, num_workers=12)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# node feature matrix
x = data.x.to(device)

In [None]:
# labels
y = data.y.to(device)

In [None]:
# creating adjs for performing a trace on the GraphSage model
# will contain only edge_idx and size attributes

max_nodes = 1000
def create_triton_input(dummy_n_ids, dummy_adjs):
    edge_list_0 = []
    edge_list_1 = []
    edge_list_2 = []
    edge_adjs = []
    for idx, e_idx in enumerate(dummy_adjs[0]):
        if idx == 0:
            edge_list_0.append(e_idx[0])
            #edge_list_0.append(e_idx[1])
            edge_list_0.append(torch.tensor(np.asarray(e_idx[2])))
        elif idx == 1:
            edge_list_1.append(e_idx[0])
            #edge_list_1.append(e_idx[1])
            edge_list_1.append(torch.tensor(np.asarray(e_idx[2])))
        else:
            edge_list_2.append(e_idx[0])
            #edge_list_2.append(e_idx[1])
            edge_list_2.append(torch.tensor(np.asarray(e_idx[2])))
    
    # creating edge indexes
    edge_index_0 = edge_list_0[0]
    edge_index_0 = edge_index_0.to(device)
    edge_size_0 = edge_list_0[1]
    edge_size_0 = edge_size_0.to(device)

    edge_index_1 = edge_list_1[0]
    edge_index_1 = edge_index_1.to(device)
    edge_size_1 = edge_list_1[1]
    edge_size_1 = edge_size_1.to(device)

    edge_index_2 = edge_list_2[0]
    edge_index_2 = edge_index_2.to(device)
    edge_size_2 = edge_list_2[1]
    edge_size_2 = edge_size_2.to(device)
    
    
    # add padding to node feature matrix
    dummy_x = x[dummy_n_ids[0]]
    total_nodes = dummy_x.size(0)
    nodes_padded = max_nodes - total_nodes
    dummy_x_pad = F.pad(input=dummy_x, pad=(0, 0, 0, nodes_padded), mode='constant', value=0)
    dummy_x_pad = dummy_x_pad.to(device)
    
    return dummy_x_pad, edge_index_0, edge_size_0, edge_index_1, edge_size_1, edge_index_2, edge_size_2

In [None]:
# setting inputs and output names as same in config file
input_name = ['input__0', 'input__1', 'input__2', 'input__3', 'input__4', 'input__5', 'input__6']
output_name = ['output__0', 'output__1', 'output__2']
VERBOSE = False
from tritonclient.utils import *

## Client-Side Script to Interact with Triton Inference Server

The run_inference function computes the node embeddings of a given node at three different layers of trained GraphSage model and returns the same. This function requires 7 inputs:

{ node_matrix: Padded node feature matrix consiting of nodes involved in the computation      graph

  edge_index_0: adjacency list for all the edges involved at the Hop-3 (layer-3)
  
  edge_size_0 : shape of the bipartite graph at Hop-3

  edge_index_1: adjacency list for all the edges involved at the Hop-2 (layer-2)
  
  edge_size_1 : shape of the bipartite graph at Hop-2
  
  edge_index_2: adjacency list for all the edges involved at the Hop-1 (layer-1)
  
  edge_size_2 : shape of the bipartite graph at Hop-1
}

Note: Neighborhood sampler returns ajacency list in reversed order

In [None]:
def run_inference(node_matrix, edge_index_0, edge_size_0, edge_index_1, edge_size_1, edge_index_2, edge_size_2, model_name='graph_embeddings', url='127.0.0.1:8000', model_version='1'):
    triton_client = tritonhttpclient.InferenceServerClient(
        url=url, verbose=VERBOSE)
    model_metadata = triton_client.get_model_metadata(
        model_name=model_name, model_version=model_version)
    model_config = triton_client.get_model_config(
        model_name=model_name, model_version=model_version)
    # I have restricted the input sequence length to 256

    input_node_matrix = node_matrix
    input_node_matrix  = np.array(input_node_matrix.cpu(), dtype=np.float32)
    
    # edges_indexes and sizes
    ed_index_0 = np.array(edge_index_0.cpu(), dtype=np.int64)
    ed_index_1 = np.array(edge_index_1.cpu(), dtype=np.int64)
    ed_index_2 = np.array(edge_index_2.cpu(), dtype=np.int64)
    
    ed_size_0 = np.array(edge_size_0.cpu(), dtype=np.int64)
    ed_size_1 = np.array(edge_size_1.cpu(), dtype=np.int64)
    ed_size_2 = np.array(edge_size_2.cpu(), dtype=np.int64)
    


    input0 = tritonhttpclient.InferInput(input_name[0], (1000,  100), 'FP32')
    input0.set_data_from_numpy(input_node_matrix, binary_data=False)
    
    #layer-1
    input1 = tritonhttpclient.InferInput(input_name[1], ed_index_0.shape, 'INT64')
    input1.set_data_from_numpy(ed_index_0, binary_data=False)
    #size
    input2 = tritonhttpclient.InferInput(input_name[2], (2,), 'INT64')
    input2.set_data_from_numpy(ed_size_0, binary_data=False)
    
    #layer-2
    input3 = tritonhttpclient.InferInput(input_name[3], ed_index_1.shape, 'INT64')
    input3.set_data_from_numpy(ed_index_1, binary_data=False)
    #size
    input4 = tritonhttpclient.InferInput(input_name[4], (2,), 'INT64')
    input4.set_data_from_numpy(ed_size_1, binary_data=False)
    
    #layer-3
    input5 = tritonhttpclient.InferInput(input_name[5], ed_index_2.shape, 'INT64')
    input5.set_data_from_numpy(ed_index_2, binary_data=False)
    #size
    input6 = tritonhttpclient.InferInput(input_name[6], (2,), 'INT64')
    input6.set_data_from_numpy(ed_size_2, binary_data=False)
    
    output0 = tritonhttpclient.InferRequestedOutput(output_name[0],  binary_data=False)
    output1 = tritonhttpclient.InferRequestedOutput(output_name[1],  binary_data=False)
    output2 = tritonhttpclient.InferRequestedOutput(output_name[2],  binary_data=False)
    
    response = triton_client.infer(model_name, model_version=model_version, inputs=[input0, input1, input2, input3, input4, input5, input6], outputs=[output0, output1, output2])
    # layer-1 embeddings
    embeddings_layer_1 = response.as_numpy('output__0')
    # layer-2 embeddings
    embeddings_layer_2 = response.as_numpy('output__1')
    # # layer-3 embeddings
    embeddings_layer_3 = response.as_numpy('output__2')
    return embeddings_layer_1, embeddings_layer_2, embeddings_layer_3

In [None]:
# load integer to real product category label mapping
df = pd.read_csv('./create_triton_acceptable_models/datasets/ogbn_products/mapping/labelidx2productcategory.csv.gz')

In [None]:
label_idx, prod_cat = df.iloc[: ,0].values, df.iloc[: ,1].values
label_mapping = dict(zip(label_idx, prod_cat))

For the demonstration purpose we will use first 5000 test nodes for the Inference


In [None]:
# selecting test nodes and its adjacency matrix

layer_3_embs = []
layer_2_embs = []
for idx, (batch_size, n_id, adjs) in enumerate(test_loader):
        print("idx:", idx)
        dummy_n_ids = []
        dummy_adjs = []
        dummy_n_ids.append(n_id)
        dummy_adjs.append(adjs)
        
        if len(dummy_n_ids[0]) == 1:
            print("Found Disconnected Node in the graph at index:", idx)
            layer_3_embs.append("Disconnected Node")
        elif idx == 5000:
            break
        else:
            # creating triton input
            dummy_x_pad, edge_index_0, edge_size_0, edge_index_1, edge_size_1, edge_index_2, edge_size_2 = create_triton_input(dummy_n_ids, dummy_adjs)
            # generating node embeddings for test node from Triton Server
            emb1, emb2, emb3 = run_inference(dummy_x_pad, edge_index_0, edge_size_0, edge_index_1, edge_size_1, edge_index_2, edge_size_2)
            layer_3_embs.append(emb3[0])

        

In [None]:
# connectiong to arangodb
# Initialize the ArangoDB client.
client = ArangoClient("http://127.0.0.1:8529")

In [None]:
## Connect to the database
#amazon_db = oasis.connect_python_arango(login)
amazon_db = client.db('_system', username='root', password='test')

In [None]:
test_idx_lb = 235938
test_idx_mb = test_idx_lb + len(layer_3_embs)
test_idx_ub = 2449028

In [None]:
# load dataset
#! ./arangorestore -c none --create-collection true --server.endpoint "tcp://127.0.0.1:8529" --server.username "root" --server.database "_system" --server.password "amritsar" --default-replication-factor 3  --input-directory "./ogbn-product_dataset"

In [None]:
batch = []
BATCH_SIZE = 250
batch_idx = 1
index = 0
# collection in which we will store are inference results
product_collection = amazon_db["Products"]

In the below cell we will update the amazon product recommendation graph (obgn-products) dataset stored inside the ArangoDB with the node embeddings and their corresponding product category predictions for the 5000 test nodes.

In [None]:
for idx in range(test_idx_lb, test_idx_mb):
    update_doc = {}
    product_id = "Products/" + str(idx)
    update_doc["_id"] = product_id
    if layer_3_embs[index] == "Disconnected Node":
        update_doc["predicted_node_embeddings"] = layer_3_embs[index]
        update_doc["predicted_product"] = str(-1)
    else:
        update_doc["predicted_node_embeddings"] = layer_3_embs[index].tolist()
        update_doc["predicted_product"] = str(label_mapping[np.argmax(layer_3_embs[index], axis=-1)])
    batch.append(update_doc)
    last_record = (idx == (test_idx_mb - 1))
    index +=1
    
    if index % BATCH_SIZE == 0:
        print("Inserting batch %d" % (batch_idx))
        batch_idx += 1
        product_collection.update_many(batch)
        batch = []   
    if last_record and len(batch) > 0:
        print("Inserting batch the last batch!")
        product_collection.update_many(batch)


## Amazon Product Recommendation with AQL 
Products which can be bought together with a query product

In [None]:
# product ids for demo 235940, 240930
cursor = amazon_db.aql.execute(
"""
  FOR p in Products
    FILTER p._id == "Products/236435"
    RETURN { "predicted_node_embeddings": p.predicted_node_embeddings, "product_cat": p.product_cat }
""")

# Iterate through the result cursor
for doc in cursor:
  print(doc)

In [None]:
cursor = amazon_db.aql.execute(
"""
LET descr_emb = (
  FOR p in Products
    FILTER p._id == "Products/236435"
    FOR j in RANGE(0, 46)
      RETURN TO_NUMBER(NTH(p.predicted_node_embeddings,j))
)

LET descr_mag = (
  SQRT(SUM(
    FOR i IN RANGE(0, 47)
      RETURN POW(TO_NUMBER(NTH(descr_emb, i)), 2)
  ))
)

LET dau = (

    FOR v in Products
    FILTER HAS(v, "predicted_node_embeddings")

    LET v_mag = (SQRT(SUM(
      FOR k IN RANGE(0, 47)
        RETURN POW(TO_NUMBER(NTH(v.predicted_node_embeddings, k)), 2)
    )))

    LET numerator = (SUM(
      FOR i in RANGE(0,46)
          RETURN TO_NUMBER(NTH(descr_emb, i)) * TO_NUMBER(NTH(v.predicted_node_embeddings, i))
    ))

    LET cos_sim = (numerator)/(descr_mag * v_mag)

    RETURN {"product": v._id, "product_cat": v.product_cat, "cos_sim": cos_sim}

    )

FOR du in dau
    SORT du.cos_sim DESC
    LIMIT 5000
    RETURN {"product_cat": du.product_cat, "cos_sim": du.cos_sim} 
""")


In [None]:
# Iterate through the result cursor
for doc in cursor:
    print(doc)

Here we're using the cosine similarity to retrieve the product which can be bought together with a query product. The cosine similarity is calculated between the node embeddings of a query product and all the other 4999 products:
$$
 \frac{
  \sum\limits_{i=1}^{n}{a_i b_i}
  }{
      \sqrt{\sum\limits_{j=1}^{n}{a_j^2}}
      \sqrt{\sum\limits_{k=1}^{n}{b_k^2}}
  }
$$



Once we calculate the cosine similarities, we can then SORT the products and return the highly likely bought together product with a query product!

## Note: 
1)For the demo purpose I am using 5000 test nodes but definitely if we go for more nodes results can be improved.

2) Also there is a lot of room for the improvement of the accuracy of model which can be achieved using hyperparameter tuning for e.g setting different number of search depths or playing with size of hidden layers like we have used 256 in our experiment. Another interesting thing to experiment would be using different neighborhood sampling techniques like random walk. 

Therefore, I can left this as a HomeWork for you !!