# Here we create and save the DGL Graph

In [2]:
import torch
import dgl
import pandas as pd

In [38]:
# NODES
business = torch.load('preprocessed/business_feature.pt')
business_ids = pd.read_csv('preprocessed/business_ids.csv')
user = torch.load('preprocessed/user_feature.pt')
user_ids = pd.read_csv('preprocessed/user_ids.csv')
review = torch.load('preprocessed/review_features.pt')
review_ids = pd.read_csv('preprocessed/review_ids.csv')
tip = torch.load('preprocessed/tip_features.pt')
tip_ids = pd.read_csv('preprocessed/tip_ids.csv')
category = torch.load('preprocessed/category_features.pt')
category_ids = pd.read_csv('preprocessed/category_ids.csv')


# RELS
business_has_category = torch.load('preprocessed/business_category.pt').long()
review_to_business = torch.load('preprocessed/review_business.pt').long()
tip_to_business = torch.load('preprocessed/tip_business.pt').long()
user_to_review = torch.load('preprocessed/user_review.pt').long()
user_to_tip = torch.load('preprocessed/user_tip.pt').long()
user_to_user = torch.load('preprocessed/user_user.pt').long()

In [33]:
review_to_business[:, 1].shape

torch.Size([6989558])

In [42]:
graph = dgl.heterograph({
    ('business', 'business_has_category', 'category'): (business_has_category[:, 0], business_has_category[:, 1]),
    ('review', 'review_to_business', 'business'): (review_to_business[:, 0], review_to_business[:, 1]),
    ('tip', 'tip_to_business', 'business'): (tip_to_business[:, 0], tip_to_business[:, 1]),
    ('user', 'user_to_review', 'review'): (user_to_review[:, 0], user_to_review[:, 1]),
    ('user', 'user_to_tip', 'tip'): (user_to_tip[:, 0], user_to_tip[:, 1]),
    ('user', 'user_to_user', 'user'): (user_to_user[:, 0], user_to_user[:, 1])
}, num_nodes_dict = {
    'business': business.shape[0],
    'tip': tip.shape[0],
    'review': review.shape[0],
    'user': user.shape[0],
    'category': category.shape[0]
})

In [46]:
graph.num_nodes()

10037887

In [50]:
category_ids['category_id:ID'].values

array(['c68f8fcd92fd90c9044c0d3e261ec384',
       '6fe53dc4393776783492cf1b4666578a',
       'ac92e7dd1ea2adde16074a436d532775', ...,
       'f15f48cbbc7a8aea4e8f226899a8a5fd',
       'b397edd35777a9f6036119e6ced96ff6',
       '00f6fc6048f7fb601e7bae3c43ec81ee'], dtype=object)

In [57]:
graph.ndata['feat'] = {
    'business': business,
    'tip': tip,
    'review': review,
    'user': user,
    'category': category
}

In [54]:
business_ids.loc[123432]

business_id:ID    dd12270ae4890bb5d953851052701e66
Name: 123432, dtype: object

In [56]:
business[123432] # to see the mapping in neo4j

tensor([ 39.8568, -86.0082,   4.0000, 324.0000,   1.0000,   1.0000,   1.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   1.0000,   1.0000,   0.0000,   0.0000,
          0.0000,   1.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.0000,
          0.0000,   0.0000,   1.0000,   0.0000,   0.0000,   1.0000,   1.0000,
          0.0000,   1.0000,   0.0000,   1.0000,   0.0000,   1.0000,   0.0000,
          1.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   1.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          1.0000,   0.0000,   0.0000,   0.0000,   1.0000,   1.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.0000,   1.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.00

In [59]:
dgl.save_graphs('preprocessed/graph.dgl', [graph])

In [68]:
graph = dgl.load_graphs('preprocessed/graph.dgl')
graph = graph[0][0]