In [None]:
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

## Debug Graph Data

### Steps
1. Create Graph Client.
   ```
   from deepgraph.graph_engine.snark.local import Client

   graph = Client(data_path, list(range(n_partitions)))
   ```
2. Run Graph Query. [Graph API](https://github.com/microsoft/DeepGNN/blob/main/src/python/deepgnn/graph_engine/_base.py)
    - debug features
        * `graph.node_features()`
        * `graph.edge_features()`
        * `graph.node_types()`
        * `graph.neighbors()`
    - debug sample 
        * `graph.sample_nodes()`
        * `graph.sample_edges()`
        * `graph.sample_neighbors()`
    - debug [multihop query](https://github.com/microsoft/DeepGNN/blob/main/src/python/deepgnn/graph_engine/multihop.py).
        * `multihop.sample_fanout()`
        * `multihop.get_neighbor()`
   

### Prepare graph raw data.

In [1]:
import tempfile
import json

from deepgnn.graph_engine._base import Graph, FeatureType, SamplingStrategy
import deepgnn.graph_engine.snark.convert as convert
from deepgnn.graph_engine.snark.decoders import JsonDecoder


tmp_dir = tempfile.TemporaryDirectory()
def get_test_graph_path():
    data = ""
    nodes = []
    max_id = 5
    for i in range(0, max_id):
        node = {
            "node_weight": 1,
            "node_id": i,
            "node_type": (i % 2),
            "binary_feature": {"0": "Hello world!" + str(10 ** i), "1": "Goodbye!"},
            "float_feature": {
                "0": [float(i), float(i + 11), float(i + 101)],
                "1": [float(i), float(i + 21)],
            },
            "uint64_feature": {"3": [i, i + 10, i + 100], "4": [i, i + 20, i + 300]},
            "edge": [{"src_id": i, "dst_id": j, "edge_type": 0, "weight": 1.0 } for j in range(i, max_id)],
            "neighbor": {"0": {str(j): 1.0 for j in range(i, max_id)}},
        }
        data += json.dumps(node) + "\n"
        nodes += node

    raw_file = tmp_dir.name + "/data.json"
    with open(raw_file, "w+") as raw:
        raw.write(data)
    meta_file = tmp_dir.name + "/meta.json"
    with open(meta_file, "w+") as meta:
        meta.write(
            '{ \
                "node_type_num": 2,           \
                "node_binary_feature_num": 2, \
                "node_float_feature_num": 2, \
                "node_uint64_feature_num": 2, \
                "edge_type_num": 1,           \
                "edge_binary_feature_num": 0, \
                "edge_float_feature_num": 0,  \
                "edge_uint64_feature_num": 0}'
        )

    # Convert to snark binary format
    partitions = 1
    convert.MultiWorkersConverter(
        graph_path=raw_file,
        meta_path=meta_file,
        partition_count=partitions,
        output_dir=tmp_dir.name,
        decoder_class=JsonDecoder,
    ).convert()

    return tmp_dir.name
    

### Create Graph Client
Support loading graph data from
- Disk or NFS. 
  - `data_dir="/deepgnn-nfs/users/test/ppidata"`

In [2]:
import numpy as np

from deepgnn.graph_engine.snark.local import Client as SnarkClient
from deepgnn.graph_engine import FeatureType

def create_graph_client(data_path, n_partitions=1):
    graph = SnarkClient(data_path, list(range(n_partitions)))
    return graph

## create client with local file path.
data_path = get_test_graph_path()
graph = create_graph_client(data_path)


[2022-02-15 09:54:46,157] {convert.py:200} INFO - worker 0 try to generate partition: 0 - 1
[2022-02-15 09:54:46,160] {_adl_reader.py:123} INFO - [1,0] Input files: ['/tmp/tmpahxg1n05/data.json']
[2022-02-15 09:54:46,185] {local.py:30} INFO - Graph data path: /tmp/tmpahxg1n05. Partitions [0]
[2022-02-15 09:54:46,200] {local.py:35} INFO - Loaded snark graph. Node counts: [3, 2]. Edge counts: [15]


#### debug  features
- `node_features()`
- `edge_features()`

In [3]:
## Print node lables.
node_ids = np.array([0, 1, 2], dtype=np.int64)
label_metadata = np.array([[0, 121]], dtype=np.int)
label = graph.node_features(node_ids, label_metadata, FeatureType.FLOAT)
print("============================= node label ========================")
print(label[:, 0:10])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


#### debug sample
 - `sample_nodes()`
 - `sample_edges()`
 - `sample_neighbors()`

In [4]:
node_ids = graph.sample_nodes(size = 10, node_types = 0, strategy = SamplingStrategy.Weighted)
print('node_ids', node_ids)

edges = graph.sample_edges(size = 10, edge_types=0, strategy = SamplingStrategy.Weighted) 
print('edges', edges)

node_ids = np.array([0, 1, 2], dtype=np.int64)
edgetypes = np.array([0], dtype=np.int32)
nb, nb_weight, nb_types, nb_count = graph.sample_neighbors(nodes=node_ids, edge_types=edgetypes, count = 5, strategy='byweight')
print('neighbor nodes id\n', nb)
print('neighbor weight\n', nb_weight)
print('neighbor types\n', nb_types)
print('neighbor count\n', nb_count)

node_ids [0 4 2 4 2 4 2 2 2 2]
edges [[1 2 0]
 [0 3 0]
 [1 1 0]
 [2 2 0]
 [0 0 0]
 [3 3 0]
 [3 4 0]
 [2 3 0]
 [4 4 0]
 [2 3 0]]
neighbor nodes id
 [[2 1 2 2 1]
 [1 4 2 3 3]
 [4 3 4 2 2]]
neighbor weight
 [[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
neighbor types
 [[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
neighbor count
 [0]


####  debug [multihop query](https://github.com/microsoft/DeepGNN/blob/main/src/python/deepgnn/graph_engine/multihop.py)

- `multihop.sample_fanout()`
- `multihop.get_neighbor()`

In [5]:
from deepgnn.graph_engine import multihop

## sample_fanout()
#node_ids = np.array([0, 1, 2], dtype=np.int64)
node_ids = graph.sample_nodes(size = 5, node_types = 0, strategy = SamplingStrategy.Weighted)
meta_path = [[0], [0]] # edge_type list
fanout = [3, 3] # fanout setting
n_hop_neighbor, weights, types = multihop.sample_fanout(graph, node_ids, metapath=meta_path, fanouts=fanout, default_node=-1, sampling_strategy='byweight')
print("============================= multihop.sample_fanout ========================")
print(n_hop_neighbor)
print(weights)
print(types)


## get_neighbor()
node_ids = graph.sample_nodes(size = 5, node_types = 0, strategy = SamplingStrategy.Weighted)
edgetypes = np.array([[0],[0]], dtype=np.int32)
nodes_list, adj_list = multihop.get_neighbor(graph, node_ids, edgetypes, max_neighbors_per_node=100)
print("============================= multihop.get_neighbor ========================")
print(nodes_list)
print(adj_list)


[array([2, 0, 0, 0, 2]), array([3, 3, 2, 4, 2, 3, 0, 2, 1, 3, 3, 0, 4, 4, 3]), array([4, 3, 3, 4, 3, 3, 3, 4, 4, 4, 4, 4, 2, 4, 2, 3, 3, 4, 2, 1, 0, 4,
       4, 2, 2, 2, 3, 4, 3, 3, 4, 3, 4, 1, 2, 2, 4, 4, 4, 4, 4, 4, 3, 4,
       3])]
[array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
      dtype=float32), array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)]
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0], dtype=int32)]
[[array([2, 0, 4, 4, 0]), array([5])], [array([0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0