# Libraries

In [40]:
from ogb.graphproppred import PygGraphPropPredDataset
from ogb.io.read_graph_raw import read_csv_graph_raw
from torch_geometric.data import DataLoader
from ogb.graphproppred import Evaluator

# Get dataset

In [44]:
# Download and process data at './dataset/ogbg_molhiv/'
dataset = PygGraphPropPredDataset(name = "ogbg-molhiv", root = 'data/homework02/')

In [45]:
# Print MetaData information from dataset
print(dataset.meta_info)

num tasks                                                                1
eval metric                                                         rocauc
download_name                                                          hiv
version                                                                  1
url                      http://snap.stanford.edu/ogb/data/graphproppre...
add_inverse_edge                                                      True
data type                                                              mol
has_node_attr                                                         True
has_edge_attr                                                         True
task type                                            binary classification
num classes                                                              2
split                                                             scaffold
additional node files                                                 None
additional edge files    

In [46]:
# Get index of the atoms
split_idx = dataset.get_idx_split() 
train_ids = split_idx["train"]
test_ids = split_idx["test"]
valid_ids = split_idx["valid"]
graphs = read_csv_graph_raw("data/homework02/ogbg_molhiv/raw")

Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 41127/41127 [00:00<00:00, 447000.14it/s]


# Check graphs

In [52]:
''' 
Each graph has:
    'num_nodes': 
        Quantity of the nodes
    
    'edge_index': 
        Two list. The node in the index i into 
        first list is connected with i node into second list
    
    'edge_feat':
        Each edge has three features:
            'bond_type'
            'bond_stereo'
            'conjugated'

    'node_feat':
        Each node has three features:
            'atomic_num'
            'chirality'
            'degree'
            'formal_charge'
            'num_h'
            'num_rad_e'
            'hybridization'
            'is_aromatic'
            'is_in_ring'
'''
print(graphs[0])


{'edge_index': array([[ 0,  1,  2,  3,  4,  5,  6,  7,  6,  4, 10, 11, 12, 11, 14, 15,
        16, 15,  9, 18],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18,  2,  4]], dtype=int64), 'edge_feat': array([[0, 0, 0],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int64), 'node_feat': array([[ 5,  0,  4,  5,  3,  0,  2,  0,  0],
       [ 5,  0,  4,  5,  2,  0,  2,  0,  0],
       [ 5,  0,  3,  5,  0,  0,  1,  0,  1],
       [ 7,  0,  2,  6,  0,  0,  1,  0,  1],
       [28,  0,  4,  2,  0,  0,  5,  0,  1],
       [ 7,  0,  2,  6,  0,  0,  1,  0,  1],
       [ 5,  0,  3,  5,  0,  0,  1,  0,  1],
       [ 5,  0,  4,  5,  2,  0,  2,  0,  0],
       [ 5,  0,