In [1]:
import tokenizer
import torch

graph_tokenizer = tokenizer.GraphTokenizer(torch.load("dictionary.pt"))

In [2]:
import h5py
import tqdm
import numpy as np
import torch
import data
import torch_geometric as tg

all_data = []
with h5py.File('Data/train.h5', 'r') as f:
    for label in tqdm.tqdm(f.keys()):
        group = f[label]
        graph1 = data.read_graph(group['graph1'])
        graph2 = data.read_graph(group['graph2'])
        # Index using () for scalar dataset
        y = group["y"][()]
        all_data.append({"graph1":graph1,"graph2":graph2,"y":torch.tensor(y)})

all_data[0]

100%|█████████████████████████████████████| 500/500 [00:00<00:00, 1147.68it/s]


{'graph1': Data(x=[254, 9], edge_index=[2, 476], edge_attr=[476, 3], blend_batch=[28], mol_batch=[254]),
 'graph2': Data(x=[239, 9], edge_index=[2, 452], edge_attr=[452, 3], blend_batch=[28], mol_batch=[239]),
 'y': tensor(0.5769)}

In [3]:
graph_tokenizer.tokenize(all_data[0]["graph1"])

Data(x=[254], edge_index=[2, 476], edge_attr=[476], blend_batch=[28], mol_batch=[254])

In [4]:
import aggregate

agg = aggregate.BlendAggregator(True,9,1,1,0)
from torch_geometric.loader import DataLoader
batch = next(iter(DataLoader([all_data[0]["graph1"],all_data[0]["graph2"]],batch_size=2)))
print(agg(batch.x,batch).shape)
print(agg(all_data[0]["graph1"].x,all_data[0]["graph1"]).shape)

torch.Size([2, 9])
torch.Size([1, 9])


In [5]:
import aggregate

agg = aggregate.BlendAggregator(False,9,1,1,0)
from torch_geometric.loader import DataLoader
batch = next(iter(DataLoader([all_data[0]["graph1"],all_data[0]["graph2"]],batch_size=2)))
print(agg(batch.x,batch).shape)
print(agg(all_data[0]["graph1"].x,all_data[0]["graph1"]).shape)

torch.Size([2, 9])
torch.Size([1, 9])


In [6]:
import mpnn

config = mpnn.Config(node_out_feats=16,
                 edge_hidden_feats=16, num_step_message_passing=3)
model = mpnn.from_config(config,node_in_feats=9, edge_in_feats=3,dropout=.1, do_edge_update=True)
exmpl = all_data[0]["graph1"]
model(exmpl,exmpl.x,exmpl.edge_attr)

(tensor([[-0.3087,  0.2398, -0.0294,  ..., -0.1557, -0.4226,  0.0596],
         [-0.6061,  0.2975,  0.0441,  ..., -0.1206, -0.0000,  0.2664],
         [-0.4704,  0.1119, -0.0220,  ..., -0.3159, -0.5034, -0.0561],
         ...,
         [-0.4995,  0.2896,  0.4074,  ..., -0.0000,  0.0469, -0.3208],
         [-0.5031,  0.1146, -0.0578,  ..., -0.2303, -0.2998,  0.2197],
         [-0.4769,  0.2424,  0.1084,  ..., -0.1312, -0.2787, -0.1429]],
        grad_fn=<MulBackward0>),
 tensor([[-0.0148, -0.1306, -0.1554,  ...,  0.1641,  0.4541,  0.3151],
         [ 0.0060, -0.0915, -0.1627,  ...,  0.1329,  0.3147,  0.2220],
         [ 0.0157, -0.0992, -0.2161,  ...,  0.0531,  0.3120,  0.0000],
         ...,
         [ 0.0874, -0.0289, -0.1996,  ...,  0.0589,  0.3615,  0.1889],
         [-0.0000, -0.0838, -0.2561,  ...,  0.0642,  0.3596,  0.2077],
         [-0.0006, -0.1329, -0.2837,  ...,  0.0410,  0.3431,  0.2004]],
        grad_fn=<MulBackward0>))

In [7]:
import utils

utils.readout_counts(model)

{'total': '12,224',
 'base': '6,480',
 'project_edge_feats': '64',
 'edge_update_network': '1,056',
 'gnn_layer': '4,624'}

In [8]:
import encoder
import torch

mpnn_configs = [mpnn.Config(node_out_feats=16,
                 edge_hidden_feats=8, num_step_message_passing=5), mpnn.Config(node_out_feats=64,
                 edge_hidden_feats=32, num_step_message_passing=3), mpnn.Config(node_out_feats=128,
                 edge_hidden_feats=64, num_step_message_passing=1)]
config = {"mpnn_configs":mpnn_configs, "do_two_stage":True, "do_edge_update":True, "embedding_dim_x":32, "embedding_dim_edge_attr": 64, "do_edge_update":True, "num_sabs":8,"dropout":0.1, "heads":8, "warmup":.05, "lr": 1e-3, "weight_decay":.01, "betas":(.99,.999)}
ex_model = encoder.Encoder(graph_tokenizer=None,**config)
ex_model(exmpl)

tensor([[ 16.9583,  23.6146,  42.0182,  39.7710,  17.0534,  15.2370,  25.9016,
         -11.5936,   4.4948,  16.0699,  23.4399,  22.5283,  19.7193,  12.4031,
         -29.9322,  21.4564,   7.5801,  37.0799,  21.3236,  29.2335,   8.4697,
          23.5336,  -3.8189,   6.2963,   4.6791,   9.6362,  11.6724,  24.3808,
          13.6773,   1.0631,  42.9330,  28.2912,  11.8086,   3.1609,  41.3929,
          24.9530,   8.5946,  19.8126,  22.0701,  26.8269,  16.6450,  18.6452,
         -15.1641,   5.8025,   7.5817,   8.7591,  -4.3016,  13.4386,  36.3745,
          13.2861,  25.9600,  -5.3286,  28.0919,   6.1746,  25.3568,  10.2057,
           4.1980,  12.5206,  13.3980,  -5.0892,  24.6521,  -8.2101,  17.6350,
           4.0994,  23.5761,   7.7184,  15.9622,   1.7439,  15.6459,  10.8583,
          -4.3025,   2.4597,  14.8605,  33.6072,  -2.8444,  18.6817,  -1.9339,
          18.3029,  20.0758,  26.0175,   3.6948,  16.9872,  28.6298,   5.2037,
          17.6996,  -0.5525,  14.7102,  22.5676,   7

In [9]:
mpnn_configs = [mpnn.Config(node_out_feats=64,
                 edge_hidden_feats=32, num_step_message_passing=3)]
config = {"mpnn_configs":mpnn_configs,  "do_two_stage":False, "embedding_dim_x":32, "embedding_dim_edge_attr": 64, "do_edge_update":False, "num_sabs":8,"dropout":0.1, "heads":8, "warmup":.05, "lr": 1e-3, "weight_decay":.01, "betas":(.99,.999)}
ex_model = encoder.Encoder(graph_tokenizer=graph_tokenizer,**config)
exmpl_tokenized_graph = graph_tokenizer.tokenize(all_data[0]["graph1"])
ex_model(exmpl_tokenized_graph)

tensor([[-3.5342,  5.8691,  1.9356,  3.9250,  4.3043,  0.4475,  2.0477,  3.8878,
         -1.0316, -0.4927,  3.1384, -1.0431,  0.8578,  5.5217, -0.6399,  5.4862,
          2.9179,  2.1390,  0.1408,  2.9660,  6.4209,  0.9619,  1.6826,  1.3622,
          2.5840,  7.5151,  0.0904,  3.1781,  3.9731,  3.2431,  5.2631,  1.5384,
          0.3144, -0.8868,  2.5447,  3.5468,  5.4266,  4.0325,  2.7833, -1.2086,
          2.8764,  7.2132,  5.1883,  2.8140,  5.0126,  7.1626,  2.0744,  4.8210,
          1.9521,  5.8612,  0.4300,  2.8682, -1.3000,  5.1001,  0.3766,  1.5599,
          6.5754, -0.4751,  1.7334, -0.8443,  4.2337,  2.6250,  1.7305, -0.9494]],
       grad_fn=<ViewBackward0>)