In [1]:
import tokenizer
import torch

graph_tokenizer = tokenizer.GraphTokenizer(torch.load("dictionary.pt"))

In [2]:
import h5py
import tqdm
import numpy as np
import torch
import data
import torch_geometric as tg

all_data = []
with h5py.File('Data/train.h5', 'r') as f:
    for label in tqdm.tqdm(f.keys()):
        group = f[label]
        graph1 = data.read_graph(group['graph1'])
        graph2 = data.read_graph(group['graph2'])
        # Index using () for scalar dataset
        y = group["y"][()]
        all_data.append({"graph1":graph1,"graph2":graph2,"y":torch.tensor(y)})

all_data[0]

100%|██████████████████████████████████████| 500/500 [00:00<00:00, 963.33it/s]


{'graph1': BlendData(x=[254, 9], edge_index=[2, 476], edge_attr=[476, 3], blend_batch=[28], mol_batch=[254]),
 'graph2': BlendData(x=[239, 9], edge_index=[2, 452], edge_attr=[452, 3], blend_batch=[28], mol_batch=[239]),
 'y': tensor(0.5769)}

In [3]:
graph_tokenizer.tokenize(all_data[0]["graph1"])

BlendData(x=[254], edge_index=[2, 476], edge_attr=[476], blend_batch=[28], mol_batch=[254])

In [4]:
import aggregate

agg = aggregate.BlendAggregator(True,9,1,1,0)
from torch_geometric.loader import DataLoader
batch = next(iter(DataLoader([all_data[0]["graph1"],all_data[0]["graph2"]],batch_size=2)))
print(agg(batch.x,batch).shape)
print(agg(all_data[0]["graph1"].x,all_data[0]["graph1"]).shape)

TypeError: Sequential.forward() got an unexpected keyword argument 'index'

In [None]:
import aggregate

agg = aggregate.BlendAggregator(False,9,1,1,0)
from torch_geometric.loader import DataLoader
batch = next(iter(DataLoader([all_data[0]["graph1"],all_data[0]["graph2"]],batch_size=2)))
print(agg(batch.x,batch).shape)
print(agg(all_data[0]["graph1"].x,all_data[0]["graph1"]).shape)

In [None]:
import mpnn

config = mpnn.Config(node_out_feats=16,
                 edge_hidden_feats=16, num_step_message_passing=3)
model = mpnn.from_config(config,node_in_feats=9, edge_in_feats=3,dropout=.1, do_edge_update=False, act_mode="relu", aggr_mode="mean")
exmpl = all_data[0]["graph1"]
model(exmpl,exmpl.x,exmpl.edge_attr)

In [None]:
import mpnn

config = mpnn.Config(node_out_feats=16,
                 edge_hidden_feats=16, num_step_message_passing=3)
model = mpnn.from_config(config,node_in_feats=9, edge_in_feats=3,dropout=.1, do_edge_update=True, act_mode="silu", aggr_mode="mean")
exmpl = all_data[0]["graph1"]
model(exmpl,exmpl.x,exmpl.edge_attr)

In [None]:
import utils

utils.readout_counts(model)

In [None]:
import encoder
import torch

mpnn_configs = [mpnn.Config(node_out_feats=16,
                 edge_hidden_feats=8, num_step_message_passing=5), mpnn.Config(node_out_feats=64,
                 edge_hidden_feats=32, num_step_message_passing=3), mpnn.Config(node_out_feats=128,
                 edge_hidden_feats=64, num_step_message_passing=1)]
config = {"mpnn_configs":mpnn_configs, "do_two_stage":True, "do_edge_update":True, "embedding_dim_x":32, "embedding_dim_edge_attr": 64, "do_edge_update":True, "num_sabs":8,"dropout":0.1, "heads":8, "warmup":.05, "lr": 1e-3, "weight_decay":.01, "betas":(.99,.999), "act_mode":"silu", "aggr_mode":"mean"}
ex_model = encoder.Encoder(graph_tokenizer=None,**config)
ex_model(exmpl)

In [None]:
mpnn_configs = [mpnn.Config(node_out_feats=64,
                 edge_hidden_feats=32, num_step_message_passing=3)]
config = {"mpnn_configs":mpnn_configs,  "do_two_stage":False, "embedding_dim_x":32, "embedding_dim_edge_attr": 64, "do_edge_update":False, "num_sabs":8,"dropout":0.1, "heads":8, "warmup":.05, "lr": 1e-3, "weight_decay":.01, "betas":(.99,.999), "act_mode":"gelu","aggr_mode":"max"}
ex_model = encoder.Encoder(graph_tokenizer=graph_tokenizer,**config)
exmpl_tokenized_graph = graph_tokenizer.tokenize(all_data[0]["graph1"])
ex_model(exmpl_tokenized_graph)

In [None]:
class CrossEncoder(torch.nn.Module):
    def __init__(self,encoder, do_cosine_similarity, **kwargs):
        super(CrossEncoder, self).__init__()
        self.encoder = encoder
        self.do_cosine_similarity = do_cosine_similarity
        if not self.do_cosine_similarity:
          self.readout = torch.nn.Linear(self.encoder.readout.in_channels*2,1)

    def forward(self,graph1, graph2):
      embed1 = self.encoder(graph1)
      embed2 = self.encoder(graph2)

      if self.do_cosine_similarity:
        return torch.nn.functional.cosine_similarity(embed1,embed2)

      return torch.nn.functional.sigmoid(self.readout(torch.cat([embed1,embed2],dim=-1))).squeeze(dim=-1)

m = CrossEncoder(ex_model,False)