In [2]:
import pytorch_lightning as pl
from torch import optim
import wandb
import torch
import os

from torch.nn.functional import binary_cross_entropy

import torch_geometric as tg
import torchmetrics
from pytorch_lightning.loggers.wandb import WandbLogger

from GraphCoAttention.nn.models.HeterogenousCoAttention import HeteroGNN

import torch
from torch import nn
from torch.nn import Parameter, Sequential, ReLU, GRU
from torch.nn import functional as F
import torch_geometric as tg

from torch_geometric.nn import GATConv, HeteroConv, Linear, GATv2Conv, NNConv, Set2Set
from torch_geometric.nn.glob import global_mean_pool, global_add_pool
from torch.nn import LeakyReLU

from GraphCoAttention.data.MultipartiteData import BipartitePairData


In [3]:
    
class Net(torch.nn.Module):
    def __init__(self, hidden_channels, outer_out_channels, inner_out_channels,
                 num_layers, batch_size):
        super().__init__()

        self.batch_size = batch_size
        self.dim = hidden_channels

        nn = Sequential(Linear(5, 128), ReLU(), Linear(128, self.dim * self.dim))

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HeteroConv({
                ('x_i', 'inner_edge_i', 'x_i'): NNConv(self.dim, self.dim, nn, aggr='mean'),
                 ('x_j', 'inner_edge_j', 'x_j'): NNConv(self.dim, self.dim, nn, aggr='mean'),
                ('x_i', 'outer_edge_ij', 'x_j'): NNConv(self.dim, self.dim, nn, aggr='mean'),
                ('x_j', 'outer_edge_ji', 'x_i'): NNConv(self.dim, self.dim, nn, aggr='mean'),
                ('x_i', 'inner_edge_i', 'x_i'): NNConv(self.dim, self.dim, nn, aggr='mean'),
                ('x_j', 'inner_edge_j', 'x_j'): NNConv(self.dim, self.dim, nn, aggr='mean'),
            }, aggr='sum')
            self.convs.append(conv)
        
        self.lin = Linear(self.dim, outer_out_channels)

        self.lin_i = Linear(self.dim, inner_out_channels)
        self.lin_j = Linear(self.dim, inner_out_channels)
        

    def forward(self, x_dict, edge_index_dict, d):
        x_dict, edge_index_dict = x_dict, edge_index_dict
        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: torch.tanh(torch.sum(x.view(-1, self.dim), dim=1))
                      for key, x in x_dict.items()}

        p_i = global_add_pool(x_dict['x_i'], batch=d['x_i'].batch, size=self.batch_size).unsqueeze(1).tanh()
        p_j = global_add_pool(x_dict['x_j'], batch=d['x_j'].batch, size=self.batch_size).unsqueeze(1).tanh()
        
        y_i_ = self.lin_i(p_i)
        y_j_ = self.lin_j(p_j)

        x = torch.cat([p_i, p_j], dim=1)
        x = torch.sum(x, dim=1)

        logits = self.lin(x).sigmoid()
        return logits, y_i_, y_j_
    

In [5]:
class Learner(pl.LightningModule):
    def __init__(self, root_dir, lr=0.001):
        super().__init__()
        self.root_dir = root_dir

        # self.dataset = HeteroDrugDrugInteractionData(root=self.root_dir)
        self.dataset = HeteroQM9(root=self.root_dir)
        self.dataset = self.dataset.shuffle()

        self.num_workers = 32
        self.lr = lr
        # self.num_node_types = len(self.dataset[0].x_dict)
        self.n_cycles = 16
        self.dropout = 0.1
        self.batch_size = 2
        self.lr = 0.001
        self.hidden_dim = 25

        self.Net = Net(hidden_channels=self.hidden_dim, outer_out_channels=1, inner_out_channels=1, num_layers=self.n_cycles,
                                                 batch_size=self.batch_size)

        self.bce_loss = torch.nn.BCEWithLogitsLoss()
        self.mse_loss = torch.nn.MSELoss()
        
        
    def forward(self, batch, *args, **kwargs):

        y_ij, y_i_, y_j_ = self.Net(batch.x_dict, batch.edge_index_dict, batch)

        # logits = self.CoAttention(data)
        # logits = torch.sigmoid(torch.mean(logits))
        return y_ij, y_i_, y_j_

    def training_step(self, data, batch_idx):
        y_ij, y_i_, y_j_ = self(data)
        y_pred = y_ij.squeeze()
        y_true = data.binary_y.float()

        mse1 = self.mse_loss(input=y_i_.flatten(), target=data['y_i'].y)
        mse2 = self.mse_loss(input=y_j_.flatten(), target=data['y_j'].y)
        mse = mse1 + mse2
        bce = self.bce_loss(input=y_pred, target=y_true)
        loss = bce + mse

        # self.log('train_loss', bce)
        wandb.log({"train/loss": loss})
        wandb.log({'train/y_pred': y_pred})
        wandb.log({'train/y_true': y_true})
        return {'loss': loss}  # , 'train_accuracy': acc, 'train_f1': f1}

    def validation_step(self, val_batch, batch_idx):

        # print(val_batch.binary_y.float())

        y_ij, y_i_, y_j_ = self(val_batch)
        y_pred = y_ij.squeeze()
        y_true = val_batch.binary_y.float()

        mse1 = self.mse_loss(input=y_i_.flatten(), target=val_batch['y_i'].y)
        mse2 = self.mse_loss(input=y_j_.flatten(), target=val_batch['y_j'].y)
        mse = mse1 + mse2
        bce = self.bce_loss(input=y_pred, target=y_true)
        loss = bce + mse
        # self.log('validation_loss', bce_loss)
        # self.log('Predicted', y_pred)
        # self.log('Actual', y_true)
        wandb.log({"val/loss": loss})
        return {'loss': loss}

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.lr, betas=(0.28, 0.93), weight_decay=0.01)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, '25,35', gamma=0.1)
        return [optimizer], [scheduler]

    def train_dataloader(self):
        return tg.loader.DataLoader(list(self.dataset),
                                    num_workers=self.num_workers, pin_memory=False, shuffle=True)

    def val_dataloader(self):
        return tg.loader.DataLoader(list(self.dataset), 
                                    num_workers=self.num_workers, pin_memory=False, shuffle=True)


if __name__ == '__main__':
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    data_dir = os.path.join('GraphCoAttention', 'data')
    wandb.init()
    wandb_logger = WandbLogger(project='flux', log_model='all')
    trainer = pl.Trainer(gpus=[0], max_epochs=2000, check_val_every_n_epoch=500, accumulate_grad_batches=1)
    trainer.fit(Learner(data_dir))


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkatharina_z[0m (use `wandb login --relogin` to force relogin)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type              | Params
-----------------------------------------------
0 | Net      | Net               | 123 K 
1 | bce_loss | BCEWithLogitsLoss | 0     
2 | mse_loss | MSELoss           | 0     
-----------------------------------------------
123 K     Trainable params
0         Non-trainable params
123 K     Total params
0.492     Total estimated model params size (MB)


Validation sanity check:   0%|                            | 0/2 [00:00<?, ?it/s]Batch(
  binary_y=[1],
  [1mx_i[0m={
    x=[23, 9],
    batch=[23],
    ptr=[2]
  },
  [1mx_j[0m={
    x=[17, 9],
    batch=[17],
    ptr=[2]
  },
  [1my_i[0m={
    y=[15],
    y_norm=[15]
  },
  [1my_j[0m={
    y=[15],
    y_norm=[15]
  },
  [1m(x_i, inner_edge_i, x_i)[0m={
    edge_index=[2, 48],
    edge_attr=[48, 3]
  },
  [1m(x_j, inner_edge_j, x_j)[0m={
    edge_index=[2, 34],
    edge_attr=[23, 3]
  },
  [1m(x_i, outer_edge_ij, x_j)[0m={
    edge_index=[2, 391],
    edge_attr=[23, 3]
  },
  [1m(x_j, outer_edge_ji, x_i)[0m={ edge_index=[2, 391] }
)


TypeError: linear(): argument 'input' (position 1) must be Tensor, not NoneType