In [1]:
#!pip install lifelines
#!pip install dgl
#!pip install torch==1.9.0
#!pip install pydot
#!pip install ogb
#!pip install preprocessing

In [2]:
import argparse
import random

import dgl

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from dgl.dataloading import GraphDataLoader
from ogb.graphproppred import Evaluator
from ogb.graphproppred.mol_encoder import AtomEncoder
from torch.utils.data import Dataset
from tqdm import tqdm

In [3]:
def aggregate_mean(h, vector_field, h_in):
    return torch.mean(h, dim=1)


def aggregate_max(h, vector_field, h_in):
    return torch.max(h, dim=1)[0]


def aggregate_sum(h, vector_field, h_in):
    return torch.sum(h, dim=1)

In [4]:
def aggregate_dir_dx(h, vector_field, h_in, eig_idx=1):
    eig_w = (
        (vector_field[:, :, eig_idx])
        / (
            torch.sum(
                torch.abs(vector_field[:, :, eig_idx]), keepdim=True, dim=1
            )
            + 1e-8
        )
    ).unsqueeze(-1)
    h_mod = torch.mul(h, eig_w)
    return torch.abs(torch.sum(h_mod, dim=1) - torch.sum(eig_w, dim=1) * h_in)

In [5]:
class FCLayer(nn.Module):
    def __init__(self, in_size, out_size):
        super(FCLayer, self).__init__()

        self.in_size = in_size
        self.out_size = out_size
        self.linear = nn.Linear(in_size, out_size, bias=True)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.linear.weight, 1 / self.in_size)
        self.linear.bias.data.zero_()

    def forward(self, x):
        h = self.linear(x)
        return h

In [6]:
class MLP(nn.Module):
    def __init__(self, in_size, out_size):
        super(MLP, self).__init__()

        self.in_size = in_size
        self.out_size = out_size
        self.fc = FCLayer(in_size, out_size)

    def forward(self, x):
        x = self.fc(x)
        return x

In [7]:
class DGNLayer(nn.Module):
    def __init__(self, in_dim, out_dim, dropout, aggregators):
        super().__init__()

        self.dropout = dropout

        self.aggregators = aggregators

        self.batchnorm_h = nn.BatchNorm1d(out_dim)
        self.pretrans = MLP(in_size=2 * in_dim, out_size=in_dim)
        self.posttrans = MLP(
            in_size=(len(aggregators) * 1 + 1) * in_dim, out_size=out_dim
        )

In [8]:
def pretrans_edges(self, edges):
        z2 = torch.cat([edges.src["h"], edges.dst["h"]], dim=1)
        vector_field = edges.data["eig"]
        return {"e": self.pretrans(z2), "vector_field": vector_field}

def message_func(self, edges):
  return {
    "e": edges.data["e"],
    "vector_field": edges.data["vector_field"],
    }
def reduce_func(self, nodes):
    h_in = nodes.data["h"]
    h = nodes.mailbox["e"]

    vector_field = nodes.mailbox["vector_field"]

    h = torch.cat(
      [
      aggregate(h, vector_field, h_in)for aggregate in self.aggregators
       ],dim=1)
    return {"h": h}

def forward(self, g, h, snorm_n):
  g.ndata["h"] = h

  # pretransformation
  g.apply_edges(self.pretrans_edges)

  # aggregation
  g.update_all(self.message_func, self.reduce_func)
  h = torch.cat([h, g.ndata["h"]], dim=1)

  # posttransformation
  h = self.posttrans(h)

  # graph and batch normalization
  h = h * snorm_n
  h = self.batchnorm_h(h)
  h = F.relu(h)

  h = F.dropout(h, self.dropout, training=self.training)

  return h

In [9]:
class MLPReadout(nn.Module):
    def __init__(self, input_dim, output_dim, L=2):  # L=nb_hidden_layers
        super().__init__()
        list_FC_layers = [
            nn.Linear(input_dim // 2**l, input_dim // 2 ** (l + 1), bias=True)
            for l in range(L)
        ]
        list_FC_layers.append(
            nn.Linear(input_dim // 2**L, output_dim, bias=True)
        )
        self.FC_layers = nn.ModuleList(list_FC_layers)
        self.L = L

    def forward(self, x):
        y = x
        for l in range(self.L):
            y = self.FC_layers[l](y)
            y = F.relu(y)
        y = self.FC_layers[self.L](y)
        return y

In [10]:
class DGNNet(nn.Module):
    def __init__(self, hidden_dim=420, out_dim=420, dropout=0.2, n_layers=4):
        super().__init__()

        self.embedding_h = AtomEncoder(emb_dim=hidden_dim)
        self.aggregators = [
            aggregate_mean,
            aggregate_sum,
            aggregate_max,
            aggregate_dir_dx,
        ]

        self.layers = nn.ModuleList(
            [
                DGNLayer(
                    in_dim=hidden_dim,
                    out_dim=hidden_dim,
                    dropout=dropout,
                    aggregators=self.aggregators,
                )
                for _ in range(n_layers - 1)
            ]
        )
        self.layers.append(
            DGNLayer(
                in_dim=hidden_dim,
                out_dim=out_dim,
                dropout=dropout,
                aggregators=self.aggregators,
            )
        )

        # 128 out dim since ogbg-molpcba has 128 tasks
        self.MLP_layer = MLPReadout(out_dim, 128)

    def forward(self, g, h, snorm_n):
        h = self.embedding_h(h)

        for i, conv in enumerate(self.layers):
            h_t = conv(g, h, snorm_n)
            h = h_t

        g.ndata["h"] = h

        hg = dgl.mean_nodes(g, "h")

        return self.MLP_layer(hg)

    def loss(self, scores, labels):
        is_labeled = labels == labels
        loss = nn.BCEWithLogitsLoss()(
            scores[is_labeled], labels[is_labeled].float()
        )
        return loss

In [11]:
def train_epoch(model, optimizer, device, data_loader):
    model.train()
    epoch_loss = 0
    epoch_train_AP = 0
    list_scores = []
    list_labels = []
    for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate(
        data_loader
    ):
        batch_graphs = batch_graphs.to(device)
        batch_x = batch_graphs.ndata["feat"]  # num x feat
        batch_snorm_n = batch_snorm_n.to(device)
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad()

        print(batch_x.to(torch.int64))
        batch_scores = model(batch_graphs, batch_x.to(torch.int64), batch_snorm_n)

        loss = model.loss(batch_scores, batch_labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        list_scores.append(batch_scores)
        list_labels.append(batch_labels)

    epoch_loss /= iter + 1

    evaluator = Evaluator(name="ogbg-molpcba")
    epoch_train_AP = evaluator.eval(
        {"y_pred": torch.cat(list_scores), "y_true": torch.cat(list_labels)}
    )["ap"]

    return epoch_loss, epoch_train_AP




In [12]:
def evaluate_network(model, device, data_loader):
    model.eval()
    epoch_test_loss = 0
    epoch_test_AP = 0
    with torch.no_grad():
        list_scores = []
        list_labels = []
        for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate(
            data_loader
        ):
            batch_graphs = batch_graphs.to(device)
            batch_x = batch_graphs.ndata["feat"]
            batch_snorm_n = batch_snorm_n.to(device)
            batch_labels = batch_labels.to(device)

            batch_scores = model(batch_graphs, batch_x, batch_snorm_n)

            loss = model.loss(batch_scores, batch_labels)
            epoch_test_loss += loss.item()
            list_scores.append(batch_scores)
            list_labels.append(batch_labels)

        epoch_test_loss /= iter + 1

        evaluator = Evaluator(name="ogbg-molpcba")
        epoch_test_AP = evaluator.eval(
            {"y_pred": torch.cat(list_scores), "y_true": torch.cat(list_labels)}
        )["ap"]

    return epoch_test_loss, epoch_test_AP

In [13]:
def train(dataset, params):
    trainset, valset, testset = dataset.train, dataset.val, dataset.test
    device = params.device

    print("Training Graphs: ", len(trainset))
    print("Validation Graphs: ", len(valset))
    print("Test Graphs: ", len(testset))

    model = DGNNet()
    model = model.to(device)

    # view model parameters
    total_param = 0
    print("MODEL DETAILS:\n")
    for param in model.parameters():
        total_param += np.prod(list(param.data.size()))
    print("DGN Total parameters:", total_param)

    optimizer = optim.Adam(model.parameters(), lr=0.0008, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.8, patience=8, verbose=True
    )

    epoch_train_losses, epoch_val_losses = [], []
    epoch_train_APs, epoch_val_APs, epoch_test_APs = [], [], []

    train_loader = GraphDataLoader(
        trainset,
        batch_size=params.batch_size,
        shuffle=True,
        collate_fn=dataset.collate,
        pin_memory=True,
    )
    val_loader = GraphDataLoader(
        valset,
        batch_size=params.batch_size,
        shuffle=False,
        collate_fn=dataset.collate,
        pin_memory=True,
    )
    test_loader = GraphDataLoader(
        testset,
        batch_size=params.batch_size,
        shuffle=False,
        collate_fn=dataset.collate,
        pin_memory=True,
    )

    with tqdm(range(450), unit="epoch") as t:
        for epoch in t:
            t.set_description("Epoch %d" % epoch)

            epoch_train_loss, epoch_train_ap = train_epoch(
                model, optimizer, device, train_loader
            )
            epoch_val_loss, epoch_val_ap = evaluate_network(
                model, device, val_loader
            )

            epoch_train_losses.append(epoch_train_loss)
            epoch_val_losses.append(epoch_val_loss)
            epoch_train_APs.append(epoch_train_ap.item())
            epoch_val_APs.append(epoch_val_ap.item())

            _, epoch_test_ap = evaluate_network(model, device, test_loader)

            epoch_test_APs.append(epoch_test_ap.item())

            t.set_postfix(
                train_loss=epoch_train_loss,
                train_AP=epoch_train_ap.item(),
                val_AP=epoch_val_ap.item(),
                refresh=False,
            )

            scheduler.step(-epoch_val_ap.item())

            if optimizer.param_groups[0]["lr"] < 1e-5:
                print("\n!! LR EQUAL TO MIN LR SET.")
                break

            print("")

    best_val_epoch = np.argmax(np.array(epoch_val_APs))
    best_train_epoch = np.argmax(np.array(epoch_train_APs))
    best_val_ap = epoch_val_APs[best_val_epoch]
    best_val_test_ap = epoch_test_APs[best_val_epoch]
    best_val_train_ap = epoch_train_APs[best_val_epoch]
    best_train_ap = epoch_train_APs[best_train_epoch]

    print("Best Train AP: {:.4f}".format(best_train_ap))
    print("Best Val AP: {:.4f}".format(best_val_ap))
    print("Test AP of Best Val: {:.4f}".format(best_val_test_ap))
    print("Train AP of Best Val: {:.4f}".format(best_val_train_ap))

In [14]:
class Subset(object):
    def __init__(self, dataset, labels, indices):
        dataset = [dataset[idx] for idx in indices]
        labels = [labels[idx] for idx in indices]
        self.dataset, self.labels = [], []
        for i, g in enumerate(dataset):
            if g.num_nodes() > 5:
                self.dataset.append(g)
                self.labels.append(labels[i])
        self.len = len(self.dataset)

    def __getitem__(self, item):
        return self.dataset[item], self.labels[item]

    def __len__(self):
        return self.len

In [15]:
class PCBADataset(Dataset):
    def __init__(self, name):
        print("[I] Loading dataset %s..." % (name))
        self.name = name

        self.dataset, self.split_idx = prepare_dataset(name)
        print("One hot encoding substructure counts... ", end="")
        self.d_id = [1] * self.dataset[0].edata["subgraph_counts"].shape[1]

        for g in self.dataset:
            g.edata["eig"] = g.edata["subgraph_counts"].float()

        self.train = Subset(
            self.dataset, self.split_idx["label"], self.split_idx["train"]
        )
        self.val = Subset(
            self.dataset, self.split_idx["label"], self.split_idx["valid"]
        )
        self.test = Subset(
            self.dataset, self.split_idx["label"], self.split_idx["test"]
        )

        print(
            "train, test, val sizes :",
            len(self.train),
            len(self.test),
            len(self.val),
        )
        print("[I] Finished loading.")

    # form a mini batch from a given list of samples = [(graph, label) pairs]
    def collate(self, samples):
        # The input samples is a list of pairs (graph, label).
        graphs, labels = map(list, zip(*samples))
        labels = torch.stack(labels)

        tab_sizes_n = [g.num_nodes() for g in graphs]
        tab_snorm_n = [
            torch.FloatTensor(size, 1).fill_(1.0 / size) for size in tab_sizes_n
        ]
        snorm_n = torch.cat(tab_snorm_n).sqrt()
        batched_graph = dgl.batch(graphs)

        return batched_graph, labels, snorm_n

In [16]:
from google.colab import drive
import pandas as pd
drive.mount('/content/drive')
data = pd.read_csv("/content/drive/MyDrive/Case/Graph Class/train.csv") #Josh's directory
data.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0.1,Unnamed: 0,MassScore,VolumeScore,Area2D,NumLesion3D,isAgZero,isLesion3DBelow5,AgGroupX1,AgGroupX2,AgGroupX3,...,female,smokingstatus,Baseline.BMI,IBD,SLE,Psoriasis,RA,BASELINE_DM,DM,Status
0,0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,33.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,11.391991,95.593929,38.62381,12.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,36.5,0.0,0.0,0.0,0.0,1.0,1.0,1.0
2,2,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,30.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,1.0,0.0,21.32,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,14.42598,76.675415,30.975342,3.0,0.0,1.0,1.0,0.0,0.0,...,1.0,1.0,33.76,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
from dgl.data import DGLDataset

class MACEDataset(DGLDataset):
    def __init__(self):
        super().__init__(name='MACE')

    def process(self):
        MACEData=data
        # edges = pd.read_csv('./graph_edges.csv')
        # properties = pd.read_csv('./graph_properties.csv')
        self.graphs = []
        self.labels = []
        self.time = []
        self.dim_nfeats = 0
        self.gclasses=1

        # Create a graph for each graph ID from the edges table.
        # First process the properties table into two dictionaries with graph IDs as keys.
        # The label and number of nodes are values.
        label_dict = {}
        num_nodes_dict = {}
        length=len(MACEData)
        rows=[]
        cols=[]


        connections=[['female','smokingstatus'],['female','Baseline.BMI'],['Age_right','BASELINE_DM'],
['Age_right','smokingstatus'],['BASELINE_DM','MassScore'],
['Age_right','SLE'],['Age_right','Psoriasis'],['Age_right','RA'],['Age_right','IBD'],
['female','Age_right'],
['female','MassScore'],['female','Baseline.BMI'],
['BASELINE_DM','MassScore'],['DM','MassScore'],
['isAgZero','MassScore'],['isAgZero','final_score'],
['MassScore','isArt2plus'],['MassScore','isArt3plus'],['MassScore','numArtCalc'],
['numArtCalc','numLesionPerArtery3D_LM1'],['numArtCalc','numLesionPerArtery3D_LAD1'],['numArtCalc','numLesionPerArtery3D_LCX1'],['numArtCalc','numLesionPerArtery3D_RCA1'],
['MassScore','numLesionPerArtery3D_LM1'],['MassScore','numLesionPerArtery3D_LAD1'],['MassScore','numLesionPerArtery3D_LCX1'],['MassScore','numLesionPerArtery3D_RCA1'],
['numLesionPerArtery3D_LM1','AgastonScorePerArtery2D_LM1'],['numLesionPerArtery3D_LAD1','AgastonScorePerArtery2D_LAD1'],['numLesionPerArtery3D_LCX1','AgastonScorePerArtery2D_LCX1'],['numLesionPerArtery3D_RCA1','AgastonScorePerArtery2D_RCA1'],
['numLesionPerArtery3D_LM1','MassScorePerArtery_LM1'],['numLesionPerArtery3D_LAD1','MassScorePerArtery_LAD1'],['numLesionPerArtery3D_LCX1','MassScorePerArtery_LCX1'],['numLesionPerArtery3D_RCA1','MassScorePerArtery_RCA1'],
['numLesionPerArtery3D_LM1','VolumeScorePerArtery_LM1'],['numLesionPerArtery3D_LAD1','VolumeScorePerArtery_LAD1'],['numLesionPerArtery3D_LCX1','VolumeScorePerArtery_LCX1'],['numLesionPerArtery3D_RCA1','VolumeScorePerArtery_RCA1'],
['numLesionPerArtery3D_LM1','DistTop2LastLesionPerArtery_LM1'],['numLesionPerArtery3D_LAD1','DistTop2LastLesionPerArtery_LAD1'],['numLesionPerArtery3D_LCX1','DistTop2LastLesionPerArtery_LCX1'],['numLesionPerArtery3D_RCA1','DistTop2LastLesionPerArtery_RCA1'],
['numLesionPerArtery3D_LM1','DistFirst2LastLesionPerArtery_LM1'],['numLesionPerArtery3D_LAD1','DistFirst2LastLesionPerArtery_LAD1'],['numLesionPerArtery3D_LCX1','DistFirst2LastLesionPerArtery_LCX1'],['numLesionPerArtery3D_RCA1','DistFirst2LastLesionPerArtery_RCA1'],
['MassScore','massHist1'],['MassScore','massHist2'],['MassScore','massHist3'],['MassScore','massHist4'],['MassScore','massHist5'],
['VolumeScore','avrHist1'],['VolumeScore','avrHist2'],['VolumeScore','avrHist3'],['VolumeScore','avrHist4'],['VolumeScore','avrHist5'],
['Area2D','NumLesion3D'],['Area2D','VolumeScore'],
['MassScore','NumLesion3D'],
['isLesion3DBelow5','NumLesion3D'],
['final_score','AgGroupX1'], ['AgGroupX2','final_score'], ['AgGroupX3','final_score'],
['AgGroupX1','ICfirstMomentH1'],['AgGroupX2','ICfirstMomentH2'],['AgGroupX3','ICfirstMomentH3'],
['AgGroupX1','ICsecondMomentH1'],['AgGroupX2','ICsecondMomentH2'],['AgGroupX3','ICsecondMomentH3'],
['AgGroupX1','ICmeanMomentH1'],['AgGroupX2','ICmeanMomentH2'],['AgGroupX3','ICmeanMomentH3'],
['AgGroupX1','ICskewnesstMomentH1'],['AgGroupX2','ICskewnesstMomentH2'],['AgGroupX3','ICskewnesstMomentH3'],
['AgGroupX1','ICkurtosisMomentH1'],['AgGroupX2','ICkurtosisMomentH2'],['AgGroupX3','ICkurtosisMomentH3']
]
        MACEData=MACEData.drop(['Time','Status','Unnamed: 0'],axis=1)

        for i in connections:
          rows.append(MACEData.columns.get_loc(i[0]))
          cols.append(MACEData.columns.get_loc(i[1]))
          i[0]=MACEData.columns.get_loc(i[0])
          i[1]=MACEData.columns.get_loc(i[1])

        for i in range(length):
            x=MACEData.iloc[[i]]
            label_dict = {}
            num_nodes_dict = {}
            g = dgl.graph((rows, cols), num_nodes=np.size(x))
            edges=g.edges()
            parent=np.unique(np.array(edges[0]))
            child=np.unique(np.array(edges[1]))
            leaf=np.setdiff1d(parent,child)

            feats=np.unique(connections)
            self.dim_nfeats=len(feats)
            featarray=np.zeros([len(feats),len(x)+1])
            g.ndata['feat']=torch.rot90(torch.tensor(x.to_numpy()),1,[1,0])
            #g = dgl.add_self_loop(g)
            self.graphs.append(g)
            self.labels.append([data['Status'][i]])
            self.time.append([data['Time'][i]])
        

        from sklearn.model_selection import train_test_split

        # assume that your dataset is stored in a variable called "data"
        # and the labels are stored in a variable called "labels"

        # split the dataset into training and testing sets
        train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

        # split the training set into training and validation sets
        train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)

        # create a dictionary that stores the indices for each split
        self.split_idx = {
          "train": list(range(len(train_data))),
          "valid": list(range(len(val_data))),
          "test": list(range(len(test_data)))
            }

        # output the indices as a single list
        #output = [split_idx["train"], split_idx["val"], split_idx["test"]]


        # Convert the label list to tensor for saving.
        self.labels = torch.tensor(self.labels)
        self.time = torch.tensor(self.time)

        self.train = Subset(
            self.graphs, self.labels, self.split_idx["train"]
        )
        self.val = Subset(
            self.graphs, self.labels, self.split_idx["valid"]
        )
        self.test = Subset(
            self.graphs, self.labels, self.split_idx["test"]
        )

        print(
            "train, test, val sizes :",
            len(self.train),
            len(self.test),
            len(self.val),
        )
        print("[I] Finished loading.")

    def __getitem__(self, i):
        return self.graphs[i], self.labels[i]

    def __len__(self):
        return len(self.graphs)

    def collate(self, samples):
        # The input samples is a list of pairs (graph, label).
        graphs, labels = map(list, zip(*samples))
        labels = torch.stack(labels)

        tab_sizes_n = [g.num_nodes() for g in graphs]
        tab_snorm_n = [
            torch.FloatTensor(size, 1).fill_(1.0 / size) for size in tab_sizes_n
        ]
        snorm_n = torch.cat(tab_snorm_n).sqrt()
        batched_graph = dgl.batch(graphs)

        return batched_graph, labels, snorm_n

In [18]:
#if __name__ == "__main__":
#parser = argparse.ArgumentParser()
    #parser.add_argument(
   #     "--gpu_id", default=0, type=int, help="Please give a value for gpu id"
    #)
    #parser.add_argument(
     #   "--seed", default=41, type=int, help="Please give a value for seed"
    #)
    #parser.add_argument(
    #    "--batch_size",
    #    default=2048,
    #    type=int,
    #    help="Please give a value for batch_size",
    #)
#args = parser.parse_args()

    # device
#args.device = torch.device(
#  "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu"
#  )

#args.device = torch.device("cpu")
args=argparse.Namespace(device=torch.device("cpu"), seed=2023,batch_size=5)

    # setting seeds
#random.seed(args.seed)
#np.random.seed(args.seed)
#torch.manual_seed(args.seed)
#if torch.cuda.is_available():
#  torch.cuda.manual_seed(args.seed)

dataset = MACEDataset()
train(dataset, args)

train, test, val sizes : 2153 674 539
[I] Finished loading.
Training Graphs:  2153
Validation Graphs:  539
Test Graphs:  674
MODEL DETAILS:

DGN Total parameters: 5143133


Epoch 0:   0%|          | 0/450 [00:00<?, ?epoch/s]

tensor([[    9],
        [   47],
        [   19],
        [    1],
        [    0],
        [    1],
        [    0],
        [    0],
        [    0],
        [    0],
        [    1],
        [    0],
        [    0],
        [    0],
        [    0],
        [    1],
        [    0],
        [   58],
        [    0],
        [    0],
        [    0],
        [    9],
        [    0],
        [    0],
        [    0],
        [   47],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [    0],
        [   51],
        [    0],
        [    0],
        [    0],
        [    4],
        [    0],
        [    0],
        [  131],
        [   88],
        [   40],
        [17408],
        [ 7930],
        [ 1609],
        [   11],
        [    7],
        [    3],
        [    7],
        [    4],
        [    0],
        [    2




IndexError: ignored

In [28]:
dataset = PCBADataset("ogbg-molpcba")
train(dataset, args)

[I] Loading dataset ogbg-molpcba...


NameError: ignored