In [1]:
import sys
sys.path.append("/home/hawk31/graph-attribution/")
sys.path.append("/home/hawk31/xaibench_tf/")

In [3]:
import argparse
import collections
import os
from contextlib import nullcontext

import dill
import numpy as np
import pandas as pd
import sonnet as snt
import tensorflow as tf
from graph_attribution.experiments import GNN
from graph_attribution.featurization import MolTensorizer, smiles_to_graphs_tuple
from graph_attribution.graphnet_models import BlockType
from graph_attribution.hparams import get_hparams
from graph_attribution.tasks import BinaryClassificationTaskType
from graph_attribution.templates import TargetType
from graph_attribution.training import make_tf_opt_epoch_fn
from tqdm import tqdm

from xaibench.utils import LOG_PATH, MODELS_PATH

GPUS = tf.config.list_physical_devices("GPU")
N_EPOCHS = 500
N_LAYERS = 10
BATCH_SIZE = 32

if GPUS:
    tf.config.experimental.set_memory_growth(GPUS[0], True)
    DEVICE = tf.device("/GPU:0")
else:
    DEVICE = nullcontext()


In [19]:
df = pd.read_csv("/home/hawk31/graph-attribution/data/logic7/logic7_smiles.csv")

smiles, values = (
    df["smiles"].values,
    df["label"].values[:, np.newaxis],
)

tensorizer = MolTensorizer()
graph_data = smiles_to_graphs_tuple(smiles, tensorizer)
print(values.shape)


(4326, 1)


In [20]:
hp = get_hparams(
    {
        "block_type": "gcn",
        "epochs": N_EPOCHS,
        "batch_size": BATCH_SIZE,
        "n_layers": N_LAYERS,
        "task_type": None,
    }
)
task_act = BinaryClassificationTaskType().get_nn_activation_fn()
task_loss = BinaryClassificationTaskType().get_nn_loss_fn()
target_type = TargetType("globals")

In [22]:
with DEVICE:
    model = GNN(
        node_size=hp.node_size,
        edge_size=hp.edge_size,
        global_size=hp.global_size,
        y_output_size=1,
        block_type=BlockType(hp.block_type),
        activation=task_act,
        target_type=target_type,
        n_layers=hp.n_layers,
    )
    model(graph_data)  # one pass needed for init

    optimizer = snt.optimizers.Adam(hp.learning_rate)

    opt_one_epoch = make_tf_opt_epoch_fn(
        graph_data, values, hp.batch_size, model, optimizer, task_loss
    )

    pbar = tqdm(range(hp.epochs))
    metrics = collections.defaultdict(list)

    for _ in pbar:
        train_loss = opt_one_epoch(graph_data, values).numpy()
        metrics["bce"].append(train_loss)
        y_hat = model(graph_data).numpy().squeeze()

        pbar.set_postfix({key: values[-1] for key, values in metrics.items()})

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
100%|██████████| 500/500 [25:49<00:00,  3.10s/it, bce=0.00224]


In [23]:
from sklearn.metrics import roc_auc_score
roc_auc_score(values, y_hat)

1.0