In [1]:
import numpy as np
import pickle
import torch
import lightning.pytorch as pl
from tdc.benchmark_group import admet_group
from tqdm import tqdm
import chemprop
from chemprop import data, models, nn

In [2]:
pl.seed_everything(42)

Seed set to 42


42

In [3]:
benchmark_config = {
    'hia_hou': ('binary', False),
    'pgp_broccatelli': ('binary', False),
    'bioavailability_ma': ('binary', False),
    'bbb_martins': ('binary', False),
    'cyp2c9_veith': ('binary', False),
    'cyp2d6_veith': ('binary', False),
    'cyp3a4_veith': ('binary', False),
    'cyp2c9_substrate_carbonmangels': ('binary', False),
    'cyp2d6_substrate_carbonmangels': ('binary', False),
    'cyp3a4_substrate_carbonmangels': ('binary', False),
    'herg': ('binary', False),
    'ames': ('binary', False),
    'dili': ('binary', False)
}

In [4]:
group = admet_group(path='data/')
predictions = {}

Downloading Benchmark Group...
100%|██████████████████████████████████████████████████████████████████████████| 1.47M/1.47M [00:00<00:00, 2.59MiB/s]
Extracting zip file...
Done!


In [5]:
for admet_benchmark in tqdm(benchmark_config.keys()):
    benchmark = group.get(admet_benchmark)
    name = benchmark['name']
    train_df, test_df = benchmark['train_val'], benchmark['test']
    train_pts = [
        data.MoleculeDatapoint.from_smi(smi, [y]) 
        for smi, y in zip(train_df['Drug'], train_df['Y'])
    ]
    test_pts = [
        data.MoleculeDatapoint.from_smi(smi, [y]) 
        for smi, y in zip(test_df['Drug'], test_df['Y'])
    ]

    train_dset = data.MoleculeDataset(train_pts)
    test_dset = data.MoleculeDataset(test_pts)

    train_loader = data.build_dataloader(train_dset, batch_size=64, shuffle=True, num_workers=0)
    test_loader = data.build_dataloader(test_dset, batch_size=64, shuffle=False, num_workers=0)

    mp = nn.BondMessagePassing()
    agg = nn.MeanAggregation()
    ffn = nn.BinaryClassificationFFN(n_tasks=1)
    
    model = models.MPNN(mp, agg, ffn, batch_norm=True)

    trainer = pl.Trainer(
        max_epochs=20,
        accelerator="cpu", 
        devices=1,
        enable_progress_bar=False,
        logger=False,
        enable_checkpointing=False
    )
    trainer.fit(model, train_loader)
    preds_batches = trainer.predict(model, test_loader)
    y_pred_proba = torch.cat(preds_batches).squeeze().cpu().numpy()

    predictions[f"{name}_y_pred_proba"] = y_pred_proba
    predictions[f"{name}_y_true"] = test_df['Y'].values

  0%|                                                                                         | 0/13 [00:00<?, ?it/s]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
  8%|██████▏                                                                          | 1/13 [00:08<01:41,  8.48s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 15%|████████████▍                                                                    | 2/13 [00:28<02:47, 15.22s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 31%|████████████████████████▉                                                        | 4/13 [01:08<02:55, 19.55s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 38%|███████████████████████████████▏                                                 | 5/13 [04:40<11:52, 89.11s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 46%|████████████████████████████████████▉                                           | 6/13 [08:26<15:48, 135.53s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 54%|███████████████████████████████████████████                                     | 7/13 [11:57<16:02, 160.43s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 62%|█████████████████████████████████████████████████▏                              | 8/13 [12:08<09:23, 112.72s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 69%|████████████████████████████████████████████████████████                         | 9/13 [12:19<05:23, 80.98s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 85%|███████████████████████████████████████████████████████████████████▋            | 11/13 [12:41<01:29, 44.67s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
 92%|█████████████████████████████████████████████████████████████████████████▊      | 12/13 [14:06<00:56, 56.77s/it]GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
Loading `train_dataloader` to estimate number of stepping batches.


`Trainer.fit` stopped: `max_epochs=20` reached.
100%|████████████████████████████████████████████████████████████████████████████████| 13/13 [14:15<00:00, 65.81s/it]


In [6]:
with open('chemprop_binary.pickle', 'wb') as handle:
    pickle.dump(predictions, handle, protocol=pickle.HIGHEST_PROTOCOL)