In [1]:
import json

import rdkit.Chem as Chem
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

from src.dataset import (
    load_cyp_data_split,
    load_herg_data_split,
    load_pampa_data_split,
    load_synthetic_data_split
)
from src.explanations import img_for_mol, grad_cam, saliency_map
from src.featurizers import GraphFeaturizer
from tuning_results import (
    cyp_gnn_params,
    herg_gnn_params,
    pampa_gnn_params,
    synthetic_gnn_params
)
from src.utils import load_gnn_model

No normalization for SPS. Feature removed!
No normalization for AvgIpc. Feature removed!
Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'dgl'
Skipped loading modules with transformers dependency. No module named 'transformers'
cannot import name 'HuggingFaceModel' from 'deepchem.models.torch_models' (c:\Users\kamil\miniconda3\envs\masters\lib\site-packages\deepchem\models\torch_models\__init__.py)
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'
Skipped loading some PyTorch models, missing a dependency. No module named 'tensorflow'


In [3]:
import os

grad_cam_smiles = json.load(open('results/smiles_grad_cam.json'))
saliency_map_smiles = json.load(open('results/smiles_saliency_map.json'))

dataset_names = ["herg", "pampa", "cyp", "synthetic"]
component_sizes = range(3, 9)

for dataset_name in dataset_names:
    for smiles_list, component_size in zip(grad_cam_smiles[dataset_name], component_sizes):
        for i, smiles in enumerate(smiles_list):
            mol = Chem.MolFromSmiles(smiles)
            img = img_for_mol(mol)
            output_dir = f"results/{dataset_name}_grad_cam_top_components/component_size_{component_size}"
            os.makedirs(output_dir, exist_ok=True)
            plt.imsave(f"results/{dataset_name}_grad_cam_top_components/component_size_{component_size}/{i}_{smiles}.png", img)

for dataset_name in dataset_names:
    for smiles_list, component_size in zip(saliency_map_smiles[dataset_name], component_sizes):
        for i, smiles in enumerate(smiles_list):
            mol = Chem.MolFromSmiles(smiles)
            img = img_for_mol(mol)
            output_dir = f"results/{dataset_name}_saliency_map_top_components/component_size_{component_size}"
            os.makedirs(output_dir, exist_ok=True)
            plt.imsave(f"results/{dataset_name}_saliency_map_top_components/component_size_{component_size}/{i}_{smiles}.png", img)

In [24]:
_, _, test_cyp = load_cyp_data_split()
_, _, test_herg = load_herg_data_split()
_, _, test_pampa = load_pampa_data_split()
_, _, test_synthetic = load_synthetic_data_split()

datasets = {
    "cyp": test_cyp,
    "herg": test_herg,
    "pampa": test_pampa,
    "synthetic": test_synthetic
}

featurizer = GraphFeaturizer("y")

featurized_datasets = {
    "cyp": featurizer(test_cyp),
    "herg": featurizer(test_herg),
    "pampa": featurizer(test_pampa),
    "synthetic": featurizer(test_synthetic)
}

best_params = {
    "cyp": cyp_gnn_params,
    "herg": herg_gnn_params,
    "pampa": pampa_gnn_params,
    "synthetic": synthetic_gnn_params
}

n_mols_to_save = 20
np.random.seed(1)

for dataset_name in datasets.keys():
    model = load_gnn_model(datasets[dataset_name], dataset_name, best_params[dataset_name])
    mols_to_save = np.random.randint(0, len(featurized_datasets[dataset_name]), n_mols_to_save)
    for i in mols_to_save:
        mol = datasets[dataset_name][i]
        featurized_mol = featurized_datasets[dataset_name][i]
        invert_gradients = True if dataset_name == "synthetic" else False
        grad_cam_weights = grad_cam(model, featurized_mol, invert_gradients)
        scaled_grad_cam_weights = MinMaxScaler().fit_transform(grad_cam_weights.reshape(-1, 1)).squeeze()
        img = img_for_mol(mol, scaled_grad_cam_weights)
        output_dir = f"results/{dataset_name}_grad_cam_explanations"
        os.makedirs(output_dir, exist_ok=True)
        plt.imsave(f"{output_dir}/{i}.png", img)

        saliency_map_weights = saliency_map(model, featurized_mol)
        scaled_saliency_map_weights = MinMaxScaler().fit_transform(saliency_map_weights.reshape(-1, 1)).squeeze()
        img = img_for_mol(mol, scaled_saliency_map_weights)
        output_dir = f"results/{dataset_name}_saliency_map_explanations"
        os.makedirs(output_dir, exist_ok=True)
        plt.imsave(f"{output_dir}/{i}.png", img)

Found local copy...
Loading...
Done!
Found local copy...
Loading...
Done!
Found local copy...
Loading...
Done!
