In [None]:
import pydgn
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
import os.path as osp
sns.color_palette("colorblind", as_cmap=True)
sns.set_palette("colorblind")
from pydgn.data.dataset import OGBGDatasetInterface
from dataset import OGBGmolpcbaFeatureMap

In [None]:
data_root = 'DATA/'
dataset_name = 'ogbg-molpcba'
exp_folder = f'GSPN_RESULTS/UNSUPERVISED/unsupervised_embedding_generation_categorical_{dataset_name}/MODEL_ASSESSMENT/'

outer_fold = 1
outer_folder = osp.join(exp_folder, f'OUTER_FOLD_{outer_fold}')
ms_folder = osp.join(outer_folder, 'MODEL_SELECTION')

config_id = 15  # best config for the unsupervised part according to regression task
config_folder = osp.join(ms_folder, f'config_{config_id}')

model_config_file = osp.join(config_folder, 'config_results.json')
config = json.load(open(model_config_file, 'r'))['config']

best_ckpt = torch.load(osp.join(config_folder, 'INNER_FOLD_1/best_checkpoint.pth'), map_location='cpu')['model_state']

dataset = OGBGDatasetInterface(data_root, dataset_name)
print(config)

In [None]:
from pydgn.experiment.experiment import Experiment

exp = Experiment(config, config_folder, exp_seed=0)
model = exp.create_unsupervised_model(dataset.dim_node_features, dataset.dim_edge_features, dataset.dim_target)
model.load_state_dict(best_ckpt)
model.to('cpu')
model.eval()

In [None]:
num_features = dataset.data.x.shape[1]
unique_values = [torch.sort(torch.unique(dataset.data.x[:,f]), descending=False)[0] for f in range(num_features)] 
print(unique_values)

def preprocess_node_features(g):    
    for f in range(num_features):
        id = 0
        for v in unique_values[f].tolist():
            assert id <= v
            g.x[:, f][g.x[:, f] == v] = id
            id += 1

In [None]:
dataset.data.x

In [None]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
import cairosvg
import io

In [None]:
import pandas as pd
from ogb.utils import smiles2graph
mol_df = pd.read_csv('DATA/ogbg_molpcba/mapping/mol.csv')
mol_df

In [None]:
smile = mol_df['smiles'][1]
mol = Chem.MolFromSmiles(smile)
print(smile)
graph_original = smiles2graph(smile)
print(graph_original['node_feat'])
mol

In [None]:
from torch_geometric.data import Data
graph_original_data = Data(x=torch.tensor(graph_original['node_feat']), edge_index=torch.tensor(graph_original['edge_index']), edge_attr=torch.tensor(graph_original['edge_feat']))

# print(graph_original_data.x)
preprocess_node_features(graph_original_data)
# print(graph_original_data.x)

with torch.no_grad():
    preds_g, node_posterior, [objective_v, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers] = model(graph_original_data)
    
print(objective_v, objective_v.mean())
sns.heatmap(node_posterior)
plt.figure()
sns.heatmap(node_posterior[10].unsqueeze(0))

In [None]:
smile = 'N#Cc1nnn(-c2ccc(O)cc2)c1O'
mol = Chem.MolFromSmiles(smile)
print(smile)
graph_modified = smiles2graph(smile)
print(graph_modified['node_feat'])
mol

In [None]:
from torch_geometric.data import Data
graph_modified_data = Data(x=torch.tensor(graph_modified['node_feat']), edge_index=torch.tensor(graph_modified['edge_index']), edge_attr=torch.tensor(graph_modified['edge_feat']))
preprocess_node_features(graph_modified_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v_1, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers, ] = model(graph_modified_data)
    

print(objective_v_1, objective_v_1.mean())
sns.heatmap(node_posterior)
plt.figure()
sns.heatmap(node_posterior[10].unsqueeze(0))

In [None]:
print(smile)
plt.figure(figsize=(10,5))  
sns.heatmap((objective_v_1 - objective_v).unsqueeze(0), cmap='rocket_r') # substituting leads to a change in likelihood
plt.rcParams.update({'font.size': 22})
plt.xticks(ticks=np.arange(15)+0.5, labels=['N','C','C','N', 'N', 'N', 'C', 'C', 'C', 'C', 'Cl', 'C', 'C', 'C', 'N'])
plt.ylabel(r'$\Delta \log \mathcal{L}}$ after change')
plt.yticks([])
plt.xlabel(f'SMILES: {smile}')
plt.tight_layout()
plt.savefig('plots/delta_log_likelihood_1.pdf', bbox_inches='tight')
print(objective_v_1 - objective_v)

### Two different situations. On the right of the image the carbon connected to the oxigen becomes more likely than when it was connected to the Cl,
### whereas in the middle replacing the nitrogen with a carbon seems much less likely to happen.

In [None]:
#!pip install svglib django-renderpdf

In [None]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdFMCS
from rdkit.Chem.Draw import rdDepictor
from svglib.svglib import svg2rlg
from reportlab.graphics import renderPDF


smile = mol_df['smiles'][1]
mol = Chem.MolFromSmiles(smile)
graph_original = smiles2graph(smile)

Draw.MolToFile(mol, 'plots/test.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg('plots/test.svg')
renderPDF.drawToFile(drawing, 'plots/test.pdf')

In [None]:
mol.GetAtomWithIdx(10)

In [None]:
# dataset_preprocessed = OGBGDatasetInterface(data_root, dataset_name)

# model.to('cuda:0')
# for sample in dataset_preprocessed:
#     preprocess_node_features(sample)
#     sample.to('cuda:0')
#     with torch.no_grad():
#         _, _, [loglik, _, _, _, _, _, _, _, _] = model(sample)
    
#     print(loglik.mean())
#     sample.to('cpu')
# model.to('cpu')

### Extra Molecules

In [None]:
id_mol = 1

smile = mol_df['smiles'][id_mol]
mol = Chem.MolFromSmiles(smile)
print(smile)
graph_original = smiles2graph(smile)
graph_original_data = Data(x=torch.tensor(graph_original['node_feat']), edge_index=torch.tensor(graph_original['edge_index']), edge_attr=torch.tensor(graph_original['edge_feat']))

preprocess_node_features(graph_original_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers] = model(graph_original_data)

mol

In [None]:
new_smile = smile.replace('Cl', 'O') 
new_mol = Chem.MolFromSmiles(new_smile)
print(new_smile)
graph_modified = smiles2graph(new_smile)

graph_modified_data = Data(x=torch.tensor(graph_modified['node_feat']), edge_index=torch.tensor(graph_modified['edge_index']), edge_attr=torch.tensor(graph_modified['edge_feat']))
preprocess_node_features(graph_modified_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v_1, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers, ] = model(graph_modified_data)

plt.figure(figsize=(10,5))  
sns.heatmap((objective_v_1 - objective_v).unsqueeze(0), cmap='rocket_r') # substituting leads to a change in likelihood
plt.rcParams.update({'font.size': 22})
labels = ['N','C','C','N', 'N', 'N', 'C', 'C', 'C', 'C', 'Cl', 'C', 'C', 'C', 'N']
plt.xticks(ticks=np.arange(len(labels))+0.5, labels=labels)
plt.ylabel(r'$\Delta \log \mathcal{L}}$ after change')
plt.yticks([])
plt.xlabel(f'SMILES: {smile}')
plt.tight_layout()
plt.savefig(f'plots/delta_log_likelihood_{id_mol}.pdf', bbox_inches='tight')
print(objective_v_1 - objective_v)    
    
Draw.MolToFile(mol, f'plots/mol_{id_mol}.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}.pdf')

Draw.MolToFile(new_mol, f'plots/mol_{id_mol}_modified.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}_modified.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}_modified.pdf')



In [None]:
torch.manual_seed(42)
id_mol = torch.randint(0, len(dataset), (1,)).item()
print(id_mol)
smile = mol_df['smiles'][id_mol]
mol = Chem.MolFromSmiles(smile)
print(smile)
graph_original = smiles2graph(smile)
graph_original_data = Data(x=torch.tensor(graph_original['node_feat']), edge_index=torch.tensor(graph_original['edge_index']), edge_attr=torch.tensor(graph_original['edge_feat']))

preprocess_node_features(graph_original_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers] = model(graph_original_data)

mol

In [None]:
new_smile = smile.replace('Cl', 'O') 
new_mol = Chem.MolFromSmiles(new_smile)
print(new_smile)
graph_modified = smiles2graph(new_smile)

graph_modified_data = Data(x=torch.tensor(graph_modified['node_feat']), edge_index=torch.tensor(graph_modified['edge_index']), edge_attr=torch.tensor(graph_modified['edge_feat']))
preprocess_node_features(graph_modified_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v_1, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers, ] = model(graph_modified_data)

plt.figure(figsize=(10,5))  
sns.heatmap((objective_v_1 - objective_v).unsqueeze(0), cmap='rocket_r') # substituting leads to a change in likelihood
plt.rcParams.update({'font.size': 22})
labels = ['C','N','C','C','O','N','C','C','Cl','C','C','C','C','Cl','C','O','C','C','C','C','C','S']
plt.xticks(ticks=np.arange(len(labels))+0.5, labels=labels)
plt.ylabel(r'$\Delta \log \mathcal{L}}$ after change')
plt.yticks([])
plt.xlabel(f'SMILES: {smile}')
plt.tight_layout()
plt.savefig(f'plots/delta_log_likelihood_{id_mol}.pdf', bbox_inches='tight')
print(objective_v_1 - objective_v)    
    
Draw.MolToFile(mol, f'plots/mol_{id_mol}.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}.pdf')

Draw.MolToFile(new_mol, f'plots/mol_{id_mol}_modified.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}_modified.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}_modified.pdf')

new_mol

In [None]:
id_mol = torch.randint(0, len(dataset), (1,)).item()
print(id_mol)
smile = mol_df['smiles'][id_mol]
mol = Chem.MolFromSmiles(smile)
print(smile)
graph_original = smiles2graph(smile)
graph_original_data = Data(x=torch.tensor(graph_original['node_feat']), edge_index=torch.tensor(graph_original['edge_index']), edge_attr=torch.tensor(graph_original['edge_feat']))

preprocess_node_features(graph_original_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers] = model(graph_original_data)

mol

In [None]:
new_smile = smile.replace('O', 'N')
new_smile = new_smile.replace('o', 'N')
new_mol = Chem.MolFromSmiles(new_smile)
print(new_smile)
graph_modified = smiles2graph(new_smile)

graph_modified_data = Data(x=torch.tensor(graph_modified['node_feat']), edge_index=torch.tensor(graph_modified['edge_index']), edge_attr=torch.tensor(graph_modified['edge_feat']))
preprocess_node_features(graph_modified_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v_1, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers, ] = model(graph_modified_data)

plt.figure(figsize=(10,5))  
sns.heatmap((objective_v_1 - objective_v).unsqueeze(0), cmap='rocket_r') # substituting leads to a change in likelihood
plt.rcParams.update({'font.size': 22})
labels=['O','C','C','C','C','N','C','C','C','C','C','C','C','C','C','C','C','C','C','C','O','O','C','C','C','C','C', 'Cl', 'C', 'C']
plt.xticks(ticks=np.arange(len(labels))+0.5, labels=labels)
plt.ylabel(r'$\Delta \log \mathcal{L}}$ after change')
plt.yticks([])
plt.xlabel(f'SMILES: {smile}')
plt.tight_layout()
plt.savefig(f'plots/delta_log_likelihood_{id_mol}.pdf', bbox_inches='tight')
print(objective_v_1 - objective_v)    

Draw.MolToFile(mol, f'plots/mol_{id_mol}.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}.pdf')

Draw.MolToFile(new_mol, f'plots/mol_{id_mol}_modified.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}_modified.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}_modified.pdf')

new_mol

In [None]:
id_mol = torch.randint(0, len(dataset), (1,)).item()
print(id_mol)
smile = mol_df['smiles'][id_mol]
mol = Chem.MolFromSmiles(smile)
print(smile)
graph_original = smiles2graph(smile)
graph_original_data = Data(x=torch.tensor(graph_original['node_feat']), edge_index=torch.tensor(graph_original['edge_index']), edge_attr=torch.tensor(graph_original['edge_feat']))

preprocess_node_features(graph_original_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers] = model(graph_original_data)

mol

In [None]:
new_smile = smile.replace('s', 'N') 
new_mol = Chem.MolFromSmiles(new_smile)
print(new_smile)
graph_modified = smiles2graph(new_smile)

graph_modified_data = Data(x=torch.tensor(graph_modified['node_feat']), edge_index=torch.tensor(graph_modified['edge_index']), edge_attr=torch.tensor(graph_modified['edge_feat']))
preprocess_node_features(graph_modified_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v_1, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers, ] = model(graph_modified_data)

plt.figure(figsize=(10,5))  
sns.heatmap((objective_v_1 - objective_v).unsqueeze(0), cmap='rocket_r') # substituting leads to a change in likelihood
plt.rcParams.update({'font.size': 22})
labels = ['C','O','C','O','C','S','C','C','C','N','C','O','C','S','C','C','C','C']
plt.xticks(ticks=np.arange(len(labels))+0.5, labels=labels, rotation = 0)
plt.ylabel(r'$\Delta \log \mathcal{L}}$ after change')
plt.yticks([])
plt.xlabel(f'SMILES: {smile}')
plt.tight_layout()
plt.savefig(f'plots/delta_log_likelihood_{id_mol}.pdf', bbox_inches='tight')
print(objective_v_1 - objective_v)    
    
Draw.MolToFile(mol, f'plots/mol_{id_mol}.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}.pdf')

Draw.MolToFile(new_mol, f'plots/mol_{id_mol}_modified.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}_modified.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}_modified.pdf')

new_mol

In [None]:
id_mol = torch.randint(0, len(dataset), (1,)).item()
print(id_mol)

smile = mol_df['smiles'][id_mol]
mol = Chem.MolFromSmiles(smile)
print(smile)
graph_original = smiles2graph(smile)
graph_original_data = Data(x=torch.tensor(graph_original['node_feat']), edge_index=torch.tensor(graph_original['edge_index']), edge_attr=torch.tensor(graph_original['edge_feat']))

preprocess_node_features(graph_original_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers] = model(graph_original_data)

mol

In [None]:
new_smile = smile.replace('O', 'N') 
new_mol = Chem.MolFromSmiles(new_smile)
print(new_smile)
graph_modified = smiles2graph(new_smile)

graph_modified_data = Data(x=torch.tensor(graph_modified['node_feat']), edge_index=torch.tensor(graph_modified['edge_index']), edge_attr=torch.tensor(graph_modified['edge_feat']))
preprocess_node_features(graph_modified_data)

with torch.no_grad():
    preds_g, node_posterior, [objective_v_1, objective_g, _, _, _, _, _, mixture_weights, avg_params_across_layers, ] = model(graph_modified_data)

plt.figure(figsize=(10,5))  
sns.heatmap((objective_v_1 - objective_v).unsqueeze(0), cmap='rocket_r') # substituting leads to a change in likelihood
plt.rcParams.update({'font.size': 22})
labels = ['C','C','N','C','C','C','O','C','C','C','C','C','C','C','C','C','C','C', 'O', 'C','C','C','C','C','C','C','C','C','C']
plt.xticks(ticks=np.arange(len(labels))+0.5, labels=labels)
plt.ylabel(r'$\Delta \log \mathcal{L}}$ after change')
plt.yticks([])
plt.xlabel(f'SMILES: {smile}')
plt.tight_layout()
plt.savefig(f'plots/delta_log_likelihood_{id_mol}.pdf', bbox_inches='tight')
print(objective_v_1 - objective_v)    
    
Draw.MolToFile(mol, f'plots/mol_{id_mol}.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}.pdf')

Draw.MolToFile(new_mol, f'plots/mol_{id_mol}_modified.svg', size=(600, 600), 
                kekulize=True,
                wedgeBonds=True,
                fitImage=False,
                options=None,
                canvas=None)

drawing = svg2rlg(f'plots/mol_{id_mol}_modified.svg')
renderPDF.drawToFile(drawing, f'plots/mol_{id_mol}_modified.pdf')

new_mol