In [None]:
#@title Installations and function declaration
#@markdown # Aims
#@markdown A problem in the uploaded compounds to Fragalysis is
#@markdown the template and the virtual compounds may have clashed due to induced fit
#@markdown yet the latter could not be provided,
#@markdown making it hard to understand if really induced or a clash was let be.
#@markdown Therefore, ideally the hits will be ranked by induced fit,
#@markdown but shown as if lock and key thus not confusing anyone.

#@markdown ### Preface on Colab

#@markdown ← Press the button with the play icon to run a _cell_.
#@markdown In this cell: install requirements.

#@markdown This is a _Colab notebook_, a variant of a Jupyter notebook.
#@markdown If you are not in Colab press [this](https://colab.research.google.com/github/matteoferla/Fragment-hit-follow-up-chemistry/blob/main/colab/upload_fix.ipynb).
#@markdown For the preparation of an upload file
#@markdown see [this](https://colab.research.google.com/github/matteoferla/Fragment-hit-follow-up-chemistry/blob/main/colab/upload_prep.ipynb).

#@markdown Colab runs in Google's servers, hence why you will get asked
#@markdown to sign in if not done so already.
#@markdown Likewise it will ask if you trust the author (Matteo Ferla),
#@markdown if unsure about whether you should trust anything I do
#@markdown [click here for details](https://www.youtube.com/watch?v=dQw4w9WgXcQ).

#@markdown The menu bar can shown/hidden via the chevron in the top right.

#@markdown To inspect code press `show code` ↓

#@markdown Still confused about notebook? Ask your friendly demonstrators for more!


!pip install rdkit requests fragmenstein>=0.14.0 pandas tqdm plotly -q


from gist_import import GistImporter
from types import ModuleType

script_url: str = 'https://raw.githubusercontent.com/matteoferla/Fragment-hit-follow-up-chemistry/main/followup/lock_n_keyify.py'
lock_n_key: ModuleType = GistImporter.from_github(script_url).to_module('lock_n_key')
import requests
import zipfile
import io
import re
import os
from typing import Dict
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.Draw import IPythonConsole
from fragmenstein import Wictor  # Victor, but RDKit only
import logging
import operator
import pandas as pd
import plotly.express as px
from tqdm.notebook import tqdm
import logging
from fragmenstein import Wictor

Wictor.enable_stdout(logging.ERROR)

In [None]:
#@title Upload file

target = 'A71EV2A' #@param {type:"string"}

from google.colab import files

uploaded = files.upload()

filename = list(uploaded.keys())[0]

In [None]:
#@title Run!

print('Fetching PDB blocks')
data: dict = lock_n_key.get_target_data(target)
project_id: str = data['project_id']
apo_pdbblocks: Dict[str, str] = lock_n_key.get_apo_pdbblocks( data['zip_archive'] )
apo_pdbblocks = {k: lock_n_key.remove_altloc(block) for k, block in apo_pdbblocks.items() }

print('Reading mols')
with Chem.ForwardSDMolSupplier(io.BytesIO(uploaded[filename])) as sdfh:
    vcs = list(sdfh)
header = vcs[0]
vcs = vcs[1:]
print(f'{vcs} molecules provided')

print('Scoring every molecule in every template')
all_scores = []
for vc in tqdm(vcs):
        scores = [lock_n_key.score(vc, template, apo_pdbblock)  for template, apo_pdbblock in apo_pdbblocks.items()]
        all_scores.append( scores )
best = [sorted(scores, key=operator.itemgetter('∆∆G'))[0] for scores in all_scores if scores]

print('Scores:')
df = pd.DataFrame(best)
print(f'{len(df)} VCs, {sum(df["∆∆G"] < 0.)} acceptably placed. {sum(df["comRMSD"] < 1)} with minor deviation')
px.scatter(df, '∆∆G', 'comRMSD',
           title='Fragmenstein Wictor placed compounds (best template)').show()

In [None]:
#@title Download

output_filename='template-adjusted.sdf' #@param {type:"string"}
method_suffix='template-adjusted' #@param {type:"string"}

# Writing file
method_name = header.GetProp('method')
new_header = Chem.Mol(header)
new_header.SetProp('method', f'{method_name}{method_suffix}')
n = 0
with Chem.SDWriter(output_filename) as sdfh:
    sdfh.write(new_header)
    for info in best:
        if info['∆∆G'] >= 0.:
            continue
        if info['comRMSD'] >= 2.:
            continue
        sdfh.write(info['mol'])
        n+=1
print(f'wrote {n} molecules')

# download
files.download(output_filename)