In [None]:
# Get all followups
from rdkit import Chem
from typing import Dict

with Chem.SDMolSupplier('of-interest.sdf') as sd:
    hitdex: Dict[str, Chem.Mol] = {mol.GetProp('_Name'): mol for mol in sd}
    
with Chem.SDMolSupplier('trimmed_x0771-x1604.sdf') as sds:
    for hit in sds:
        h_name = hit.GetProp('_Name')
        hitdex[h_name] = hit

In [None]:
import requests
import pandas as pd

# https://arthor.docking.org/api.html

base_url = 'https://arthor.docking.org/'

dbs = pd.DataFrame( requests.get(base_url + 'dt/data').json() )
dbs

In [None]:
Chem.MolFromSmiles(Chem.MolToSmiles(hitdex['x0147_0A§1']))

In [None]:
Chem.MolToSmiles(hitdex['x0147_0A§1'])

In [None]:
# N1C=CC2=C1C=CC=C2
Chem.MolFromSmarts('a1aa2[cX3][aH0X2][aH0X2]a2[aX2]a1')

In [None]:
# '[aX3]1:[aX2]:[aH0X2]:[aH0X2]:[cX3]:1'

I generated the SMARTS manually. How would this be automated?
One option is show the atom indices and ask user to change the atoms.
Say idx 1: `c` is wanted as `[aX3]`

In [None]:
## Get table of SMARTS

# no need to escape (%2C)
dbname=','.join(['BB-50-22Q1', 'MADE-BB-23Q1-770M', 'Mcule-BB-22Q1-2.1M', 'ChemSpace-BB-Stock-Mar2022-712K'])
query = 'a1aa2[cX3H0][aH0X2][aH0X2]a2[aX2]a1'

from rdkit.Chem import PandasTools, Draw, AllChem
response = requests.get(base_url + f'/dt/{dbname}/search', dict(query=query,
                                                               type='SMARTS',
                                                               length=1_000_000)
                       )

assert response.json()['recordsTotal']
matches = pd.DataFrame( response.json()['data'], columns=['idx', 'smiles_id', 'empty', 'something', 'db'])
matches['id'] = matches.smiles_id.str.split(expand=True)[1]
matches['smiles'] = matches.smiles_id.str.split(expand=True)[0]
matches.drop_duplicates('id')
PandasTools.AddMoleculeColumnToFrame(matches,'smiles','mol',includeFingerprints=True)
matches = matches.loc[~matches.mol.isnull()]
# tabs?
matches['db'] = matches.db.str.strip()
matches['N_RB'] = matches.mol.apply(AllChem.CalcNumRotatableBonds)
matches['N_HA'] = matches.mol.apply(AllChem.CalcNumHeavyAtoms)
matches

TODO: What is the context manager to supress RDKit warnings??

https://rdkit.readthedocs.io/en/latest/source/rdkit.RDLogger.html

In [None]:
s = matches.loc[(matches.N_HA == 10) & (matches.db == 'BB-50-22Q1')]
drawing = Draw.MolsToGridImage(s.mol, legends=s.id.to_list(), molsPerRow=5)
with open('enamineBB_10HA.png', 'wb') as fh:
    fh.write(drawing.data)

drawing

In [None]:
bromo_smarts = Chem.MolFromSmarts('a1aa2[cX3H0](-Br)[aH0X2][aH0X2]a2[aX2]a1')
matches['bromo4thio'] = matches.mol >= bromo_smarts

In [None]:
s = matches.loc[matches.bromo4thio].sort_values('N_HA', ascending=False)
Draw.MolsToGridImage(s.mol, legends=s.id.to_list(), molsPerRow=5)

In [None]:
s = matches.loc[matches.mol >= Chem.MolFromSmiles('N-S(=O)(=O)C')].sort_values('N_HA', ascending=False)
Draw.MolsToGridImage(s.mol, legends=s.id.to_list(), molsPerRow=5)

PLAN:

Matching substruture of SMARTS Chem.Mol will give indices. These will have neighbours that do not feature in tuple: R groups.
Add index or isotope to these R groups.
Table time.

Mapping of Inspiration to SMARTS. Mapping of followup to SMARTS
Mapping of latter to former.
Fragmenstein place.

In [None]:
# code to get xref ids from Zinc

import requests, collections, contextlib
from bs4 import BeautifulSoup

class ZincInformer(collections.abc.MutableMapping):
    """
    Get Zinc info.
    
    A class that stores the retieved values —in ``.data`` (``.dump`` and ``.load`` to store).
    The values can be accessed as a subscript or by calling the instance,
    the latter captures errors declared during initialisation by the argument ``suppressed_exception``.
    
    .. code-block::python
        zinfo = ZincInformer()
        series.apply(zinfo)
    """
    
    def __init__(self, suppressed_exception=Exception):
        self.data = {}
        self.suppressed_exception = suppressed_exception
        
    def __getitem__(self, zinc_id):
        if zinc_id not in self.data:
            soup = get_soup(zinc_id)
            self.data[zinc_id] = self.get_zinc_info(zinc_id, soup)
        return self.data[zinc_id]
    
    def __call__(self, zinc_id):
        with contextlib.suppress(self.suppressed_exception):
            return self[zinc_id]
        self[zinc_id] = {}
        return {}
    
    def __setitem__(self, zinc_id: str, info: dict):
        self.data[zinc_id] = info
        
    def __delitem__(self, zinc_id):
        del self.data[zinc_id]
        
    def __iter__(self):
        return iter(self.data)
    
    def __len__(self):
        return len(self.data)
    
    def dump(self, filename:str='zinc.json'):
        with open(filename, 'w') as fh:
            json.dump(self.data, fh)
            
    def load(self, filename:str='zinc.json'):
        with open(filename, 'r') as fh:
            self.data = json.load(fh)
            
    # ======== specific methods
    
    @classmethod
    def get_soup(self, zinc_id: str) -> BeautifulSoup:
        """parse HTML. Return a soup"""
        response = requests.get(f'https://zinc.docking.org/substances/{zinc_id}')
        response.raise_for_status()
        return BeautifulSoup(response.text, 'html.parser')

    @classmethod
    def get_dl(self, soup: BeautifulSoup) -> dict:
        """
        Data tables are organised in dl entries, dt headers and dd fields.
        """
        keys, values = [], []
        for dl in soup.findAll("dl"):
            for dt in dl.findAll("dt"):
                keys.append(dt.text.strip())
            for dd in dl.findAll("dd"):
                values.append(dd.text.strip())
        return dict(zip(keys, values))

    @classmethod
    def polísh(self, dl: dict) -> dict:
        """
        There's a bunch of words that get in the way...
        """
        return {k.replace('Bioactive', '').replace('Natural Products', '').replace('Building Blocks','').strip(): v for k, v in dl.items()}

    @classmethod
    def get_zinc_info(self, zinc_id, soup):
        """
        These fields ought to always exist!
        """
        return {'query_name': zinc_id,
                'title': soup.title.text.strip(),
                'SMILES': soup.find('input', dict(id="substance-smiles-field")).attrs['value'].strip(),
                'inchi': soup.find('input', dict(id="substance-inchi-field")).attrs['value'].strip(),
                'inchikey': soup.find('input', dict(id="substance-inchikey-field")).attrs['value'].strip(),
                **self.polísh(self.get_dl(soup))
               }

In [None]:
zinfo = ZincInformer()

matches['id'].apply(zinfo)