In [1]:
from rdkit import Chem
from pathlib import Path
from rdkit.Chem.Draw import IPythonConsole

mols = []
with Chem.SDMolSupplier('combo-rocs.sdf') as sdfh:
    mol: Chem.Mol
    for mol in sdfh:
        if mol.HasProp('id'):
            catalog_name = mol.GetProp('id') 
        elif mol.HasProp('Catalog ID'):
            catalog_name = mol.GetProp('Catalog ID')
        else:
            print(mol.GetPropsAsDict())
            raise Exception
        mol.SetProp('_Name', catalog_name)
        mols.append( mol )

In [6]:
from fragmenstein import Laboratory, place_input_validator
import pandas as pd

In [7]:
!echo $(cat /proc/cpuinfo | grep processor | wc -l)

124


In [None]:
with open('template.pdb') as fh:
    pdbblock = fh.read()

# Laboratory.replace_hits(pdbblock, mols, ,
#                         suffix='redocked')

lab = Laboratory(pdbblock=pdbblock, covalent_resi=None, run_plip=True)
selfies = pd.DataFrame([dict(name=hit.GetProp('_Name')+'_replaced',
                             hits=[hit],
                             smiles=Chem.MolToSmiles(hit)
                             ) for hit in mols])
replacements: pd.DataFrame = lab.place(place_input_validator(selfies), n_cores=120, timeout=600)
Laboratory.fix_intxns(replacements)
replacements['bleached_name'] = replacements['name']
replacements['name'] = replacements.hit_mols.apply(lambda ms: ms[0].GetProp('_Name'))

In [None]:
replacements.to_pickle(f'ROCS_scored.pkl.gz')

In [11]:
replacements['name'] = replacements.hit_mols.apply(lambda ms: ms[0].GetProp('_Name') if ms else '')

In [17]:
replacements.error.value_counts()

error
                                                                                                                                         3058
MemoryError                                                                                                                                45
AttributeError 'NoneType' object has no attribute 'Initialize'                                                                             16
RuntimeError \n\nFile: /home/benchmark/rosetta/source/src/core/conformation/util.cc:1512\n[ ERROR ] UtilityExitException\nERROR: \n\n      10
TimeoutError                                                                                                                                9
ValueError Bad Conformer Id                                                                                                                 4
Name: count, dtype: int64

In [22]:
replacements = pd.concat([selfies.rename(columns={c: 'input_'+c for c in selfies.columns}), replacements], axis=1)

In [27]:
replacements = pd.concat([replacements, pd.DataFrame([m.GetPropsAsDict() for m in mols])], axis=1)

In [35]:
from rdkit import Geometry, Chem

warheads = dict(haloacetamide = Chem.MolFromSmarts('C(-[Cl,Br,I])-C(=O)-N'),
                acrylamide = Chem.MolFromSmarts('C=C-C(=O)-N'),
                halooxazole = Chem.MolFromSmarts('c(-[Cl,Br,I])(:o):n'),
                halothiazole = Chem.MolFromSmarts('c(-[Cl,Br,I])(:s):n'),
                haloarene = Chem.MolFromSmarts('c(-[Cl,Br,I])'),
                nitrile = Chem.MolFromSmarts('C#N'),
                alkine = Chem.MolFromSmarts('C#C'),
                whatever = Chem.MolFromSmarts('C=C-a'),
                sulfo = Chem.MolFromSmarts('S-C'),
                sulfo2 = Chem.MolFromSmarts('S-F'),
                aldehyde = Chem.MolFromSmarts('[CH1]=O'),
                epoxide = Chem.MolFromSmarts('C1-O-C-1'),
                betalactam = Chem.MolFromSmarts('C1(=O)-N-C-C-1'),
)

# Gamma sulfur of rxve cysteine
sg = Geometry.Point3D(13.619, 10.298, 23.618)

def get_distance(mol):
    if mol.GetProp('experiment') in ('ROCS-shape_Q1.sdf', 'ROCS-BB-0RB.sdf', 'ROCS-BB-1RB.sdf', 'ROCS-PPI.sdf'):
        return float('nan')
    for group in warheads.values():
        if mol.HasSubstructMatch(group):
            cx_idx: int = mol.GetSubstructMatch(group)[0]
            break
    else:
        return float('nan')
    point: Geometry.Point3D = mol.GetConformer().GetAtomPosition(cx_idx)
    d = point.Distance(sg)
    mol.SetDoubleProp('SG_distance', d)
    return d

In [36]:
import operator

replacements['distance'] = replacements.input_hits.apply(operator.itemgetter(0)).apply(get_distance)

In [37]:
replacements.sort_values('∆∆G')[['input_name', 'name', '∆∆G', 'experiment', 'distance']]

Unnamed: 0,input_name,name,∆∆G,experiment,distance
1913,Z781034730_replaced,Z781034730,-12.643915,ROCS-cov2.sdf,9.963828
1703,Z131580190_replaced,Z131580190,-11.821899,ROCS-cov2.sdf,4.825081
1787,Z4607528830_replaced,Z4607528830,-11.676839,ROCS-cov2.sdf,9.044766
2312,EN300-28299118_replaced,EN300-28299118,-11.453057,ROCS-BB-0RB.sdf,
103,Z223278782_replaced,Z223278782,-11.068881,ROCS-shape_Q1.sdf,
...,...,...,...,...,...
2890,Z1124083570_replaced,,,ROCS-PPI.sdf,
2940,Z1419784252_replaced,,,ROCS-PPI.sdf,
2943,Z1356227377_replaced,,,ROCS-PPI.sdf,
2972,Z2065554701_replaced,,,ROCS-PPI.sdf,


In [None]:
replacements.columns.to_list()

In [52]:
from collections import defaultdict
from Bio.SeqUtils import seq1

def narrate(row: pd.Series):
    grouped = defaultdict(list)
    for name, value in row.items():
        if not isinstance(name, tuple) or value == 0.:
            continue
        itxn_type, resn, resi = name
        grouped[itxn_type].append(seq1(resn, undef_code="X")+str(resi))
    narrative = f'info set:{row.clean_experiment};'
    if row.distance:
        narrative += f'Warhead {row.distance}Å to Cys110:SG; '
    for itxn_type in sorted(grouped):
        narrative += f'{itxn_type}:{"+".join(grouped[itxn_type])}; '
    return narrative

replacements['clean_experiment'] = replacements.experiment.str.replace(".sdf", '').replace("ROCS-", '')
replacements['rationale'] = 'info ' + replacements.apply(narrate, axis=1)

In [51]:
#replacements.loc[(replacements.distance < 3) & (replacements['∆∆G'] < 0.) & (replacements['comRMSD'] < 2.)].sort_values('∆∆G')[['input_name', 'name', '∆∆G', 'experiment', 'distance']]

In [45]:
from gist_import import GistImporter

# fu for fragalysis upload
fmodule = GistImporter.from_github('https://raw.githubusercontent.com/matteoferla/Fragment-hit-follow-up-chemistry/main/followup/prep_fragalysis.py')
prep = fmodule['prep']
generate_header = fmodule['generate_header']
floatify_columns = fmodule['floatify_columns']

In [None]:
replacements.columns.to_list()

In [65]:
wanted_key_types = {'rationale': str, 
               #'cluster_rank': int,
               #'N_interactions': int, 
               #'N_interactions_lost': int,
               '∆∆G': float, 
                    'comRMSD': float,
                    #'N_rotatable_bonds': int, 
                    'ROCS_TanimotoCombo': float,
                     'ROCS_ShapeTanimoto': float,
                     'ROCS_ColorTanimoto': float,
                    'distance': float,
                     #'N_unconstrained_atoms': int,
                   # 'N_constrained_atoms': int
                   }

for k, ktype in wanted_key_types.items():
    replacements[k] = replacements[k].astype(ktype)

wanted_keys = list(wanted_key_types)

def clean_names(names):
    return ','.join(['x0451_0A', 'x0554_0A', 'x0556_0A', 'x0566_0A', 'x0310_0A', 'x0416_0A'])

replacements['ref_mols'] = replacements.hit_names.apply(clean_names)

method_name = 'A71-ROCS-cov'
header: Chem.Mol = generate_header(method=method_name,
                         ref_url='https://github.com/matteoferla/EV-A71-2A-elaborations',
                         submitter_name='Matteo Ferla',
                         submitter_email='matteo.ferla@stats.ox.ac.uk',
                         submitter_institution='University of Oxford',
                         extras=dict(zip(wanted_keys, wanted_keys))
                                  )
                                   
prep(replacements.loc[(replacements.distance < 3) & (replacements['∆∆G'] < 0.) & (replacements['comRMSD'] < 2.)].sort_values('∆∆G'), 
     header, mol_col='minimized_mol', 
     name_col='name',
     outfile=f'{method_name}.sdf',
     ref_pdb_name='x0451_0A',
     extras=wanted_keys
    )

In [63]:
len(replacements.loc[(replacements.distance < 3) & (replacements['∆∆G'] < 0.) & (replacements['comRMSD'] < 2.)].sort_values('∆∆G'))

45

In [66]:
wanted_key_types = {'rationale': str, 
               #'cluster_rank': int,
               #'N_interactions': int, 
               #'N_interactions_lost': int,
               '∆∆G': float, 
                    'comRMSD': float,
                    #'N_rotatable_bonds': int, 
                    'ROCS_TanimotoCombo': float,
                     'ROCS_ShapeTanimoto': float,
                     'ROCS_ColorTanimoto': float,
                    'distance': float,
                     #'N_unconstrained_atoms': int,
                   # 'N_constrained_atoms': int
                   }

for k, ktype in wanted_key_types.items():
    replacements[k] = replacements[k].astype(ktype)

wanted_keys = list(wanted_key_types)

def clean_names(names):
    return ','.join(['x0451_0A', 'x0554_0A', 'x0556_0A', 'x0566_0A', 'x0310_0A', 'x0416_0A'])

replacements['ref_mols'] = replacements.hit_names.apply(clean_names)

method_name = 'A71-ROCS-mixed'
header: Chem.Mol = generate_header(method=method_name,
                         ref_url='https://github.com/matteoferla/EV-A71-2A-elaborations',
                         submitter_name='Matteo Ferla',
                         submitter_email='matteo.ferla@stats.ox.ac.uk',
                         submitter_institution='University of Oxford',
                         extras=dict(zip(wanted_keys, wanted_keys))
                                  )
                                   
prep(replacements.loc[(replacements['∆∆G'] < 0.) & (replacements['comRMSD'] < 2.)].sort_values('∆∆G').head(200), 
     header, mol_col='minimized_mol', 
     name_col='name',
     outfile=f'{method_name}.sdf',
     ref_pdb_name='x0451_0A',
     extras=wanted_keys
    )