In [2]:
!curl https://fragalysis.diamond.ac.uk/media/targets/XX01ZVNS2B.zip --output XX01ZVNS2B.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  257M  100  257M    0     0  10.6M      0  0:00:24  0:00:24 --:--:-- 11.5M


In [3]:
target_name = 'XX01ZVNS2B'

In [4]:
import zipfile
from pathlib import Path

assert Path(f'{target_name}.zip').exists(), f'No {target_name}.zip file'
with zipfile.ZipFile(f'{target_name}.zip') as zfh:
    assert [fn for fn in zfh.namelist() if '.sdf' in fn and '/' not in fn],\
            'No combined sdf even corrupted in Zip'

AssertionError: No combined sdf even corrupted in Zip

In [5]:
from pathlib import Path
from rdkit import Chem
from typing import List

hits: List[Chem.Mol] = []

def poorly_extract(pdbblock) -> List[Chem.Mol]:
    # RDKit has an issue with altLocs and I don't want to use PyMol
    pdbs = []
    lines = [line for line in pdbblock.split('\n') if 'HETATM' in line]
    for altloc in (' ', 'A', 'B', 'C', 'D', 'E'):
        pdb = Chem.MolFromPDBBlock('\n'.join([line for line in lines if altloc+'LIG' in line]).replace(altloc+'LIG', ' LIG'))
        if pdb and pdb.GetNumAtoms():
            pdbs.extend(Chem.GetMolFrags(pdb, asMols=True, sanitizeFrags=False))
    return pdbs

def add_info(mol, pdb_mols, n, chain):
    j = 0
    for pdb_mol in pdb_mols:
        if pdb_mol.GetAtomWithIdx(0).GetPDBResidueInfo().GetChainId() != chain:
            continue
        for ref_atom, pdb_atom in zip(mol.GetAtoms(), pdb_mol.GetAtoms()):
            info = pdb_atom.GetPDBResidueInfo()
            ref_atom.SetProp('name', info.GetName())
            ref_atom.SetProp('molFileAlias', info.GetName())
            ref_atom.SetIntProp('resi', info.GetResidueNumber())
            ref_atom.SetProp('resn', info.GetResidueName())
            ref_atom.SetProp('altLoc', info.GetAltLoc())
            ref_atom.SetDoubleProp('bfactor', info.GetTempFactor())
            ref_atom.SetDoubleProp('occupancy', info.GetOccupancy())
        return
    else:
        raise Exception(f'{j}, {n}, {chain}')
        
for path in (Path(target_name) / 'aligned').glob('*/*'):
    if path.suffix != '.sdf':
        continue
    name = path.name[:-len(path.suffix)]
    pdb_path = path.parent / f'{name}_bound.pdb'
    assert pdb_path.exists(), f'{pdb_path} does not exist'
    pdb_mols = poorly_extract(pdb_path.read_text())
    assert pdb_mols, f'{pdb_path} is rotten'
    with Chem.SDMolSupplier(str(path)) as sds:
        for mol in sds:
            mol.SetProp('_Name', name)
            add_info(mol, pdb_mols, n=int(name.split('_')[1][:-1]),chain=name.split('_')[1][-1] )
            hits.append(mol)

In [7]:
keep_prefix = False

import os
import json
from pathlib import Path
from rdkit import Chem
from rdkit.Chem import PandasTools, AllChem, Draw
from rdkit.Chem.MolStandardize.rdMolStandardize import LargestFragmentChooser
import pandas as pd
import ipywidgets as wd

# with Chem.SDMolSupplier(f'{target_name}_combined.sdf') as sdf_r:
#     hits = list(sdf_r)


metadata: pd.DataFrame = pd.read_csv(Path(target_name) / 'metadata.csv', index_col=0)

if keep_prefix:
    metadata['code'] = metadata.crystal_name
else:
    metadata['code'] = metadata.crystal_name.str.replace(f'{target_name}-', '')
metadata = metadata.set_index('crystal_name')
# remove the salts from the SMILES
metadata['smiles'] = metadata.smiles.apply(lambda v: sorted(v.split('.'), key=len)[-1])
# add the mol
metadata['mol'] = {h.GetProp('_Name'): h for h in hits}
assert sum(metadata.mol.isna()) == 0, 'Mol name matching issue'
fix = lambda row: AllChem.AssignBondOrdersFromTemplate(Chem.MolFromSmiles(row.smiles), row.mol)
metadata['mol'] = metadata.apply(fix, axis=1)
metadata['occupancies'] = metadata.mol.apply(lambda m: json.dumps([a.GetDoubleProp('occupancy') for a in m.GetAtoms()]))
metadata['bfactor'] = metadata.mol.apply(lambda m: json.dumps([a.GetDoubleProp('bfactor') for a in m.GetAtoms()]))

PandasTools.WriteSDF(df=metadata.reset_index(), 
                     out=f'{target_name}.corrected.sdf',
                     molColName='mol', idName='code',
                     properties=['crystal_name', 'RealCrystalName', 'smiles', 
                                 'alternate_name', 'site_name', 'occupancies', 'bfactor'])

In [10]:

PandasTools.WriteSDF(df=metadata.loc[metadata.site_name.isin(['S2', 'S1', 'S3'])].reset_index(), 
                     out=f'{target_name}.filtered.sdf',
                     molColName='mol', idName='code',
                     properties=['crystal_name', 'RealCrystalName', 'smiles', 
                                 'alternate_name', 'site_name', 'occupancies', 'bfactor'])

In [4]:
from pathlib import Path

target_name = 'XX01ZVNS2B'

import zipfile
from pathlib import Path

folder = Path(target_name) / 'aligned'

assert folder.exists()

import pymol2

with pymol2.PyMOL() as pymol:
    for code in ('x0884_1B','x0182_0B'):
        path = next(iter(folder.glob(f'*{code}*/*{code}_bound.pdb')))
        pymol.cmd.load(str(path), code)
    pymol.cmd.save('allo.pse')



In [5]:
import fragmenstein



In [5]:
!ls XX01ZVNS2B/aligned/XX01ZVNS2B-x0051_0B/XX01ZVNS2B-x0051_0B_bound.pdb

XX01ZVNS2B/aligned/XX01ZVNS2B-x0051_0B/XX01ZVNS2B-x0051_0B_bound.pdb


In [9]:
def add_my_settings(pymol):
    cmd = pymol.cmd
    
    # Set fetch path
    cmd.set("fetch_path", "/Users/matteo/pymol_PDBs")
    
    # Set other options
    cmd.set("use_shaders", 0)
    cmd.set("ray_shadows", 0)
    cmd.set("ray_trace_mode", 3)
    cmd.bg_color("white")
    
    # Alias backless
    cmd.alias("backless", 'hide sticks, (name C+N+H+HA+O and not resn pro) or (name C+H+HA+O and resn pro)')
    
    # Set custom colors
    cmd.set_color("turquoise", [0.18823529411764706, 0.8352941176470589, 0.7843137254901961])
    cmd.set_color("coral", [1.0, 0.4980392156862745, 0.3137254901960784])
    cmd.set_color("teal", [0.0, 0.5019607843137255, 0.5019607843137255])
    cmd.set_color("sage", [0.6980392156862745, 0.6745098039215687, 0.5333333333333333])
    cmd.set_color("lavender", [0.9019607843137255, 0.9019607843137255, 0.9803921568627451])
    cmd.set_color("mustard", [1.0, 0.8588235294117647, 0.34509803921568627])
    cmd.set_color("aquamarine", [0.4980392156862745, 1.0, 0.8313725490196079])
    cmd.set_color("feijoa", [0.6470588235294118, 0.8431372549019608, 0.5215686274509804])
    cmd.set_color("rose", [1.0, 0.0, 0.4980392156862745])
    cmd.set_color("paleturquoise", [0.6862745098039216, 0.9333333333333333, 0.9333333333333333])
    cmd.set_color("lightcoral", [0.9411764705882353, 0.5019607843137255, 0.5019607843137255])
    cmd.set_color("lightpurple", [0.8117647058823529, 0.6235294117647059, 1.0])
    cmd.set_color("lightblue", [0.5294117647058824, 0.807843137254902, 0.9803921568627451])
    cmd.set_color("lightgreen", [0.5647058823529412, 0.9333333333333333, 0.5647058823529412])
    cmd.set_color("lightyellow", [1.0, 1.0, 0.8784313725490196])
    cmd.set_color("lightorange", [1.0, 0.6274509803921569, 0.47843137254901963])
    cmd.set_color("lightpink", [1.0, 0.7137254901960784, 0.7568627450980392])
    cmd.set_color("robinsegg", [0.0, 0.8, 0.8])
    cmd.set_color("cerulean", [0.0, 0.4823529411764706, 0.6549019607843137])
    cmd.set_color("periwinkle", [0.8, 0.8, 1.0])
    cmd.set_color("fragmenstein", [0.6823529411764706, 0.8470588235294118, 0.5098039215686274])
    cmd.set_color("fragmenstein2", [0.5058823529411764, 0.8352941176470589, 1.0])
    cmd.set_color("fragmenstein3", [1.0, 0.6862745098039216, 0.8156862745098039])
    cmd.set_color("oxford", [0.0, 0.12941176470588237, 0.2784313725490196])
 

In [10]:
from pathlib import Path
import pymol2

with pymol2.PyMOL() as pymol:
    add_my_settings(pymol)
    pymol.cmd.load('template2.pdb', 'template')
    for path in Path('XX01ZVNS2B/aligned').glob('*/*_bound.pdb'):
        name = path.stem.replace('XX01ZVNS2B-', '').split('_')[0]
        if name in pymol.cmd.get_names():
            continue
        pymol.cmd.load(path.as_posix(), name)
        pymol.cmd.align(name, 'template')
    pymol.cmd.color('0x40E0D0', 'element C')
    pymol.cmd.color('0xF88379', 'element C and resn LIG')
    pymol.cmd.disable('template')
    pymol.cmd.save('combined.pse')