# Get SMILES from pubchem

In [1]:
import pubchempy as pcp
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import subprocess

In [2]:
# path to python script to convert sdf to rosetta parameters
ros_exe="/Users/pgreisen/Programs/Rosetta/Rosetta/main/source/scripts/python/public/molfile_to_params.py"
NUMBER_OF_CONFORMERS=25

In [3]:
%%capture
!pip install pubchempy

In [4]:
list_of_compounds = {"SIA" : "sialic acid",
                     "GLC" : "glucose",
                     "HDY" : "6beta-hydroxycortisol", 
                     "BRA" : "brassinolide", 
                     "ATP" : "ATP", 
                     "AFL" : "Aflatoxin",
                     "TAM" : "Tramadol",
                     "SRN" : "Serotonin", 
                     "TNT" : "TNT"}
loc = {}
for j in list_of_compounds.keys():
    loc[j] = pcp.get_compounds(list_of_compounds[j], 'name', as_dataframe=True)
    loc[j]["RosettaName"] = j

In [5]:
df = pd.concat(list(loc.values())).reset_index()
df.head()

Unnamed: 0,cid,atom_stereo_count,atoms,bond_stereo_count,bonds,cactvs_fingerprint,canonical_smiles,charge,complexity,conformer_id_3d,...,record,rotatable_bond_count,shape_fingerprint_3d,shape_selfoverlap_3d,tpsa,undefined_atom_stereo_count,undefined_bond_stereo_count,volume_3d,xlogp,RosettaName
0,445063,6,"[{'aid': 1, 'number': 8, 'element': 'O', 'y': ...",0,"[{'aid1': 1, 'aid2': 12, 'order': 1}, {'aid1':...",1110000001110010001111000000000000000000000000...,CC(=O)NC1C(CC(OC1C(C(CO)O)O)(C(=O)O)O)O,0,403,,...,"{'id': {'id': {'cid': 445063}}, 'atoms': {'aid...",5,,,177.0,0,0,,-3.5,SIA
1,906,6,"[{'aid': 1, 'number': 8, 'element': 'O', 'y': ...",0,"[{'aid1': 1, 'aid2': 12, 'order': 1, 'style': ...",1110000001110010001111000000000000000000000000...,CC(=O)NC1C(CC(OC1C(C(CO)O)O)(C(=O)O)O)O,0,403,,...,"{'id': {'id': {'cid': 906}}, 'atoms': {'aid': ...",5,,,177.0,6,0,,-3.5,SIA
2,444885,6,"[{'aid': 1, 'number': 8, 'element': 'O', 'y': ...",0,"[{'aid1': 1, 'aid2': 12, 'order': 1}, {'aid1':...",1110000001110010001111000000000000000000000000...,CC(=O)NC1C(CC(OC1C(C(CO)O)O)(C(=O)O)O)O,0,403,,...,"{'id': {'id': {'cid': 444885}}, 'atoms': {'aid...",5,,,177.0,0,0,,-3.5,SIA
3,656387,6,"[{'aid': 1, 'number': 8, 'element': 'O', 'y': ...",0,"[{'aid1': 1, 'aid2': 12, 'order': 1}, {'aid1':...",1110000001110010001111000000000000000000000000...,CC(=O)NC1C(CC(OC1C(C(CO)O)O)(C(=O)O)O)O,0,403,,...,"{'id': {'id': {'cid': 656387}}, 'atoms': {'aid...",5,,,177.0,0,0,,-3.5,SIA
4,5793,5,"[{'aid': 1, 'number': 8, 'element': 'O', 'y': ...",0,"[{'aid1': 1, 'aid2': 9, 'order': 1}, {'aid1': ...",1100000001100000001110000000000000000000000000...,C(C1C(C(C(C(O1)O)O)O)O)O,0,151,,...,"{'id': {'id': {'cid': 5793}}, 'atoms': {'aid':...",1,,,110.0,1,0,,-2.6,GLC


In [6]:
def deprotonate_acids(mol):
    deprotonate_cooh = AllChem.ReactionFromSmarts("[C:1](=[O:2])-[OH1:3]>>[C:1](=[O:2])-[O-H0:3]")
    m = Chem.MolFromSmiles(mol)
    m_deprot = deprotonate_cooh.RunReactants((m,))
    if(m_deprot):
        return Chem.MolToSmiles(m_deprot[0][0]), Chem.rdmolops.GetFormalCharge(m_deprot[0][0]) 
    else:
        return Chem.MolToSmiles(m), Chem.rdmolops.GetFormalCharge(m) 

In [7]:
def protonate_sp3_hybridized_amines(mol):
    m1 = Chem.MolFromSmiles(mol)
    for at in m1.GetAtoms():
        if at.GetAtomicNum() == 7 and at.GetFormalCharge()==0 and str(at.GetHybridization()) == 'SP3':
            at.SetFormalCharge(1)
    m1.UpdatePropertyCache()
    return Chem.rdmolfiles.MolToSmiles(m1), Chem.rdmolops.GetFormalCharge(m1) 

In [8]:
df["PhysiologicalChargeSMILES"] = ""
df["PhysiologicalCharge"] = 0
for i,j in zip(df.index, df['isomeric_smiles']):
    tmpsmiles, charge_ = protonate_sp3_hybridized_amines(j)
    tmpsmiles, charge_ = deprotonate_acids(tmpsmiles)
    df.iloc[i,df.columns.get_loc("PhysiologicalChargeSMILES")] = tmpsmiles
    df.iloc[i,df.columns.get_loc("PhysiologicalCharge")] = charge_
    

In [9]:
import shutil,subprocess,os
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdMolTransforms as rdmt
import numpy as np
# from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.Draw import MolDrawing, DrawingOptions
from rdkit.Geometry import rdGeometry as geom
import py3Dmol
from rdkit import Chem
from rdkit.Chem import AllChem
from ipywidgets import interact, interactive, fixed
from ipywidgets import interact, widgets
from IPython.display import display
import py3Dmol

In [10]:
def drawit(m,p,confId=-1):
    mb = Chem.MolToMolBlock(m,confId=confId)
    p.removeAllModels()
    p.addModel(mb,'sdf')
    p.setStyle({'stick':{}})
    p.setBackgroundColor('0xeeeeee')
    p.zoomTo()
    return p.show()

In [11]:
def generate_molecule(name,smiles):
    """
    Generate the 3D molecular structure based on input SMILES
    ----------
    name : name of molecule
    smiles: SMILES of molecule
    Returns
    ----------
    Mol 
    
    """
    LIGAND_NAME = name
    m = Chem.MolFromSmiles(smiles)
    # Add hydrogens
    m_h = Chem.AddHs(m)
    # Embeed the geometry
    AllChem.EmbedMolecule(m_h, params=AllChem.ETKDGv2())
    AllChem.MMFFOptimizeMolecule(m_h, mmffVariant="MMFF94s")
    # Setting name of molecule
    m_h.SetProp("_Name",LIGAND_NAME)
    
    return m_h

In [12]:
def get_conformers(mol,nr=500,rmsthreshold=0.1):
    """
    Generate 3D conformers of molecule using CSD-method
    ----------
    mol : RKdit molecule
    nr : integer, number of conformers to be generate
    rmsthreshold : float, prune conformers that are less rms away from another conf
    Returns
    ----------
    List of new conformation IDs
    """
    # Generate conformers on the CSD-method
    return AllChem.EmbedMultipleConfs(mol, numConfs=nr,useBasicKnowledge=True,\
                                      pruneRmsThresh=rmsthreshold,useExpTorsionAnglePrefs=True)






In [13]:
ligands = {}
for i,j,k in zip(df.cid, df['isomeric_smiles'],df["PhysiologicalChargeSMILES"]):
    if(k != ""):
        ligands["CID_"+str(i)] = k
    else:
        ligands["CID_"+str(i)] = j

In [14]:
mols = {}
for name in ligands.keys():
    mols[name] = generate_molecule(name,ligands[name])

In [15]:
for i in ligands.keys():
    cids = get_conformers(mols[i], NUMBER_OF_CONFORMERS,0.1)
    # Do a short minimization and compute the RMSD
    for cid in cids:
        _ = AllChem.MMFFOptimizeMolecule(mols[i], confId=cid, mmffVariant="MMFF94s")
        
    rmslist = []
    AllChem.AlignMolConformers(mols[i], RMSlist=rmslist)

In [16]:
molecule_widget = widgets.Dropdown(
    options=list(mols.keys()),
    value=list(mols.keys())[0],
    description='Molecule:',
    disabled=False,
)

In [17]:
molconf_widget = widgets.Label(
    value = 'Number of conformers: '+str(mols[molecule_widget.value].GetNumConformers())
)

In [18]:
def select_molecule(molecule):
    new_i = widgets.interactive(print_city, country=countryW, city=geoWs[country['new']])
    i.children = new_i.children
    
def on_change(change):
    from IPython.display import clear_output
    clear_output()
    molconf_widget.value = 'Number of conformers: '+str(mols[change['new']].GetNumConformers())
    display(container)
    interact(drawit,m=fixed(mols[change['new']]),p=fixed(p),confId=(0,mols[change['new']].GetNumConformers()-1));

In [19]:
container = widgets.HBox([molecule_widget, molconf_widget])
display(container)
# now construct the view and interactive widget:
p = py3Dmol.view(width=600,height=400)
# this is the widget that needs to tricker events
molecule_widget.observe(on_change, names='value')
interact(drawit,m=fixed(mols[molecule_widget.value]),p=fixed(p),confId=(0,mols[molecule_widget.value].GetNumConformers()-1));

HBox(children=(Dropdown(description='Molecule:', options=('CID_445063', 'CID_906', 'CID_444885', 'CID_656387',…

interactive(children=(IntSlider(value=0, description='confId', max=24), Output()), _dom_classes=('widget-inter…

In [20]:
def write_aligned_to_file(molecule, filename='Aligned.sdf' ):
    conf_ids = [conf.GetId() for conf in molecule.GetConformers()]
    rmslst = []
    AllChem.AlignMolConformers(molecule, RMSlist=rmslst)
    writer3 = Chem.SDWriter(filename)
    for conf in conf_ids:
        writer3.write(molecule,confId=conf)

In [21]:
import glob, shutil, os, subprocess
def setup_directory(dictionary_w_molecule, df):
    for i in dictionary_w_molecule.keys():
        os.mkdir(i)
        os.chdir(i)
        write_aligned_to_file(dictionary_w_molecule[i])
        charge_ = df[df["cid"] == int(i.split("_")[1])]["PhysiologicalCharge"].values[0]
        exe="python2.7 "+ros_exe+" Aligned.sdf -c --recharge="+str(charge_)+ " --conformers-in-one-file "
        subprocess.Popen(exe,shell=True).wait()
        os.chdir("../")
setup_directory(mols,df)