# Get SMILES from pubchem

In [1]:
import pubchempy as pcp
import pandas as pd

In [2]:
# Added physiological charge based on ChemAxon
physiological_charge_states = {
    # serotonin
    ##'SRN' : 'C1=CC2=C(C=C1O)C(=CN2)CC[NH3+]',
    'SRN' : 'C1=CC2=C(C=C1O)C(=CN2)CC[N+]',
    'ATP' : 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)[C@@H](O)[C@H]1O',
    # sialic acid
    'SIA' : '', #'CC(=O)N[C@@H]1[C@@H](O)C[C@](O)(O[C@H]1[C@H](O)[C@H](O)CO)C([O-])=O',
    # Tramadol
    'TAM' : 'C[NH+](C)C[C@H]1CCCC[C@@]1(C2=CC(=CC=C2)OC)O',
                     "GLC" : "",
                     "HDY" : "", 
                     "BRA" : "", 
                     "AFL" : "",
                     "NAF" : "", 
                     "TNT" : ""}
pcs = pd.DataFrame.from_dict(physiological_charge_states,columns=["PhysiologicalChargeSMILES"],orient='index').reset_index()
pcs = pcs.rename(columns= {'index' : "RosettaName"} )

In [3]:
NUMBER_OF_CONFORMERS=25

In [4]:
%%capture
!pip install pubchempy

In [5]:
list_of_compounds = {"SIA" : "sialic acid",
                     "GLC" : "glucose",
                     "HDY" : "6beta-hydroxycortisol", 
                     "BRA" : "brassinolide", 
                     "ATP" : "ATP", 
                     "AFL" : "Aflatoxin",
                     "TAM" : "Tramadol",
                     "SRN" : "Serotonin",
                     "NAF" : "nafarelin", 
                     "TNT" : "TNT"}
loc = {}
for j in list_of_compounds.keys():
    # print(pcp.get_synonyms(list_of_compounds[j], 'name'))
    loc[j] = pcp.get_compounds(list_of_compounds[j], 'name', as_dataframe=True)
    loc[j]["RosettaName"] = j

[{'CID': 445063, 'Synonym': ['131-48-6', 'N-Acetyl-D-neuraminic acid', 'Lactaminic acid', 'N-acetylneuramic acid', 'NANA', 'N-acetyl-beta-neuraminic acid', 'sialic acid', 'beta-Neu5Ac', 'BETA-SIALIC ACID', 'N-Acetylsialic acid', '5-N-ACETYL-BETA-D-NEURAMINIC ACID', 'UNII-TIP79W5HPN', '(2S,4S,5R,6R)-5-Acetamido-2,4-dihydroxy-6-((1R,2R)-1,2,3-trihydroxypropyl)tetrahydro-2H-pyran-2-carboxylic acid', 'Acetylneuraminic acid', '5-Acetamido-3,5-dideoxy-D-glycero-D-galactononulosonic acid', 'TIP79W5HPN', 'CHEMBL165084', 'N-acetyl-beta-neuraminate', 'CHEBI:45744', 'Acido aceneuramico', 'Acide aceneuramique', 'Acidium aceneuramicum', 'MFCD00006620', 'NAN', 'O-sialic acid', '(-)-N-Acetylneuraminic acid', '5-acetamido-3,5-dideoxy-D-glycero-beta-D-galacto-non-2-ulopyranosonic acid', '19342-33-7', '(2S,4S,5R,6R)-5-acetamido-2,4-dihydroxy-6-[(1R,2R)-1,2,3-trihydroxypropyl]oxane-2-carboxylic acid', 'SLB', '5-N-Acetylneuraminic acid', 'Aceneuramate', 'Lactaminate', 'Sialomucin', 'N-Acetylsialate', 'b-s

[{'CID': 6852390, 'Synonym': ['6beta-Hydroxycortisol', 'NSC 76163', '53-35-0', 'UNII-J37WKJ5Y50', 'MLS000028854', 'J37WKJ5Y50', 'SMR000058916', '(6R,8S,9S,10R,11S,13S,14S,17R)-6,11,17-trihydroxy-17-(2-hydroxyacetyl)-10,13-dimethyl-2,6,7,8,9,11,12,14,15,16-decahydro-1H-cyclopenta[a]phenanthren-3-one', '174866-45-6', '6|A-Hydroxy Cortisol', '3078-34-0', '6beta,17-Dihydroxycorticosterone', '6beta-HO-cortisol', '6beta-OH-cortisol', 'Corticosterone, 6beta,17-dihydroxy-', '6-beta-hydroxycortisol', 'Opera_ID_751', '4-Pregnene-6,11beta,17,21-tetrol-3,20-dione', 'AC1OAA1A', '6beta-Hydroxyhydrocortisone', 'SCHEMBL142686', '6|A,17-Dihydroxycorticosterone', 'CHEMBL1389133', 'CTK8F1105', 'DTXSID80425873', 'Benzyl 2-Acetamido-2-deoxy-4-O-beta-D-galactofuranosyl-alpha-D-glucopyranoside', 'CHEBI:139271', '6beta,11beta,17alpha,21-Tetrahydroxypregn-4-en-3,20-dione', 'HMS2235P15', 'Pregn-4-ene-3,20-dione, 6beta,11beta,17,21-tetrahydroxy-', 'NSC76163', 'MFCD00200405', 'NSC-76163', 'ZINC13118371', 'AKOS027

[{'CID': 33741, 'Synonym': ['Tramadol', '(+)-Tramadol', 'Ralivia flashtab', '27203-92-5', 'Ralivia ER', 'Tramadolum [INN-Latin]', 'cis-Tramadol', 'Tramadol [INN:BAN]', '123154-38-1', 'Tramal', 'Tramadol HCl', 'EINECS 248-319-6', 'UNII-0NG5TTM63P', '0NG5TTM63P', 'CHEBI:75725', 'Tridural', 'Ryzolt', 'Zydol', '181289-58-7', 'NCGC00159343-02', 'Ultram ER', 'Tramadon', 'Cyclohexanol, 2-((dimethylamino)methyl)-1-(m-methoxyphenyl)-', '(+)-trans-2-(Dimethylaminomethyl)-1-(m-methoxyphenyl)cyclohexanol', '(1R,2R)-2-[(dimethylamino)methyl]-1-(3-methoxyphenyl)cyclohexanol', 'Racemic tramadol', 'DSSTox_CID_3691', 'DSSTox_RID_77150', 'DSSTox_GSID_23691', 'E 382', 'Tramadolum', 'Biomadol', 'Contramid', 'Labesfal', 'Tramadis', 'Tramaliv', 'Trapidol', 'CHEMBL201531', 'Zytram', 'Tradonal odis', 'Cyclohexanol, 2-((dimethylamino)-1-(3-methoxyphenyl)-, cis-(+-)-', 'Cyclohexanol, 2-((dimethylamino)methyl)-1-(3-methoxyphenyl)-, cis-(+-)-', 'Tramadol Contramid', 'HSDB 7047', 'CAS-36282-47-0', '(+-)-Tramadol',

In [6]:
df = pd.concat(list(loc.values())).reset_index()
df = pd.merge(right=df, right_on="RosettaName",left=pcs, left_on="RosettaName")

In [7]:
import shutil,subprocess,os
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdMolTransforms as rdmt
import numpy as np
# from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.Draw import MolDrawing, DrawingOptions
from rdkit.Geometry import rdGeometry as geom
import py3Dmol
from rdkit import Chem
from rdkit.Chem import AllChem
from ipywidgets import interact, interactive, fixed
from ipywidgets import interact, widgets
from IPython.display import display
import py3Dmol

In [8]:
def drawit(m,p,confId=-1):
    mb = Chem.MolToMolBlock(m,confId=confId)
    p.removeAllModels()
    p.addModel(mb,'sdf')
    p.setStyle({'stick':{}})
    p.setBackgroundColor('0xeeeeee')
    p.zoomTo()
    return p.show()

In [9]:
def generate_molecule(name,smiles):
    """
    Generate the 3D molecular structure based on input SMILES
    ----------
    name : name of molecule
    smiles: SMILES of molecule
    Returns
    ----------
    Mol 
    
    """
    LIGAND_NAME = name
    m = Chem.MolFromSmiles(smiles)
    # Add hydrogens
    m_h = Chem.AddHs(m)
    # Embeed the geometry
    AllChem.EmbedMolecule(m_h, params=AllChem.ETKDGv2())
    AllChem.MMFFOptimizeMolecule(m_h, mmffVariant="MMFF94s")
    # Setting name of molecule
    m_h.SetProp("_Name",LIGAND_NAME)
    
    return m_h

In [10]:
def get_conformers(mol,nr=500,rmsthreshold=0.1):
    """
    Generate 3D conformers of molecule using CSD-method
    ----------
    mol : RKdit molecule
    nr : integer, number of conformers to be generate
    rmsthreshold : float, prune conformers that are less rms away from another conf
    Returns
    ----------
    List of new conformation IDs
    """
    # Generate conformers on the CSD-method
    return AllChem.EmbedMultipleConfs(mol, numConfs=nr,useBasicKnowledge=True,\
                                      pruneRmsThresh=rmsthreshold,useExpTorsionAnglePrefs=True)






In [11]:
ligands = {}
for i,j,k in zip(df.cid, df['isomeric_smiles'],df["PhysiologicalChargeSMILES"]):
    if(k != ""):
        print(k)
        ligands["CID_"+str(i)] = k
    else:
        ligands["CID_"+str(i)] = j

C1=CC2=C(C=C1O)C(=CN2)CC[N+]
Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)[C@@H](O)[C@H]1O
C[NH+](C)C[C@H]1CCCC[C@@]1(C2=CC(=CC=C2)OC)O


In [13]:
mols = {}
for name in ligands.keys():
    mols[name] = generate_molecule(name,ligands[name])

ValueError: Bad Conformer Id

In [None]:
for i in ligands.keys():
    cids = get_conformers(mols[i], NUMBER_OF_CONFORMERS,0.1)
    # Do a short minimization and compute the RMSD
    for cid in cids:
        _ = AllChem.MMFFOptimizeMolecule(mols[i], confId=cid, mmffVariant="MMFF94s")
        
    rmslist = []
    AllChem.AlignMolConformers(mols[i], RMSlist=rmslist)

In [None]:
molecule_widget = widgets.Dropdown(
    options=list(mols.keys()),
    value=list(mols.keys())[0],
    description='Molecule:',
    disabled=False,
)

In [None]:
molconf_widget = widgets.Label(
    value = 'Number of conformers: '+str(mols[molecule_widget.value].GetNumConformers())
)

In [None]:
def select_molecule(molecule):
    new_i = widgets.interactive(print_city, country=countryW, city=geoWs[country['new']])
    i.children = new_i.children
    
def on_change(change):
    from IPython.display import clear_output
    clear_output()
    molconf_widget.value = 'Number of conformers: '+str(mols[change['new']].GetNumConformers())
    display(container)
    interact(drawit,m=fixed(mols[change['new']]),p=fixed(p),confId=(0,mols[change['new']].GetNumConformers()-1));

In [None]:
container = widgets.HBox([molecule_widget, molconf_widget])
display(container)
# now construct the view and interactive widget:
p = py3Dmol.view(width=600,height=400)
# this is the widget that needs to tricker events
molecule_widget.observe(on_change, names='value')
interact(drawit,m=fixed(mols[molecule_widget.value]),p=fixed(p),confId=(0,mols[molecule_widget.value].GetNumConformers()-1));

In [None]:
print(mols.keys())

In [None]:
def write_aligned_to_file(list_of_confs, atoms_to_match=(8,9,10,11),filename='Aligned.sdf' ):
    aligned = None
    aligned =  list_of_confs[0]
    tmp_length_ = len(list_of_confs)
    for i in range(tmp_length_):
        aligned.AddConformer(list_of_confs[i].GetConformer(0),assignId=True)   
    conf_ids = [conf.GetId() for conf in aligned.GetConformers()]
    rmslst = []
    AllChem.AlignMolConformers(aligned, atomIds=atoms_to_match, RMSlist=rmslst)
    
    for i in  conf_ids:
        writer3 = Chem.SDWriter(filename+"_"+str(i)+".sdf")
        writer3.write(aligned,confId=i)
    return aligned

In [None]:

#

# df.to_excel("test.xlsx") #, index=False)

In [None]:
import glob, shutil, os, subprocess
def setup_directory():
    dummy_count = []
    for i in glob.glob("*.sdf"):
        tmpkey = i.split("_")
        if(tmpkey[0] == 'TS'):
            key = tmpkey[0]+"_"+tmpkey[1]+"_"+tmpkey[2]+"_"+tmpkey[3]
        else:
            key = tmpkey[0]+"_"+tmpkey[1]
            
        if(key not in dummy_count):
            dummy_count.append(key)
            os.mkdir(key)
        shutil.move(i,key)
    # Move to directory and generate Rosetta parameters
    return dummy_count
dirs = setup_directory()

In [None]:
def generate_rosetta_parameters(directory):
    import os
    ts_ = 0
    tmpkey = directory.split("_")
    os.chdir(directory)
    try:
        sp = subprocess.Popen("rm *_0.sdf", stdout=subprocess.PIPE).wait()
    except:
        print("File is already removed!")
    
    for f in glob.glob("*sdf"):
        os.system("cat "+f+" >> "+directory+".sdf")

    if(tmpkey[0] == 'TS'):
        ts_ = 1
        ligand_name = tmpkey[2][0] + tmpkey[0][0] +tmpkey[3]

    else:
        ligand_name = tmpkey[0][0:2] +tmpkey[-1]

    if(ts_ == 0): 
        os.system("python2.7 /Users/pgreisen/Programs/Rosetta/Rosetta/main/source/scripts/python/public/molfile_to_params.py "+directory+".sdf -n "+ligand_name+" -c --recharge=1 --keep-names")
    else:
        os.system("python2.7 /Users/pgreisen/Programs/Rosetta/Rosetta/main/source/scripts/python/public/molfile_to_params.py "+directory+".sdf -n "+ligand_name+" -c --recharge=0 --keep-names")
        
    os.chdir('../')
    
for i in dirs:
    generate_rosetta_parameters(i)

python ~/Rosetta/main/source/src/python/apps/public/molfile_to_params.py Fentanyl_cambridge.mol -n CFN -c --recharge=1