In [13]:
import os

import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.SaltRemover import SaltRemover

import pyrfume
from pyrfume import odorants
from rickpy import ProgressBar

In [4]:
file_path = os.path.join(pyrfume.DATA, 'all_cids_properties.csv')
df = pd.read_csv(file_path).set_index('CID')

## Make 3D optimized versions of the molecules

In [14]:
# Make basic mol objects
mols = {cid: Chem.MolFromSmiles(smi) for cid, smi in df['IsomericSMILES'].items()}

# Then optimize them
s = SaltRemover()
p = ProgressBar(len(df))
for i, (cid, mol) in enumerate(mols.items()):
    p.animate(i, status=cid)
    try:
        mol.SetProp("_Name","%d: %s" % (cid, df.loc[cid, 'IsomericSMILES']))
        mol = s.StripMol(mol, dontRemoveEverything=True)
        mol = Chem.AddHs(mol)
        AllChem.Compute2DCoords(mol)
        AllChem.EmbedMolecule(mol)
        AllChem.UFFOptimizeMolecule(mol) # Is this deterministic?  
    except Exception as e:
        p.log('Exception for %d: %s' % (cid, e))
        mols[cid] = None
    else:
        mols[cid] = mol
        
# Remove CIDs without a successful optimization
mols = {cid: mol for cid, mol in mols.items() if mol}

                                                                                        [-----------------------99%----------------------] 9721 out of 9722 complete (137481702)
Exception for 134688339: Bad Conformer Id
Exception for 98467650: Bad Conformer Id
Exception for 98139505: Bad Conformer Id
Exception for 98123459: Bad Conformer Id
Exception for 98118709: Bad Conformer Id


In [15]:
print("%d mol files successfully optimized from %d CIDs" % (len(mols), len(df)))

9690 mol files successfully optimized from 9722 CIDs


## Write to an SDF file

In [18]:
file_path = os.path.join(pyrfume.DATA, 'all_cids.sdf')
f = Chem.SDWriter(file_path)
for cid, mol in mols.items():
    f.write(mol)
f.close()

In [25]:
# Write the last molecule to a mol file
mol_block = Chem.MolToMolBlock(mol)
file_path = os.path.join(pyrfume.DATA, 'random.mol')
with open(file_path,'w+') as f:
    print(mol_block, 
          file=f)