In [16]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, SaltRemover, rdmolfiles
from mordred import Calculator, descriptors as all_descriptors

In [42]:
smiles = pd.read_csv('data/cids-names-smiles.csv').set_index('CID')['IsomericSMILES']

In [82]:
calc = Calculator(all_descriptors)
print("Convering SMILES string to Mol format...")
mols = {cid: Chem.MolFromSmiles(smi) for cid, smi in smiles.items()}
print("Computing 3D coordinates...")
s = SaltRemover.SaltRemover()
for i, (cid, mol) in enumerate(mols.items()):
    if i > 0 and i % 100 == 0:
        print("Finished %d" % i)
    try:
        mol.SetProp("_Name","%d: %s" % (cid, smiles[cid]))
        mol = s.StripMol(mol,dontRemoveEverything=True)
        mol = Chem.AddHs(mol)
        AllChem.Compute2DCoords(mol)
        AllChem.EmbedMolecule(mol)
        AllChem.UFFOptimizeMolecule(mol) # Is this deterministic?  
    except Exception as e:
        print('Exception for %d' % cid)
        mols[cid] = None
    else:
        mols[cid] = mol
mols = {cid: mol for cid, mol in mols.items() if mol}

Convering SMILES string to Mol format...
Computing 3D coordinates...
Finished 100
Finished 200
Finished 300
Finished 400
Exception for 17967112
Finished 500
Finished 600
Finished 700
Finished 800
Finished 900
Finished 1000
Finished 1100
Finished 1200
Finished 1300
Finished 1400
Finished 1500
Finished 1600
Finished 1700
Finished 1800
Finished 1900
Finished 2000
Finished 2100


In [85]:
writer = rdmolfiles.SDWriter('data/cids-smiles.sdf')
for smile, mol in mols.items():
    writer.write(mol)
writer.close()

In [86]:
suppl = rdmolfiles.SDMolSupplier('data/cids-smiles.sdf')
for mol in suppl:
    print(mol.GetNumAtoms())

15
13
17
12
5
13
8
15
14
21
21
16
16
14
11
15
8
27
12
5
8
19
20
15
8
15
11
10
6
16
7
11
15
14
14
12
7
7
21
9
9
11
11
9
13
13
19
13
15
16
17
18
12
4
4
4
3
9
6
4
4
10
10
30
11
10
8
16
6
1
5
12
13
16
13
13
14
6
8
6
9
9
8
12
11
12
11
6
5
6
5
6
13
10
13
6
8
7
12
12
14
18
3
15
11
3
3
13
8
1
12
11
10
10
29
11
13
10
12
14
10
11
11
11
11
29
11
15
8
16
11
8
8
11
14
10
13
10
11
11
18
9
14
7
17
11
11
11
14
12
9
6
13
9
12
12
10
10
16
16
17
21
12
12
13
10
13
16
18
11
9
8
10
4
20
12
6
10
10
8
11
11
11
18
18
14
10
17
15
11
7
11
15
15
6
9
11
10
6
8
10
13
11
9
7
10
11
14
15
16
7
6
11
11
9
22
15
11
7
9
6
7
15
11
10
16
9
7
4
17
12
5
17
16
8
4
3
8
7
7
13
2
12
21
10
11
13
14
10
8
13
12
14
12
14
14
10
12
17
12
5
21
6
10
8
15
10
11
9
11
5
9
2
11
15
16
9
10
21
14
10
14
10
25
13
8
30
9
10
7
9
10
9
13
15
2
5
14
10
2
4
15
10
11
10
10
4
9
9
1
16
9
11
15
10
17
9
15
10
12
14
7
9
10
9
11
5
4
5
11
9
10
6
5
6
10
7
11
6
5
5
3
6
11
19
10
14
8
20
7
5
7
5
9
7
12
11
5
4
18
12
13
10
16
8
5
4
8
11
16
9
12
12
13
14
6
14
10
6
1