In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context='talk', style='ticks',
        color_codes=True, rc={'legend.frameon': False})

%matplotlib inline

In [2]:
mol_test = pd.read_csv('data/mol_test.csv')

In [3]:
from rdkit import Chem
from rdkit.Chem import AllChem

In [4]:
def reoptimize_mol(smile):
    try:
        
        mol = Chem.MolFromSmiles(mol_test.smile.iloc[1])
        mol = Chem.AddHs(mol, addCoords=True)    
        AllChem.EmbedMolecule(mol, useRandomCoords=True)
        assert AllChem.UFFOptimizeMolecule(mol, maxIters=1000) == 0
        return Chem.MolToMolBlock(mol)
    
    
    except Exception:
        print(smile)
        return None

In [5]:
mols = {}
for i, smile in tqdm(mol_test.smile.items(), total=5000):
    mols[i] = reoptimize_mol(smile)

100%|██████████| 5000/5000 [13:36<00:00,  6.12it/s]


In [9]:
mol_test['molUFF'] = pd.Series(mols)

In [11]:
mol_test.to_csv('data/mol_test_uff.csv', index=False)