In [201]:
import datamol as dm
import pandas as pd
from rdkit import Chem
import plotly.express as px
import numpy as np
from openbabel import pybel
import openbabel as obabel


def conformer_energies(conformers):
    all_props = [conf.GetPropsAsDict() for conf in conformers.GetConformers()]
    return pd.DataFrame(all_props)

def write_conformers(filename, conformers):
    """Writes all conformers to an sdf file with name filename."""
    c0 = conformers.GetConformer(0)
    m0 = c0.GetOwningMol()
    all_props = [conf.GetPropsAsDict() for conf in conformers.GetConformers()]
    energy_keys = [x for x in all_props[0].keys() if 'delta' not in x]
    energy_key = energy_keys[0]
    with Chem.SDWriter(filename+".sdf") as w:
        for i in range(m0.GetNumConformers()):
            m0.SetProp('ID', f'conf_{i}')
            m0.SetProp('E_kcal', f"{all_props[i][energy_key]:.3f}")
            w.write(m0, confId=i)
    
    print(f"Wrote {m0.GetNumConformers()} conformers to {filename}.sdf")

def write_conformers_to_files(confs, prefix, route, title, suffix='gjf'):
    header = "!Put Keywords Here, check Charge and Multiplicity.\n#\n\n"
    for i, conf in enumerate(confs):
        molblock = dm.to_molblock(conf.GetOwningMol())
        openbabel_file = pybel.readstring('mol', molblock)
        out = openbabel_file.write('gjf', opt=dict(b=True)).replace(header, route+"\n\n"+title)
        fname=f"{prefix}-{i:04}.{suffix}"
        open(fname, 'w').write(out)
    
    print(f"Wrote {len(confs)} files.")
        
        

In [202]:
mol_string = open('/Users/dwyer/Dropbox/__mu/che443/cc-23/reactant-analogue.mol', 'r').read()

In [203]:
mol = dm.read_molblock(mol_string, remove_hs=False)

In [204]:
dm.viz.conformers(mol, remove_hs=False)

NGLWidget()

In [205]:
mol_conformers = dm.conformers.generate(mol, forcefield="MMFF94s",
                                        minimize_energy=True,
                                        align_conformers=True,
                                        add_hs=False,
                                        energy_iterations=2000, num_threads=8,
                                        verbose=True
                                       ) 

In [206]:
df = conformer_energies(mol_conformers)
df.head(20) # Just print the first few rows

Unnamed: 0,rdkit_MMFF94s_energy,rdkit_MMFF94s_delta_energy
0,163.097923,0.0
1,163.097924,6.382805e-07
2,163.09793,6.928177e-06
3,163.097931,7.395131e-06
4,163.904786,0.8068625
5,163.904786,0.806863
6,164.114245,1.016322
7,164.126648,1.028725
8,164.126648,1.028725
9,164.126648,1.028725


In [207]:
len(df)

200

In [208]:
out = dm.conformers.cluster(mol_conformers, rms_cutoff=1, centroids=False)

In [209]:
first_confs = [x.GetConformer(0) for x in out]

In [210]:
first_confs[0].GetPropsAsDict()

{'rdkit_MMFF94s_energy': 165.79508111760967,
 'rdkit_MMFF94s_delta_energy': 2.6971579575201474}

In [211]:
df_clust = pd.DataFrame([x.GetPropsAsDict() for x in first_confs])

In [212]:
df_clust = df_clust.sort_values('rdkit_MMFF94s_energy')

In [213]:
df_clust['w'] = np.exp(-df_clust['rdkit_MMFF94s_delta_energy'].values/2.5)
df_clust['p'] = df_clust['w']/df_clust['w'].sum()
df_important = df_clust[(df_clust['p']>0.01)]
important_conformer_indices = df_important.index

In [214]:
good_confs = [x for i, x in enumerate(first_confs) if i in important_conformer_indices]


In [215]:
write_conformers_to_files(good_confs, 'test-sugar', '#n PM6 Opt Freq', "test sugar")

Wrote 25 files.
