# Calculation of Principal Moment of Inertia

<hr>
Sauer, W. H. B.; Schwarz, M. K. Molecular Shape Diversity of Combinatorial Libraries:  A Prerequisite for Broad Bioactivity. J. Chem. Inf. Comput. Sci. 2003, 43 (3), 987–1003. https://doi.org/10.1021/ci025599w.


In [1]:
%reload_ext autoreload
%autoreload 2
# def warn(*args, **kwargs):
# warn    pass  # to silence scikit-learn warnings

import warnings
warnings.filterwarnings('ignore')
# warnings.warn = warn

# Global Imports
# from collections import Counter
# import glob
from pathlib import Path
import sys

import pandas as pd
import numpy as np
# import seaborn as sns
# from matplotlib import pyplot as plt

from rdkit import Chem
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import Descriptors as Desc
from rdkit.Chem import rdMolDescriptors as rdMolDesc

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Project-local Imports
PROJECT_DIR = list(Path("..").absolute().parents)[1]
sys.path.append(str(PROJECT_DIR))
import plt_style
import utils as u
from utils import lp

In [2]:
def gen_3d(mol):
    mh = Chem.AddHs(mol)
    Chem.EmbedMolecule(mh, Chem.ETKDG())
    res = 10
    ntries = -1
    iters = [100, 300, 1000]
    while res > 0 and ntries < 3:
        ntries += 1
        res = Chem.UFFOptimizeMolecule(mh, maxIters=iters[ntries])
    return mh, res

def calc_pmi(inp, source, avg=3):
    source = source.lower()
    did_not_converge = 0
    pmi1 = []
    pmi2 = []
    if isinstance(inp, str):
        inp = [inp]
    for i in inp:
        mol = Chem.MolFromSmiles(i)
        pmi1_avg = []
        pmi2_avg = []
        for _ in range(avg):
            mol, res = gen_3d(mol)
            did_not_converge += res
            pmis = sorted([rdMolDesc.CalcPMI1(mol), rdMolDesc.CalcPMI2(mol), rdMolDesc.CalcPMI3(mol)])
            pmi1_avg.append(pmis[0] / pmis[2])
            pmi2_avg.append(pmis[1] / pmis[2])
        pmi1.append(np.median(pmi1_avg))
        pmi2.append(np.median(pmi2_avg))
    print("* {} minimizations did not converge.".format(did_not_converge))
    return pmi1, pmi2 # pmi1, pmi2 are lists

In [3]:
df = u.read_tsv("../Input Data/pmi_input.tsv")

smiles = list(df['SMILES'])
PMIk, PMIl = calc_pmi(smiles, 'smiles')

df['PMIx'] = PMIk
df['PMIy'] = PMIl

print(df.keys())

* 0 minimizations did not converge.
Index(['Class', 'SMILES', 'PMIx', 'PMIy'], dtype='object')


In [5]:
u.write_tsv(df, "results/pmi_results.tsv")