In [None]:
import os, ast
import pandas as pd
import numpy as np
from rdkit import Chem, DataStructs
from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, GraphDescriptors, AllChem, MACCSkeys

In [None]:
def extract_molecule_info(smiles, name):

    mol = Chem.MolFromSmiles(smiles)
    if not mol:
        return pd.Series({})

    info = {
        f"{name}_mol_wt": Descriptors.MolWt(mol),
        f"{name}_logp": Descriptors.MolLogP(mol),
        f"{name}_atoms": mol.GetNumAtoms(),
        f"{name}_bonds": mol.GetNumBonds(),
        f"{name}_rotbonds": Lipinski.NumRotatableBonds(mol),
        f"{name}_hdonors": Lipinski.NumHDonors(mol),
        f"{name}_hacceptors": Lipinski.NumHAcceptors(mol),
        f"{name}_tpsa": rdMolDescriptors.CalcTPSA(mol),
        f"{name}_aromrings": rdMolDescriptors.CalcNumAromaticRings(mol),
        f"{name}_satrings": rdMolDescriptors.CalcNumSaturatedRings(mol),
        f"{name}_alirings": rdMolDescriptors.CalcNumAliphaticRings(mol),
        f"{name}_csp3": rdMolDescriptors.CalcFractionCSP3(mol),
        f"{name}_hka": rdMolDescriptors.CalcHallKierAlpha(mol),
        f"{name}_chi0n": GraphDescriptors.Chi0n(mol),
        f"{name}_chi1n": GraphDescriptors.Chi1n(mol),
        f"{name}_kappa1": GraphDescriptors.Kappa1(mol),
    }

    return pd.Series(info)

In [None]:
def generate_ecfp(smiles,
                  radius=2,
                  nBits=1024,
                  use_features=False,
                  use_chirality=False):

    molecule = Chem.MolFromSmiles(smiles)
    if molecule is None:
        return None

    feature_list = AllChem.GetMorganFingerprintAsBitVect(molecule,
                                                         radius=radius,
                                                         nBits=nBits,
                                                         useFeatures=use_features,
                                                         useChirality=use_chirality)
    return np.array(feature_list)

In [None]:
def smiles_to_maccs(smiles):

    molecule = Chem.MolFromSmiles(smiles)
    if molecule is None:
        return None
    else:
        maccs_key = MACCSkeys.GenMACCSKeys(molecule)
        return [int(bit) for bit in maccs_key.ToBitString()]