In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from tqdm import tqdm
from collections import OrderedDict
from copy import deepcopy
from source.utils.conforge_conformer_generation import generate_conformers, get_conformer_generator
from source.utils.mol2pyg import mols2pyg_list_with_targets, pyg2mol
from source.utils.mol_utils import get_energy, drop_disconnected_components, visualize_3d_mols
from source.data_transforms._frad_transforms import frad
from source.utils import parse_csv

In [None]:
input_data = '/home/nobilm@usi.ch/pretrain_paper/data/halicin_data.csv'
data = parse_csv(input_data, [0,1])
print(data.keys())
smiles = data['smiles']
targets = data['activity']

idx = 1501

max_confs = 1
n_confs_to_keep = 1
smiles,targets = [smiles[idx]],[targets[idx]]

conf_generator = get_conformer_generator(1)
for s,y in tqdm(zip(smiles, targets), total=len(smiles)):
    s = drop_disconnected_components(s)
    conformers = generate_conformers(s, conf_generator)
    if not conformers: continue
    mol2pyg_kwargs = {"max_energy": max((get_energy(m) for m in conformers))}
    pyg_mol_confs = mols2pyg_list_with_targets(conformers, [s]*len(conformers), [y]*len(conformers), **mol2pyg_kwargs)
    dc_pyg_mol_confs = deepcopy(pyg_mol_confs)
    out_dihedrals_only = frad(pyg_mol_confs[0], add_coords_noise=False)
    out = frad(dc_pyg_mol_confs[0])

d = OrderedDict()
# d["rdkit_conf"]=rdkit_generate_conformers(preprocess_mol(rdChem.MolFromSmiles(smiles[0], smi_reader_params())),num_conformers=1)[0]
d["conforge"]=conformers[0]
d["dih"]=pyg2mol(out_dihedrals_only)
d["dih+cords"]=pyg2mol(out)

mols,titles = [],[]
for k,v in d.items():
    mols.append(v)
    titles.append(f"{k}:{get_energy(v):.1f}")

visualize_3d_mols(mols,titles=titles, width=1800, height=400)

# ConfGen exploration

In [None]:
# # /home/marconobile/CDPKit/CDPKit/Apps/CmdLine/ConfGen/ConfGenImpl.cpp
# # https://cdpkit.org/v1.1.1/applications/confgen.html
# # https://cdpkit.org/cdpl_api_doc/python_api_doc/classCDPL_1_1ConfGen_1_1ConformerGeneratorSettings.html#a8f00604dec7d4da5376bcd28b2051f42
# import CDPL.Chem as Chem
# import CDPL.ConfGen as ConfGen
# import CDPL.Chem as CDPLChem
# from source.utils.code_utils import print_obj_API
# conf_generator = ConfGen.ConformerGenerator()
# conf_generator.settings.setEnergyWindow(20.0)
# conf_generator.settings.setMaxNumOutputConformers(250)
# conf_generator.settings.setMinRMSD(0.5)

# print("getEnergyWindow", conf_generator.settings.getEnergyWindow())
# print("getMaxNumOutputConformers", conf_generator.settings.getMaxNumOutputConformers())
# print("getMaxNumSampledConformers", conf_generator.settings.getMaxNumSampledConformers())
# print("getMinRMSD", conf_generator.settings.getMinRMSD())
# conf_generator = ConfGen.ConformerGenerator()
# conf_generator.settings.setEnergyWindow(20.0)
# conf_generator.settings.setMaxNumOutputConformers(250)
# conf_generator.settings.setMinRMSD(0.5)
# print("getEnergyWindow", conf_generator.settings.getEnergyWindow())
# print("getMaxNumOutputConformers", conf_generator.settings.getMaxNumOutputConformers())
# print("getMaxNumSampledConformers", conf_generator.settings.getMaxNumSampledConformers())
# print("getMinRMSD", conf_generator.settings.getMinRMSD())
# print_obj_API(conf_generator.settings)
# print_obj_API(conf_generator.settings.fragmentBuildSettings)
# type(conf_generator.settings.fragmentBuildSettings.THOROUGH)
# # Conformer generation preset to use: -C [ –conf-gen-preset ] arg
# # (SMALL_SET_DIVERSE, MEDIUM_SET_DIVERSE, LARGE_SET_DIVERSE, SMALL_SET_DENSE, MEDIUM_SET_DENSE, LARGE_SET_DENSE, default: MEDIUM_SET_DIVERSE).

# # -m [ –mode ] arg
# # Conformer sampling mode (AUTO, STOCHASTIC, SYSTEMATIC, default: AUTO).

# # -e [ –e-window ] arg
# # Energy window for generated conformers.
# # The energy window may be specified as a single constant value or  as a list of pairs RBC1 EW1 RBC2 EW2…
# # where RBC denotes a rotatable bond count and EW the energy window that applies if the rotatable bond count of the processed molecule is <= RBC
# # (the EW value associated with the lowest RBC that fulfills the latter condition takes precedence). If the rotatable bond count of the processed
# # molecule is outside any defined range then the EW value associated with the highest RBC will be used. (default: 15.0, energy window values must be >= 0).

# # -r [ –rmsd ] arg
# # Minimum RMSD for output conformer selection.
# # The RMSD may be specified as a single constant value or  as a list of pairs RBC1 RMSD1 RBC2 RMSD2…
# # where RBC denotes a rotatable bond count and RMSD is the value that applies if the rotatable bond count of the processed molecule is <= RBC
# # (the RMSD value associated with the lowest RBC that fulfills the latter condition takes precedence).
# # If the rotatable bond count of the processed molecule is outside any defined range then the RMSD value associated with the highest RBC will be used.
# # (default: 0.5, RMSD values must be >= 0, 0 disables RMSD checking).

# # -n [ –max-num-out-confs ] arg
# # Maximum number of output conformers per molecule. The max. number of output conformers may be specified as a single constant value or as a list of
# # pairs RBC1 MC1 RBC2 MC2… where RBC denotes a rotatable bond count and MC the max. number of conformers that applies if the rotatable bond count of
# # the processed molecule is <= RBC (the MC value associated with the lowest RBC that fulfills the latter condition takes precedence).
# # If the rotatable bond count of the processed molecule is outside any defined range then the MC value associated with the highest RBC will be used.
# # (default: 100, count values must be >= 0, 0 disables limit).

# # -x [ –max-num-sampled-confs ] arg
# # Maximum number of sampled conformers (only effective in stochastic sampling mode, default: 2000, must be >= 0, 0 disables limit).

# # -y [ –conv-check-cycle-size ] arg
# # Minimum number of duplicate conformers that have to be generated in succession to consider convergence to be reached
# # (only effective in stochastic sampling mode, default: 100, must be > 0).

# # -B [ –frag-build-preset ] arg
# # Fragment build preset to use (FAST, THOROUGH, only effective in systematic sampling mode, default: FAST).
# # ConformerGeneratorSettings Attributes: [

# #   'DEFAULT',

# #   'LARGE_SET_DENSE',
# #   'LARGE_SET_DIVERSE',
# #   'MEDIUM_SET_DENSE',
# #   'MEDIUM_SET_DIVERSE',
# #   'SMALL_SET_DENSE',
# #   'SMALL_SET_DIVERSE',

# #   'samplingMode',
#   # settings.setSamplingMode(ConformerSamplingMode::AUTO);
#   # settings.setSamplingMode(ConformerSamplingMode::SYSTEMATIC);
#   # settings.setSamplingMode(ConformerSamplingMode::STOCHASTIC);


# #   'energyWindow',
# #   'distanceExponent',
# #   'fragmentBuildSettings',
# #   'maxNumRefinementIterations',
# #   'maxNumSampledConformers',
# #   'maxPoolSize',
# #   'maxRotatableBondCount',
# #   'minRMSD',
# #   'maxNumOutputConformers',
# #   'refinementTolerance',
# #   'strictForceFieldParam'


# #   'forceFieldTypeSystematic',
# #   'forceFieldTypeStochastic',

# #   'nitrogenEnumMode',
# #   'convCheckCycleSize',
# #   'dielectricConstant',
# #   'enumRings',
# #   'genCoordsFromScratch',
# #   'includeInputCoords',
# #   'macrocycleRotorBondCountThresh',
# #   'objectID',
# #   'sampleAngleTolRanges',
# #   'sampleHetAtomHydrogens',

# #   'timeout'
