In [1]:
from prism_pruner.conformer_ensemble import ConformerEnsemble
from prism_pruner.graph_manipulations import graphize
from prism_pruner.pruner import prune_by_moment_of_inertia, prune_by_rmsd, prune_by_rmsd_rot_corr
from prism_pruner.utils import EH_TO_KCAL

ensemble = ConformerEnsemble.from_xyz("../tests/crest_conformers.xyz", read_energies=True)
ensemble.coords.shape

(675, 220, 3)

In [None]:
%%time
pruned, mask = prune_by_moment_of_inertia(
    ensemble.coords,
    ensemble.atoms,
    max_deviation=0.01,  # 1% difference
    debugfunction=print,
)
energies = ensemble.energies[mask]
print()

pruned, mask = prune_by_rmsd(
    pruned,
    ensemble.atoms,
    energies=energies,  # in Eh
    max_dE=1 / EH_TO_KCAL,  # 1 kcal/mol
    max_rmsd=1.0,  # 1 Å
    debugfunction=print,
)
energies = energies[mask]
print()

DEBUG: MOIPrunerConfig - k=20, rejected 456 (keeping 219/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=10, rejected 56 (keeping 163/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=5, rejected 38 (keeping 125/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=2, rejected 46 (keeping 79/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=1, rejected 15 (keeping 64/675), in 0.0 s
DEBUG: MOIPrunerConfig - keeping 64/675 (0.1 s)
DEBUG: MOIPrunerConfig - Used cached data 5780/15111 times, 38.25% of total calls

DEBUG: RMSDPrunerConfig - k=2, rejected 3 (keeping 61/64), in 0.1 s
DEBUG: RMSDPrunerConfig - k=1, rejected 0 (keeping 61/64), in 0.0 s
DEBUG: RMSDPrunerConfig - keeping 61/64 (0.1 s)
DEBUG: RMSDPrunerConfig - Used cached data 900/1732 times, 51.96% of total calls

CPU times: user 344 ms, sys: 3.65 ms, total: 347 ms
Wall time: 344 ms


In [None]:
graph = graphize(ensemble.atoms, ensemble.coords[0])

pruned, mask = prune_by_rmsd_rot_corr(
    structures=pruned,
    atoms=ensemble.atoms,
    graph=graph,
    energies=energies,  # in Eh
    max_dE=1 / EH_TO_KCAL,  # 1 kcal/mol
    max_rmsd=1.0,  # 1 Å
    debugfunction=print,
    logfunction=print,
)

DEBUG: prune_by_rmsd_rot_corr - temporarily added edge 35-182 to the graph (will be removed before returning)

 >> Dihedrals considered for rotamer corrections:
 1  - [13 14 16 17]         : NCCC : 3-fold
 2  - [21 22 23 24]         : NCCC : 2-fold
 3  - [21 22 29 30]         : NCCC : 2-fold
 4  - [37 38 74 72]         : NPCC : 2-fold
 5  - [37 38 79 78]         : NPCC : 2-fold
 6  - [37 38 86 84]         : NPCC : 2-fold
 7  - [ 68  67 114 115]     : CCCC : 3-fold
 8  - [ 68  70  71 102]     : CCCC : 3-fold
 9  - [ 78  77 154 167]     : CCCC : 3-fold
 10 - [ 80  81 153 155]     : CCCC : 3-fold
 11 - [ 84  83 127 129]     : CCCC : 3-fold
 12 - [ 87  88 128 141]     : CCCC : 3-fold
 13 - [189 193 197 218]     : CCNO : 2-fold


DEBUG: RMSDRotCorrPrunerConfig - k=2, rejected 14 (keeping 47/61), in 9.8 s
DEBUG: RMSDRotCorrPrunerConfig - k=1, rejected 2 (keeping 45/61), in 1.2 s
DEBUG: RMSDRotCorrPrunerConfig - keeping 45/61 (11.0 s)
DEBUG: RMSDRotCorrPrunerConfig - Used cached data 531/1175