In [None]:
from prism_pruner.conformer_ensemble import ConformerEnsemble
from prism_pruner.pruner import prune_by_moment_of_inertia, prune_by_rmsd
from prism_pruner.utils import EH_TO_KCAL

# ensemble = ConformerEnsemble.from_xyz("../examples/ensemble.xyz")
ensemble = ConformerEnsemble.from_xyz("../tests/crest_conformers.xyz", read_energies=True)
ensemble.coords.shape

(675, 220, 3)

In [6]:
# %%prun -s cumtime
_ = prune_by_moment_of_inertia(
    ensemble.coords,
    ensemble.atoms,
    max_deviation=0.01,  # 1% difference
    debugfunction=print,
)

DEBUG: MOIPrunerConfig - k=20, rejected 456 (keeping 219/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=10, rejected 56 (keeping 163/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=5, rejected 38 (keeping 125/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=2, rejected 46 (keeping 79/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=1, rejected 15 (keeping 64/675), in 0.0 s
DEBUG: MOIPrunerConfig - keeping 64/675 (0.1 s)
DEBUG: MOIPrunerConfig - Used cached data 5780/15111 times, 38.25% of total calls


In [9]:
_ = prune_by_moment_of_inertia(
    ensemble.coords,
    ensemble.atoms,
    energies=ensemble.energies,  # in Eh
    ewin=1 / EH_TO_KCAL,  # 1 kcal/mol
    max_deviation=0.01,  # 1% difference
    debugfunction=print,
)

DEBUG: MOIPrunerConfig - k=20, rejected 456 (keeping 219/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=10, rejected 50 (keeping 169/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=5, rejected 34 (keeping 135/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=2, rejected 27 (keeping 108/675), in 0.0 s
DEBUG: MOIPrunerConfig - k=1, rejected 0 (keeping 108/675), in 0.0 s
DEBUG: MOIPrunerConfig - keeping 108/675 (0.1 s)
DEBUG: MOIPrunerConfig - Used cached data 7335/19209 times, 38.19% of total calls


In [10]:
pruned, mask = prune_by_rmsd(
    ensemble.coords,
    ensemble.atoms,
    max_rmsd=1.0,  # Will reject below 1 Å
    debugfunction=print,
)

DEBUG: RMSDPrunerConfig - k=20, rejected 152 (keeping 523/675), in 1.5 s
DEBUG: RMSDPrunerConfig - k=10, rejected 73 (keeping 450/675), in 1.1 s
DEBUG: RMSDPrunerConfig - k=5, rejected 62 (keeping 388/675), in 1.3 s
DEBUG: RMSDPrunerConfig - k=2, rejected 76 (keeping 312/675), in 2.9 s
DEBUG: RMSDPrunerConfig - k=1, rejected 36 (keeping 276/675), in 2.7 s
DEBUG: RMSDPrunerConfig - keeping 276/675 (9.6 s)
DEBUG: RMSDPrunerConfig - Used cached data 55967/118708 times, 47.15% of total calls


In [11]:
# %%prun -s cumtime
pruned, mask = prune_by_rmsd(
    ensemble.coords,
    ensemble.atoms,
    energies=ensemble.energies,  # in Eh
    ewin=1 / EH_TO_KCAL,  # 1 kcal/mol
    max_rmsd=1.0,  # Will reject below 1 Å
    debugfunction=print,
)

DEBUG: RMSDPrunerConfig - k=20, rejected 153 (keeping 522/675), in 1.4 s
DEBUG: RMSDPrunerConfig - k=10, rejected 67 (keeping 455/675), in 0.9 s
DEBUG: RMSDPrunerConfig - k=5, rejected 55 (keeping 400/675), in 1.1 s
DEBUG: RMSDPrunerConfig - k=2, rejected 44 (keeping 356/675), in 1.1 s
DEBUG: RMSDPrunerConfig - k=1, rejected 1 (keeping 355/675), in 0.2 s
DEBUG: RMSDPrunerConfig - keeping 355/675 (4.8 s)
DEBUG: RMSDPrunerConfig - Used cached data 64182/141454 times, 45.37% of total calls


In [12]:
from prism_pruner.graph_manipulations import graphize
from prism_pruner.pruner import prune_by_rmsd_rot_corr

In [13]:
# %%prun -s cumtime

graph = graphize(ensemble.atoms, ensemble.coords[0])

pruned, mask = prune_by_rmsd_rot_corr(
    structures=ensemble.coords,
    atoms=ensemble.atoms,
    graph=graph,
    max_rmsd=1.0,  # 1 Å
    debugfunction=print,
)

DEBUG: prune_by_rmsd_rot_corr - temporarily added edge 35-181 to the graph (will be removed before returning)


KeyboardInterrupt: 

In [19]:
%%prun -s cumtime
pruned, mask = prune_by_rmsd_rot_corr(
    structures=ensemble.coords[0:100],
    atoms=ensemble.atoms,
    graph=graph,
    energies=ensemble.energies[0:100],  # in Eh
    ewin=1 / EH_TO_KCAL,  # 1 kcal/mol
    max_rmsd=1.0,  # 1 Å
    debugfunction=print,
    logfunction=print,
)


 >> Dihedrals considered for rotamer corrections:
 1  - [37 38 74 72]         : NPCC : 2-fold
 2  - [37 38 79 78]         : NPCC : 2-fold
 3  - [37 38 86 84]         : NPCC : 2-fold
 4  - [ 68  67 114 115]     : CCCC : 3-fold
 5  - [ 68  70  71 102]     : CCCC : 3-fold
 6  - [ 78  77 154 167]     : CCCC : 3-fold
 7  - [ 80  81 153 155]     : CCCC : 3-fold
 8  - [ 84  83 127 129]     : CCCC : 3-fold
 9  - [ 87  88 128 141]     : CCCC : 3-fold
 10 - [189 193 197 218]     : CCNO : 2-fold


DEBUG: RMSDRotCorrPrunerConfig - k=2, rejected 80 (keeping 20/100), in 10.6 s
DEBUG: RMSDRotCorrPrunerConfig - k=1, rejected 1 (keeping 19/100), in 0.3 s
DEBUG: RMSDRotCorrPrunerConfig - keeping 19/100 (10.9 s)
DEBUG: RMSDRotCorrPrunerConfig - Used cached data 90/771 times, 11.67% of total calls
 

         4738464 function calls (4453553 primitive calls) in 12.405 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      2/1    0.000    0.000   12.398   12.398 {built-in method builtins.exec}
      2/1    0.001    0.000   10.888   10.888 <string>:1(<module>)
        1    0.000    0.000   10.888   10.888 pruner.py:429(prune_by_rmsd_rot_corr)
        1    0.000    0.000   10.888   10.888 pruner.py:297(prune)
        2    0.000    0.000   10.886    5.443 pruner.py:260(_main_compute_group)
        3    0.001    0.000   10.886    3.629 pruner.py:223(_main_compute_row)
      537    0.005    0.000   10.846    0.020 pruner.py:85(evaluate_sim)
      537    0.355    0.001   10.836    0.020 torsion_module.py:401(rotationally_corrected_rmsd_and_max)
      120    0.009    0.000   10.660    0.089 pruner.py:170(_main_compute_subrow)
    36059    1.384    0.000    5.776    0.000 utils.py:105(rotate_dihedral)
    14499    0.351    0.000