Skip to content

Commit

Permalink
Link config for removing duplicate substructs (#41)
Browse files Browse the repository at this point in the history
  • Loading branch information
sethaxen committed Feb 25, 2020
1 parent ff9b199 commit f7d4ecb
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
2 changes: 1 addition & 1 deletion e3fp/config/defaults.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ stereo = True
counts = False
include_disconnected = True
rdkit_invariants = False
merge_duplicate_substructs = True
remove_duplicate_substructs = True
exclude_floating = True
5 changes: 4 additions & 1 deletion e3fp/fingerprint/fprinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
EXCLUDE_FLOATING_DEF = get_default_value(
"fingerprinting", "exclude_floating", bool
)
REMOVE_DUPLICATE_SUBSTRUCTS_DEF = get_default_value(
"fingerprinting", "remove_duplicate_substructs", bool
)
IDENT_DTYPE = np.int64 # np.dtype to use for identifiers
Y_AXIS_PRECISION = 0.1 # angstroms
Z_AXIS_PRECISION = 0.01 # rad
Expand Down Expand Up @@ -107,7 +110,7 @@ def __init__(
include_disconnected=INCLUDE_DISCONNECTED_DEF,
rdkit_invariants=RDKIT_INVARIANTS_DEF,
exclude_floating=EXCLUDE_FLOATING_DEF,
remove_duplicate_substructs=True,
remove_duplicate_substructs=REMOVE_DUPLICATE_SUBSTRUCTS_DEF,
):
"""Initialize fingerprinter settings."""
self.mol = None
Expand Down
15 changes: 14 additions & 1 deletion e3fp/fingerprint/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
EXCLUDE_FLOATING_DEF = get_default_value(
"fingerprinting", "exclude_floating", bool
)

REMOVE_DUPLICATE_SUBSTRUCTS_DEF = get_default_value(
"fingerprinting", "remove_duplicate_substructs", bool
)
OUT_EXT_DEF = ".fp.bz2"


Expand Down Expand Up @@ -67,6 +69,7 @@ def fprints_dict_from_mol(
include_disconnected=INCLUDE_DISCONNECTED_DEF,
rdkit_invariants=RDKIT_INVARIANTS_DEF,
exclude_floating=EXCLUDE_FLOATING_DEF,
remove_duplicate_substructs=REMOVE_DUPLICATE_SUBSTRUCTS_DEF,
out_dir_base=None,
out_ext=OUT_EXT_DEF,
save=False,
Expand Down Expand Up @@ -94,6 +97,10 @@ def fprints_dict_from_mol(
fingerprints.
stereo : bool, optional
Incorporate stereochemistry in fingerprint.
remove_duplicate_substructs : bool, optional
If a substructure arises that corresponds to an identifier already in
the fingerprint, then the identifier for the duplicate substructure is
not added to fingerprint.
include_disconnected : bool, optional
Include disconnected atoms when hashing and for stereo calculations.
Turn off purely for testing purposes, to make E3FP more like ECFP.
Expand Down Expand Up @@ -168,6 +175,7 @@ def fprints_dict_from_mol(
include_disconnected=include_disconnected,
rdkit_invariants=rdkit_invariants,
exclude_floating=exclude_floating,
remove_duplicate_substructs=remove_duplicate_substructs,
)

try:
Expand Down Expand Up @@ -242,6 +250,7 @@ def run(
include_disconnected=INCLUDE_DISCONNECTED_DEF,
rdkit_invariants=RDKIT_INVARIANTS_DEF,
exclude_floating=EXCLUDE_FLOATING_DEF,
remove_duplicate_substructs=REMOVE_DUPLICATE_SUBSTRUCTS_DEF,
params=None,
out_dir_base=None,
out_ext=OUT_EXT_DEF,
Expand Down Expand Up @@ -275,6 +284,9 @@ def run(
exclude_floating = get_value(
params, "fingerprinting", "exclude_floating", bool
)
remove_duplicate_substructs = get_value(
params, "fingerprinting", "remove_duplicate_substructs", bool
)

para = Parallelizer(num_proc=num_proc, parallel_mode=parallel_mode)

Expand Down Expand Up @@ -325,6 +337,7 @@ def run(
"include_disconnected": include_disconnected,
"rdkit_invariants": rdkit_invariants,
"exclude_floating": exclude_floating,
"remove_duplicate_substructs": remove_duplicate_substructs,
"out_dir_base": out_dir_base,
"out_ext": out_ext,
"all_iters": all_iters,
Expand Down

0 comments on commit f7d4ecb

Please sign in to comment.