Skip to content

Commit

Permalink
Merge pull request #152 from kjappelbaum/kjappelbaum/issue151
Browse files Browse the repository at this point in the history
refactor: simplify SBU constructor
  • Loading branch information
kjappelbaum committed Nov 7, 2022
2 parents 3b3fddc + 95374a4 commit b6c19fb
Show file tree
Hide file tree
Showing 17 changed files with 338 additions and 268 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.0.3-dev
current_version = 0.0.4-dev
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
Expand Down
40 changes: 0 additions & 40 deletions .flake8

This file was deleted.

2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
author = "Kevin Maik Jablonka"

# The full version, including alpha/beta/rc tags
release = "0.0.3-dev"
release = "0.0.4-dev"


# -- General configuration ---------------------------------------------------
Expand Down
43 changes: 42 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = moffragmentor
version = 0.0.3-dev
version = 0.0.4-dev
description = Splits MOFs into metal nodes and linkers.
author = Kevin Maik Jablonka
author_email = mail@kjablonka.com
Expand Down Expand Up @@ -142,3 +142,44 @@ exclude_lines =
[darglint]
docstring_style = google
strictness = short


#########################
# Flake8 Configuration #
# (.flake8) #
#########################
[flake8]
ignore =
S301 # pickle
S403 # pickle
S404
S603
W503 # Line break before binary operator (flake8 is wrong)
E203 # whitespace before ':'
S101 # Complaining about assert statements
D101 # Docstring missing
D102 # Docstring missing
D103 # Docstring missing
D104 # Docstring missing
D400
exclude =
.tox,
.git,
__pycache__,
docs/source/conf.py,
build,
dist,
tests/fixtures/*,
*.pyc,
*.egg-info,
.cache,
.eggs,
data
max-line-length = 120
max-complexity = 20
import-order-style = pycharm
application-import-names =
moffragmentor
tests
per-file-ignores =
tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209
7 changes: 7 additions & 0 deletions src/moffragmentor/fragmentor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ def run_fragmentation(
)

linker_collection = LinkerCollection(is_linker)
if len(linker_collection) == 0 and len(is_capping) > 0:
logger.warning(
"No linkers found, but capping molecules. Assigning capping molecules to linkers."
)
linker_collection = LinkerCollection(is_capping)
is_capping = []

capping_molecules = LinkerCollection(is_capping)

# Now handle the case of the the frameworks that have linkers without core (e.g. H-COO)
Expand Down
42 changes: 12 additions & 30 deletions src/moffragmentor/fragmentor/linkerlocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
from pymatgen.core import Structure
from structuregraph_helpers.subgraph import get_subgraphs_as_molecules

from .molfromgraph import wrap_molecule
from ..sbu import Linker, LinkerCollection
from ..utils import _flatten_list_of_sets
from moffragmentor.fragmentor.molfromgraph import wrap_molecule
from moffragmentor.sbu import Linker, LinkerCollection

__all__ = ["create_linker_collection", "identify_linker_binding_indices"]

Expand Down Expand Up @@ -55,16 +54,16 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals
# First step: remove everything that is node or unbound solvent
# bound solvent is part of the node by default
all_node_indices = set()
all_persistent_non_metal_bridges = set()
# all_persistent_non_metal_bridges = set()
for node_indices in node_location_result.nodes:
all_node_indices.update(node_indices)

for node in node_collection:
all_persistent_non_metal_bridges.update(
_flatten_list_of_sets(
node._persistent_non_metal_bridged # pylint:disable=protected-access
)
)
# for node in node_collection:
# all_persistent_non_metal_bridges.update(
# _flatten_list_of_sets(
# node._persistent_non_metal_bridged # pylint:disable=protected-access
# )
# )

not_linker_indices = (
(
Expand All @@ -81,23 +80,13 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals
| set(mof.metal_indices) & all_node_indices
# some metals might also be in the linker, e.g., in porphyrins
)

# potential_linker_indices = set(list(range(len(mof.structure)))) - not_linker_indices
# get terminal indices we need to keep in the linker

# terminal_indices = []
# for linker_index in potential_linker_indices:
# for neighbor in mof.get_neighbor_indices(linker_index):
# if mof._is_terminal(neighbor):
# terminal_indices.append(neighbor)

graph_ = mof.structure_graph.__copy__()
graph_.structure = Structure.from_sites(graph_.structure.sites)
graph_.remove_nodes(not_linker_indices)

# Second step: extract the connected components
# return all as molecules
mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules(
_mols, graphs, idxs, _centers, coordinates = get_subgraphs_as_molecules(
graph_,
return_unique=False,
filter_in_cell=False,
Expand All @@ -107,29 +96,22 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals

# Third: pick those molecules that are closest to the UC
# ToDo: we should be able to skip this
linker_indices, coords = _pick_central_linker_indices(mof, coordinates)
linker_indices, _coords = _pick_central_linker_indices(mof, coordinates)
found_frac_centers = set()
found_hashes = set()
for i, linker_index in enumerate(linker_indices):
for linker_index in linker_indices:
idx = idxs[linker_index]
center = centers[linker_index]
coords_ = coords[i]
branching_indices = node_location_result.branching_indices & set(idx)
mol, mapping = wrap_molecule(idx, mof)
linker = Linker(
molecule=mol,
molecule_graph=graphs[linker_index],
center=center,
graph_branching_indices=branching_indices,
closest_branching_index_in_molecule=branching_indices,
binding_indices=identify_linker_binding_indices(
mof,
node_location_result.connecting_paths,
idx,
),
coordinates=coords_,
original_indices=idx,
connecting_paths=[],
molecule_original_indices_mapping=mapping,
)
frac_center = mof.structure.lattice.get_fractional_coords(mol.center_of_mass)
Expand Down
8 changes: 6 additions & 2 deletions src/moffragmentor/fragmentor/molfromgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@


def wrap_molecule(
mol_idxs: Iterable[int], mof: "mof.MOF", starting_index: Optional[int] = None # noqa: F821
mol_idxs: Iterable[int],
mof: "mof.MOF",
starting_index: Optional[int] = None,
add_additional_site: bool = True, # noqa: F821
) -> Molecule:
"""Wrap a molecule in the cell of the MOF by walking along the structure graph.
Expand All @@ -30,6 +33,7 @@ def wrap_molecule(
mof (MOF): MOF object that contains the mol_idxs.
starting_index (int, optional): Starting index for the walk.
Defaults to 0.
add_additional_site (bool): Whether to add an additional site
Returns:
Molecule: wrapped molecule
Expand Down Expand Up @@ -76,7 +80,7 @@ def wrap_molecule(
- new_positions_cart[current_index]
)
> VestaCutoffDictNN._lookup_dict[species_a][species_b]
):
) & add_additional_site:
logger.warning(
"Warning: neighbor_index {} is already in new_positions_cart, "
"but the distance is too large. "
Expand Down
30 changes: 19 additions & 11 deletions src/moffragmentor/fragmentor/nodelocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,8 @@ def create_node_collection(mof, node_location_result: NodelocationResult) -> Nod
node = Node.from_mof_and_indices(
mof=mof,
node_indices=node_indices,
branching_indices=node_location_result.branching_indices & node_indices,
binding_indices=node_location_result.binding_indices & node_indices,
connecting_paths=node_location_result.connecting_paths & node_indices,
branching_indices=node_location_result.branching_indices,
binding_indices=node_location_result.binding_indices,
)
nodes.append(node)

Expand Down Expand Up @@ -217,7 +216,7 @@ def break_rod_nodes(mof, node_result):
"""Break rod nodes into smaller pieces."""
new_nodes = []
for node in node_result.nodes:
if get_sbu_dimensionality(mof, node) == 1:
if get_sbu_dimensionality(mof, node) >= 1:
logger.debug("Found 1- or 2-dimensional node. Will break into isolated metals.")
new_nodes.extend(break_rod_node(mof, node))
else:
Expand Down Expand Up @@ -246,10 +245,10 @@ def check_node(node_indices, branching_indices, mof) -> bool:
bool: True if the node is reasonable, False otherwise
"""
# check if there is not way more organic than metal
num_carbons = len(node_indices & set(mof.c_indices))
num_organic = len(node_indices & set(mof.c_indices)) + len(node_indices & set(mof.n_indices))
num_metals = len(node_indices & set(mof.metal_indices))
branching_indices_in_node = branching_indices & node_indices
if num_carbons > num_metals + len(branching_indices_in_node):
if num_organic > num_metals + len(branching_indices_in_node):
return False
return True

Expand All @@ -258,7 +257,10 @@ def break_organic_nodes(node_result, mof):
"""If we have a node that is mostly organic, we break it up into smaller pieces."""
new_nodes = []
for node in node_result.nodes:
if check_node(node, node_result.branching_indices, mof) or might_be_porphyrin(
if len(node) == len(mof):
logger.debug("Breaking node as full MOF is assigned as node.")
new_nodes.extend(break_rod_node(mof, node))
elif check_node(node, node_result.branching_indices, mof) or might_be_porphyrin(
node, node_result.branching_indices, mof
):
new_nodes.append(node)
Expand Down Expand Up @@ -304,7 +306,6 @@ def find_nodes(
node_result = find_node_clusters(
mof, unbound_solvent.indices, forbidden_indices=forbidden_indices
)

if create_single_metal_bus:
# Rewrite the node result
node_result = create_single_metal_nodes(mof, node_result)
Expand All @@ -331,16 +332,22 @@ def might_be_porphyrin(node_indices, branching_idx, mof):
metal_in_node = _get_metal_sublist(node_indices, mof.metal_indices)
node_indices = set(node_indices)
branching_idx = set(branching_idx)
bound_to_metal = sum([mof.get_neighbor_indices(i) for i in metal_in_node], [])
branching_bound_to_metal = branching_idx & set(bound_to_metal)
# ToDo: check and think if this can handle the general case
# it should, at least if we only look at the metals
if (len(metal_in_node) == 1) & (len(node_indices) > 1):
if (len(metal_in_node) == 1) & (len(node_indices | branching_bound_to_metal) > 1):
logger.debug(
"metal_in_node",
metal_in_node,
node_indices,
)
num_neighbors = len(mof.get_neighbor_indices(metal_in_node[0]))
if metal_and_branching_coplanar(node_indices, branching_idx, mof) & (num_neighbors > 2):
num_neighbors = len(bound_to_metal)
if (
metal_and_branching_coplanar(node_indices, branching_bound_to_metal, mof)
& (num_neighbors > 2)
& (len(branching_bound_to_metal) < 5)
):
logger.debug(
"Metal in linker found, indices: {}".format(
node_indices,
Expand All @@ -355,5 +362,6 @@ def detect_porphyrin(node_collection, mof):
not_node = []
for i, node in enumerate(node_collection):
if might_be_porphyrin(node._original_indices, node._original_graph_branching_indices, mof):
logger.info("Found porphyrin in node {}".format(node._original_indices))
not_node.append(i)
return not_node
4 changes: 2 additions & 2 deletions src/moffragmentor/fragmentor/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.core import Molecule

from .solventlocator import _locate_bound_solvent
from ..utils import (
from moffragmentor.fragmentor.solventlocator import _locate_bound_solvent
from moffragmentor.utils import (
_get_cartesian_coords,
_get_molecule_edge_label,
_get_vertices_of_smaller_component_upon_edge_break,
Expand Down
8 changes: 8 additions & 0 deletions src/moffragmentor/mof.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def _reset(self):
self._bridges = None
self._nx_graph = None

def __copy__(self):
"""Make a a new MOF object with copies of the same structure and structure graph."""
return MOF(IStructure.from_sites(self._structure.sites), self.structure_graph.__copy__())

def dump(self, path) -> None:
"""Dump this object as pickle file"""
pickle_dump(self, path)
Expand Down Expand Up @@ -257,6 +261,10 @@ def h_indices(self) -> List[int]:
def c_indices(self) -> List[int]:
return [i for i, species in enumerate(self.structure.species) if str(species) == "C"]

@cached_property
def n_indices(self) -> List[int]:
return [i for i, species in enumerate(self.structure.species) if str(species) == "N"]

def get_neighbor_indices(self, site: int) -> List[int]:
"""Get list of indices of neighboring sites."""
return [site.index for site in self.structure_graph.get_connected_sites(site)]
Expand Down

0 comments on commit b6c19fb

Please sign in to comment.