Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: simplify SBU constructor #152

Merged
merged 9 commits into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.0.3-dev
current_version = 0.0.4-dev
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
Expand Down
40 changes: 0 additions & 40 deletions .flake8

This file was deleted.

2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
author = "Kevin Maik Jablonka"

# The full version, including alpha/beta/rc tags
release = "0.0.3-dev"
release = "0.0.4-dev"


# -- General configuration ---------------------------------------------------
Expand Down
43 changes: 42 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = moffragmentor
version = 0.0.3-dev
version = 0.0.4-dev
description = Splits MOFs into metal nodes and linkers.
author = Kevin Maik Jablonka
author_email = mail@kjablonka.com
Expand Down Expand Up @@ -142,3 +142,44 @@ exclude_lines =
[darglint]
docstring_style = google
strictness = short


#########################
# Flake8 Configuration #
# (.flake8) #
#########################
[flake8]
ignore =
S301 # pickle
S403 # pickle
S404
S603
W503 # Line break before binary operator (flake8 is wrong)
E203 # whitespace before ':'
S101 # Complaining about assert statements
D101 # Docstring missing
D102 # Docstring missing
D103 # Docstring missing
D104 # Docstring missing
D400
exclude =
.tox,
.git,
__pycache__,
docs/source/conf.py,
build,
dist,
tests/fixtures/*,
*.pyc,
*.egg-info,
.cache,
.eggs,
data
max-line-length = 120
max-complexity = 20
import-order-style = pycharm
application-import-names =
moffragmentor
tests
per-file-ignores =
tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209
7 changes: 7 additions & 0 deletions src/moffragmentor/fragmentor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ def run_fragmentation(
)

linker_collection = LinkerCollection(is_linker)
if len(linker_collection) == 0 and len(is_capping) > 0:
logger.warning(
"No linkers found, but capping molecules. Assigning capping molecules to linkers."
)
linker_collection = LinkerCollection(is_capping)
is_capping = []

capping_molecules = LinkerCollection(is_capping)

# Now handle the case of the the frameworks that have linkers without core (e.g. H-COO)
Expand Down
42 changes: 12 additions & 30 deletions src/moffragmentor/fragmentor/linkerlocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
from pymatgen.core import Structure
from structuregraph_helpers.subgraph import get_subgraphs_as_molecules

from .molfromgraph import wrap_molecule
from ..sbu import Linker, LinkerCollection
from ..utils import _flatten_list_of_sets
from moffragmentor.fragmentor.molfromgraph import wrap_molecule
from moffragmentor.sbu import Linker, LinkerCollection

__all__ = ["create_linker_collection", "identify_linker_binding_indices"]

Expand Down Expand Up @@ -55,16 +54,16 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals
# First step: remove everything that is node or unbound solvent
# bound solvent is part of the node by default
all_node_indices = set()
all_persistent_non_metal_bridges = set()
# all_persistent_non_metal_bridges = set()
for node_indices in node_location_result.nodes:
all_node_indices.update(node_indices)

for node in node_collection:
all_persistent_non_metal_bridges.update(
_flatten_list_of_sets(
node._persistent_non_metal_bridged # pylint:disable=protected-access
)
)
# for node in node_collection:
# all_persistent_non_metal_bridges.update(
# _flatten_list_of_sets(
# node._persistent_non_metal_bridged # pylint:disable=protected-access
# )
# )

not_linker_indices = (
(
Expand All @@ -81,23 +80,13 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals
| set(mof.metal_indices) & all_node_indices
# some metals might also be in the linker, e.g., in porphyrins
)

# potential_linker_indices = set(list(range(len(mof.structure)))) - not_linker_indices
# get terminal indices we need to keep in the linker

# terminal_indices = []
# for linker_index in potential_linker_indices:
# for neighbor in mof.get_neighbor_indices(linker_index):
# if mof._is_terminal(neighbor):
# terminal_indices.append(neighbor)

graph_ = mof.structure_graph.__copy__()
graph_.structure = Structure.from_sites(graph_.structure.sites)
graph_.remove_nodes(not_linker_indices)

# Second step: extract the connected components
# return all as molecules
mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules(
_mols, graphs, idxs, _centers, coordinates = get_subgraphs_as_molecules(
graph_,
return_unique=False,
filter_in_cell=False,
Expand All @@ -107,29 +96,22 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals

# Third: pick those molecules that are closest to the UC
# ToDo: we should be able to skip this
linker_indices, coords = _pick_central_linker_indices(mof, coordinates)
linker_indices, _coords = _pick_central_linker_indices(mof, coordinates)
found_frac_centers = set()
found_hashes = set()
for i, linker_index in enumerate(linker_indices):
for linker_index in linker_indices:
idx = idxs[linker_index]
center = centers[linker_index]
coords_ = coords[i]
branching_indices = node_location_result.branching_indices & set(idx)
mol, mapping = wrap_molecule(idx, mof)
linker = Linker(
molecule=mol,
molecule_graph=graphs[linker_index],
center=center,
graph_branching_indices=branching_indices,
closest_branching_index_in_molecule=branching_indices,
binding_indices=identify_linker_binding_indices(
mof,
node_location_result.connecting_paths,
idx,
),
coordinates=coords_,
original_indices=idx,
connecting_paths=[],
molecule_original_indices_mapping=mapping,
)
frac_center = mof.structure.lattice.get_fractional_coords(mol.center_of_mass)
Expand Down
8 changes: 6 additions & 2 deletions src/moffragmentor/fragmentor/molfromgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@


def wrap_molecule(
mol_idxs: Iterable[int], mof: "mof.MOF", starting_index: Optional[int] = None # noqa: F821
mol_idxs: Iterable[int],
mof: "mof.MOF",
starting_index: Optional[int] = None,
add_additional_site: bool = True, # noqa: F821
) -> Molecule:
"""Wrap a molecule in the cell of the MOF by walking along the structure graph.

Expand All @@ -30,6 +33,7 @@ def wrap_molecule(
mof (MOF): MOF object that contains the mol_idxs.
starting_index (int, optional): Starting index for the walk.
Defaults to 0.
add_additional_site (bool): Whether to add an additional site

Returns:
Molecule: wrapped molecule
Expand Down Expand Up @@ -76,7 +80,7 @@ def wrap_molecule(
- new_positions_cart[current_index]
)
> VestaCutoffDictNN._lookup_dict[species_a][species_b]
):
) & add_additional_site:
logger.warning(
"Warning: neighbor_index {} is already in new_positions_cart, "
"but the distance is too large. "
Expand Down
30 changes: 19 additions & 11 deletions src/moffragmentor/fragmentor/nodelocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,8 @@ def create_node_collection(mof, node_location_result: NodelocationResult) -> Nod
node = Node.from_mof_and_indices(
mof=mof,
node_indices=node_indices,
branching_indices=node_location_result.branching_indices & node_indices,
binding_indices=node_location_result.binding_indices & node_indices,
connecting_paths=node_location_result.connecting_paths & node_indices,
branching_indices=node_location_result.branching_indices,
binding_indices=node_location_result.binding_indices,
)
nodes.append(node)

Expand Down Expand Up @@ -217,7 +216,7 @@ def break_rod_nodes(mof, node_result):
"""Break rod nodes into smaller pieces."""
new_nodes = []
for node in node_result.nodes:
if get_sbu_dimensionality(mof, node) == 1:
if get_sbu_dimensionality(mof, node) >= 1:
logger.debug("Found 1- or 2-dimensional node. Will break into isolated metals.")
new_nodes.extend(break_rod_node(mof, node))
else:
Expand Down Expand Up @@ -246,10 +245,10 @@ def check_node(node_indices, branching_indices, mof) -> bool:
bool: True if the node is reasonable, False otherwise
"""
# check if there is not way more organic than metal
num_carbons = len(node_indices & set(mof.c_indices))
num_organic = len(node_indices & set(mof.c_indices)) + len(node_indices & set(mof.n_indices))
num_metals = len(node_indices & set(mof.metal_indices))
branching_indices_in_node = branching_indices & node_indices
if num_carbons > num_metals + len(branching_indices_in_node):
if num_organic > num_metals + len(branching_indices_in_node):
return False
return True

Expand All @@ -258,7 +257,10 @@ def break_organic_nodes(node_result, mof):
"""If we have a node that is mostly organic, we break it up into smaller pieces."""
new_nodes = []
for node in node_result.nodes:
if check_node(node, node_result.branching_indices, mof) or might_be_porphyrin(
if len(node) == len(mof):
logger.debug("Breaking node as full MOF is assigned as node.")
new_nodes.extend(break_rod_node(mof, node))
elif check_node(node, node_result.branching_indices, mof) or might_be_porphyrin(
node, node_result.branching_indices, mof
):
new_nodes.append(node)
Expand Down Expand Up @@ -304,7 +306,6 @@ def find_nodes(
node_result = find_node_clusters(
mof, unbound_solvent.indices, forbidden_indices=forbidden_indices
)

if create_single_metal_bus:
# Rewrite the node result
node_result = create_single_metal_nodes(mof, node_result)
Expand All @@ -331,16 +332,22 @@ def might_be_porphyrin(node_indices, branching_idx, mof):
metal_in_node = _get_metal_sublist(node_indices, mof.metal_indices)
node_indices = set(node_indices)
branching_idx = set(branching_idx)
bound_to_metal = sum([mof.get_neighbor_indices(i) for i in metal_in_node], [])
branching_bound_to_metal = branching_idx & set(bound_to_metal)
# ToDo: check and think if this can handle the general case
# it should, at least if we only look at the metals
if (len(metal_in_node) == 1) & (len(node_indices) > 1):
if (len(metal_in_node) == 1) & (len(node_indices | branching_bound_to_metal) > 1):
logger.debug(
"metal_in_node",
metal_in_node,
node_indices,
)
num_neighbors = len(mof.get_neighbor_indices(metal_in_node[0]))
if metal_and_branching_coplanar(node_indices, branching_idx, mof) & (num_neighbors > 2):
num_neighbors = len(bound_to_metal)
if (
metal_and_branching_coplanar(node_indices, branching_bound_to_metal, mof)
& (num_neighbors > 2)
& (len(branching_bound_to_metal) < 5)
):
logger.debug(
"Metal in linker found, indices: {}".format(
node_indices,
Expand All @@ -355,5 +362,6 @@ def detect_porphyrin(node_collection, mof):
not_node = []
for i, node in enumerate(node_collection):
if might_be_porphyrin(node._original_indices, node._original_graph_branching_indices, mof):
logger.info("Found porphyrin in node {}".format(node._original_indices))
not_node.append(i)
return not_node
4 changes: 2 additions & 2 deletions src/moffragmentor/fragmentor/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.core import Molecule

from .solventlocator import _locate_bound_solvent
from ..utils import (
from moffragmentor.fragmentor.solventlocator import _locate_bound_solvent
from moffragmentor.utils import (
_get_cartesian_coords,
_get_molecule_edge_label,
_get_vertices_of_smaller_component_upon_edge_break,
Expand Down
8 changes: 8 additions & 0 deletions src/moffragmentor/mof.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def _reset(self):
self._bridges = None
self._nx_graph = None

def __copy__(self):
"""Make a a new MOF object with copies of the same structure and structure graph."""
return MOF(IStructure.from_sites(self._structure.sites), self.structure_graph.__copy__())

def dump(self, path) -> None:
"""Dump this object as pickle file"""
pickle_dump(self, path)
Expand Down Expand Up @@ -257,6 +261,10 @@ def h_indices(self) -> List[int]:
def c_indices(self) -> List[int]:
return [i for i, species in enumerate(self.structure.species) if str(species) == "C"]

@cached_property
def n_indices(self) -> List[int]:
return [i for i, species in enumerate(self.structure.species) if str(species) == "N"]

def get_neighbor_indices(self, site: int) -> List[int]:
"""Get list of indices of neighboring sites."""
return [site.index for site in self.structure_graph.get_connected_sites(site)]
Expand Down
Loading