From a923808cab37e3f76d3af6019975ebff00577cd8 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 3 May 2024 10:25:08 +0200 Subject: [PATCH 01/13] init draft aromatic --- pysmiles/read_smiles.py | 22 ++----- pysmiles/smiles_helper.py | 109 ++++++++++++++++--------------- tests/test_read_smiles.py | 2 +- tests/test_smiles_helpers.py | 123 +++++++++++++++++++++-------------- 4 files changed, 138 insertions(+), 118 deletions(-) diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py index 79e9634..b66c160 100644 --- a/pysmiles/read_smiles.py +++ b/pysmiles/read_smiles.py @@ -184,30 +184,18 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True, if ring_nums: raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys()))) - # Time to deal with aromaticity. This is a mess, because it's not super - # clear what aromaticity information has been provided, and what should be - # inferred. In addition, to what extend do we want to provide a "sane" - # molecule, even if this overrides what the SMILES string specifies? - cycles = nx.cycle_basis(mol) - ring_idxs = set() - for cycle in cycles: - ring_idxs.update(cycle) - non_ring_idxs = set(mol.nodes) - ring_idxs - for n_idx in non_ring_idxs: - if mol.nodes[n_idx].get('aromatic', False): - raise ValueError("You specified an aromatic atom outside of a" - " ring. This is impossible") - - mark_aromatic_edges(mol) - fill_valence(mol) if reinterpret_aromatic: - mark_aromatic_atoms(mol) + mark_aromatic_atoms(mol, prefill_valence=True) mark_aromatic_edges(mol) for idx, jdx in mol.edges: if ((not mol.nodes[idx].get('aromatic', False) or not mol.nodes[jdx].get('aromatic', False)) and mol.edges[idx, jdx].get('order', 1) == 1.5): mol.edges[idx, jdx]['order'] = 1 + else: + mark_aromatic_edges(mol) + + fill_valence(mol) if explicit_hydrogen: add_explicit_hydrogens(mol) diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index 0c13c19..f3a8dc7 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -436,8 +436,17 @@ def _hydrogen_neighbours(mol, n_idx): h_neighbours += 1 return h_neighbours - -def mark_aromatic_atoms(mol, atoms=None): +def _prune_nodes(nodes, mol): + new_nodes = [] + for node in nodes: + if mol.nodes[node].get('element', '*') == '*': + new_nodes.append(node) + missing = bonds_missing(mol, node, use_order=True) + if missing > 0: + new_nodes.append(node) + return mol.subgraph(new_nodes) + +def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): """ Sets the 'aromatic' attribute for all nodes in `mol`. Requires that the 'hcount' on atoms is correct. @@ -456,45 +465,49 @@ def mark_aromatic_atoms(mol, atoms=None): """ if atoms is None: atoms = set(mol.nodes) - aromatic = set() - # Only cycles can be aromatic - for cycle in nx.cycle_basis(mol): - # All atoms should be sp2, so each contributes an electron. We make - # sure they are later. - electrons = len(cycle) - maybe_aromatic = True - - for node_idx in cycle: - node = mol.nodes[node_idx] - element = node.get('element', '*').capitalize() - hcount = node.get('hcount', 0) - degree = mol.degree(node_idx) + hcount - hcount += _hydrogen_neighbours(mol, node_idx) - # Make sure they are possibly aromatic, and are sp2 hybridized - if element not in AROMATIC_ATOMS or degree not in (2, 3): - maybe_aromatic = False - break - # Some of the special cases per group. N and O type atoms can - # donate an additional electron from a lone pair. - # missing cases: - # extracyclic sp2 heteroatom (e.g. =O) - # some charged cases - if element in 'N P As'.split() and hcount == 1: - electrons += 1 - elif element in 'O S Se'.split(): - electrons += 1 - if node.get('charge', 0) == +1 and not (element == 'C' and hcount == 0): - electrons -= 1 - if maybe_aromatic and int(electrons) % 2 == 0: - # definitely (anti) aromatic - aromatic.update(cycle) - for node_idx in atoms: - node = mol.nodes[node_idx] - if node_idx not in aromatic: - node['aromatic'] = False + print('go here') + # we start by pre-filling the valance according + # to existing bonds for all non-aromatic nodes + if prefill_valence: + for n_idx in mol: + node = mol.nodes[n_idx] + if not node.get('aromatic', False): + missing = max(bonds_missing(mol, n_idx), 0) + charge = node['charge'] + node['hcount'] = node.get('hcount', 0) + missing + charge + # now we erease all previous notion of aromaticity + nx.set_node_attributes(mol, False, 'aromatic') + # then we find all delocalized subgraphs + # by pruning all nodes that have a defined + # valance + ds_graph = nx.Graph() + ds_graph = _prune_nodes(mol.nodes, mol) + print('nodes', ds_graph.nodes) + for sub_ds in nx.connected_components(ds_graph): + # next we prune atoms that cannot be aromatic but sometimes are + # considered aromatic + #sub_ds_graph = _prune_nodes(sub_ds, mol) + print(sub_ds) + sub_ds_graph = mol.subgraph(sub_ds) + max_match = nx.max_weight_matching(sub_ds_graph) + #print(sub_ds_graph.nodes) + #print(max_match) + # this is a completely invalid smiles + print("--") + print(sub_ds_graph.edges) + print(max_match) + if not nx.is_perfect_matching(sub_ds_graph, max_match): + raise SyntaxError + + # we consider it aromatic in this case + # if it is a cycle + if nx.cycle_basis(sub_ds_graph): + nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic') else: - node['aromatic'] = True - + nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic') + for edge in max_match: + print(edge) + mol.edges[edge]['order'] = 2 def mark_aromatic_edges(mol): """ @@ -511,17 +524,11 @@ def mark_aromatic_edges(mol): None `mol` is modified in-place. """ - for cycle in nx.cycle_basis(mol): - for idx, jdx in mol.edges(nbunch=cycle): - if idx not in cycle or jdx not in cycle: - continue - if (mol.nodes[idx].get('aromatic', False) - and mol.nodes[jdx].get('aromatic', False)): - mol.edges[idx, jdx]['order'] = 1.5 - for idx, jdx in mol.edges: - if 'order' not in mol.edges[idx, jdx]: - mol.edges[idx, jdx]['order'] = 1 - + for edge in mol.edges: + if all(mol.nodes[node].get('aromatic', 'False') for node in edge): + mol.edges[edge]['order'] = 1.5 + elif not mol.edges[edge].get('order', False): + mol.edges[edge]['order'] = 1 def correct_aromatic_rings(mol): """ diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py index 73196d3..050f059 100644 --- a/tests/test_read_smiles.py +++ b/tests/test_read_smiles.py @@ -523,10 +523,10 @@ def test_read_smiles(smiles, node_data, edge_data, explicit_h): found = read_smiles(smiles, explicit_hydrogen=explicit_h) print(found.nodes(data=True)) print(found.edges(data=True)) + print(smiles) expected = make_mol(node_data, edge_data) assertEqualGraphs(found, expected) - @pytest.mark.parametrize('smiles, error_type', ( ('[CL-]', ValueError), ('[HH]', ValueError), diff --git a/tests/test_smiles_helpers.py b/tests/test_smiles_helpers.py index 1c59401..d773fbc 100644 --- a/tests/test_smiles_helpers.py +++ b/tests/test_smiles_helpers.py @@ -23,6 +23,7 @@ @pytest.mark.parametrize('helper, kwargs, n_data_in, e_data_in, n_data_out, e_data_out', ( + # 1 ( add_explicit_hydrogens, {}, [(0, {'element': 'C'})], @@ -30,6 +31,7 @@ [(0, {'element': 'C'})], [], ), + # 2 ( add_explicit_hydrogens, {}, [(0, {'element': 'C', 'hcount': 2})], @@ -40,6 +42,7 @@ [(0, 1, {'order': 1}), (0, 2, {'order': 1})], ), + # 3 ( add_explicit_hydrogens, {}, [(0, {'element': 'C', 'hcount': 2}), @@ -57,6 +60,7 @@ (3, 5, {'order': 1}), (0, 3, {'order': 2})], ), + # 4 ( remove_explicit_hydrogens, {}, [(0, {'element': 'C'}), @@ -78,6 +82,7 @@ [(0, 1, {'order': 2}), (1, 2, {'order': 1})], ), + # 5 ( remove_explicit_hydrogens, {}, [(0, {'element': 'H'}), @@ -87,6 +92,7 @@ (1, {'element': 'H', 'hcount': 0}),], [(0, 1, {'order': 1})], ), + # 6 ( remove_explicit_hydrogens, {}, [(0, {'element': 'C'}), @@ -96,6 +102,7 @@ (1, {'element': 'H', 'hcount': 0}),], [(0, 1, {'order': 2})], ), + # 6 ( fill_valence, {'respect_hcount': True, 'respect_bond_order': True, 'max_bond_order': 3}, @@ -106,6 +113,7 @@ (1, {'element': 'C', 'hcount': 3})], [(0, 1, {'order': 1})], ), + # 6 ( fill_valence, {'respect_hcount': True, 'respect_bond_order': True, 'max_bond_order': 3}, @@ -116,6 +124,7 @@ (1, {'element': 'C', 'hcount': 3})], [(0, 1, {'order': 1})], ), + # 7 ( fill_valence, {'respect_hcount': False, 'respect_bond_order': True, 'max_bond_order': 3}, @@ -126,6 +135,7 @@ (1, {'element': 'C', 'hcount': 3})], [(0, 1, {'order': 1})], ), + # 8 ( fill_valence, {'respect_hcount': True, 'respect_bond_order': False, 'max_bond_order': 3}, @@ -136,6 +146,7 @@ (1, {'element': 'C', 'hcount': 1})], [(0, 1, {'order': 3})], ), + # 9 ( # This case sort of stinks, since there's a single aromatic bond not in # a cycle. @@ -148,6 +159,7 @@ (1, {'element': 'C', 'hcount': 2})], [(0, 1, {'order': 1.5})], ), + # 10 ( fill_valence, {'respect_hcount': False, 'respect_bond_order': True, 'max_bond_order': 3}, @@ -158,6 +170,7 @@ (1, {'element': 'C', 'hcount': 5})], [(0, 1, {'order': 1})], ), + # 11 ( mark_aromatic_atoms, {}, [(0, {'element': 'C', 'hcount': 1, 'charge': 0}), @@ -181,6 +194,7 @@ (3, 0, {'order': 1}), (3, 4, {'order': 1})], ), + # 12 ( mark_aromatic_atoms, {}, [(0, {'element': 'C', 'hcount': 2, 'charge': 0}), @@ -204,21 +218,23 @@ (3, 0, {'order': 1}), (3, 4, {'order': 1})], ), - ( - mark_aromatic_atoms, {}, - [(0, {'charge': 1}), - (1, {'charge': 0}), - (2, {'charge': 0}),], - [(0, 1, {'order': 1}), - (1, 2, {'order': 1}), - (2, 0, {'order': 1}),], - [(0, {'charge': 1, 'aromatic': True}), - (1, {'charge': 0, 'aromatic': True}), - (2, {'charge': 0, 'aromatic': True}),], - [(0, 1, {'order': 1}), - (1, 2, {'order': 1}), - (2, 0, {'order': 1}),], - ), + # +# ( +# mark_aromatic_atoms, {}, +# [(0, {'charge': 1}), +# (1, {'charge': 0}), +# (2, {'charge': 0}),], +# [(0, 1, {'order': 1}), +# (1, 2, {'order': 1}), +# (2, 0, {'order': 1}),], +# [(0, {'charge': 1, 'aromatic': True}), +# (1, {'charge': 0, 'aromatic': True}), +# (2, {'charge': 0, 'aromatic': True}),], +# [(0, 1, {'order': 1}), +# (1, 2, {'order': 1}), +# (2, 0, {'order': 1}),], +# ), + # 13 ( mark_aromatic_atoms, {}, [(0, {'element': 'C', 'hcount': 1, 'charge': 0}), @@ -238,6 +254,7 @@ (2, 3, {'order': 1}), (3, 0, {'order': 1}),], ), + # 14 ( mark_aromatic_edges, {}, [(0, {'charge': 1, 'aromatic': True}), @@ -253,6 +270,7 @@ (1, 2, {'order': 1.5}), (2, 0, {'order': 1.5}),], ), + # 15 ( mark_aromatic_edges, {}, [(0, {'charge': 1, 'aromatic': True}), @@ -263,9 +281,10 @@ [(0, {'charge': 1, 'aromatic': True}), (1, {'charge': 0, 'aromatic': True}), (2, {'charge': 0, 'aromatic': True}),], - [(0, 1, {'order': 1}), - (1, 2, {'order': 1}),], + [(0, 1, {'order': 1.5}), + (1, 2, {'order': 1.5}),], ), + # 16 ( # This case smells a bit. Not all atoms in a cycle are aromatic, so only # some of the bonds become aromatic. @@ -283,6 +302,7 @@ (1, 2, {'order': 1.5}), (2, 0, {'order': 1}),], ), + # 17 ( mark_aromatic_edges, {}, [(0, {'charge': 1, 'aromatic': True}), @@ -302,6 +322,7 @@ (2, 0, {'order': 1.5}), (2, 3, {'order': 1})], ), + # 18 ( correct_aromatic_rings, {}, [(0, {'element': 'C'}), @@ -321,6 +342,7 @@ (2, 3, {'order': 1}), (3, 0, {'order': 1})], ), + # 19 ( correct_aromatic_rings, {}, [(0, {'element': 'C', 'hcount': 1}), @@ -340,6 +362,7 @@ (2, 3, {'order': 1.5}), (3, 0, {'order': 1.5})], ), + # 20 - this should lead to bond-orders of three ... ( correct_aromatic_rings, {}, [(0, {'element': 'C', 'hcount': 1}), @@ -353,10 +376,11 @@ (1, {'element': 'C', 'hcount': 1, 'aromatic': False}), (2, {'element': 'C', 'hcount': 1, 'aromatic': False}), (3, {'element': 'C', 'hcount': 1, 'aromatic': False}),], - [(0, 1, {'order': 1}), + [(0, 1, {'order': 2}), (1, 2, {'order': 1}), - (2, 3, {'order': 1}),], + (2, 3, {'order': 2}),], ), + # 21 ( correct_aromatic_rings, {}, [(0, {'element': 'C', 'hcount': 1}), @@ -369,16 +393,16 @@ (2, 3, {}), (3, 4, {}), (4, 0, {})], - [(0, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (1, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (2, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (3, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (4, {'element': 'O', 'hcount': 0, 'aromatic': True}),], - [(0, 1, {'order': 1.5}), - (1, 2, {'order': 1.5}), - (2, 3, {'order': 1.5}), - (3, 4, {'order': 1.5}), - (4, 0, {'order': 1.5})], + [(0, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (1, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (2, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (3, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (4, {'element': 'O', 'hcount': 0, 'aromatic': False}),], + [(0, 1, {'order': 2}), + (1, 2, {'order': 1}), + (2, 3, {'order': 2}), + (3, 4, {'order': 1}), + (4, 0, {'order': 1})], ), ( correct_aromatic_rings, {}, @@ -392,16 +416,16 @@ (2, 3, {}), (3, 4, {}), (4, 0, {}),], - [(0, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (1, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (2, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (3, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (4, {'element': 'N', 'hcount': 1, 'aromatic': True}),], - [(0, 1, {'order': 1.5}), - (1, 2, {'order': 1.5}), - (2, 3, {'order': 1.5}), - (3, 4, {'order': 1.5}), - (4, 0, {'order': 1.5}),], + [(0, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (1, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (2, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (3, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (4, {'element': 'N', 'hcount': 1, 'aromatic': False}),], + [(0, 1, {'order': 2}), + (1, 2, {'order': 1}), + (2, 3, {'order': 2}), + (3, 4, {'order': 1}), + (4, 0, {'order': 1}),], ), ( correct_aromatic_rings, {}, @@ -417,21 +441,22 @@ (3, 4, {}), (4, 0, {}), (4, 5, {})], - [(0, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (1, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (2, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (3, {'element': 'C', 'hcount': 1, 'aromatic': True}), - (4, {'element': 'N', 'hcount': 0, 'aromatic': True}), + [(0, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (1, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (2, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (3, {'element': 'C', 'hcount': 1, 'aromatic': False}), + (4, {'element': 'N', 'hcount': 0, 'aromatic': False}), (5, {'element': 'H', 'hcount': 0, 'aromatic': False})], - [(0, 1, {'order': 1.5}), - (1, 2, {'order': 1.5}), - (2, 3, {'order': 1.5}), - (3, 4, {'order': 1.5}), - (4, 0, {'order': 1.5}), + [(0, 1, {'order': 2}), + (1, 2, {'order': 1}), + (2, 3, {'order': 2}), + (3, 4, {'order': 1}), + (4, 0, {'order': 1}), (4, 5, {'order': 1})], ), )) def test_helper(helper, kwargs, n_data_in, e_data_in, n_data_out, e_data_out): mol = make_mol(n_data_in, e_data_in) helper(mol, **kwargs) + ref_mol = make_mol(n_data_out, e_data_out) assertEqualGraphs(mol, make_mol(n_data_out, e_data_out)) From 4cee726a3a98dbdcac0bf5a04b50b3e2115477c0 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 3 May 2024 13:04:38 +0200 Subject: [PATCH 02/13] fix all but one test --- tests/test_read_smiles.py | 88 +++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py index 050f059..1f016fa 100644 --- a/tests/test_read_smiles.py +++ b/tests/test_read_smiles.py @@ -344,7 +344,7 @@ ( '[2H][CH2]', [(0, {'charge': 0, 'element': 'H', 'hcount': 0, 'isotope': 2, 'aromatic': False}), - (1, {'charge': 0, 'element': 'C', 'hcount': 2, 'aromatic': False})], + (1, {'charge': 0, 'element': 'C', 'hcount': 3, 'aromatic': False})], [(0, 1, {'order': 1})], False ), @@ -407,45 +407,45 @@ ), ( 'c1occc1', - [(0, {'charge': 0, 'element': 'C', 'aromatic': True}), - (1, {'charge': 0, 'element': 'O', 'aromatic': True}), - (2, {'charge': 0, 'element': 'C', 'aromatic': True}), - (3, {'charge': 0, 'element': 'C', 'aromatic': True}), - (4, {'charge': 0, 'element': 'C', 'aromatic': True}), + [(0, {'charge': 0, 'element': 'C', 'aromatic': False}), + (1, {'charge': 0, 'element': 'O', 'aromatic': False}), + (2, {'charge': 0, 'element': 'C', 'aromatic': False}), + (3, {'charge': 0, 'element': 'C', 'aromatic': False}), + (4, {'charge': 0, 'element': 'C', 'aromatic': False}), (5, {'charge': 0, 'element': 'H', 'aromatic': False}), (6, {'charge': 0, 'element': 'H', 'aromatic': False}), (7, {'charge': 0, 'element': 'H', 'aromatic': False}), (8, {'charge': 0, 'element': 'H', 'aromatic': False})], - [(0, 1, {'order': 1.5}), - (0, 4, {'order': 1.5}), + [(0, 1, {'order': 1}), + (0, 4, {'order': 2}), (0, 5, {'order': 1}), - (1, 2, {'order': 1.5}), - (2, 3, {'order': 1.5}), + (1, 2, {'order': 1}), + (2, 3, {'order': 2}), (2, 6, {'order': 1}), - (3, 4, {'order': 1.5}), + (3, 4, {'order': 1}), (3, 7, {'order': 1}), (4, 8, {'order': 1})], True ), ( 'c1[asH]ccc1', - [(0, {'charge': 0, 'element': 'C', 'aromatic': True}), - (1, {'charge': 0, 'element': 'As', 'aromatic': True}), - (2, {'charge': 0, 'element': 'C', 'aromatic': True}), - (3, {'charge': 0, 'element': 'C', 'aromatic': True}), - (4, {'charge': 0, 'element': 'C', 'aromatic': True}), + [(0, {'charge': 0, 'element': 'C', 'aromatic': False}), + (1, {'charge': 0, 'element': 'As', 'aromatic': False}), + (2, {'charge': 0, 'element': 'C', 'aromatic': False}), + (3, {'charge': 0, 'element': 'C', 'aromatic': False}), + (4, {'charge': 0, 'element': 'C', 'aromatic': False}), (5, {'charge': 0, 'element': 'H', 'aromatic': False}), (6, {'charge': 0, 'element': 'H', 'aromatic': False}), (7, {'charge': 0, 'element': 'H', 'aromatic': False}), (8, {'charge': 0, 'element': 'H', 'aromatic': False}), (9, {'charge': 0, 'element': 'H', 'aromatic': False})], - [(0, 1, {'order': 1.5}), - (0, 4, {'order': 1.5}), + [(0, 1, {'order': 1}), + (0, 4, {'order': 2}), (0, 5, {'order': 1}), - (1, 2, {'order': 1.5}), - (2, 3, {'order': 1.5}), + (1, 2, {'order': 1}), + (2, 3, {'order': 2}), (2, 6, {'order': 1}), - (3, 4, {'order': 1.5}), + (3, 4, {'order': 1}), (3, 7, {'order': 1}), (4, 8, {'order': 1}), (1, 9, {'order': 1}),], @@ -453,22 +453,22 @@ ), ( 'c1[se]ccc1', - [(0, {'charge': 0, 'element': 'C', 'aromatic': True}), - (1, {'charge': 0, 'element': 'Se', 'aromatic': True}), - (2, {'charge': 0, 'element': 'C', 'aromatic': True}), - (3, {'charge': 0, 'element': 'C', 'aromatic': True}), - (4, {'charge': 0, 'element': 'C', 'aromatic': True}), + [(0, {'charge': 0, 'element': 'C', 'aromatic': False}), + (1, {'charge': 0, 'element': 'Se', 'aromatic': False}), + (2, {'charge': 0, 'element': 'C', 'aromatic': False}), + (3, {'charge': 0, 'element': 'C', 'aromatic': False}), + (4, {'charge': 0, 'element': 'C', 'aromatic': False}), (5, {'charge': 0, 'element': 'H', 'aromatic': False}), (6, {'charge': 0, 'element': 'H', 'aromatic': False}), (7, {'charge': 0, 'element': 'H', 'aromatic': False}), (8, {'charge': 0, 'element': 'H', 'aromatic': False})], - [(0, 1, {'order': 1.5}), - (0, 4, {'order': 1.5}), + [(0, 1, {'order': 1}), + (0, 4, {'order': 2}), (0, 5, {'order': 1}), - (1, 2, {'order': 1.5}), - (2, 3, {'order': 1.5}), + (1, 2, {'order': 1}), + (2, 3, {'order': 2}), (2, 6, {'order': 1}), - (3, 4, {'order': 1.5}), + (3, 4, {'order': 1}), (3, 7, {'order': 1}), (4, 8, {'order': 1})], True @@ -500,22 +500,22 @@ ), ( '[*+]1[*][*]1', - [(0, {'charge': 1, 'aromatic': True, 'hcount': 0}), - (1, {'charge': 0, 'aromatic': True, 'hcount': 0}), - (2, {'charge': 0, 'aromatic': True, 'hcount': 0})], - [(0, 1, {'order': 1.5}), - (1, 2, {'order': 1.5}), - (2, 0, {'order': 1.5}),], + [(0, {'charge': 1, 'aromatic': False, 'hcount': 0}), + (1, {'charge': 0, 'aromatic': False, 'hcount': 0}), + (2, {'charge': 0, 'aromatic': False, 'hcount': 0})], + [(0, 1, {'order': 1}), + (1, 2, {'order': 1}), + (2, 0, {'order': 1}),], False ), ( 'N1[*][*]1', - [(0, {'element': 'N', 'charge': 0, 'aromatic': True, 'hcount': 1}), - (1, {'charge': 0, 'aromatic': True, 'hcount': 0}), - (2, {'charge': 0, 'aromatic': True, 'hcount': 0})], - [(0, 1, {'order': 1.5}), - (1, 2, {'order': 1.5}), - (2, 0, {'order': 1.5}),], + [(0, {'element': 'N', 'charge': 0, 'aromatic': False, 'hcount': 1}), + (1, {'charge': 0, 'aromatic': False, 'hcount': 0}), + (2, {'charge': 0, 'aromatic': False, 'hcount': 0})], + [(0, 1, {'order': 1}), + (1, 2, {'order': 1}), + (2, 0, {'order': 1}),], False ) )) @@ -535,7 +535,7 @@ def test_read_smiles(smiles, node_data, edge_data, explicit_h): ('c1c1CC', ValueError), ('CC11C', ValueError), ('1CCC1', ValueError), - ('cccccc', ValueError), +# ('cccccc', ValueError), ('C=1CC-1', ValueError), )) def test_invalid_smiles(smiles, error_type): From 51f09af76d98a42ee33b2075960d9500d373388b Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 3 May 2024 13:07:57 +0200 Subject: [PATCH 03/13] fix wild-cards and unkown elements --- pysmiles/smiles_helper.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index f3a8dc7..bc7fe2b 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -439,17 +439,20 @@ def _hydrogen_neighbours(mol, n_idx): def _prune_nodes(nodes, mol): new_nodes = [] for node in nodes: - if mol.nodes[node].get('element', '*') == '*': - new_nodes.append(node) - missing = bonds_missing(mol, node, use_order=True) + # no point in trying to guess hybridization for + # things of unkown valance; i.e. remove them + if mol.nodes[node].get('element', '*') not in VALENCES: + continue + # the charge addition is neccessary because non of the + # functions seem to deal with absolute charges so far + missing = bonds_missing(mol, node, use_order=True) + mol.nodes[node]['charge'] if missing > 0: new_nodes.append(node) return mol.subgraph(new_nodes) def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): """ - Sets the 'aromatic' attribute for all nodes in `mol`. Requires that - the 'hcount' on atoms is correct. + Properly kekeulizes molecules and sets the aromatic attribute. Parameters ---------- @@ -465,14 +468,14 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): """ if atoms is None: atoms = set(mol.nodes) - print('go here') # we start by pre-filling the valance according # to existing bonds for all non-aromatic nodes if prefill_valence: for n_idx in mol: node = mol.nodes[n_idx] - if not node.get('aromatic', False): + if not node.get('aromatic', False) and node.get('element', '*') in VALENCES: missing = max(bonds_missing(mol, n_idx), 0) + print(missing) charge = node['charge'] node['hcount'] = node.get('hcount', 0) + missing + charge # now we erease all previous notion of aromaticity @@ -482,20 +485,12 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): # valance ds_graph = nx.Graph() ds_graph = _prune_nodes(mol.nodes, mol) - print('nodes', ds_graph.nodes) for sub_ds in nx.connected_components(ds_graph): # next we prune atoms that cannot be aromatic but sometimes are # considered aromatic - #sub_ds_graph = _prune_nodes(sub_ds, mol) - print(sub_ds) sub_ds_graph = mol.subgraph(sub_ds) max_match = nx.max_weight_matching(sub_ds_graph) - #print(sub_ds_graph.nodes) - #print(max_match) # this is a completely invalid smiles - print("--") - print(sub_ds_graph.edges) - print(max_match) if not nx.is_perfect_matching(sub_ds_graph, max_match): raise SyntaxError @@ -506,7 +501,6 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): else: nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic') for edge in max_match: - print(edge) mol.edges[edge]['order'] = 2 def mark_aromatic_edges(mol): From c3b1677e5e877f83f51cbedbad7feba3a9d10e1e Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 7 May 2024 10:03:10 +0200 Subject: [PATCH 04/13] address comments --- pysmiles/read_smiles.py | 2 +- pysmiles/smiles_helper.py | 78 +++++++++++++++++++++--------------- tests/test_read_smiles.py | 12 ++++++ tests/test_smiles_helpers.py | 4 +- 4 files changed, 61 insertions(+), 35 deletions(-) diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py index b66c160..0c5cc74 100644 --- a/pysmiles/read_smiles.py +++ b/pysmiles/read_smiles.py @@ -185,7 +185,7 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True, raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys()))) if reinterpret_aromatic: - mark_aromatic_atoms(mol, prefill_valence=True) + mark_aromatic_atoms(mol) mark_aromatic_edges(mol) for idx, jdx in mol.edges: if ((not mol.nodes[idx].get('aromatic', False) or diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index bc7fe2b..e3b3a78 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -439,18 +439,16 @@ def _hydrogen_neighbours(mol, n_idx): def _prune_nodes(nodes, mol): new_nodes = [] for node in nodes: - # no point in trying to guess hybridization for - # things of unkown valance; i.e. remove them - if mol.nodes[node].get('element', '*') not in VALENCES: + # all wild card nodes are ellegible + if mol.nodes[node].get('element', '*') == '*': + new_nodes.append(node) continue - # the charge addition is neccessary because non of the - # functions seem to deal with absolute charges so far - missing = bonds_missing(mol, node, use_order=True) + mol.nodes[node]['charge'] + missing = bonds_missing(mol, node, use_order=True) + mol.nodes[node].get('charge', 0) if missing > 0: new_nodes.append(node) return mol.subgraph(new_nodes) -def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): +def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False): """ Properly kekeulizes molecules and sets the aromatic attribute. @@ -460,6 +458,10 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): The molecule. atoms: collections.abc.Iterable The atoms to act on. Will still analyse the full molecule. + correct_aromatic: bool + If the falg is set then all nodes are considered + in the kekulization process otherwise only aromatic + nodes are considered. Returns ------- @@ -468,34 +470,46 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False): """ if atoms is None: atoms = set(mol.nodes) - # we start by pre-filling the valance according - # to existing bonds for all non-aromatic nodes - if prefill_valence: - for n_idx in mol: - node = mol.nodes[n_idx] - if not node.get('aromatic', False) and node.get('element', '*') in VALENCES: - missing = max(bonds_missing(mol, n_idx), 0) - print(missing) - charge = node['charge'] - node['hcount'] = node.get('hcount', 0) + missing + charge - # now we erease all previous notion of aromaticity - nx.set_node_attributes(mol, False, 'aromatic') - # then we find all delocalized subgraphs - # by pruning all nodes that have a defined - # valance ds_graph = nx.Graph() - ds_graph = _prune_nodes(mol.nodes, mol) + # in the correct aromatic mode we consider + # the hcount of all nodes to be correct and + # prune all nodes that have full valance + # bond orders are ignored + if correct_aromatic: + nodes = mol.nodes + # otherwise we only consider aromatic nodes; + # all other nodes regardless of their valency + # are pruned + else: + nodes = [node for node, aromatic in mol.nodes(data='aromatic') if aromatic] + + # prune all nodes from molecule that are elegible and have + # full valency + ds_graph = _prune_nodes(nodes, mol) + + # set the aromatic attribute to False for all nodes + # as a precaution + nx.set_node_attributes(mol, False, 'aromatic') + for sub_ds in nx.connected_components(ds_graph): - # next we prune atoms that cannot be aromatic but sometimes are - # considered aromatic sub_ds_graph = mol.subgraph(sub_ds) + print(sub_ds_graph.nodes) max_match = nx.max_weight_matching(sub_ds_graph) - # this is a completely invalid smiles - if not nx.is_perfect_matching(sub_ds_graph, max_match): - raise SyntaxError - - # we consider it aromatic in this case - # if it is a cycle + # if the subgraph is three nodes it might be + # a triangle, which is the only special case + # where there is no maximum match but + is_triangle = (len(sub_ds_graph.nodes) == 3 and nx.cycle_basis(sub_ds_graph)) + if not is_triangle: + max_match = nx.max_weight_matching(sub_ds_graph) + # we check if a maximum matching exists and + # if it is perfect. if it is not perfect, + # this graph originates from a completely invalid + # smiles and we raise an error + if not nx.is_perfect_matching(sub_ds_graph, max_match): + msg = "Your molecule is invalid and cannot be kekulized." + raise SyntaxError(msg) + + # we consider it aromatic in this case if it is a cycle if nx.cycle_basis(sub_ds_graph): nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic') else: @@ -540,7 +554,7 @@ def correct_aromatic_rings(mol): `mol` is modified in-place. """ fill_valence(mol) - mark_aromatic_atoms(mol) + mark_aromatic_atoms(mol, correct_aromatic=True) mark_aromatic_edges(mol) diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py index 1f016fa..75dc0ce 100644 --- a/tests/test_read_smiles.py +++ b/tests/test_read_smiles.py @@ -556,3 +556,15 @@ def test_stereo_logging(caplog, smiles, n_records): assert len(caplog.records) == n_records for record in caplog.records: assert record.levelname == "WARNING" + + +@pytest.mark.parametrize('smiles', ( + 'c1c[nH]cc1', + 'c1cNcc1', + 'c1cScc1', + 'c1cnc[nH]1', + 'c1cncN1', + 'c1cscc1')) +def test_kekulize(smiles): + g = read_smiles(smiles) + assert len(g) > 0 diff --git a/tests/test_smiles_helpers.py b/tests/test_smiles_helpers.py index d773fbc..09d4a81 100644 --- a/tests/test_smiles_helpers.py +++ b/tests/test_smiles_helpers.py @@ -172,7 +172,7 @@ ), # 11 ( - mark_aromatic_atoms, {}, + mark_aromatic_atoms, {'correct_aromatic': True}, [(0, {'element': 'C', 'hcount': 1, 'charge': 0}), (1, {'element': 'C', 'hcount': 1, 'charge': 0}), (2, {'element': 'C', 'hcount': 1, 'charge': 0}), @@ -236,7 +236,7 @@ # ), # 13 ( - mark_aromatic_atoms, {}, + mark_aromatic_atoms, {'correct_aromatic': True}, [(0, {'element': 'C', 'hcount': 1, 'charge': 0}), (1, {'element': 'C', 'hcount': 1, 'charge': 0}), (2, {'element': 'C', 'hcount': 1, 'charge': 0}), From 6c7b0d0afe27640a8a75a58b15e0929069e12de1 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 7 May 2024 18:34:10 +0200 Subject: [PATCH 05/13] fix test 2HCH2 --- tests/test_read_smiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py index 75dc0ce..6fa072e 100644 --- a/tests/test_read_smiles.py +++ b/tests/test_read_smiles.py @@ -344,7 +344,7 @@ ( '[2H][CH2]', [(0, {'charge': 0, 'element': 'H', 'hcount': 0, 'isotope': 2, 'aromatic': False}), - (1, {'charge': 0, 'element': 'C', 'hcount': 3, 'aromatic': False})], + (1, {'charge': 0, 'element': 'C', 'hcount': 2, 'aromatic': False})], [(0, 1, {'order': 1})], False ), From a61b2e770fd8d137f491d16b75647dcf4e17b34c Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 7 May 2024 18:39:07 +0200 Subject: [PATCH 06/13] externalize selection of nodes to work on when kekulizing --- pysmiles/read_smiles.py | 3 ++- pysmiles/smiles_helper.py | 20 +++----------------- tests/test_smiles_helpers.py | 4 ++-- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py index 0c5cc74..f47b2b5 100644 --- a/pysmiles/read_smiles.py +++ b/pysmiles/read_smiles.py @@ -185,7 +185,8 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True, raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys()))) if reinterpret_aromatic: - mark_aromatic_atoms(mol) + arom_atoms = [node for node, aromatic in mol.nodes(data='aromatic') if aromatic] + mark_aromatic_atoms(mol, arom_atoms) mark_aromatic_edges(mol) for idx, jdx in mol.edges: if ((not mol.nodes[idx].get('aromatic', False) or diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index e3b3a78..cbb1006 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -448,7 +448,7 @@ def _prune_nodes(nodes, mol): new_nodes.append(node) return mol.subgraph(new_nodes) -def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False): +def mark_aromatic_atoms(mol, atoms=None): """ Properly kekeulizes molecules and sets the aromatic attribute. @@ -470,22 +470,9 @@ def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False): """ if atoms is None: atoms = set(mol.nodes) - ds_graph = nx.Graph() - # in the correct aromatic mode we consider - # the hcount of all nodes to be correct and - # prune all nodes that have full valance - # bond orders are ignored - if correct_aromatic: - nodes = mol.nodes - # otherwise we only consider aromatic nodes; - # all other nodes regardless of their valency - # are pruned - else: - nodes = [node for node, aromatic in mol.nodes(data='aromatic') if aromatic] - # prune all nodes from molecule that are elegible and have # full valency - ds_graph = _prune_nodes(nodes, mol) + ds_graph = _prune_nodes(atoms, mol) # set the aromatic attribute to False for all nodes # as a precaution @@ -493,7 +480,6 @@ def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False): for sub_ds in nx.connected_components(ds_graph): sub_ds_graph = mol.subgraph(sub_ds) - print(sub_ds_graph.nodes) max_match = nx.max_weight_matching(sub_ds_graph) # if the subgraph is three nodes it might be # a triangle, which is the only special case @@ -554,7 +540,7 @@ def correct_aromatic_rings(mol): `mol` is modified in-place. """ fill_valence(mol) - mark_aromatic_atoms(mol, correct_aromatic=True) + mark_aromatic_atoms(mol, atoms=mol.nodes) mark_aromatic_edges(mol) diff --git a/tests/test_smiles_helpers.py b/tests/test_smiles_helpers.py index 09d4a81..d773fbc 100644 --- a/tests/test_smiles_helpers.py +++ b/tests/test_smiles_helpers.py @@ -172,7 +172,7 @@ ), # 11 ( - mark_aromatic_atoms, {'correct_aromatic': True}, + mark_aromatic_atoms, {}, [(0, {'element': 'C', 'hcount': 1, 'charge': 0}), (1, {'element': 'C', 'hcount': 1, 'charge': 0}), (2, {'element': 'C', 'hcount': 1, 'charge': 0}), @@ -236,7 +236,7 @@ # ), # 13 ( - mark_aromatic_atoms, {'correct_aromatic': True}, + mark_aromatic_atoms, {}, [(0, {'element': 'C', 'hcount': 1, 'charge': 0}), (1, {'element': 'C', 'hcount': 1, 'charge': 0}), (2, {'element': 'C', 'hcount': 1, 'charge': 0}), From 98566ff14d084855b7f7e4ae4563ec4f98cb7d96 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 7 May 2024 18:45:06 +0200 Subject: [PATCH 07/13] add test for skipping kekulizing --- tests/test_read_smiles.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py index 6fa072e..3aa717f 100644 --- a/tests/test_read_smiles.py +++ b/tests/test_read_smiles.py @@ -564,7 +564,17 @@ def test_stereo_logging(caplog, smiles, n_records): 'c1cScc1', 'c1cnc[nH]1', 'c1cncN1', - 'c1cscc1')) + 'c1cscc1',)) def test_kekulize(smiles): g = read_smiles(smiles) assert len(g) > 0 + + +@pytest.mark.parametrize('smiles', ( + 'cc', + 'cn',)) +def test_skip_kekulize(smiles): + g = read_smiles(smiles, reinterpret_aromatic=False) + for node in g.nodes: + assert g.nodes[node]['aromatic'] + assert g.edges[(0, 1)]['order'] == 1.5 From 11cb991b094864896f72acc49172f12b52965f33 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 7 May 2024 18:48:03 +0200 Subject: [PATCH 08/13] fix comment and docstrings --- pysmiles/smiles_helper.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index cbb1006..ef0b176 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -439,7 +439,7 @@ def _hydrogen_neighbours(mol, n_idx): def _prune_nodes(nodes, mol): new_nodes = [] for node in nodes: - # all wild card nodes are ellegible + # all wild card nodes are eligible if mol.nodes[node].get('element', '*') == '*': new_nodes.append(node) continue @@ -457,11 +457,7 @@ def mark_aromatic_atoms(mol, atoms=None): mol : nx.Graph The molecule. atoms: collections.abc.Iterable - The atoms to act on. Will still analyse the full molecule. - correct_aromatic: bool - If the falg is set then all nodes are considered - in the kekulization process otherwise only aromatic - nodes are considered. + The atoms to act on; all other nodes are pruned Returns ------- @@ -470,7 +466,7 @@ def mark_aromatic_atoms(mol, atoms=None): """ if atoms is None: atoms = set(mol.nodes) - # prune all nodes from molecule that are elegible and have + # prune all nodes from molecule that are eligible and have # full valency ds_graph = _prune_nodes(atoms, mol) From f3e9dd2b4477319ae70954d4dc200bc5e36449d8 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 8 May 2024 13:22:52 +0200 Subject: [PATCH 09/13] fix writer for aromatic systems --- pysmiles/write_smiles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysmiles/write_smiles.py b/pysmiles/write_smiles.py index 4263701..6fafd7a 100644 --- a/pysmiles/write_smiles.py +++ b/pysmiles/write_smiles.py @@ -68,8 +68,8 @@ def _write_edge_symbol(molecule, n_idx, n_jdx): Whether an explicit symbol is needed for this edge. """ order = molecule.edges[n_idx, n_jdx].get('order', 1) - aromatic_atoms = molecule.nodes[n_idx].get('element', '*').islower() and\ - molecule.nodes[n_jdx].get('element', '*').islower() + aromatic_atoms = molecule.nodes[n_idx].get('aromatic', False) and\ + molecule.nodes[n_jdx].get('aromatic', False) aromatic_bond = aromatic_atoms and order == 1.5 cross_aromatic = aromatic_atoms and order == 1 single_bond = order == 1 From 3089553e41e4d06b5225e8da419e807a505f4571 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 8 May 2024 13:54:41 +0200 Subject: [PATCH 10/13] fix bug when setting aromatic bond orders --- pysmiles/smiles_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index ef0b176..11075a6 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -517,7 +517,7 @@ def mark_aromatic_edges(mol): for edge in mol.edges: if all(mol.nodes[node].get('aromatic', 'False') for node in edge): mol.edges[edge]['order'] = 1.5 - elif not mol.edges[edge].get('order', False): + elif 'order' not in mol.edges[edge]: mol.edges[edge]['order'] = 1 def correct_aromatic_rings(mol): From e7203311260ab72a419e669c574829eaa50cb371 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 8 May 2024 15:10:47 +0200 Subject: [PATCH 11/13] fix aromtic assignment --- tests/test_read_smiles.py | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py index 3aa717f..7cd3a77 100644 --- a/tests/test_read_smiles.py +++ b/tests/test_read_smiles.py @@ -382,6 +382,70 @@ (2, 0, {'order': 1})], False ), + ( + 'c1cscc1', + [(0, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (1, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (2, {'charge': 0, 'element': 'S', 'aromatic': False, 'hcount': 0}), + (3, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (4, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),], + [(0, 1, {'order': 2}), + (0, 4, {'order': 1}), + (1, 2, {'order': 1}), + (2, 3, {'order': 1}), + (3, 4, {'order': 2}),], + False + ), + ( + 'c12ccccc1[nH]cc2', + [(0, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 0}), + (1, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (2, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (3, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (4, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (5, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 0}), + (6, {'charge': 0, 'element': 'N', 'aromatic': False, 'hcount': 1}), + (7, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}), + (8, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),], + [(0, 1, {'order': 2}), + (1, 2, {'order': 1}), + (2, 3, {'order': 2}), + (3, 4, {'order': 1}), + (4, 5, {'order': 2}), + (5, 0, {'order': 1}), + (5, 6, {'order': 1}), + (6, 7, {'order': 1}), + (7, 8, {'order': 2}), + (8, 0, {'order': 1}),], + False + ), + ( + 'c1cc2ccc3c2c1cc3', + [(0, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}), + (1, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}), + (2, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}), + (3, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}), + (4, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}), + (5, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}), + (6, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}), + (7, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}), + (8, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}), + (9, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}),], + [(0, 1, {'order': 1.5}), + (0, 7, {'order': 1.5}), + (1, 2, {'order': 1.5}), + (2, 3, {'order': 1.5}), + (3, 4, {'order': 1.5}), + (4, 5, {'order': 1.5}), + (6, 2, {'order': 1.5}), + (5, 6, {'order': 1.5}), + (5, 9, {'order': 1.5}), + (6, 7, {'order': 1.5}), + (6, 2, {'order': 1.5}), + (9, 8, {'order': 1.5}), + (7, 8, {'order': 1.5}),], + False + ), ( '[Rh-](Cl)(Cl)(Cl)(Cl)$[Rh-](Cl)(Cl)(Cl)Cl', [(0, {'charge': -1, 'element': 'Rh', 'hcount': 0, 'aromatic': False}), From 636f7c0cdb4f280bc175899c0db83c5a73840daf Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 8 May 2024 15:11:04 +0200 Subject: [PATCH 12/13] fix aromtic assignment --- pysmiles/smiles_helper.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index 11075a6..f7deadf 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -491,9 +491,16 @@ def mark_aromatic_atoms(mol, atoms=None): msg = "Your molecule is invalid and cannot be kekulized." raise SyntaxError(msg) - # we consider it aromatic in this case if it is a cycle - if nx.cycle_basis(sub_ds_graph): - nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic') + # we consider a node aromatic if it can take part in DIME + # to do so all nodes in a delocalized subgraph have to be + # part of a cycle system + cycles = nx.cycle_basis(sub_ds_graph) + nodes_in_cycles = [] + for cycle in cycles: + nodes_in_cycles += cycle + + if set(nodes_in_cycles) == set(mol.nodes): + nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic') else: nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic') for edge in max_match: From cb1c721db8094007463c467d5f58b67483fe48ae Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 8 May 2024 15:13:07 +0200 Subject: [PATCH 13/13] fix aromtic assignment bug considering only ds subgraph --- pysmiles/smiles_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index f7deadf..a0676a3 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -499,7 +499,7 @@ def mark_aromatic_atoms(mol, atoms=None): for cycle in cycles: nodes_in_cycles += cycle - if set(nodes_in_cycles) == set(mol.nodes): + if set(nodes_in_cycles) == set(sub_ds_graph.nodes): nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic') else: nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic')