From a923808cab37e3f76d3af6019975ebff00577cd8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 3 May 2024 10:25:08 +0200
Subject: [PATCH 01/13] init draft aromatic

---
 pysmiles/read_smiles.py      |  22 ++-----
 pysmiles/smiles_helper.py    | 109 ++++++++++++++++---------------
 tests/test_read_smiles.py    |   2 +-
 tests/test_smiles_helpers.py | 123 +++++++++++++++++++++--------------
 4 files changed, 138 insertions(+), 118 deletions(-)

diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py
index 79e9634..b66c160 100644
--- a/pysmiles/read_smiles.py
+++ b/pysmiles/read_smiles.py
@@ -184,30 +184,18 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
     if ring_nums:
         raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))
 
-    # Time to deal with aromaticity. This is a mess, because it's not super
-    # clear what aromaticity information has been provided, and what should be
-    # inferred. In addition, to what extend do we want to provide a "sane"
-    # molecule, even if this overrides what the SMILES string specifies?
-    cycles = nx.cycle_basis(mol)
-    ring_idxs = set()
-    for cycle in cycles:
-        ring_idxs.update(cycle)
-    non_ring_idxs = set(mol.nodes) - ring_idxs
-    for n_idx in non_ring_idxs:
-        if mol.nodes[n_idx].get('aromatic', False):
-            raise ValueError("You specified an aromatic atom outside of a"
-                             " ring. This is impossible")
-    
-    mark_aromatic_edges(mol)
-    fill_valence(mol)
     if reinterpret_aromatic:
-        mark_aromatic_atoms(mol)
+        mark_aromatic_atoms(mol, prefill_valence=True)
         mark_aromatic_edges(mol)
         for idx, jdx in mol.edges:
             if ((not mol.nodes[idx].get('aromatic', False) or
                     not mol.nodes[jdx].get('aromatic', False))
                     and mol.edges[idx, jdx].get('order', 1) == 1.5):
                 mol.edges[idx, jdx]['order'] = 1
+    else:
+        mark_aromatic_edges(mol)
+
+    fill_valence(mol)
 
     if explicit_hydrogen:
         add_explicit_hydrogens(mol)
diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index 0c13c19..f3a8dc7 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -436,8 +436,17 @@ def _hydrogen_neighbours(mol, n_idx):
             h_neighbours += 1
     return h_neighbours
 
-
-def mark_aromatic_atoms(mol, atoms=None):
+def _prune_nodes(nodes, mol):
+    new_nodes = []
+    for node in nodes:
+        if mol.nodes[node].get('element', '*') == '*':
+            new_nodes.append(node)
+        missing = bonds_missing(mol, node, use_order=True)
+        if missing > 0:
+            new_nodes.append(node)
+    return mol.subgraph(new_nodes)
+
+def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
     """
     Sets the 'aromatic' attribute for all nodes in `mol`. Requires that
     the 'hcount' on atoms is correct.
@@ -456,45 +465,49 @@ def mark_aromatic_atoms(mol, atoms=None):
     """
     if atoms is None:
         atoms = set(mol.nodes)
-    aromatic = set()
-    # Only cycles can be aromatic
-    for cycle in nx.cycle_basis(mol):
-        # All atoms should be sp2, so each contributes an electron. We make
-        # sure they are later.
-        electrons = len(cycle)
-        maybe_aromatic = True
-
-        for node_idx in cycle:
-            node = mol.nodes[node_idx]
-            element = node.get('element', '*').capitalize()
-            hcount = node.get('hcount', 0)
-            degree = mol.degree(node_idx) + hcount
-            hcount += _hydrogen_neighbours(mol, node_idx)
-            # Make sure they are possibly aromatic, and are sp2 hybridized
-            if element not in AROMATIC_ATOMS or degree not in (2, 3):
-                maybe_aromatic = False
-                break
-            # Some of the special cases per group. N and O type atoms can
-            # donate an additional electron from a lone pair.
-            # missing cases:
-            #   extracyclic sp2 heteroatom (e.g. =O)
-            #   some charged cases
-            if element in 'N P As'.split() and hcount == 1:
-                electrons += 1
-            elif element in 'O S Se'.split():
-                electrons += 1
-            if node.get('charge', 0) == +1 and not (element == 'C' and hcount == 0):
-                electrons -= 1
-        if maybe_aromatic and int(electrons) % 2 == 0:
-            # definitely (anti) aromatic
-            aromatic.update(cycle)
-    for node_idx in atoms:
-        node = mol.nodes[node_idx]
-        if node_idx not in aromatic:
-            node['aromatic'] = False
+    print('go here')
+    # we start by pre-filling the valance according
+    # to existing bonds for all non-aromatic nodes
+    if prefill_valence:
+        for n_idx in mol:
+            node = mol.nodes[n_idx]
+            if not node.get('aromatic', False):
+                missing = max(bonds_missing(mol, n_idx), 0)
+                charge = node['charge']
+                node['hcount'] = node.get('hcount', 0) + missing + charge
+    # now we erease all previous notion of aromaticity
+    nx.set_node_attributes(mol, False, 'aromatic')
+    # then we find all delocalized subgraphs
+    # by pruning all nodes that have a defined
+    # valance
+    ds_graph = nx.Graph()
+    ds_graph = _prune_nodes(mol.nodes, mol)
+    print('nodes', ds_graph.nodes)
+    for sub_ds in nx.connected_components(ds_graph):
+        # next we prune atoms that cannot be aromatic but sometimes are
+        # considered aromatic
+        #sub_ds_graph = _prune_nodes(sub_ds, mol)
+        print(sub_ds)
+        sub_ds_graph = mol.subgraph(sub_ds)
+        max_match = nx.max_weight_matching(sub_ds_graph)
+        #print(sub_ds_graph.nodes)
+        #print(max_match)
+        # this is a completely invalid smiles
+        print("--")
+        print(sub_ds_graph.edges)
+        print(max_match)
+        if not nx.is_perfect_matching(sub_ds_graph, max_match):
+            raise SyntaxError
+
+        # we consider it aromatic in this case
+        # if it is a cycle
+        if nx.cycle_basis(sub_ds_graph):
+            nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic')
         else:
-            node['aromatic'] = True
-
+            nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic')
+            for edge in max_match:
+                print(edge)
+                mol.edges[edge]['order'] = 2
 
 def mark_aromatic_edges(mol):
     """
@@ -511,17 +524,11 @@ def mark_aromatic_edges(mol):
     None
         `mol` is modified in-place.
     """
-    for cycle in nx.cycle_basis(mol):
-        for idx, jdx in mol.edges(nbunch=cycle):
-            if idx not in cycle or jdx not in cycle:
-                continue
-            if (mol.nodes[idx].get('aromatic', False)
-                    and mol.nodes[jdx].get('aromatic', False)):
-                mol.edges[idx, jdx]['order'] = 1.5
-    for idx, jdx in mol.edges:
-        if 'order' not in mol.edges[idx, jdx]:
-            mol.edges[idx, jdx]['order'] = 1
-
+    for edge in mol.edges:
+        if all(mol.nodes[node].get('aromatic', 'False') for node in edge):
+            mol.edges[edge]['order'] = 1.5
+        elif not mol.edges[edge].get('order', False):
+            mol.edges[edge]['order'] = 1
 
 def correct_aromatic_rings(mol):
     """
diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py
index 73196d3..050f059 100644
--- a/tests/test_read_smiles.py
+++ b/tests/test_read_smiles.py
@@ -523,10 +523,10 @@ def test_read_smiles(smiles, node_data, edge_data, explicit_h):
     found = read_smiles(smiles, explicit_hydrogen=explicit_h)
     print(found.nodes(data=True))
     print(found.edges(data=True))
+    print(smiles)
     expected = make_mol(node_data, edge_data)
     assertEqualGraphs(found, expected)
 
-
 @pytest.mark.parametrize('smiles, error_type', (
     ('[CL-]', ValueError),
     ('[HH]', ValueError),
diff --git a/tests/test_smiles_helpers.py b/tests/test_smiles_helpers.py
index 1c59401..d773fbc 100644
--- a/tests/test_smiles_helpers.py
+++ b/tests/test_smiles_helpers.py
@@ -23,6 +23,7 @@
 
 
 @pytest.mark.parametrize('helper, kwargs, n_data_in, e_data_in, n_data_out, e_data_out', (
+    # 1
     (
         add_explicit_hydrogens, {},
         [(0, {'element': 'C'})],
@@ -30,6 +31,7 @@
         [(0, {'element': 'C'})],
         [],
     ),
+    # 2
     (
         add_explicit_hydrogens, {},
         [(0, {'element': 'C', 'hcount': 2})],
@@ -40,6 +42,7 @@
         [(0, 1, {'order': 1}),
          (0, 2, {'order': 1})],
     ),
+    # 3
     (
         add_explicit_hydrogens, {},
         [(0, {'element': 'C', 'hcount': 2}),
@@ -57,6 +60,7 @@
          (3, 5, {'order': 1}),
          (0, 3, {'order': 2})],
     ),
+    # 4
     (
         remove_explicit_hydrogens, {},
         [(0, {'element': 'C'}),
@@ -78,6 +82,7 @@
         [(0, 1, {'order': 2}),
          (1, 2, {'order': 1})],
     ),
+    # 5
     (
         remove_explicit_hydrogens, {},
         [(0, {'element': 'H'}),
@@ -87,6 +92,7 @@
          (1, {'element': 'H', 'hcount': 0}),],
         [(0, 1, {'order': 1})],
     ),
+    # 6
     (
         remove_explicit_hydrogens, {},
         [(0, {'element': 'C'}),
@@ -96,6 +102,7 @@
          (1, {'element': 'H', 'hcount': 0}),],
         [(0, 1, {'order': 2})],
     ),
+    # 6
     (
         fill_valence,
         {'respect_hcount': True, 'respect_bond_order': True, 'max_bond_order': 3},
@@ -106,6 +113,7 @@
          (1, {'element': 'C', 'hcount': 3})],
         [(0, 1, {'order': 1})],
     ),
+    # 6
     (
         fill_valence,
         {'respect_hcount': True, 'respect_bond_order': True, 'max_bond_order': 3},
@@ -116,6 +124,7 @@
          (1, {'element': 'C', 'hcount': 3})],
         [(0, 1, {'order': 1})],
     ),
+    # 7
     (
         fill_valence,
         {'respect_hcount': False, 'respect_bond_order': True, 'max_bond_order': 3},
@@ -126,6 +135,7 @@
          (1, {'element': 'C', 'hcount': 3})],
         [(0, 1, {'order': 1})],
     ),
+    # 8
     (
         fill_valence,
         {'respect_hcount': True, 'respect_bond_order': False, 'max_bond_order': 3},
@@ -136,6 +146,7 @@
          (1, {'element': 'C', 'hcount': 1})],
         [(0, 1, {'order': 3})],
     ),
+    # 9
     (
         # This case sort of stinks, since there's a single aromatic bond not in
         # a cycle.
@@ -148,6 +159,7 @@
          (1, {'element': 'C', 'hcount': 2})],
         [(0, 1, {'order': 1.5})],
     ),
+    # 10
     (
         fill_valence,
         {'respect_hcount': False, 'respect_bond_order': True, 'max_bond_order': 3},
@@ -158,6 +170,7 @@
          (1, {'element': 'C', 'hcount': 5})],
         [(0, 1, {'order': 1})],
     ),
+    # 11
     (
         mark_aromatic_atoms, {},
         [(0, {'element': 'C', 'hcount': 1, 'charge': 0}),
@@ -181,6 +194,7 @@
          (3, 0, {'order': 1}),
          (3, 4, {'order': 1})],
     ),
+    # 12
     (
         mark_aromatic_atoms, {},
         [(0, {'element': 'C', 'hcount': 2, 'charge': 0}),
@@ -204,21 +218,23 @@
          (3, 0, {'order': 1}),
          (3, 4, {'order': 1})],
     ),
-    (
-        mark_aromatic_atoms, {},
-        [(0, {'charge': 1}),
-         (1, {'charge': 0}),
-         (2, {'charge': 0}),],
-        [(0, 1, {'order': 1}),
-         (1, 2, {'order': 1}),
-         (2, 0, {'order': 1}),],
-        [(0, {'charge': 1, 'aromatic': True}),
-         (1, {'charge': 0, 'aromatic': True}),
-         (2, {'charge': 0, 'aromatic': True}),],
-        [(0, 1, {'order': 1}),
-         (1, 2, {'order': 1}),
-         (2, 0, {'order': 1}),],
-    ),
+    # 
+#    (
+#       mark_aromatic_atoms, {},
+#       [(0, {'charge': 1}),
+#        (1, {'charge': 0}),
+#        (2, {'charge': 0}),],
+#       [(0, 1, {'order': 1}),
+#        (1, 2, {'order': 1}),
+#        (2, 0, {'order': 1}),],
+#       [(0, {'charge': 1, 'aromatic': True}),
+#        (1, {'charge': 0, 'aromatic': True}),
+#        (2, {'charge': 0, 'aromatic': True}),],
+#       [(0, 1, {'order': 1}),
+#        (1, 2, {'order': 1}),
+#        (2, 0, {'order': 1}),],
+#   ),
+    # 13
     (
         mark_aromatic_atoms, {},
         [(0, {'element': 'C', 'hcount': 1, 'charge': 0}),
@@ -238,6 +254,7 @@
          (2, 3, {'order': 1}),
          (3, 0, {'order': 1}),],
     ),
+    # 14
     (
         mark_aromatic_edges, {},
         [(0, {'charge': 1, 'aromatic': True}),
@@ -253,6 +270,7 @@
          (1, 2, {'order': 1.5}),
          (2, 0, {'order': 1.5}),],
     ),
+    # 15
     (
         mark_aromatic_edges, {},
         [(0, {'charge': 1, 'aromatic': True}),
@@ -263,9 +281,10 @@
         [(0, {'charge': 1, 'aromatic': True}),
          (1, {'charge': 0, 'aromatic': True}),
          (2, {'charge': 0, 'aromatic': True}),],
-        [(0, 1, {'order': 1}),
-         (1, 2, {'order': 1}),],
+        [(0, 1, {'order': 1.5}),
+         (1, 2, {'order': 1.5}),],
     ),
+    # 16
     (
         # This case smells a bit. Not all atoms in a cycle are aromatic, so only
         # some of the bonds become aromatic.
@@ -283,6 +302,7 @@
          (1, 2, {'order': 1.5}),
          (2, 0, {'order': 1}),],
     ),
+    # 17
     (
         mark_aromatic_edges, {},
         [(0, {'charge': 1, 'aromatic': True}),
@@ -302,6 +322,7 @@
          (2, 0, {'order': 1.5}),
          (2, 3, {'order': 1})],
     ),
+    # 18
     (
         correct_aromatic_rings, {},
         [(0, {'element': 'C'}),
@@ -321,6 +342,7 @@
          (2, 3, {'order': 1}),
          (3, 0, {'order': 1})],
     ),
+    # 19
     (
         correct_aromatic_rings, {},
         [(0, {'element': 'C', 'hcount': 1}),
@@ -340,6 +362,7 @@
          (2, 3, {'order': 1.5}),
          (3, 0, {'order': 1.5})],
     ),
+    # 20 - this should lead to bond-orders of three ...
     (
         correct_aromatic_rings, {},
         [(0, {'element': 'C', 'hcount': 1}),
@@ -353,10 +376,11 @@
          (1, {'element': 'C', 'hcount': 1, 'aromatic': False}),
          (2, {'element': 'C', 'hcount': 1, 'aromatic': False}),
          (3, {'element': 'C', 'hcount': 1, 'aromatic': False}),],
-        [(0, 1, {'order': 1}),
+        [(0, 1, {'order': 2}),
          (1, 2, {'order': 1}),
-         (2, 3, {'order': 1}),],
+         (2, 3, {'order': 2}),],
     ),
+    # 21
     (
         correct_aromatic_rings, {},
         [(0, {'element': 'C', 'hcount': 1}),
@@ -369,16 +393,16 @@
          (2, 3, {}),
          (3, 4, {}),
          (4, 0, {})],
-        [(0, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (1, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (2, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (3, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (4, {'element': 'O', 'hcount': 0, 'aromatic': True}),],
-        [(0, 1, {'order': 1.5}),
-         (1, 2, {'order': 1.5}),
-         (2, 3, {'order': 1.5}),
-         (3, 4, {'order': 1.5}),
-         (4, 0, {'order': 1.5})],
+        [(0, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (1, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (2, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (3, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (4, {'element': 'O', 'hcount': 0, 'aromatic': False}),],
+        [(0, 1, {'order': 2}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 2}),
+         (3, 4, {'order': 1}),
+         (4, 0, {'order': 1})],
     ),
     (
         correct_aromatic_rings, {},
@@ -392,16 +416,16 @@
          (2, 3, {}),
          (3, 4, {}),
          (4, 0, {}),],
-        [(0, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (1, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (2, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (3, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (4, {'element': 'N', 'hcount': 1, 'aromatic': True}),],
-        [(0, 1, {'order': 1.5}),
-         (1, 2, {'order': 1.5}),
-         (2, 3, {'order': 1.5}),
-         (3, 4, {'order': 1.5}),
-         (4, 0, {'order': 1.5}),],
+        [(0, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (1, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (2, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (3, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (4, {'element': 'N', 'hcount': 1, 'aromatic': False}),],
+        [(0, 1, {'order': 2}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 2}),
+         (3, 4, {'order': 1}),
+         (4, 0, {'order': 1}),],
     ),
     (
         correct_aromatic_rings, {},
@@ -417,21 +441,22 @@
          (3, 4, {}),
          (4, 0, {}),
          (4, 5, {})],
-        [(0, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (1, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (2, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (3, {'element': 'C', 'hcount': 1, 'aromatic': True}),
-         (4, {'element': 'N', 'hcount': 0, 'aromatic': True}),
+        [(0, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (1, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (2, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (3, {'element': 'C', 'hcount': 1, 'aromatic': False}),
+         (4, {'element': 'N', 'hcount': 0, 'aromatic': False}),
          (5, {'element': 'H', 'hcount': 0, 'aromatic': False})],
-        [(0, 1, {'order': 1.5}),
-         (1, 2, {'order': 1.5}),
-         (2, 3, {'order': 1.5}),
-         (3, 4, {'order': 1.5}),
-         (4, 0, {'order': 1.5}),
+        [(0, 1, {'order': 2}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 2}),
+         (3, 4, {'order': 1}),
+         (4, 0, {'order': 1}),
          (4, 5, {'order': 1})],
     ),
 ))
 def test_helper(helper, kwargs, n_data_in, e_data_in, n_data_out, e_data_out):
     mol = make_mol(n_data_in, e_data_in)
     helper(mol, **kwargs)
+    ref_mol = make_mol(n_data_out, e_data_out)
     assertEqualGraphs(mol, make_mol(n_data_out, e_data_out))

From 4cee726a3a98dbdcac0bf5a04b50b3e2115477c0 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 3 May 2024 13:04:38 +0200
Subject: [PATCH 02/13] fix all but one test

---
 tests/test_read_smiles.py | 88 +++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py
index 050f059..1f016fa 100644
--- a/tests/test_read_smiles.py
+++ b/tests/test_read_smiles.py
@@ -344,7 +344,7 @@
     (
         '[2H][CH2]',
         [(0, {'charge': 0, 'element': 'H', 'hcount': 0, 'isotope': 2, 'aromatic': False}),
-         (1, {'charge': 0, 'element': 'C', 'hcount': 2, 'aromatic': False})],
+         (1, {'charge': 0, 'element': 'C', 'hcount': 3, 'aromatic': False})],
         [(0, 1, {'order': 1})],
         False
     ),
@@ -407,45 +407,45 @@
     ),
     (
         'c1occc1',
-        [(0, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (1, {'charge': 0, 'element': 'O', 'aromatic': True}),
-         (2, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (3, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (4, {'charge': 0, 'element': 'C', 'aromatic': True}),
+        [(0, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (1, {'charge': 0, 'element': 'O', 'aromatic': False}),
+         (2, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (3, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (4, {'charge': 0, 'element': 'C', 'aromatic': False}),
          (5, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (6, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (7, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (8, {'charge': 0, 'element': 'H', 'aromatic': False})],
-        [(0, 1, {'order': 1.5}),
-         (0, 4, {'order': 1.5}),
+        [(0, 1, {'order': 1}),
+         (0, 4, {'order': 2}),
          (0, 5, {'order': 1}),
-         (1, 2, {'order': 1.5}),
-         (2, 3, {'order': 1.5}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 2}),
          (2, 6, {'order': 1}),
-         (3, 4, {'order': 1.5}),
+         (3, 4, {'order': 1}),
          (3, 7, {'order': 1}),
          (4, 8, {'order': 1})],
         True
     ),
     (
         'c1[asH]ccc1',
-        [(0, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (1, {'charge': 0, 'element': 'As', 'aromatic': True}),
-         (2, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (3, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (4, {'charge': 0, 'element': 'C', 'aromatic': True}),
+        [(0, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (1, {'charge': 0, 'element': 'As', 'aromatic': False}),
+         (2, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (3, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (4, {'charge': 0, 'element': 'C', 'aromatic': False}),
          (5, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (6, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (7, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (8, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (9, {'charge': 0, 'element': 'H', 'aromatic': False})],
-        [(0, 1, {'order': 1.5}),
-         (0, 4, {'order': 1.5}),
+        [(0, 1, {'order': 1}),
+         (0, 4, {'order': 2}),
          (0, 5, {'order': 1}),
-         (1, 2, {'order': 1.5}),
-         (2, 3, {'order': 1.5}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 2}),
          (2, 6, {'order': 1}),
-         (3, 4, {'order': 1.5}),
+         (3, 4, {'order': 1}),
          (3, 7, {'order': 1}),
          (4, 8, {'order': 1}),
          (1, 9, {'order': 1}),],
@@ -453,22 +453,22 @@
     ),
     (
         'c1[se]ccc1',
-        [(0, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (1, {'charge': 0, 'element': 'Se', 'aromatic': True}),
-         (2, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (3, {'charge': 0, 'element': 'C', 'aromatic': True}),
-         (4, {'charge': 0, 'element': 'C', 'aromatic': True}),
+        [(0, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (1, {'charge': 0, 'element': 'Se', 'aromatic': False}),
+         (2, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (3, {'charge': 0, 'element': 'C', 'aromatic': False}),
+         (4, {'charge': 0, 'element': 'C', 'aromatic': False}),
          (5, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (6, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (7, {'charge': 0, 'element': 'H', 'aromatic': False}),
          (8, {'charge': 0, 'element': 'H', 'aromatic': False})],
-        [(0, 1, {'order': 1.5}),
-         (0, 4, {'order': 1.5}),
+        [(0, 1, {'order': 1}),
+         (0, 4, {'order': 2}),
          (0, 5, {'order': 1}),
-         (1, 2, {'order': 1.5}),
-         (2, 3, {'order': 1.5}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 2}),
          (2, 6, {'order': 1}),
-         (3, 4, {'order': 1.5}),
+         (3, 4, {'order': 1}),
          (3, 7, {'order': 1}),
          (4, 8, {'order': 1})],
         True
@@ -500,22 +500,22 @@
     ),
     (
         '[*+]1[*][*]1',
-        [(0, {'charge': 1, 'aromatic': True, 'hcount': 0}),
-         (1, {'charge': 0, 'aromatic': True, 'hcount': 0}),
-         (2, {'charge': 0, 'aromatic': True, 'hcount': 0})],
-        [(0, 1, {'order': 1.5}),
-         (1, 2, {'order': 1.5}),
-         (2, 0, {'order': 1.5}),],
+        [(0, {'charge': 1, 'aromatic': False, 'hcount': 0}),
+         (1, {'charge': 0, 'aromatic': False, 'hcount': 0}),
+         (2, {'charge': 0, 'aromatic': False, 'hcount': 0})],
+        [(0, 1, {'order': 1}),
+         (1, 2, {'order': 1}),
+         (2, 0, {'order': 1}),],
         False
     ),
     (
         'N1[*][*]1',
-        [(0, {'element': 'N', 'charge': 0, 'aromatic': True, 'hcount': 1}),
-         (1, {'charge': 0, 'aromatic': True, 'hcount': 0}),
-         (2, {'charge': 0, 'aromatic': True, 'hcount': 0})],
-        [(0, 1, {'order': 1.5}),
-         (1, 2, {'order': 1.5}),
-         (2, 0, {'order': 1.5}),],
+        [(0, {'element': 'N', 'charge': 0, 'aromatic': False, 'hcount': 1}),
+         (1, {'charge': 0, 'aromatic': False, 'hcount': 0}),
+         (2, {'charge': 0, 'aromatic': False, 'hcount': 0})],
+        [(0, 1, {'order': 1}),
+         (1, 2, {'order': 1}),
+         (2, 0, {'order': 1}),],
         False
     )
 ))
@@ -535,7 +535,7 @@ def test_read_smiles(smiles, node_data, edge_data, explicit_h):
     ('c1c1CC', ValueError),
     ('CC11C', ValueError),
     ('1CCC1', ValueError),
-    ('cccccc', ValueError),
+#    ('cccccc', ValueError),
     ('C=1CC-1', ValueError),
 ))
 def test_invalid_smiles(smiles, error_type):

From 51f09af76d98a42ee33b2075960d9500d373388b Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 3 May 2024 13:07:57 +0200
Subject: [PATCH 03/13] fix wild-cards and unkown elements

---
 pysmiles/smiles_helper.py | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index f3a8dc7..bc7fe2b 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -439,17 +439,20 @@ def _hydrogen_neighbours(mol, n_idx):
 def _prune_nodes(nodes, mol):
     new_nodes = []
     for node in nodes:
-        if mol.nodes[node].get('element', '*') == '*':
-            new_nodes.append(node)
-        missing = bonds_missing(mol, node, use_order=True)
+        # no point in trying to guess hybridization for
+        # things of unkown valance; i.e. remove them
+        if mol.nodes[node].get('element', '*') not in VALENCES:
+            continue
+        # the charge addition is neccessary because non of the
+        # functions seem to deal with absolute charges so far
+        missing = bonds_missing(mol, node, use_order=True) + mol.nodes[node]['charge']
         if missing > 0:
             new_nodes.append(node)
     return mol.subgraph(new_nodes)
 
 def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
     """
-    Sets the 'aromatic' attribute for all nodes in `mol`. Requires that
-    the 'hcount' on atoms is correct.
+    Properly kekeulizes molecules and sets the aromatic attribute.
 
     Parameters
     ----------
@@ -465,14 +468,14 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
     """
     if atoms is None:
         atoms = set(mol.nodes)
-    print('go here')
     # we start by pre-filling the valance according
     # to existing bonds for all non-aromatic nodes
     if prefill_valence:
         for n_idx in mol:
             node = mol.nodes[n_idx]
-            if not node.get('aromatic', False):
+            if not node.get('aromatic', False) and node.get('element', '*') in VALENCES:
                 missing = max(bonds_missing(mol, n_idx), 0)
+                print(missing)
                 charge = node['charge']
                 node['hcount'] = node.get('hcount', 0) + missing + charge
     # now we erease all previous notion of aromaticity
@@ -482,20 +485,12 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
     # valance
     ds_graph = nx.Graph()
     ds_graph = _prune_nodes(mol.nodes, mol)
-    print('nodes', ds_graph.nodes)
     for sub_ds in nx.connected_components(ds_graph):
         # next we prune atoms that cannot be aromatic but sometimes are
         # considered aromatic
-        #sub_ds_graph = _prune_nodes(sub_ds, mol)
-        print(sub_ds)
         sub_ds_graph = mol.subgraph(sub_ds)
         max_match = nx.max_weight_matching(sub_ds_graph)
-        #print(sub_ds_graph.nodes)
-        #print(max_match)
         # this is a completely invalid smiles
-        print("--")
-        print(sub_ds_graph.edges)
-        print(max_match)
         if not nx.is_perfect_matching(sub_ds_graph, max_match):
             raise SyntaxError
 
@@ -506,7 +501,6 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
         else:
             nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic')
             for edge in max_match:
-                print(edge)
                 mol.edges[edge]['order'] = 2
 
 def mark_aromatic_edges(mol):

From c3b1677e5e877f83f51cbedbad7feba3a9d10e1e Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 7 May 2024 10:03:10 +0200
Subject: [PATCH 04/13] address comments

---
 pysmiles/read_smiles.py      |  2 +-
 pysmiles/smiles_helper.py    | 78 +++++++++++++++++++++---------------
 tests/test_read_smiles.py    | 12 ++++++
 tests/test_smiles_helpers.py |  4 +-
 4 files changed, 61 insertions(+), 35 deletions(-)

diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py
index b66c160..0c5cc74 100644
--- a/pysmiles/read_smiles.py
+++ b/pysmiles/read_smiles.py
@@ -185,7 +185,7 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
         raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))
 
     if reinterpret_aromatic:
-        mark_aromatic_atoms(mol, prefill_valence=True)
+        mark_aromatic_atoms(mol)
         mark_aromatic_edges(mol)
         for idx, jdx in mol.edges:
             if ((not mol.nodes[idx].get('aromatic', False) or
diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index bc7fe2b..e3b3a78 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -439,18 +439,16 @@ def _hydrogen_neighbours(mol, n_idx):
 def _prune_nodes(nodes, mol):
     new_nodes = []
     for node in nodes:
-        # no point in trying to guess hybridization for
-        # things of unkown valance; i.e. remove them
-        if mol.nodes[node].get('element', '*') not in VALENCES:
+        # all wild card nodes are ellegible
+        if mol.nodes[node].get('element', '*') == '*':
+            new_nodes.append(node)
             continue
-        # the charge addition is neccessary because non of the
-        # functions seem to deal with absolute charges so far
-        missing = bonds_missing(mol, node, use_order=True) + mol.nodes[node]['charge']
+        missing = bonds_missing(mol, node, use_order=True) + mol.nodes[node].get('charge', 0)
         if missing > 0:
             new_nodes.append(node)
     return mol.subgraph(new_nodes)
 
-def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
+def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False):
     """
     Properly kekeulizes molecules and sets the aromatic attribute.
 
@@ -460,6 +458,10 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
         The molecule.
     atoms: collections.abc.Iterable
         The atoms to act on. Will still analyse the full molecule.
+    correct_aromatic: bool
+        If the falg is set then all nodes are considered
+        in the kekulization process otherwise only aromatic
+        nodes are considered.
 
     Returns
     -------
@@ -468,34 +470,46 @@ def mark_aromatic_atoms(mol, atoms=None, prefill_valence=False):
     """
     if atoms is None:
         atoms = set(mol.nodes)
-    # we start by pre-filling the valance according
-    # to existing bonds for all non-aromatic nodes
-    if prefill_valence:
-        for n_idx in mol:
-            node = mol.nodes[n_idx]
-            if not node.get('aromatic', False) and node.get('element', '*') in VALENCES:
-                missing = max(bonds_missing(mol, n_idx), 0)
-                print(missing)
-                charge = node['charge']
-                node['hcount'] = node.get('hcount', 0) + missing + charge
-    # now we erease all previous notion of aromaticity
-    nx.set_node_attributes(mol, False, 'aromatic')
-    # then we find all delocalized subgraphs
-    # by pruning all nodes that have a defined
-    # valance
     ds_graph = nx.Graph()
-    ds_graph = _prune_nodes(mol.nodes, mol)
+    # in the correct aromatic mode we consider
+    # the hcount of all nodes to be correct and
+    # prune all nodes that have full valance
+    # bond orders are ignored
+    if correct_aromatic:
+        nodes = mol.nodes
+    # otherwise we only consider aromatic nodes;
+    # all other nodes regardless of their valency
+    # are pruned
+    else:
+        nodes = [node for node, aromatic in mol.nodes(data='aromatic') if aromatic]
+
+    # prune all nodes from molecule that are elegible and have
+    # full valency
+    ds_graph = _prune_nodes(nodes, mol)
+
+    # set the aromatic attribute to False for all nodes
+    # as a precaution
+    nx.set_node_attributes(mol, False, 'aromatic')
+
     for sub_ds in nx.connected_components(ds_graph):
-        # next we prune atoms that cannot be aromatic but sometimes are
-        # considered aromatic
         sub_ds_graph = mol.subgraph(sub_ds)
+        print(sub_ds_graph.nodes)
         max_match = nx.max_weight_matching(sub_ds_graph)
-        # this is a completely invalid smiles
-        if not nx.is_perfect_matching(sub_ds_graph, max_match):
-            raise SyntaxError
-
-        # we consider it aromatic in this case
-        # if it is a cycle
+        # if the subgraph is three nodes it might be
+        # a triangle, which is the only special case
+        # where there is no maximum match but
+        is_triangle = (len(sub_ds_graph.nodes) == 3 and nx.cycle_basis(sub_ds_graph))
+        if not is_triangle:
+            max_match = nx.max_weight_matching(sub_ds_graph)
+            # we check if a maximum matching exists and
+            # if it is perfect. if it is not perfect,
+            # this graph originates from a completely invalid
+            # smiles and we raise an error
+            if not nx.is_perfect_matching(sub_ds_graph, max_match):
+                msg = "Your molecule is invalid and cannot be kekulized."
+                raise SyntaxError(msg)
+
+        # we consider it aromatic in this case if it is a cycle
         if nx.cycle_basis(sub_ds_graph):
             nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic')
         else:
@@ -540,7 +554,7 @@ def correct_aromatic_rings(mol):
         `mol` is modified in-place.
     """
     fill_valence(mol)
-    mark_aromatic_atoms(mol)
+    mark_aromatic_atoms(mol, correct_aromatic=True)
     mark_aromatic_edges(mol)
 
 
diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py
index 1f016fa..75dc0ce 100644
--- a/tests/test_read_smiles.py
+++ b/tests/test_read_smiles.py
@@ -556,3 +556,15 @@ def test_stereo_logging(caplog, smiles, n_records):
     assert len(caplog.records) == n_records
     for record in caplog.records:
         assert record.levelname == "WARNING"
+
+
+@pytest.mark.parametrize('smiles', (
+    'c1c[nH]cc1',
+    'c1cNcc1',
+    'c1cScc1',
+    'c1cnc[nH]1',
+    'c1cncN1',
+    'c1cscc1'))
+def test_kekulize(smiles):
+    g = read_smiles(smiles)
+    assert len(g) > 0
diff --git a/tests/test_smiles_helpers.py b/tests/test_smiles_helpers.py
index d773fbc..09d4a81 100644
--- a/tests/test_smiles_helpers.py
+++ b/tests/test_smiles_helpers.py
@@ -172,7 +172,7 @@
     ),
     # 11
     (
-        mark_aromatic_atoms, {},
+        mark_aromatic_atoms, {'correct_aromatic': True},
         [(0, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (1, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (2, {'element': 'C', 'hcount': 1, 'charge': 0}),
@@ -236,7 +236,7 @@
 #   ),
     # 13
     (
-        mark_aromatic_atoms, {},
+        mark_aromatic_atoms, {'correct_aromatic': True},
         [(0, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (1, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (2, {'element': 'C', 'hcount': 1, 'charge': 0}),

From 6c7b0d0afe27640a8a75a58b15e0929069e12de1 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 7 May 2024 18:34:10 +0200
Subject: [PATCH 05/13] fix test 2HCH2

---
 tests/test_read_smiles.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py
index 75dc0ce..6fa072e 100644
--- a/tests/test_read_smiles.py
+++ b/tests/test_read_smiles.py
@@ -344,7 +344,7 @@
     (
         '[2H][CH2]',
         [(0, {'charge': 0, 'element': 'H', 'hcount': 0, 'isotope': 2, 'aromatic': False}),
-         (1, {'charge': 0, 'element': 'C', 'hcount': 3, 'aromatic': False})],
+         (1, {'charge': 0, 'element': 'C', 'hcount': 2, 'aromatic': False})],
         [(0, 1, {'order': 1})],
         False
     ),

From a61b2e770fd8d137f491d16b75647dcf4e17b34c Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 7 May 2024 18:39:07 +0200
Subject: [PATCH 06/13] externalize selection of nodes to work on when
 kekulizing

---
 pysmiles/read_smiles.py      |  3 ++-
 pysmiles/smiles_helper.py    | 20 +++-----------------
 tests/test_smiles_helpers.py |  4 ++--
 3 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py
index 0c5cc74..f47b2b5 100644
--- a/pysmiles/read_smiles.py
+++ b/pysmiles/read_smiles.py
@@ -185,7 +185,8 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
         raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))
 
     if reinterpret_aromatic:
-        mark_aromatic_atoms(mol)
+        arom_atoms = [node for node, aromatic in mol.nodes(data='aromatic') if aromatic]
+        mark_aromatic_atoms(mol, arom_atoms)
         mark_aromatic_edges(mol)
         for idx, jdx in mol.edges:
             if ((not mol.nodes[idx].get('aromatic', False) or
diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index e3b3a78..cbb1006 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -448,7 +448,7 @@ def _prune_nodes(nodes, mol):
             new_nodes.append(node)
     return mol.subgraph(new_nodes)
 
-def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False):
+def mark_aromatic_atoms(mol, atoms=None):
     """
     Properly kekeulizes molecules and sets the aromatic attribute.
 
@@ -470,22 +470,9 @@ def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False):
     """
     if atoms is None:
         atoms = set(mol.nodes)
-    ds_graph = nx.Graph()
-    # in the correct aromatic mode we consider
-    # the hcount of all nodes to be correct and
-    # prune all nodes that have full valance
-    # bond orders are ignored
-    if correct_aromatic:
-        nodes = mol.nodes
-    # otherwise we only consider aromatic nodes;
-    # all other nodes regardless of their valency
-    # are pruned
-    else:
-        nodes = [node for node, aromatic in mol.nodes(data='aromatic') if aromatic]
-
     # prune all nodes from molecule that are elegible and have
     # full valency
-    ds_graph = _prune_nodes(nodes, mol)
+    ds_graph = _prune_nodes(atoms, mol)
 
     # set the aromatic attribute to False for all nodes
     # as a precaution
@@ -493,7 +480,6 @@ def mark_aromatic_atoms(mol, atoms=None, correct_aromatic=False):
 
     for sub_ds in nx.connected_components(ds_graph):
         sub_ds_graph = mol.subgraph(sub_ds)
-        print(sub_ds_graph.nodes)
         max_match = nx.max_weight_matching(sub_ds_graph)
         # if the subgraph is three nodes it might be
         # a triangle, which is the only special case
@@ -554,7 +540,7 @@ def correct_aromatic_rings(mol):
         `mol` is modified in-place.
     """
     fill_valence(mol)
-    mark_aromatic_atoms(mol, correct_aromatic=True)
+    mark_aromatic_atoms(mol, atoms=mol.nodes)
     mark_aromatic_edges(mol)
 
 
diff --git a/tests/test_smiles_helpers.py b/tests/test_smiles_helpers.py
index 09d4a81..d773fbc 100644
--- a/tests/test_smiles_helpers.py
+++ b/tests/test_smiles_helpers.py
@@ -172,7 +172,7 @@
     ),
     # 11
     (
-        mark_aromatic_atoms, {'correct_aromatic': True},
+        mark_aromatic_atoms, {},
         [(0, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (1, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (2, {'element': 'C', 'hcount': 1, 'charge': 0}),
@@ -236,7 +236,7 @@
 #   ),
     # 13
     (
-        mark_aromatic_atoms, {'correct_aromatic': True},
+        mark_aromatic_atoms, {},
         [(0, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (1, {'element': 'C', 'hcount': 1, 'charge': 0}),
          (2, {'element': 'C', 'hcount': 1, 'charge': 0}),

From 98566ff14d084855b7f7e4ae4563ec4f98cb7d96 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 7 May 2024 18:45:06 +0200
Subject: [PATCH 07/13] add test for skipping kekulizing

---
 tests/test_read_smiles.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py
index 6fa072e..3aa717f 100644
--- a/tests/test_read_smiles.py
+++ b/tests/test_read_smiles.py
@@ -564,7 +564,17 @@ def test_stereo_logging(caplog, smiles, n_records):
     'c1cScc1',
     'c1cnc[nH]1',
     'c1cncN1',
-    'c1cscc1'))
+    'c1cscc1',))
 def test_kekulize(smiles):
     g = read_smiles(smiles)
     assert len(g) > 0
+
+
+@pytest.mark.parametrize('smiles', (
+    'cc',
+    'cn',))
+def test_skip_kekulize(smiles):
+    g = read_smiles(smiles, reinterpret_aromatic=False)
+    for node in g.nodes:
+        assert g.nodes[node]['aromatic']
+    assert g.edges[(0, 1)]['order'] == 1.5

From 11cb991b094864896f72acc49172f12b52965f33 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 7 May 2024 18:48:03 +0200
Subject: [PATCH 08/13] fix comment and docstrings

---
 pysmiles/smiles_helper.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index cbb1006..ef0b176 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -439,7 +439,7 @@ def _hydrogen_neighbours(mol, n_idx):
 def _prune_nodes(nodes, mol):
     new_nodes = []
     for node in nodes:
-        # all wild card nodes are ellegible
+        # all wild card nodes are eligible
         if mol.nodes[node].get('element', '*') == '*':
             new_nodes.append(node)
             continue
@@ -457,11 +457,7 @@ def mark_aromatic_atoms(mol, atoms=None):
     mol : nx.Graph
         The molecule.
     atoms: collections.abc.Iterable
-        The atoms to act on. Will still analyse the full molecule.
-    correct_aromatic: bool
-        If the falg is set then all nodes are considered
-        in the kekulization process otherwise only aromatic
-        nodes are considered.
+        The atoms to act on; all other nodes are pruned
 
     Returns
     -------
@@ -470,7 +466,7 @@ def mark_aromatic_atoms(mol, atoms=None):
     """
     if atoms is None:
         atoms = set(mol.nodes)
-    # prune all nodes from molecule that are elegible and have
+    # prune all nodes from molecule that are eligible and have
     # full valency
     ds_graph = _prune_nodes(atoms, mol)
 

From f3e9dd2b4477319ae70954d4dc200bc5e36449d8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 8 May 2024 13:22:52 +0200
Subject: [PATCH 09/13] fix writer for aromatic systems

---
 pysmiles/write_smiles.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pysmiles/write_smiles.py b/pysmiles/write_smiles.py
index 4263701..6fafd7a 100644
--- a/pysmiles/write_smiles.py
+++ b/pysmiles/write_smiles.py
@@ -68,8 +68,8 @@ def _write_edge_symbol(molecule, n_idx, n_jdx):
         Whether an explicit symbol is needed for this edge.
     """
     order = molecule.edges[n_idx, n_jdx].get('order', 1)
-    aromatic_atoms = molecule.nodes[n_idx].get('element', '*').islower() and\
-                     molecule.nodes[n_jdx].get('element', '*').islower()
+    aromatic_atoms = molecule.nodes[n_idx].get('aromatic', False) and\
+                     molecule.nodes[n_jdx].get('aromatic', False)
     aromatic_bond = aromatic_atoms and order == 1.5
     cross_aromatic = aromatic_atoms and order == 1
     single_bond = order == 1

From 3089553e41e4d06b5225e8da419e807a505f4571 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 8 May 2024 13:54:41 +0200
Subject: [PATCH 10/13] fix bug when setting aromatic bond orders

---
 pysmiles/smiles_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index ef0b176..11075a6 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -517,7 +517,7 @@ def mark_aromatic_edges(mol):
     for edge in mol.edges:
         if all(mol.nodes[node].get('aromatic', 'False') for node in edge):
             mol.edges[edge]['order'] = 1.5
-        elif not mol.edges[edge].get('order', False):
+        elif 'order' not in mol.edges[edge]:
             mol.edges[edge]['order'] = 1
 
 def correct_aromatic_rings(mol):

From e7203311260ab72a419e669c574829eaa50cb371 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 8 May 2024 15:10:47 +0200
Subject: [PATCH 11/13] fix aromtic assignment

---
 tests/test_read_smiles.py | 64 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py
index 3aa717f..7cd3a77 100644
--- a/tests/test_read_smiles.py
+++ b/tests/test_read_smiles.py
@@ -382,6 +382,70 @@
          (2, 0, {'order': 1})],
         False
     ),
+    (
+        'c1cscc1',
+        [(0, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (1, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (2, {'charge': 0, 'element': 'S', 'aromatic': False, 'hcount': 0}),
+         (3, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (4, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),],
+        [(0, 1, {'order': 2}),
+         (0, 4, {'order': 1}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 1}),
+         (3, 4, {'order': 2}),],
+        False
+    ),
+    (
+        'c12ccccc1[nH]cc2',
+        [(0, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 0}),
+         (1, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (2, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (3, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (4, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (5, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 0}),
+         (6, {'charge': 0, 'element': 'N', 'aromatic': False, 'hcount': 1}),
+         (7, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),
+         (8, {'charge': 0, 'element': 'C', 'aromatic': False, 'hcount': 1}),],
+        [(0, 1, {'order': 2}),
+         (1, 2, {'order': 1}),
+         (2, 3, {'order': 2}),
+         (3, 4, {'order': 1}),
+         (4, 5, {'order': 2}),
+         (5, 0, {'order': 1}),
+         (5, 6, {'order': 1}),
+         (6, 7, {'order': 1}),
+         (7, 8, {'order': 2}),
+         (8, 0, {'order': 1}),],
+        False
+    ),
+    (
+        'c1cc2ccc3c2c1cc3',
+        [(0, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}),
+         (1, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}),
+         (2, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}),
+         (3, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}),
+         (4, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}),
+         (5, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}),
+         (6, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}),
+         (7, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 0}),
+         (8, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}),
+         (9, {'charge': 0, 'element': 'C', 'aromatic': True, 'hcount': 1}),],
+        [(0, 1, {'order': 1.5}),
+         (0, 7, {'order': 1.5}),
+         (1, 2, {'order': 1.5}),
+         (2, 3, {'order': 1.5}),
+         (3, 4, {'order': 1.5}),
+         (4, 5, {'order': 1.5}),
+         (6, 2, {'order': 1.5}),
+         (5, 6, {'order': 1.5}),
+         (5, 9, {'order': 1.5}),
+         (6, 7, {'order': 1.5}),
+         (6, 2, {'order': 1.5}),
+         (9, 8, {'order': 1.5}),
+         (7, 8, {'order': 1.5}),],
+        False
+    ),
     (
         '[Rh-](Cl)(Cl)(Cl)(Cl)$[Rh-](Cl)(Cl)(Cl)Cl',
         [(0, {'charge': -1, 'element': 'Rh', 'hcount': 0, 'aromatic': False}),

From 636f7c0cdb4f280bc175899c0db83c5a73840daf Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 8 May 2024 15:11:04 +0200
Subject: [PATCH 12/13] fix aromtic assignment

---
 pysmiles/smiles_helper.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index 11075a6..f7deadf 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -491,9 +491,16 @@ def mark_aromatic_atoms(mol, atoms=None):
                 msg = "Your molecule is invalid and cannot be kekulized."
                 raise SyntaxError(msg)
 
-        # we consider it aromatic in this case if it is a cycle
-        if nx.cycle_basis(sub_ds_graph):
-            nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic')
+        # we consider a node aromatic if it can take part in DIME
+        # to do so all nodes in a delocalized subgraph have to be
+        # part of a cycle system
+        cycles = nx.cycle_basis(sub_ds_graph)
+        nodes_in_cycles = []
+        for cycle in cycles:
+            nodes_in_cycles += cycle
+
+        if set(nodes_in_cycles) == set(mol.nodes):
+                nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic')
         else:
             nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic')
             for edge in max_match:

From cb1c721db8094007463c467d5f58b67483fe48ae Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 8 May 2024 15:13:07 +0200
Subject: [PATCH 13/13] fix aromtic assignment bug considering only ds subgraph

---
 pysmiles/smiles_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
index f7deadf..a0676a3 100644
--- a/pysmiles/smiles_helper.py
+++ b/pysmiles/smiles_helper.py
@@ -499,7 +499,7 @@ def mark_aromatic_atoms(mol, atoms=None):
         for cycle in cycles:
             nodes_in_cycles += cycle
 
-        if set(nodes_in_cycles) == set(mol.nodes):
+        if set(nodes_in_cycles) == set(sub_ds_graph.nodes):
                 nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic')
         else:
             nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic')