Skip to content

Commit

Permalink
Merge cb1c721 into 2308e24
Browse files Browse the repository at this point in the history
  • Loading branch information
fgrunewald committed May 8, 2024
2 parents 2308e24 + cb1c721 commit 527a133
Show file tree
Hide file tree
Showing 5 changed files with 269 additions and 165 deletions.
23 changes: 6 additions & 17 deletions pysmiles/read_smiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,30 +184,19 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
if ring_nums:
raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))

# Time to deal with aromaticity. This is a mess, because it's not super
# clear what aromaticity information has been provided, and what should be
# inferred. In addition, to what extend do we want to provide a "sane"
# molecule, even if this overrides what the SMILES string specifies?
cycles = nx.cycle_basis(mol)
ring_idxs = set()
for cycle in cycles:
ring_idxs.update(cycle)
non_ring_idxs = set(mol.nodes) - ring_idxs
for n_idx in non_ring_idxs:
if mol.nodes[n_idx].get('aromatic', False):
raise ValueError("You specified an aromatic atom outside of a"
" ring. This is impossible")

mark_aromatic_edges(mol)
fill_valence(mol)
if reinterpret_aromatic:
mark_aromatic_atoms(mol)
arom_atoms = [node for node, aromatic in mol.nodes(data='aromatic') if aromatic]
mark_aromatic_atoms(mol, arom_atoms)
mark_aromatic_edges(mol)
for idx, jdx in mol.edges:
if ((not mol.nodes[idx].get('aromatic', False) or
not mol.nodes[jdx].get('aromatic', False))
and mol.edges[idx, jdx].get('order', 1) == 1.5):
mol.edges[idx, jdx]['order'] = 1
else:
mark_aromatic_edges(mol)

fill_valence(mol)

if explicit_hydrogen:
add_explicit_hydrogens(mol)
Expand Down
110 changes: 57 additions & 53 deletions pysmiles/smiles_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,18 +436,28 @@ def _hydrogen_neighbours(mol, n_idx):
h_neighbours += 1
return h_neighbours

def _prune_nodes(nodes, mol):
new_nodes = []
for node in nodes:
# all wild card nodes are eligible
if mol.nodes[node].get('element', '*') == '*':
new_nodes.append(node)
continue
missing = bonds_missing(mol, node, use_order=True) + mol.nodes[node].get('charge', 0)
if missing > 0:
new_nodes.append(node)
return mol.subgraph(new_nodes)

def mark_aromatic_atoms(mol, atoms=None):
"""
Sets the 'aromatic' attribute for all nodes in `mol`. Requires that
the 'hcount' on atoms is correct.
Properly kekeulizes molecules and sets the aromatic attribute.
Parameters
----------
mol : nx.Graph
The molecule.
atoms: collections.abc.Iterable
The atoms to act on. Will still analyse the full molecule.
The atoms to act on; all other nodes are pruned
Returns
-------
Expand All @@ -456,45 +466,45 @@ def mark_aromatic_atoms(mol, atoms=None):
"""
if atoms is None:
atoms = set(mol.nodes)
aromatic = set()
# Only cycles can be aromatic
for cycle in nx.cycle_basis(mol):
# All atoms should be sp2, so each contributes an electron. We make
# sure they are later.
electrons = len(cycle)
maybe_aromatic = True

for node_idx in cycle:
node = mol.nodes[node_idx]
element = node.get('element', '*').capitalize()
hcount = node.get('hcount', 0)
degree = mol.degree(node_idx) + hcount
hcount += _hydrogen_neighbours(mol, node_idx)
# Make sure they are possibly aromatic, and are sp2 hybridized
if element not in AROMATIC_ATOMS or degree not in (2, 3):
maybe_aromatic = False
break
# Some of the special cases per group. N and O type atoms can
# donate an additional electron from a lone pair.
# missing cases:
# extracyclic sp2 heteroatom (e.g. =O)
# some charged cases
if element in 'N P As'.split() and hcount == 1:
electrons += 1
elif element in 'O S Se'.split():
electrons += 1
if node.get('charge', 0) == +1 and not (element == 'C' and hcount == 0):
electrons -= 1
if maybe_aromatic and int(electrons) % 2 == 0:
# definitely (anti) aromatic
aromatic.update(cycle)
for node_idx in atoms:
node = mol.nodes[node_idx]
if node_idx not in aromatic:
node['aromatic'] = False
# prune all nodes from molecule that are eligible and have
# full valency
ds_graph = _prune_nodes(atoms, mol)

# set the aromatic attribute to False for all nodes
# as a precaution
nx.set_node_attributes(mol, False, 'aromatic')

for sub_ds in nx.connected_components(ds_graph):
sub_ds_graph = mol.subgraph(sub_ds)
max_match = nx.max_weight_matching(sub_ds_graph)
# if the subgraph is three nodes it might be
# a triangle, which is the only special case
# where there is no maximum match but
is_triangle = (len(sub_ds_graph.nodes) == 3 and nx.cycle_basis(sub_ds_graph))
if not is_triangle:
max_match = nx.max_weight_matching(sub_ds_graph)
# we check if a maximum matching exists and
# if it is perfect. if it is not perfect,
# this graph originates from a completely invalid
# smiles and we raise an error
if not nx.is_perfect_matching(sub_ds_graph, max_match):
msg = "Your molecule is invalid and cannot be kekulized."
raise SyntaxError(msg)

# we consider a node aromatic if it can take part in DIME
# to do so all nodes in a delocalized subgraph have to be
# part of a cycle system
cycles = nx.cycle_basis(sub_ds_graph)
nodes_in_cycles = []
for cycle in cycles:
nodes_in_cycles += cycle

if set(nodes_in_cycles) == set(sub_ds_graph.nodes):
nx.set_node_attributes(mol, {node: True for node in sub_ds_graph.nodes}, 'aromatic')
else:
node['aromatic'] = True

nx.set_node_attributes(mol, {node: False for node in sub_ds_graph.nodes}, 'aromatic')
for edge in max_match:
mol.edges[edge]['order'] = 2

def mark_aromatic_edges(mol):
"""
Expand All @@ -511,17 +521,11 @@ def mark_aromatic_edges(mol):
None
`mol` is modified in-place.
"""
for cycle in nx.cycle_basis(mol):
for idx, jdx in mol.edges(nbunch=cycle):
if idx not in cycle or jdx not in cycle:
continue
if (mol.nodes[idx].get('aromatic', False)
and mol.nodes[jdx].get('aromatic', False)):
mol.edges[idx, jdx]['order'] = 1.5
for idx, jdx in mol.edges:
if 'order' not in mol.edges[idx, jdx]:
mol.edges[idx, jdx]['order'] = 1

for edge in mol.edges:
if all(mol.nodes[node].get('aromatic', 'False') for node in edge):
mol.edges[edge]['order'] = 1.5
elif 'order' not in mol.edges[edge]:
mol.edges[edge]['order'] = 1

def correct_aromatic_rings(mol):
"""
Expand All @@ -539,7 +543,7 @@ def correct_aromatic_rings(mol):
`mol` is modified in-place.
"""
fill_valence(mol)
mark_aromatic_atoms(mol)
mark_aromatic_atoms(mol, atoms=mol.nodes)
mark_aromatic_edges(mol)


Expand Down
4 changes: 2 additions & 2 deletions pysmiles/write_smiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def _write_edge_symbol(molecule, n_idx, n_jdx):
Whether an explicit symbol is needed for this edge.
"""
order = molecule.edges[n_idx, n_jdx].get('order', 1)
aromatic_atoms = molecule.nodes[n_idx].get('element', '*').islower() and\
molecule.nodes[n_jdx].get('element', '*').islower()
aromatic_atoms = molecule.nodes[n_idx].get('aromatic', False) and\
molecule.nodes[n_jdx].get('aromatic', False)
aromatic_bond = aromatic_atoms and order == 1.5
cross_aromatic = aromatic_atoms and order == 1
single_bond = order == 1
Expand Down

0 comments on commit 527a133

Please sign in to comment.