Skip to content

Commit

Permalink
Add substructure 1:1 node matching
Browse files Browse the repository at this point in the history
When you want to preserve some property indexed to the nodes,
it's helpful to construct the node map.

Documentation here isn't complete because `atommatch` and `bondmatch`
are undocumented, but hopefully it sends users in the right direction.
  • Loading branch information
timholy committed Dec 1, 2020
1 parent 8844432 commit 63c1c20
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 2 deletions.
7 changes: 6 additions & 1 deletion src/graph/isomorphism/vf2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,17 @@ issubgraphmatch(G, H; kwargs...) = subgraphmatch(G, H; kwargs...) !== nothing
# Edge induced subgraph isomorphism

"""
edgesubgraphmatch(G::AbstractGraph, H::AbstractGraph; kwargs...) -> Iterator
edgesubgraphmatch(G::AbstractGraph, H::AbstractGraph;
nodematcher=(g,h)->true , edgematcher=(g,h)->true,
kwargs...) -> Iterator
Generate edge induced subgraph isomorphism mappings between `G` and `H`.
The returned iterator has `ig => ih` pairs that correspond to the indices of matching
edges in `G` and `H`, respectively.
`nodematcher` and `edgematcher` control the features needed to be counted as a match.
[`atommatch`](@ref) and [`bondmatch`](@ref) can be used to construct these functions.
See [`MolecularGraph.edgesubgraph`](@ref) to construct the subgraphs that result from the match.
"""
edgesubgraphmatches(G, H; kwargs...
Expand Down
36 changes: 36 additions & 0 deletions src/substructure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,42 @@ function querymatch(mol::UndirectedGraph, query::QueryMol; kwargs...)
return
end

"""
nmap = emaptonmap(emap, mol, query)
Convert an edge-based mapping, of the form returned by [`edgesubgraphmatches`](@ref), into
a map between nodes. Commonly, `nmap[i]` is a length-1 vector `[j]`, where `i=>j` is the mapping
from `nodeattr(query, i)` to `nodeattr(mol, j)`. In cases where the mapping is ambiguous,
`nmap[i]` may be multivalued.
"""
function emaptonmap(emap, mol::UndirectedGraph, query::QueryMol)
nmol, nq = nodecount(mol), nodecount(query)
nq <= nmol || throw(ArgumentError("query must be a substructure of mol"))
# Each node in the query edges can map to either of two nodes in mol
qnodeoptions = [Tuple{Int,Int}[] for _ = 1:nq]
for (edgeidmol, edgeidq) in emap
edgemol, edgeq = getedge(mol, edgeidmol), getedge(query, edgeidq)
for nq in edgeq
push!(qnodeoptions[nq], edgemol)
end
end
# For nodes connected to two or more other nodes, intersection results in a unique answer
assignment = [intersect(nodeops...) for nodeops in qnodeoptions]
# For the singly-connected nodes, assign them by eliminating ones already taken
taken = falses(nmol)
for a in assignment
if length(a) == 1
taken[a[1]] = true
end
end
for a in assignment
if length(a) > 1
deleteat!(a, findall(taken[a]))
end
end
return assignment
end

"""
isquerymatch(mol, query; kwargs...)
Expand Down
47 changes: 46 additions & 1 deletion test/substructure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
using MolecularGraph.Graph: lgnodematcher, lgedgematcher
using MolecularGraph:
fastidentityfilter, fastsubstrfilter,
atommatch, bondmatch
atommatch, bondmatch, emaptonmap

@testset "substructure" begin

Expand Down Expand Up @@ -108,4 +108,49 @@ end
@test !isquerymatch(hetero4, disconn)
end

@testset "node matching" begin
function nodematch(mol, query, idx=1, len=1)
afunc = atommatch(mol, query)
bfunc = bondmatch(mol, query)
emaps = edgesubgraphmatches(mol, query, nodematcher=afunc, edgematcher=bfunc)
@test length(emaps) == len
emap = emaps[idx]
return emaptonmap(emap, mol, query)
end
function single(v) # like `only` on Julia 1.4+
length(v) == 1 || error("expected a single entry")
return v[1]
end
# 6-membered carbon ring fused to a 5-membered carbon ring with an attached oxygen
query = smartstomol("[#6]~1~[#6]~[#6]~[#6]~2~[#6](~[#6]~1)~[#6]~[#6]~[#6]~2~[#8]")
# The carbon numbering looks like this:
# 3 9
# 2 4 8
# 1 5 7
# 6
# 1-Indanone, PubChem CID6735
mol = smilestomol("C1CC(=O)C2=CC=CC=C21")
nmap = nodematch(mol, query)
@test single.(nmap) == [8,7,6,5,10,9,1,2,3,4] # indices in mol corresponding to atoms in query
# 1-Acenaphthenone, PubChem CID75229
mol = smilestomol("C1C2=CC=CC3=C2C(=CC=C3)C1=O")
nmap = nodematch(mol, query)
@test single.(nmap) == [11,10,9,8,7,6,2,1,12,13]
# Ninhydrin, PubChem CID10236 (2 matches)
mol = smilestomol("C1=CC=C2C(=C1)C(=O)C(C2=O)(O)O")
nmaps = ([1,2,3,4,5,6,7,9,10,11], [2,1,6,5,4,3,10,9,7,8])
nmap = nodematch(mol, query, 1, 2)
snmap = single.(nmap)
@test snmap nmaps
nmap = nodematch(mol, query, 2, 2)
snmap = single.(nmap)
@test snmap nmaps

# Ambiguous match
query = smartstomol("[H][H]")
mol = smilestomol("[H][H]")
nmap = nodematch(mol, query)
@test nmap == [[1,2], [1,2]]
end

end # substructure

0 comments on commit 63c1c20

Please sign in to comment.