Skip to content

Commit

Permalink
Merge pull request #45 from timholy/teh/emaptonodemap
Browse files Browse the repository at this point in the history
Add substructure 1:1 node matching
  • Loading branch information
mojaie committed Dec 5, 2020
2 parents 8844432 + 63c1c20 commit be43e65
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 2 deletions.
7 changes: 6 additions & 1 deletion src/graph/isomorphism/vf2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,17 @@ issubgraphmatch(G, H; kwargs...) = subgraphmatch(G, H; kwargs...) !== nothing
# Edge induced subgraph isomorphism

"""
edgesubgraphmatch(G::AbstractGraph, H::AbstractGraph; kwargs...) -> Iterator
edgesubgraphmatch(G::AbstractGraph, H::AbstractGraph;
nodematcher=(g,h)->true , edgematcher=(g,h)->true,
kwargs...) -> Iterator
Generate edge induced subgraph isomorphism mappings between `G` and `H`.
The returned iterator has `ig => ih` pairs that correspond to the indices of matching
edges in `G` and `H`, respectively.
`nodematcher` and `edgematcher` control the features needed to be counted as a match.
[`atommatch`](@ref) and [`bondmatch`](@ref) can be used to construct these functions.
See [`MolecularGraph.edgesubgraph`](@ref) to construct the subgraphs that result from the match.
"""
edgesubgraphmatches(G, H; kwargs...
Expand Down
36 changes: 36 additions & 0 deletions src/substructure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,42 @@ function querymatch(mol::UndirectedGraph, query::QueryMol; kwargs...)
return
end

"""
nmap = emaptonmap(emap, mol, query)
Convert an edge-based mapping, of the form returned by [`edgesubgraphmatches`](@ref), into
a map between nodes. Commonly, `nmap[i]` is a length-1 vector `[j]`, where `i=>j` is the mapping
from `nodeattr(query, i)` to `nodeattr(mol, j)`. In cases where the mapping is ambiguous,
`nmap[i]` may be multivalued.
"""
function emaptonmap(emap, mol::UndirectedGraph, query::QueryMol)
nmol, nq = nodecount(mol), nodecount(query)
nq <= nmol || throw(ArgumentError("query must be a substructure of mol"))
# Each node in the query edges can map to either of two nodes in mol
qnodeoptions = [Tuple{Int,Int}[] for _ = 1:nq]
for (edgeidmol, edgeidq) in emap
edgemol, edgeq = getedge(mol, edgeidmol), getedge(query, edgeidq)
for nq in edgeq
push!(qnodeoptions[nq], edgemol)
end
end
# For nodes connected to two or more other nodes, intersection results in a unique answer
assignment = [intersect(nodeops...) for nodeops in qnodeoptions]
# For the singly-connected nodes, assign them by eliminating ones already taken
taken = falses(nmol)
for a in assignment
if length(a) == 1
taken[a[1]] = true
end
end
for a in assignment
if length(a) > 1
deleteat!(a, findall(taken[a]))
end
end
return assignment
end

"""
isquerymatch(mol, query; kwargs...)
Expand Down
47 changes: 46 additions & 1 deletion test/substructure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
using MolecularGraph.Graph: lgnodematcher, lgedgematcher
using MolecularGraph:
fastidentityfilter, fastsubstrfilter,
atommatch, bondmatch
atommatch, bondmatch, emaptonmap

@testset "substructure" begin

Expand Down Expand Up @@ -108,4 +108,49 @@ end
@test !isquerymatch(hetero4, disconn)
end

@testset "node matching" begin
function nodematch(mol, query, idx=1, len=1)
afunc = atommatch(mol, query)
bfunc = bondmatch(mol, query)
emaps = edgesubgraphmatches(mol, query, nodematcher=afunc, edgematcher=bfunc)
@test length(emaps) == len
emap = emaps[idx]
return emaptonmap(emap, mol, query)
end
function single(v) # like `only` on Julia 1.4+
length(v) == 1 || error("expected a single entry")
return v[1]
end
# 6-membered carbon ring fused to a 5-membered carbon ring with an attached oxygen
query = smartstomol("[#6]~1~[#6]~[#6]~[#6]~2~[#6](~[#6]~1)~[#6]~[#6]~[#6]~2~[#8]")
# The carbon numbering looks like this:
# 3 9
# 2 4 8
# 1 5 7
# 6
# 1-Indanone, PubChem CID6735
mol = smilestomol("C1CC(=O)C2=CC=CC=C21")
nmap = nodematch(mol, query)
@test single.(nmap) == [8,7,6,5,10,9,1,2,3,4] # indices in mol corresponding to atoms in query
# 1-Acenaphthenone, PubChem CID75229
mol = smilestomol("C1C2=CC=CC3=C2C(=CC=C3)C1=O")
nmap = nodematch(mol, query)
@test single.(nmap) == [11,10,9,8,7,6,2,1,12,13]
# Ninhydrin, PubChem CID10236 (2 matches)
mol = smilestomol("C1=CC=C2C(=C1)C(=O)C(C2=O)(O)O")
nmaps = ([1,2,3,4,5,6,7,9,10,11], [2,1,6,5,4,3,10,9,7,8])
nmap = nodematch(mol, query, 1, 2)
snmap = single.(nmap)
@test snmap nmaps
nmap = nodematch(mol, query, 2, 2)
snmap = single.(nmap)
@test snmap nmaps

# Ambiguous match
query = smartstomol("[H][H]")
mol = smilestomol("[H][H]")
nmap = nodematch(mol, query)
@test nmap == [[1,2], [1,2]]
end

end # substructure

0 comments on commit be43e65

Please sign in to comment.