Skip to content

Commit

Permalink
Implement datasets through unions and filters over specified graphs b…
Browse files Browse the repository at this point in the history
…y re-writing the query. Still fails dataset-012b
  • Loading branch information
gkellogg committed Feb 7, 2013
1 parent 422cadf commit 6034b57
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 39 deletions.
1 change: 1 addition & 0 deletions Gemfile
Expand Up @@ -9,6 +9,7 @@ gem "rdf-xsd", :git => "git://github.com/ruby-rdf/rdf-xsd.git"

group :development do
gem 'shotgun'
gem 'redcarpet'
end

group :debug do
Expand Down
1 change: 0 additions & 1 deletion examples/issue3.rb
Expand Up @@ -19,7 +19,6 @@ def query(pattern, &block)
:predicate => RDF::URI.new('http://localhost/attribute_types/last_name'),
:object => RDF::Literal.new('smith'))
elsif pattern[:predicate].path == '/attribute_types/middle_name'
require 'debugger'; debugger
statements << RDF::Statement.new(
:subject => RDF::URI.new('http://localhost/people/2'),
:predicate => RDF::URI.new('http://localhost/attribute_types/middle_name'),
Expand Down
7 changes: 7 additions & 0 deletions lib/sparql/algebra/extensions.rb
Expand Up @@ -107,6 +107,13 @@ class RDF::Query
def ==(other)
other.is_a?(RDF::Query) && patterns == other.patterns && context == context
end

##
# Don't do any more rewriting
# @return [SPARQL::Algebra::Expression] `self`
def rewrite(&block)
self
end

# Transform Query into an Array form of an SSE
#
Expand Down
42 changes: 42 additions & 0 deletions lib/sparql/algebra/operator.rb
Expand Up @@ -322,6 +322,26 @@ def optimize
end
end

##
# Rewrite operands by yielding each operand. Recursively descends
# through operands implementing this method.
#
# @yield operand
# @yieldparam [] operand
# @yieldreturn [SPARQL::Algebra::Expression] the re-written operand
# @return [SPARQL::Algebra::Expression] `self`
def rewrite(&block)
@operands = @operands.map do |op|
# Rewrite the operand
unless new_op = block.call(op)
# Not re-written, rewrite
new_op = op.respond_to?(:rewrite) ? op.rewrite(&block) : op
end
new_op
end
self
end

##
# Returns the SPARQL S-Expression (SSE) representation of this operator.
#
Expand Down Expand Up @@ -362,6 +382,7 @@ def eql?(other)
other.class == self.class && other.operands == self.operands
end
alias_method :==, :eql?

protected

##
Expand Down Expand Up @@ -401,6 +422,27 @@ def boolean(literal)
end
end

##
# Transform an array of expressions into a recursive set
# of binary operations
# e.g.: a || b || c => (|| a (|| b c))
# @param [Class] Binary Operator class
# @param [Array<SPARQL::Algebra::Expression>] *expressions
# @return [SPARQL::Algebra::Expression]
def to_binary(klass, *expressions)
case expressions.length
when 0
# Oops!
raise "Operator#to_binary requires two or more expressions"
when 1
expressions.first
when 2
klass.new(*expressions)
else
klass.new(expressions.shift, to_binary(klass, *expressions))
end
end

private

@@subclasses = [] # @private
Expand Down
147 changes: 117 additions & 30 deletions lib/sparql/algebra/operator/dataset.rb
Expand Up @@ -15,8 +15,8 @@ class Operator
# either bare, indicating a default dataset, or expressed as an array `\[:named, \<uri\>\]`,
# indicating that it represents a named data source.
#
# This operator loads the document referenced by the URI into the dataset
# using `uri` as the graph name, unless it already exists within the dataset.
# This operator loads from the datasource, unless a graph named by
# the datasource URI already exists in the repository.
#
# The contained BGP queries are then performed against the specified
# default and named graphs. Rather than using the actual default
Expand All @@ -25,23 +25,84 @@ class Operator
# and the results are filtered against those URIs included in
# the default dataset.
#
# @example
# Specifically, each BGP which is not part of a graph pattern
# is replaced with a union of graph patterns with that BGP repeated
# for each graph URI in the default dataset. This requires recursively
# updating the operator.
#
# Each graph pattern containing a variable graph name is replaced
# by a filter on that variable such that the variable must match
# only those named datasets specified.
#
# @example Dataset with one default and one named data source
#
# (prefix ((: <http://example/>))
# (dataset (<data-g1.ttl> (named <data-g2.ttl>))
# (union
# (bgp (triple ?s ?p ?o))
# (graph ?g (bgp (triple ?s ?p ?o))))))
#
# is effectively re-written to the following:
# is effectively re-written to the following:
#
# (prefix ((: <http://example/>))
# (dataset (<data-g1.ttl> (named <data-g2.ttl>))
# (filter (= ??g <data-g1.ttl>)
# (union
# (graph ??g (bgp (triple ?s ?p ?o)))
# (graph ?g (bgp (triple ?s ?p ?o)))))))
# (union
# (graph <data-g1.ttl> (bgp (triple ?s ?p ?o)))
# (filter (= ?g <data-g2.ttl>)
# (graph ?g (bgp (triple ?s ?p ?o))))))
#
# If no default or no named graphs are specified, these queries
# are eliminated.
#
# @example Dataset with one default no named data sources
#
# (prefix ((: <http://example/>))
# (dataset (<data-g1.ttl>)
# (union
# (bgp (triple ?s ?p ?o))
# (graph ?g (bgp (triple ?s ?p ?o))))))
#
# is effectively re-written to the following:
#
# (prefix ((: <http://example/>))
# (union
# (graph <data-g1.ttl> (bgp (triple ?s ?p ?o)))
# (bgp))
#
# Multiple default graphs union the information from a graph query
# on each default datasource.
#
# @example Dataset with two default data sources
#
# (prefix ((: <http://example/>))
# (dataset (<data-g1.ttl> <data-g1.ttl)
# (bgp (triple ?s ?p ?o))))
#
# is effectively re-written to the following:
#
# (prefix ((: <http://example/>))
# (union
# (graph <data-g1.ttl> (bgp (triple ?s ?p ?o)))
# (graph <data-g2.ttl> (bgp (triple ?s ?p ?o)))))
#
# Multiple named graphs place a filter on all variables used
# to identify those named graphs so that they are restricted
# to come only from the specified set. Note that this requires
# descending through expressions to find graph patterns using
# variables and placing a filter on each identified variable.
#
# @example Dataset with two named data sources
#
# (prefix ((: <http://example/>))
# (dataset ((named <data-g1.ttl>) (named <data-g2.ttl>))
# (graph ?g (bgp (triple ?s ?p ?o)))))
#
# is effectively re-written to the following:
#
# (prefix ((: <http://example/>))
# (filter ((= ?g <data-g1.ttl>) || (= ?g <data-g2.ttl>))
# (graph ?g (bgp (triple ?s ?p ?o))))))
#
# @example Dataset with multiple named graphs
# @see http://www.w3.org/TR/rdf-sparql-query/#specifyingDataset
class Dataset < Binary
include Query
Expand Down Expand Up @@ -75,7 +136,8 @@ class Dataset < Binary
# @see http://www.w3.org/TR/rdf-sparql-query/#sparqlAlgebra
def execute(queryable, options = {})
debug(options) {"Dataset"}
default_graphs = []
default_datasets = []
named_datasets = []
operand(0).each do |ds|
load_opts = {
:headers => {"Accept" => ACCEPTS}
Expand All @@ -94,11 +156,12 @@ def execute(queryable, options = {})
uri = self.base_uri ? self.base_uri.join(ds.last) : ds.last
uri.lexical = ds.last
debug(options) {"=> named data source #{uri}"}
named_datasets << uri
else
debug(options) {"=> array: join #{self.base_uri.inspect} to #{ds.inspect}"}
uri = self.base_uri ? self.base_uri.join(ds) : ds
debug(options) {"=> default data source #{uri}"}
default_graphs << uri
default_datasets << uri
end
load_opts[:context] = load_opts[:base_uri] = uri
unless queryable.has_context?(uri)
Expand All @@ -107,27 +170,51 @@ def execute(queryable, options = {})
end
end

# Query binding a non-distinguishded variable to context
default_var = RDF::Query::Variable.new
default_var.distinguished = false

@solutions = operands.last.execute(queryable, options.merge(
:context => default_var,
:depth => options[:depth].to_i + 1)
).filter do |soln|
# Reject solutions with bindings to default_var where the value
# is not a specified default graph
debug(options) {"=> filter: #{soln.inspect}"}
if soln.unbound?(default_var)
true
elsif default_graphs.include?(soln[default_var])
# Remove the variable from the solution and match
# FIXME: this should either go in RDF::Query::Solution,
# or there should be a immutable way of performing this
# as an operation on RDF::Query::Solutions
soln.bindings.delete(default_var.to_sym)
# Re-write the operand:
#require 'debugger'; breakpoint
operator = self.rewrite do |op|
case op
when Operator::Graph
if named_datasets.empty?
# * If there are no named datasets, remove all (graph)
# operations.
debug(options) {"=> #{op.to_sxp} => (bgp)"}
Operator::BGP.new
elsif (name = op.operand(0)).is_a?(RDF::Resource)
# It must match one of the named_datasets
debug(options) {"=> #{op.to_sxp} => (bgp)"}
named_datasets.include?(name) ? op : Operator::BGP.new
else
# Name is a variable, replace op with a filter on that
# variable and op
filter_expressions = named_datasets.map {|u| Operator::Equal.new(name, u)}
debug(options) {"=> #{op.to_sxp} => (filter (...) #{op.to_sxp})"}
filt = to_binary(Operator::Or, *filter_expressions)
Operator::Filter.new(filt, op)
end
when RDF::Query # Operator::BGP
case default_datasets.length
when 0
# No Default Datasets, no query to run
debug(options) {"=> #{op.to_sxp} => (bgp)"}
Operator::BGP.new
when 1
# A single dataset, write as (graph <dataset> (bgp))
debug(options) {"=> #{op.to_sxp} => (graph <#{default_datasets.first}> #{op.to_sxp})"}
Operator::Graph.new(default_datasets.first, op)
else
# Several, rewrite as Union
debug(options) {"=> #{op.to_sxp} => (union ...)"}
to_binary(Operator::Union, *default_datasets.map {|u| Operator::Graph.new(u, op)})
end
else
nil
end
end
executable = operator.operands.last
debug(options) {"=> rewritten: #{executable.to_sxp}"}

@solutions = executable.execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
end

##
Expand Down
2 changes: 1 addition & 1 deletion lib/sparql/algebra/operator/filter.rb
Expand Up @@ -33,7 +33,7 @@ class Filter < Operator::Binary
# @see http://www.w3.org/TR/rdf-sparql-query/#sparqlAlgebra
# @see http://www.w3.org/TR/rdf-sparql-query/#ebv
def execute(queryable, options = {})
debug(options) {"Filter #{operands.first}"}
debug(options) {"Filter #{operands.first.to_sxp}"}
@solutions = operands.last.execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
debug(options) {"=>(before) #{@solutions.map(&:to_hash).inspect}"}
@solutions = @solutions.filter do |solution|
Expand Down
7 changes: 7 additions & 0 deletions lib/sparql/algebra/operator/graph.rb
Expand Up @@ -45,6 +45,13 @@ def execute(queryable, options = {})
def optimize
operands = operands.map(&:optimize)
end

##
# Don't do any more rewriting
# @return [SPARQL::Algebra::Expression] `self`
def rewrite(&block)
self
end
end # Graph
end # Operator
end; end # SPARQL::Algebra
13 changes: 6 additions & 7 deletions lib/sparql/algebra/operator/union.rb
Expand Up @@ -29,20 +29,19 @@ class Union < Operator::Binary
# @see http://www.w3.org/TR/rdf-sparql-query/#sparqlAlgebra
def execute(queryable, options = {})
debug(options) {"Union"}
solutions1 = operand(0).execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
debug(options) {"=>(left) #{solutions1.inspect}"}
solutions2 = operand(1).execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
debug(options) {"=>(right) #{solutions2.inspect}"}
@solutions = RDF::Query::Solutions.new(solutions1 + solutions2)
@solutions = RDF::Query::Solutions.new(operands.inject([]) do |memo, op|
solns = op.execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
debug(options) {"=> (op) #{solns.inspect}"}
memo + solns
end)
debug(options) {"=> #{@solutions.inspect}"}
@solutions
end

##
# Returns an optimized version of this query.
#
# If optimize operands, and if the first two operands are both Queries, replace
# with the unique sum of the query elements
# Optimize operands and remove any which are empty.
#
# @return [Union, RDF::Query] `self`
def optimize
Expand Down

0 comments on commit 6034b57

Please sign in to comment.