Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Implement datasets through unions and filters over specified graphs b…

…y re-writing the query. Still fails dataset-012b
  • Loading branch information...
commit 6034b573862c0acf06e96eee40505aedd52cd534 1 parent 422cadf
@gkellogg gkellogg authored
View
1  Gemfile
@@ -9,6 +9,7 @@ gem "rdf-xsd", :git => "git://github.com/ruby-rdf/rdf-xsd.git"
group :development do
gem 'shotgun'
+ gem 'redcarpet'
end
group :debug do
View
1  examples/issue3.rb
@@ -19,7 +19,6 @@ def query(pattern, &block)
:predicate => RDF::URI.new('http://localhost/attribute_types/last_name'),
:object => RDF::Literal.new('smith'))
elsif pattern[:predicate].path == '/attribute_types/middle_name'
- require 'debugger'; debugger
statements << RDF::Statement.new(
:subject => RDF::URI.new('http://localhost/people/2'),
:predicate => RDF::URI.new('http://localhost/attribute_types/middle_name'),
View
7 lib/sparql/algebra/extensions.rb
@@ -107,6 +107,13 @@ class RDF::Query
def ==(other)
other.is_a?(RDF::Query) && patterns == other.patterns && context == context
end
+
+ ##
+ # Don't do any more rewriting
+ # @return [SPARQL::Algebra::Expression] `self`
+ def rewrite(&block)
+ self
+ end
# Transform Query into an Array form of an SSE
#
View
42 lib/sparql/algebra/operator.rb
@@ -323,6 +323,26 @@ def optimize
end
##
+ # Rewrite operands by yielding each operand. Recursively descends
+ # through operands implementing this method.
+ #
+ # @yield operand
+ # @yieldparam [] operand
+ # @yieldreturn [SPARQL::Algebra::Expression] the re-written operand
+ # @return [SPARQL::Algebra::Expression] `self`
+ def rewrite(&block)
+ @operands = @operands.map do |op|
+ # Rewrite the operand
+ unless new_op = block.call(op)
+ # Not re-written, rewrite
+ new_op = op.respond_to?(:rewrite) ? op.rewrite(&block) : op
+ end
+ new_op
+ end
+ self
+ end
+
+ ##
# Returns the SPARQL S-Expression (SSE) representation of this operator.
#
# @return [Array]
@@ -362,6 +382,7 @@ def eql?(other)
other.class == self.class && other.operands == self.operands
end
alias_method :==, :eql?
+
protected
##
@@ -401,6 +422,27 @@ def boolean(literal)
end
end
+ ##
+ # Transform an array of expressions into a recursive set
+ # of binary operations
+ # e.g.: a || b || c => (|| a (|| b c))
+ # @param [Class] Binary Operator class
+ # @param [Array<SPARQL::Algebra::Expression>] *expressions
+ # @return [SPARQL::Algebra::Expression]
+ def to_binary(klass, *expressions)
+ case expressions.length
+ when 0
+ # Oops!
+ raise "Operator#to_binary requires two or more expressions"
+ when 1
+ expressions.first
+ when 2
+ klass.new(*expressions)
+ else
+ klass.new(expressions.shift, to_binary(klass, *expressions))
+ end
+ end
+
private
@@subclasses = [] # @private
View
147 lib/sparql/algebra/operator/dataset.rb
@@ -15,8 +15,8 @@ class Operator
# either bare, indicating a default dataset, or expressed as an array `\[:named, \<uri\>\]`,
# indicating that it represents a named data source.
#
- # This operator loads the document referenced by the URI into the dataset
- # using `uri` as the graph name, unless it already exists within the dataset.
+ # This operator loads from the datasource, unless a graph named by
+ # the datasource URI already exists in the repository.
#
# The contained BGP queries are then performed against the specified
# default and named graphs. Rather than using the actual default
@@ -25,7 +25,16 @@ class Operator
# and the results are filtered against those URIs included in
# the default dataset.
#
- # @example
+ # Specifically, each BGP which is not part of a graph pattern
+ # is replaced with a union of graph patterns with that BGP repeated
+ # for each graph URI in the default dataset. This requires recursively
+ # updating the operator.
+ #
+ # Each graph pattern containing a variable graph name is replaced
+ # by a filter on that variable such that the variable must match
+ # only those named datasets specified.
+ #
+ # @example Dataset with one default and one named data source
#
# (prefix ((: <http://example/>))
# (dataset (<data-g1.ttl> (named <data-g2.ttl>))
@@ -33,15 +42,67 @@ class Operator
# (bgp (triple ?s ?p ?o))
# (graph ?g (bgp (triple ?s ?p ?o))))))
#
- # is effectively re-written to the following:
+ # is effectively re-written to the following:
#
# (prefix ((: <http://example/>))
- # (dataset (<data-g1.ttl> (named <data-g2.ttl>))
- # (filter (= ??g <data-g1.ttl>)
- # (union
- # (graph ??g (bgp (triple ?s ?p ?o)))
- # (graph ?g (bgp (triple ?s ?p ?o)))))))
+ # (union
+ # (graph <data-g1.ttl> (bgp (triple ?s ?p ?o)))
+ # (filter (= ?g <data-g2.ttl>)
+ # (graph ?g (bgp (triple ?s ?p ?o))))))
+ #
+ # If no default or no named graphs are specified, these queries
+ # are eliminated.
+ #
+ # @example Dataset with one default no named data sources
+ #
+ # (prefix ((: <http://example/>))
+ # (dataset (<data-g1.ttl>)
+ # (union
+ # (bgp (triple ?s ?p ?o))
+ # (graph ?g (bgp (triple ?s ?p ?o))))))
+ #
+ # is effectively re-written to the following:
+ #
+ # (prefix ((: <http://example/>))
+ # (union
+ # (graph <data-g1.ttl> (bgp (triple ?s ?p ?o)))
+ # (bgp))
+ #
+ # Multiple default graphs union the information from a graph query
+ # on each default datasource.
+ #
+ # @example Dataset with two default data sources
+ #
+ # (prefix ((: <http://example/>))
+ # (dataset (<data-g1.ttl> <data-g1.ttl)
+ # (bgp (triple ?s ?p ?o))))
#
+ # is effectively re-written to the following:
+ #
+ # (prefix ((: <http://example/>))
+ # (union
+ # (graph <data-g1.ttl> (bgp (triple ?s ?p ?o)))
+ # (graph <data-g2.ttl> (bgp (triple ?s ?p ?o)))))
+ #
+ # Multiple named graphs place a filter on all variables used
+ # to identify those named graphs so that they are restricted
+ # to come only from the specified set. Note that this requires
+ # descending through expressions to find graph patterns using
+ # variables and placing a filter on each identified variable.
+ #
+ # @example Dataset with two named data sources
+ #
+ # (prefix ((: <http://example/>))
+ # (dataset ((named <data-g1.ttl>) (named <data-g2.ttl>))
+ # (graph ?g (bgp (triple ?s ?p ?o)))))
+ #
+ # is effectively re-written to the following:
+ #
+ # (prefix ((: <http://example/>))
+ # (filter ((= ?g <data-g1.ttl>) || (= ?g <data-g2.ttl>))
+ # (graph ?g (bgp (triple ?s ?p ?o))))))
+ #
+ # @example Dataset with multiple named graphs
# @see http://www.w3.org/TR/rdf-sparql-query/#specifyingDataset
class Dataset < Binary
include Query
@@ -75,7 +136,8 @@ class Dataset < Binary
# @see http://www.w3.org/TR/rdf-sparql-query/#sparqlAlgebra
def execute(queryable, options = {})
debug(options) {"Dataset"}
- default_graphs = []
+ default_datasets = []
+ named_datasets = []
operand(0).each do |ds|
load_opts = {
:headers => {"Accept" => ACCEPTS}
@@ -94,11 +156,12 @@ def execute(queryable, options = {})
uri = self.base_uri ? self.base_uri.join(ds.last) : ds.last
uri.lexical = ds.last
debug(options) {"=> named data source #{uri}"}
+ named_datasets << uri
else
debug(options) {"=> array: join #{self.base_uri.inspect} to #{ds.inspect}"}
uri = self.base_uri ? self.base_uri.join(ds) : ds
debug(options) {"=> default data source #{uri}"}
- default_graphs << uri
+ default_datasets << uri
end
load_opts[:context] = load_opts[:base_uri] = uri
unless queryable.has_context?(uri)
@@ -107,27 +170,51 @@ def execute(queryable, options = {})
end
end
- # Query binding a non-distinguishded variable to context
- default_var = RDF::Query::Variable.new
- default_var.distinguished = false
-
- @solutions = operands.last.execute(queryable, options.merge(
- :context => default_var,
- :depth => options[:depth].to_i + 1)
- ).filter do |soln|
- # Reject solutions with bindings to default_var where the value
- # is not a specified default graph
- debug(options) {"=> filter: #{soln.inspect}"}
- if soln.unbound?(default_var)
- true
- elsif default_graphs.include?(soln[default_var])
- # Remove the variable from the solution and match
- # FIXME: this should either go in RDF::Query::Solution,
- # or there should be a immutable way of performing this
- # as an operation on RDF::Query::Solutions
- soln.bindings.delete(default_var.to_sym)
+ # Re-write the operand:
+ #require 'debugger'; breakpoint
+ operator = self.rewrite do |op|
+ case op
+ when Operator::Graph
+ if named_datasets.empty?
+ # * If there are no named datasets, remove all (graph)
+ # operations.
+ debug(options) {"=> #{op.to_sxp} => (bgp)"}
+ Operator::BGP.new
+ elsif (name = op.operand(0)).is_a?(RDF::Resource)
+ # It must match one of the named_datasets
+ debug(options) {"=> #{op.to_sxp} => (bgp)"}
+ named_datasets.include?(name) ? op : Operator::BGP.new
+ else
+ # Name is a variable, replace op with a filter on that
+ # variable and op
+ filter_expressions = named_datasets.map {|u| Operator::Equal.new(name, u)}
+ debug(options) {"=> #{op.to_sxp} => (filter (...) #{op.to_sxp})"}
+ filt = to_binary(Operator::Or, *filter_expressions)
+ Operator::Filter.new(filt, op)
+ end
+ when RDF::Query # Operator::BGP
+ case default_datasets.length
+ when 0
+ # No Default Datasets, no query to run
+ debug(options) {"=> #{op.to_sxp} => (bgp)"}
+ Operator::BGP.new
+ when 1
+ # A single dataset, write as (graph <dataset> (bgp))
+ debug(options) {"=> #{op.to_sxp} => (graph <#{default_datasets.first}> #{op.to_sxp})"}
+ Operator::Graph.new(default_datasets.first, op)
+ else
+ # Several, rewrite as Union
+ debug(options) {"=> #{op.to_sxp} => (union ...)"}
+ to_binary(Operator::Union, *default_datasets.map {|u| Operator::Graph.new(u, op)})
+ end
+ else
+ nil
end
end
+ executable = operator.operands.last
+ debug(options) {"=> rewritten: #{executable.to_sxp}"}
+
+ @solutions = executable.execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
end
##
View
2  lib/sparql/algebra/operator/filter.rb
@@ -33,7 +33,7 @@ class Filter < Operator::Binary
# @see http://www.w3.org/TR/rdf-sparql-query/#sparqlAlgebra
# @see http://www.w3.org/TR/rdf-sparql-query/#ebv
def execute(queryable, options = {})
- debug(options) {"Filter #{operands.first}"}
+ debug(options) {"Filter #{operands.first.to_sxp}"}
@solutions = operands.last.execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
debug(options) {"=>(before) #{@solutions.map(&:to_hash).inspect}"}
@solutions = @solutions.filter do |solution|
View
7 lib/sparql/algebra/operator/graph.rb
@@ -45,6 +45,13 @@ def execute(queryable, options = {})
def optimize
operands = operands.map(&:optimize)
end
+
+ ##
+ # Don't do any more rewriting
+ # @return [SPARQL::Algebra::Expression] `self`
+ def rewrite(&block)
+ self
+ end
end # Graph
end # Operator
end; end # SPARQL::Algebra
View
13 lib/sparql/algebra/operator/union.rb
@@ -29,11 +29,11 @@ class Union < Operator::Binary
# @see http://www.w3.org/TR/rdf-sparql-query/#sparqlAlgebra
def execute(queryable, options = {})
debug(options) {"Union"}
- solutions1 = operand(0).execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
- debug(options) {"=>(left) #{solutions1.inspect}"}
- solutions2 = operand(1).execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
- debug(options) {"=>(right) #{solutions2.inspect}"}
- @solutions = RDF::Query::Solutions.new(solutions1 + solutions2)
+ @solutions = RDF::Query::Solutions.new(operands.inject([]) do |memo, op|
+ solns = op.execute(queryable, options.merge(:depth => options[:depth].to_i + 1))
+ debug(options) {"=> (op) #{solns.inspect}"}
+ memo + solns
+ end)
debug(options) {"=> #{@solutions.inspect}"}
@solutions
end
@@ -41,8 +41,7 @@ def execute(queryable, options = {})
##
# Returns an optimized version of this query.
#
- # If optimize operands, and if the first two operands are both Queries, replace
- # with the unique sum of the query elements
+ # Optimize operands and remove any which are empty.
#
# @return [Union, RDF::Query] `self`
def optimize
View
78 spec/algebra/rewrite_spec.rb
@@ -0,0 +1,78 @@
+$:.unshift ".."
+require 'spec_helper'
+require 'algebra/algebra_helper'
+require 'sparql/client'
+
+include SPARQL::Algebra
+
+describe SPARQL::Algebra::Operator do
+ let!(:ex) {RDF::Vocabulary.new('http://example.org/')}
+ let!(:op) {Operator.new}
+
+ describe "#to_binary" do
+ it "raises exception if there are no expressions" do
+ lambda {op.send(:to_binary, Operator::Union)}.should raise_error("Operator#to_binary requires two or more expressions")
+ end
+
+ it "raises exception if there is one expressions" do
+ op.send(:to_binary, Operator::Union, Operator::BGP.new).should == Operator::BGP.new
+ end
+
+ context "with two expressions" do
+ subject {op.send(:to_binary, Operator::Union, Operator::BGP.new, Operator::BGP.new)}
+ it "returns a Union" do
+ should be_a(Operator::Union)
+ end
+ its(:operands) do
+ should == [Operator::BGP.new, Operator::BGP.new]
+ end
+ end
+
+ context "with three expressions" do
+ subject {op.send(:to_binary, Operator::Union, Operator::BGP.new, Operator::BGP.new, Operator::BGP.new)}
+ it "returns a Union" do
+ should be_a(Operator::Union)
+ end
+ its(:operands) do
+ should == [Operator::BGP.new, Operator::Union.new(Operator::BGP.new, Operator::BGP.new)]
+ end
+ end
+ end
+
+ describe "#rewrite" do
+ {
+ "Remove BGP" => [
+ %q{(prefix ((ex: <http://example.org/>))
+ (bgp (triple ex:x1 ex:p2 ex:x2)))},
+ %q{(prefix ((ex: <http://example.org/>))
+ (bgp))},
+ ],
+ "Remove Named Graph" => [
+ %q{(prefix ((ex: <http://example.org/>))
+ (graph <a> (bgp (triple ex:x1 ex:p2 ex:x2))))},
+ %q{(prefix ((ex: <http://example.org/>))
+ (bgp))},
+ ]
+ }.each do |name, (given, expected)|
+ it name do
+ query = SPARQL::Algebra::Expression.parse(given)
+ result = SPARQL::Algebra::Expression.parse(expected)
+ rewritten = query.send(:rewrite) do |op|
+ case op
+ when Operator::Graph, RDF::Query
+ Operator::BGP.new
+ else
+ op
+ end
+ end
+ rewritten.to_sxp.should == result.to_sxp
+ end
+ end
+
+ context "with default datasets" do
+ end
+
+ context "with named datasets" do
+ end
+ end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.