Skip to content

Commit

Permalink
Adds option for setting the hash algorithm for the RDFC10 algorithm. …
Browse files Browse the repository at this point in the history
…Supports MD5, SHA1, SH2, SHA384, and SHA512 in addition to the default SHA256.
  • Loading branch information
gkellogg committed Aug 30, 2023
1 parent ab65d0c commit ddfffda
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ group :debug do
end

group :test do
gem 'simplecov', '~> 0.21', platforms: :mri
gem 'simplecov', '~> 0.22', platforms: :mri
gem 'simplecov-lcov', '~> 0.8', platforms: :mri
end
3 changes: 3 additions & 0 deletions lib/rdf/normalize.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def new(enumerable, **options)
algorithm_class.new(enumerable, **options)
end
module_function :new

class MaxCallsExceeded < RuntimeError; end
class UnknownHashAlgorithm < RuntimeError; end
end

module Canonicalize
Expand Down
14 changes: 11 additions & 3 deletions lib/rdf/normalize/rdfc10.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,16 @@ class RDFC10 < Base
# @option options [Integer] :max_calls (40)
# Maximum number of calls allowed for recursive blank node labeling,
# as a multiple of the total number of blank nodes in the dataset.
# @options options [:MD5, :SHA1, :SHA2, :SHA256, :SHA384, :SHA512] :hash_algorithm (:SHA256)
# See [Digest Algorithms](https://github.com/ruby/digest#digest-algorithms)
# @return [RDF::Enumerable]
# raise [RuntimeError] if the maximum number of levels of recursion is exceeded.
def initialize(enumerable, **options)
@dataset, @options = enumerable, options
@options[:hash_algorithm] ||= :SHA256
unless %i{MD5 SHA1 SHA2 SHA256 SHA384 SHA512}.include?(@options[:hash_algorithm])
raise UnknownHashAlgorithm, "UnknownHashAlgorithm: #{@options[:hash_algorithm].inspect}. Use one of MD5, SHA1, SHA2, SHA256, SHA384, or SHA512"
end
end

# Yields each normalized statement
Expand Down Expand Up @@ -158,13 +164,15 @@ class NormalizationState
include RDF::Util::Logger

attr_accessor :bnode_to_statements
attr_accessor :hash_algorithm
attr_accessor :hash_to_bnodes
attr_accessor :canonical_issuer
attr_accessor :max_calls
attr_accessor :total_calls

def initialize(**options)
@options = options
@hash_algorithm = Digest.const_get(options.fetch(:hash_algorithm, :SHA256))
@bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("c14n")
@max_calls, @total_calls = nil, 0
end
Expand Down Expand Up @@ -233,15 +241,15 @@ def hash_related_node(related, statement, issuer, position)
# @param [RDF::Node] node
# @param [IdentifierIssuer] issuer
# @return [Array<String,IdentifierIssuer>] the Hash and issuer
# @raise [RuntimeError] If total number of calls has exceeded `max_calls` times the number of blank nodes in the dataset.
# @raise [MaxCallsExceeded] If total number of calls has exceeded `max_calls` times the number of blank nodes in the dataset.
def hash_n_degree_quads(node, issuer)
log_debug("hndq:")
log_debug(" log point", "Hash N-Degree Quads function (4.9.3).")
log_debug(" identifier") {node.id}
log_debug(" issuer") {issuer.inspect}

if max_calls && total_calls >= max_calls
raise "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
raise MaxCallsExceeded, "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
end
@total_calls += 1

Expand Down Expand Up @@ -367,7 +375,7 @@ def inspect_hash_to_bnodes
protected

def hexdigest(val)
Digest::SHA256.hexdigest(val)
hash_algorithm.hexdigest(val)
end

# Group adjacent bnodes by hash
Expand Down
31 changes: 23 additions & 8 deletions script/run
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ require 'rubygems'
$:.unshift(File.expand_path('../../lib', __FILE__))
require "bundler/setup"
require 'logger'
require 'digest'
require 'rdf/normalize'
begin
require 'linkeddata'
Expand All @@ -23,7 +24,15 @@ def run(input, focus: nil, shape: nil, **options)
raise "Reader not found for #{options[:input_format]}" unless reader_class

dataset = RDF::Repository.new {|r| r << reader_class.new(input)}
output = dataset.dump(:normalize, **options)
output = if options[:map]
dataset.canonicalize.to_hash.inspect
elsif options[:hash]
nquads = dataset.dump(:normalize, **options)
hash_algorithm = Digest.const_get(options.fetch(:hash_algorithm, :SHA256))
hash_algorithm.hexdigest(nquads)
else
dataset.dump(:normalize, **options)
end

options[:output].write output
rescue
Expand All @@ -39,7 +48,10 @@ OPT_ARGS = [
["--base", GetoptLong::REQUIRED_ARGUMENT, "Base URI of target graph, if different from graph location"],
["--debug", GetoptLong::NO_ARGUMENT, "Debug shape matching"],
["--execute", "-e", GetoptLong::REQUIRED_ARGUMENT, "Use option argument as the patch input"],
["--hash", GetoptLong::NO_ARGUMENT, "Show hash result only"],
["--hash-algorithm", GetoptLong::REQUIRED_ARGUMENT,"Hash Algorithm to use, defaults to SHA256"],
["--input-format", GetoptLong::REQUIRED_ARGUMENT, "Format of the input file, defaults to ttl"],
["--map", GetoptLong::NO_ARGUMENT, "Show the issued identifiers map"],
["--max-calls", GetoptLong::REQUIRED_ARGUMENT, "Factor for maximum allowed calls to Hash N-Degree Quads"],
["--output", "-o", GetoptLong::REQUIRED_ARGUMENT, "Save output to file"],
["--progress", GetoptLong::NO_ARGUMENT, "Display parse tree"],
Expand Down Expand Up @@ -73,13 +85,16 @@ input = nil

opts.each do |opt, arg|
case opt
when '--base' then options[:base_uri] = arg
when '--debug' then logger.level = Logger::DEBUG
when '--execute' then input = arg
when '--input-format' then options[:input_format] = arg.to_sym
when '--max-calls' then options[:max_calls] = arg.to_i
when '--output' then options[:output] = File.open(arg, "w")
when '--progress' then logger.level = Logger::INFO
when '--base' then options[:base_uri] = arg
when '--debug' then logger.level = Logger::DEBUG
when '--execute' then input = arg
when '--hash' then options[:hash] = true
when '--hash-algorithm' then options[:hash_algorithm] = arg.to_sym
when '--input-format' then options[:input_format] = arg.to_sym
when '--map' then options[:map] = true
when '--max-calls' then options[:max_calls] = arg.to_i
when '--output' then options[:output] = File.open(arg, "w")
when '--progress' then logger.level = Logger::INFO
when '--yaml'
logger.level = Logger::DEBUG
logger.formatter = lambda {|severity, datetime, progname, msg| "%s\n" % msg}
Expand Down
3 changes: 2 additions & 1 deletion script/tc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def run_tc(tc, **options)
begin
puts "open #{tc.action}" if options[:verbose]
options = {base_uri: tc.base}.merge(options)
options[:hash_algorithm] = options.delete(:algorithm).to_sym if options[:algorithm]

dataset = RDF::Repository.load(tc.action, format: :nquads)
result = if tc.type == 'rdfc:RDFC10MapTest'
Expand All @@ -55,7 +56,7 @@ def run_tc(tc, **options)
begin
RDF::Normalize::RDFC10.new(dataset, **options).to_hash
"failed" # Should raise exception
rescue RuntimeError
rescue ::RDF::Normalize::MaxCallsExceeded
"passed"
end
else
Expand Down
2 changes: 1 addition & 1 deletion spec/suite_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
result_map = JSON.load(t.expected)
expect(input_map).to produce(result_map, t)
elsif t.type == 'rdfc:RDFC10NegativeEvalTest'
expect {RDF::Normalize::RDFC10.new(dataset).to_hash}.to raise_error(RuntimeError)
expect {RDF::Normalize::RDFC10.new(dataset).to_hash}.to raise_error(::RDF::Normalize::MaxCallsExceeded)
else
result = dataset.dump(:normalize, logger: t.logger, **t.writer_options)
expect(result).to produce(t.expected, t)
Expand Down

0 comments on commit ddfffda

Please sign in to comment.