Skip to content

Commit

Permalink
add language processing to ldpath service
Browse files Browse the repository at this point in the history
  • Loading branch information
elrayle committed Nov 21, 2019
1 parent 941dec5 commit 6986cfa
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 55 deletions.
102 changes: 74 additions & 28 deletions app/services/qa/linked_data/ldpath_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,85 @@
module Qa
module LinkedData
class LdpathService
VALUE_ON_ERROR = [].freeze
LANGUAGE_PATTERN = "*LANG*".freeze
PROPERTY_NAME = "property".freeze

class_attribute :predefined_prefixes
self.predefined_prefixes = Ldpath::Transform.default_prefixes.with_indifferent_access

# Create the ldpath program for a given ldpath.
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
# @return [Ldpath::Program] an executable program that will extract a value from a graph
def self.ldpath_program(ldpath:, prefixes: {})
program_code = ""
prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" }
program_code << "property = #{ldpath} \;"
Ldpath::Program.parse program_code
rescue => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}")
raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}"
end
class << self
# Create the ldpath program for a given ldpath.
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
# @param languages [Array<Symbol>] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker)
# @return [Ldpath::Program] an executable program that will extract a value from a graph
def ldpath_program(ldpath:, prefixes: {}, languages: [])
program_code = ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages)
Ldpath::Program.parse program_code
rescue => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}")
raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}"
end

# Create the program code for a given ldpath.
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
# @param languages [Array<Symbol>] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker)
# @return [String] the program code string used with Ldpath::Program.parse
def ldpath_program_code(ldpath:, prefixes: {}, languages: [])
program_code = ""
prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" }
property_explode(program_code, ldpath, languages)
end

# Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values.
# @param program [Ldpath::Program] an executable program that will extract a value from a graph
# @param graph [RDF::Graph] the graph from which the values will be extracted
# @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri
# @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls.
# It will limit results to those found in the context graph.
## @return [Array<RDF::Literal>] the extracted values based on the ldpath
def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?)
raise ArgumentError, "You must specify a program when calling ldpath_evaluate" if program.blank?
output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context)
property_implode(output)
rescue ParseError => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}")
raise ParseError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}"
end

private

# create program code with a property per language + untagged
def property_explode(program_code, ldpath, languages)
return program_code << "#{PROPERTY_NAME} = #{ldpath} \;\n" unless ldpath.index(LANGUAGE_PATTERN)
return program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '')} \;\n" unless languages.present?
languages.map { |language| program_code << "#{property_name_for(language)} = #{ldpath.gsub(LANGUAGE_PATTERN, "[@#{language}]")} \;\n" }
program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '[@none]')} \;\n"
end

# flatten all properties and turn into RDF::Literals with language tagging if appropriate
def property_implode(output)
return nil if output.blank?
output.each do |property_name, values|
output[property_name] = remap_string_values(property_name, values) if values.first.is_a? String
end
output.values.flatten.uniq
end

def property_name_for(language)
"#{language}_#{PROPERTY_NAME}"
end

def language_from(property_name)
return nil if property_name.casecmp?(PROPERTY_NAME)
property_name.chomp("_#{PROPERTY_NAME}")
end

# Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values.
# @param program [Ldpath::Program] an executable program that will extract a value from a graph
# @param graph [RDF::Graph] the graph from which the values will be extracted
# @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri
# @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls.
# It will limit results to those found in the context graph.
## @return [Array<String>] the extracted values based on the ldpath
def self.ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?)
return VALUE_ON_ERROR if program.blank?
output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context)
output.present? ? output['property'].uniq : nil
rescue => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}")
raise StandardError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}"
def remap_string_values(property_name, values)
language = language_from(property_name)
values.map { |v| RDF::Literal.new(v, language: language) }
end
end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ class GraphLdpathMapperService
# @example ldpath map
# {
# uri: :subject_uri,
# id: 'locid:lccn :: xsd::string',
# id: 'locid:lccn',
# label: 'skos:prefLabel :: xsd::string',
# altlabel: 'skos:altLabel :: xsd::string',
# sameas: 'skos:sameAs :: xsd::anyURI',
# sort: 'vivo:rank :: xsd::integer'
# }
# @param subject_uri [RDF::URI] the subject within the graph for which the values are being extracted
Expand Down
6 changes: 3 additions & 3 deletions spec/models/linked_data/config/context_property_map_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@
let(:expanded_id) { '123' }

before do
allow(Ldpath::Program).to receive(:parse).with('property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;').and_return(basic_program)
allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(expanded_label_program)
allow(Ldpath::Program).to receive(:parse).with('property = loc:lccn ::xsd:string ;').and_return(expanded_id_program)
allow(Ldpath::Program).to receive(:parse).with("property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;\n").and_return(basic_program)
allow(Ldpath::Program).to receive(:parse).with("property = skos:prefLabel ::xsd:string ;\n").and_return(expanded_label_program)
allow(Ldpath::Program).to receive(:parse).with("property = loc:lccn ::xsd:string ;\n").and_return(expanded_id_program)
allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => [expanded_uri])
allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_label])
allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_id])
Expand Down
157 changes: 148 additions & 9 deletions spec/services/linked_data/ldpath_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@
context 'when ldpath_program gets parse error' do
let(:cause) { "undefined method `ascii_tree' for nil:NilClass" }
let(:warning) { I18n.t('qa.linked_data.ldpath.parse_logger_error') }
let(:program_code) { "@prefix skos : <http://www.w3.org/2004/02/skos/core#> ;\nproperty = skos:prefLabel ::xsd:string ;" }
let(:program_code) { "BAD_PROGRAM ;" }
let(:log_message) { "WARNING: #{warning}... cause: #{cause}\n ldpath_program=\n#{program_code}" }

before { allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause) }
before do
allow(described_class).to receive(:ldpath_program_code).with(anything).and_return(program_code)
allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause)
end

it 'logs error and returns PARSE ERROR as the value' do
expect(Rails.logger).to receive(:warn).with(log_message)
Expand All @@ -29,32 +32,168 @@
end
end

describe '.ldpath_program_code' do
subject { described_class.ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages) }

context 'for a ldpath without language pattern' do
let(:ldpath) { 'dcterms:identifier' }
let(:languages) { [:fr] }
let(:prefixes) { { "dcterms" => "http://purl.org/dc/terms/" } }
it 'generates the simple program code' do
expected_program = <<-PROGRAM
@prefix dcterms : <http://purl.org/dc/terms/> \;
property = dcterms:identifier \;
PROGRAM
expect(subject).to eq expected_program
end
end

context 'for a ldpath with language pattern' do
let(:ldpath) { 'madsrdf:authoritativeLabel*LANG* ::xsd:string' }
let(:prefixes) { { "madsrdf" => "http://www.loc.gov/mads/rdf/v1#" } }
context 'and no languages specified' do
let(:languages) { nil }
it 'generates the simple program code' do
expected_program = <<-PROGRAM
@prefix madsrdf : <http://www.loc.gov/mads/rdf/v1#> \;
property = madsrdf:authoritativeLabel ::xsd:string \;
PROGRAM
expect(subject).to eq expected_program
end
end

context 'and one language specified' do
let(:languages) { [:en] }
it 'generates a program with the language' do
expected_program = <<-PROGRAM
@prefix madsrdf : <http://www.loc.gov/mads/rdf/v1#> \;
en_property = madsrdf:authoritativeLabel[@en] ::xsd:string \;
property = madsrdf:authoritativeLabel[@none] ::xsd:string \;
PROGRAM
expect(subject).to eq expected_program
end
end

context 'and multiple languages specified' do
let(:languages) { [:fr, :de] }
it 'generates a program with languages' do
expected_program = <<-PROGRAM
@prefix madsrdf : <http://www.loc.gov/mads/rdf/v1#> \;
fr_property = madsrdf:authoritativeLabel[@fr] ::xsd:string \;
de_property = madsrdf:authoritativeLabel[@de] ::xsd:string \;
property = madsrdf:authoritativeLabel[@none] ::xsd:string \;
PROGRAM
expect(subject).to eq expected_program
end
end
end
end

describe '.ldpath_evaluate' do
subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri) }

let(:program) { instance_double(Ldpath::Program) }
let(:graph) { instance_double(RDF::Graph) }
let(:subject_uri) { instance_double(RDF::URI) }
let(:values) { ['Expanded Label'] }

before do
allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(program)
allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => values)
allow(Ldpath::Program).to receive(:parse).with(anything).and_return(program)
end
it 'returns the extracted label' do
expect(subject).to match_array values

context 'when program does not contain languages' do
context 'and value is a string' do
let(:values) { ['value'] }
before do
allow(program).to receive(:evaluate)
.with(subject_uri, context: graph, limit_to_context: true)
.and_return('property' => values)
end
it 'returns the string values as is' do
expected_values = values.map { |v| RDF::Literal.new(v) }
expect(subject).to match_array expected_values
end
end

context 'and value is a URI' do
let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] }
before do
allow(program).to receive(:evaluate)
.with(subject_uri, context: graph, limit_to_context: true)
.and_return('property' => values)
end
it 'returns the URIs' do
expected_values = values
expect(subject).to match_array expected_values
end
end

context 'and value is numeric' do
let(:values) { [23, 14, 55] }
before do
allow(program).to receive(:evaluate)
.with(subject_uri, context: graph, limit_to_context: true)
.and_return('property' => values)
end
it 'returns the URIs' do
expected_values = values
expect(subject).to match_array expected_values
end
end
end

context 'when program has languages' do
context 'and one language specified' do
let(:en_values) { ['en_value'] }
let(:untagged_values) { ['untagged_value'] }
before do
allow(program).to receive(:evaluate)
.with(subject_uri, context: graph, limit_to_context: true)
.and_return('en_property' => en_values, 'property' => untagged_values)
end
it 'generates a program with the language' do
expected_values =
en_values.map { |v| RDF::Literal.new(v, language: :en) } +
untagged_values.map { |v| RDF::Literal.new(v) }
expect(subject).to match_array expected_values
end
end

context 'and multiple languages specified' do
let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] }
let(:de_values) { ['de_value'] }
let(:untagged_values) { ['untagged_value'] }
before do
allow(program).to receive(:evaluate)
.with(subject_uri, context: graph, limit_to_context: true)
.and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values)
end
it 'returns the extracted label' do
expected_values =
(fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } +
de_values.map { |v| RDF::Literal.new(v, language: :de) } +
untagged_values.map { |v| RDF::Literal.new(v) }).uniq
expect(subject).to match_array expected_values
end
end
end

context 'when ldpath_evaluate gets parse error' do
let(:cause) { "unknown cause" }
let(:warning) { I18n.t('qa.linked_data.ldpath.evaluate_logger_error') }
let(:log_message) { "WARNING: #{warning} (cause: #{cause}" }

before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(cause) }
before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(ParseError, cause) }

it 'logs error and returns PARSE ERROR as the value' do
expect(Rails.logger).to receive(:warn).with(log_message)
expect { subject.values(graph, subject_uri) }.to raise_error StandardError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}"
expect { subject }.to raise_error ParseError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}"
end
end

context 'when program is empty' do
let(:program) { nil }
it 'returns empty array' do
expect { subject }.to raise_error ArgumentError, "You must specify a program when calling ldpath_evaluate"
end
end
end
Expand Down
Loading

0 comments on commit 6986cfa

Please sign in to comment.