From 6986cfa006652e25fd2ee267ab3f85d8cfa0f8ed Mon Sep 17 00:00:00 2001 From: "E. Lynette Rayle" Date: Mon, 11 Nov 2019 09:05:46 -0500 Subject: [PATCH] add language processing to ldpath service --- app/services/qa/linked_data/ldpath_service.rb | 102 ++++++++---- .../mapper/graph_ldpath_mapper_service.rb | 3 +- .../config/context_property_map_spec.rb | 6 +- .../linked_data/ldpath_service_spec.rb | 157 +++++++++++++++++- .../graph_ldpath_mapper_service_spec.rb | 28 ++-- 5 files changed, 241 insertions(+), 55 deletions(-) diff --git a/app/services/qa/linked_data/ldpath_service.rb b/app/services/qa/linked_data/ldpath_service.rb index 834a3447..7a7499eb 100644 --- a/app/services/qa/linked_data/ldpath_service.rb +++ b/app/services/qa/linked_data/ldpath_service.rb @@ -4,39 +4,85 @@ module Qa module LinkedData class LdpathService - VALUE_ON_ERROR = [].freeze + LANGUAGE_PATTERN = "*LANG*".freeze + PROPERTY_NAME = "property".freeze class_attribute :predefined_prefixes self.predefined_prefixes = Ldpath::Transform.default_prefixes.with_indifferent_access - # Create the ldpath program for a given ldpath. - # @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html) - # @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" }) - # @return [Ldpath::Program] an executable program that will extract a value from a graph - def self.ldpath_program(ldpath:, prefixes: {}) - program_code = "" - prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" } - program_code << "property = #{ldpath} \;" - Ldpath::Program.parse program_code - rescue => e - Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}") - raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}" - end + class << self + # Create the ldpath program for a given ldpath. + # @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html) + # @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" }) + # @param languages [Array] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker) + # @return [Ldpath::Program] an executable program that will extract a value from a graph + def ldpath_program(ldpath:, prefixes: {}, languages: []) + program_code = ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages) + Ldpath::Program.parse program_code + rescue => e + Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}") + raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}" + end + + # Create the program code for a given ldpath. + # @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html) + # @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" }) + # @param languages [Array] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker) + # @return [String] the program code string used with Ldpath::Program.parse + def ldpath_program_code(ldpath:, prefixes: {}, languages: []) + program_code = "" + prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" } + property_explode(program_code, ldpath, languages) + end + + # Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values. + # @param program [Ldpath::Program] an executable program that will extract a value from a graph + # @param graph [RDF::Graph] the graph from which the values will be extracted + # @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri + # @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls. + # It will limit results to those found in the context graph. + ## @return [Array] the extracted values based on the ldpath + def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?) + raise ArgumentError, "You must specify a program when calling ldpath_evaluate" if program.blank? + output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context) + property_implode(output) + rescue ParseError => e + Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}") + raise ParseError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}" + end + + private + + # create program code with a property per language + untagged + def property_explode(program_code, ldpath, languages) + return program_code << "#{PROPERTY_NAME} = #{ldpath} \;\n" unless ldpath.index(LANGUAGE_PATTERN) + return program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '')} \;\n" unless languages.present? + languages.map { |language| program_code << "#{property_name_for(language)} = #{ldpath.gsub(LANGUAGE_PATTERN, "[@#{language}]")} \;\n" } + program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '[@none]')} \;\n" + end + + # flatten all properties and turn into RDF::Literals with language tagging if appropriate + def property_implode(output) + return nil if output.blank? + output.each do |property_name, values| + output[property_name] = remap_string_values(property_name, values) if values.first.is_a? String + end + output.values.flatten.uniq + end + + def property_name_for(language) + "#{language}_#{PROPERTY_NAME}" + end + + def language_from(property_name) + return nil if property_name.casecmp?(PROPERTY_NAME) + property_name.chomp("_#{PROPERTY_NAME}") + end - # Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values. - # @param program [Ldpath::Program] an executable program that will extract a value from a graph - # @param graph [RDF::Graph] the graph from which the values will be extracted - # @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri - # @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls. - # It will limit results to those found in the context graph. - ## @return [Array] the extracted values based on the ldpath - def self.ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?) - return VALUE_ON_ERROR if program.blank? - output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context) - output.present? ? output['property'].uniq : nil - rescue => e - Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}") - raise StandardError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}" + def remap_string_values(property_name, values) + language = language_from(property_name) + values.map { |v| RDF::Literal.new(v, language: language) } + end end end end diff --git a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb index 70db6d6c..1fa36d2d 100644 --- a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb +++ b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb @@ -19,9 +19,10 @@ class GraphLdpathMapperService # @example ldpath map # { # uri: :subject_uri, - # id: 'locid:lccn :: xsd::string', + # id: 'locid:lccn', # label: 'skos:prefLabel :: xsd::string', # altlabel: 'skos:altLabel :: xsd::string', + # sameas: 'skos:sameAs :: xsd::anyURI', # sort: 'vivo:rank :: xsd::integer' # } # @param subject_uri [RDF::URI] the subject within the graph for which the values are being extracted diff --git a/spec/models/linked_data/config/context_property_map_spec.rb b/spec/models/linked_data/config/context_property_map_spec.rb index 51cf1690..c990dc38 100644 --- a/spec/models/linked_data/config/context_property_map_spec.rb +++ b/spec/models/linked_data/config/context_property_map_spec.rb @@ -277,9 +277,9 @@ let(:expanded_id) { '123' } before do - allow(Ldpath::Program).to receive(:parse).with('property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;').and_return(basic_program) - allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(expanded_label_program) - allow(Ldpath::Program).to receive(:parse).with('property = loc:lccn ::xsd:string ;').and_return(expanded_id_program) + allow(Ldpath::Program).to receive(:parse).with("property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;\n").and_return(basic_program) + allow(Ldpath::Program).to receive(:parse).with("property = skos:prefLabel ::xsd:string ;\n").and_return(expanded_label_program) + allow(Ldpath::Program).to receive(:parse).with("property = loc:lccn ::xsd:string ;\n").and_return(expanded_id_program) allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => [expanded_uri]) allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_label]) allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_id]) diff --git a/spec/services/linked_data/ldpath_service_spec.rb b/spec/services/linked_data/ldpath_service_spec.rb index 17d8d5c3..877d321f 100644 --- a/spec/services/linked_data/ldpath_service_spec.rb +++ b/spec/services/linked_data/ldpath_service_spec.rb @@ -17,10 +17,13 @@ context 'when ldpath_program gets parse error' do let(:cause) { "undefined method `ascii_tree' for nil:NilClass" } let(:warning) { I18n.t('qa.linked_data.ldpath.parse_logger_error') } - let(:program_code) { "@prefix skos : ;\nproperty = skos:prefLabel ::xsd:string ;" } + let(:program_code) { "BAD_PROGRAM ;" } let(:log_message) { "WARNING: #{warning}... cause: #{cause}\n ldpath_program=\n#{program_code}" } - before { allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause) } + before do + allow(described_class).to receive(:ldpath_program_code).with(anything).and_return(program_code) + allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause) + end it 'logs error and returns PARSE ERROR as the value' do expect(Rails.logger).to receive(:warn).with(log_message) @@ -29,20 +32,149 @@ end end + describe '.ldpath_program_code' do + subject { described_class.ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages) } + + context 'for a ldpath without language pattern' do + let(:ldpath) { 'dcterms:identifier' } + let(:languages) { [:fr] } + let(:prefixes) { { "dcterms" => "http://purl.org/dc/terms/" } } + it 'generates the simple program code' do + expected_program = <<-PROGRAM +@prefix dcterms : \; +property = dcterms:identifier \; +PROGRAM + expect(subject).to eq expected_program + end + end + + context 'for a ldpath with language pattern' do + let(:ldpath) { 'madsrdf:authoritativeLabel*LANG* ::xsd:string' } + let(:prefixes) { { "madsrdf" => "http://www.loc.gov/mads/rdf/v1#" } } + context 'and no languages specified' do + let(:languages) { nil } + it 'generates the simple program code' do + expected_program = <<-PROGRAM +@prefix madsrdf : \; +property = madsrdf:authoritativeLabel ::xsd:string \; +PROGRAM + expect(subject).to eq expected_program + end + end + + context 'and one language specified' do + let(:languages) { [:en] } + it 'generates a program with the language' do + expected_program = <<-PROGRAM +@prefix madsrdf : \; +en_property = madsrdf:authoritativeLabel[@en] ::xsd:string \; +property = madsrdf:authoritativeLabel[@none] ::xsd:string \; +PROGRAM + expect(subject).to eq expected_program + end + end + + context 'and multiple languages specified' do + let(:languages) { [:fr, :de] } + it 'generates a program with languages' do + expected_program = <<-PROGRAM +@prefix madsrdf : \; +fr_property = madsrdf:authoritativeLabel[@fr] ::xsd:string \; +de_property = madsrdf:authoritativeLabel[@de] ::xsd:string \; +property = madsrdf:authoritativeLabel[@none] ::xsd:string \; +PROGRAM + expect(subject).to eq expected_program + end + end + end + end + describe '.ldpath_evaluate' do subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri) } let(:program) { instance_double(Ldpath::Program) } let(:graph) { instance_double(RDF::Graph) } let(:subject_uri) { instance_double(RDF::URI) } - let(:values) { ['Expanded Label'] } before do - allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(program) - allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => values) + allow(Ldpath::Program).to receive(:parse).with(anything).and_return(program) end - it 'returns the extracted label' do - expect(subject).to match_array values + + context 'when program does not contain languages' do + context 'and value is a string' do + let(:values) { ['value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('property' => values) + end + it 'returns the string values as is' do + expected_values = values.map { |v| RDF::Literal.new(v) } + expect(subject).to match_array expected_values + end + end + + context 'and value is a URI' do + let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('property' => values) + end + it 'returns the URIs' do + expected_values = values + expect(subject).to match_array expected_values + end + end + + context 'and value is numeric' do + let(:values) { [23, 14, 55] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('property' => values) + end + it 'returns the URIs' do + expected_values = values + expect(subject).to match_array expected_values + end + end + end + + context 'when program has languages' do + context 'and one language specified' do + let(:en_values) { ['en_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('en_property' => en_values, 'property' => untagged_values) + end + it 'generates a program with the language' do + expected_values = + en_values.map { |v| RDF::Literal.new(v, language: :en) } + + untagged_values.map { |v| RDF::Literal.new(v) } + expect(subject).to match_array expected_values + end + end + + context 'and multiple languages specified' do + let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } + let(:de_values) { ['de_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + end + it 'returns the extracted label' do + expected_values = + (fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } + + de_values.map { |v| RDF::Literal.new(v, language: :de) } + + untagged_values.map { |v| RDF::Literal.new(v) }).uniq + expect(subject).to match_array expected_values + end + end end context 'when ldpath_evaluate gets parse error' do @@ -50,11 +182,18 @@ let(:warning) { I18n.t('qa.linked_data.ldpath.evaluate_logger_error') } let(:log_message) { "WARNING: #{warning} (cause: #{cause}" } - before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(cause) } + before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(ParseError, cause) } it 'logs error and returns PARSE ERROR as the value' do expect(Rails.logger).to receive(:warn).with(log_message) - expect { subject.values(graph, subject_uri) }.to raise_error StandardError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}" + expect { subject }.to raise_error ParseError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}" + end + end + + context 'when program is empty' do + let(:program) { nil } + it 'returns empty array' do + expect { subject }.to raise_error ArgumentError, "You must specify a program when calling ldpath_evaluate" end end end diff --git a/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb b/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb index ee1f6057..6b6c8686 100644 --- a/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb +++ b/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb @@ -38,11 +38,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['530369'], String) - validate_entry(subject, :label, ['Cornell University'], String) - validate_entry(subject, :altlabel, ['Ithaca (N.Y.). Cornell University'], String) + validate_entry(subject, :id, ['530369'], RDF::Literal) + validate_entry(subject, :label, ['Cornell University'], RDF::Literal) + validate_entry(subject, :altlabel, ['Ithaca (N.Y.). Cornell University'], RDF::Literal) validate_entry(subject, :sameas, ['http://id.loc.gov/authorities/names/n79021621'], RDF::URI) - validate_entry(subject, :sort, ['1'], String) + validate_entry(subject, :sort, ['1'], RDF::Literal) end end @@ -55,11 +55,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['510103'], String) - validate_entry(subject, :label, ['Cornell University. Libraries'], String) - validate_entry(subject, :altlabel, ['Cornell University. Central Libraries', 'Cornell University. John M. Olin Library', 'Cornell University. White Library'], String) + validate_entry(subject, :id, ['510103'], RDF::Literal) + validate_entry(subject, :label, ['Cornell University. Libraries'], RDF::Literal) + validate_entry(subject, :altlabel, ['Cornell University. Central Libraries', 'Cornell University. John M. Olin Library', 'Cornell University. White Library'], RDF::Literal) validate_entry(subject, :sameas, ['http://id.loc.gov/authorities/names/n50000040', 'https://viaf.org/viaf/147713418'], RDF::URI) - validate_entry(subject, :sort, ['2'], String) + validate_entry(subject, :sort, ['2'], RDF::Literal) end end @@ -72,11 +72,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['5140'], String) - validate_entry(subject, :label, ['Cornell, Joseph'], String) + validate_entry(subject, :id, ['5140'], RDF::Literal) + validate_entry(subject, :label, ['Cornell, Joseph'], RDF::Literal) validate_entry(subject, :altlabel, [], NilClass) validate_entry(subject, :sameas, [], NilClass) - validate_entry(subject, :sort, ['3'], String) + validate_entry(subject, :sort, ['3'], RDF::Literal) end end @@ -98,11 +98,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort, :context] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['5140'], String) - validate_entry(subject, :label, ['Cornell, Joseph'], String) + validate_entry(subject, :id, ['5140'], RDF::Literal) + validate_entry(subject, :label, ['Cornell, Joseph'], RDF::Literal) validate_entry(subject, :altlabel, [], NilClass) validate_entry(subject, :sameas, [], NilClass) - validate_entry(subject, :sort, ['3'], String) + validate_entry(subject, :sort, ['3'], RDF::Literal) expect(subject[:context]).to be_kind_of Hash expect(subject[:context]).to include(context)