From fce0947a0f18230ead75d8f92e9b113c1a63c622 Mon Sep 17 00:00:00 2001 From: "E. Lynette Rayle" Date: Wed, 20 Nov 2019 19:26:17 -0500 Subject: [PATCH] use maintain_literals in ldpath to support language sorting NOTE: This commit references a branch in ldpath. It needs to be removed when PR #18 is merged into the ldpath gem. --- .circleci/config.yml | 2 +- Gemfile | 2 + app/services/qa/linked_data/ldpath_service.rb | 6 +- .../mapper/graph_ldpath_mapper_service.rb | 2 +- qa.gemspec | 2 +- .../linked_data/lod_lang_defaults.json | 17 +- .../linked_data/lod_lang_multi_defaults.json | 17 +- .../linked_data/lod_lang_no_defaults.json | 15 +- .../linked_data/lod_lang_param.json | 18 +- .../linked_data/lod_min_config.json | 9 +- .../config/context_property_map_spec.rb | 6 +- .../linked_data/ldpath_service_spec.rb | 206 +++++++++++++----- 12 files changed, 204 insertions(+), 98 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dce96104..73cb0b40 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,7 +27,7 @@ jobs: project: qa - samvera/engine_cart_generate: - cache_key: v7-internal-test-app-{{ checksum "qa.gemspec" }}-{{ checksum "spec/test_app_templates/lib/generators/test_app_generator.rb" }}-{{ checksum "lib/generators/qa/install/install_generator.rb" }}-<< parameters.rails_version >>-<< parameters.ruby_version >> + cache_key: v10-internal-test-app-{{ checksum "qa.gemspec" }}-{{ checksum "spec/test_app_templates/lib/generators/test_app_generator.rb" }}-{{ checksum "lib/generators/qa/install/install_generator.rb" }}-<< parameters.rails_version >>-<< parameters.ruby_version >> - samvera/bundle_for_gem: ruby_version: << parameters.ruby_version >> diff --git a/Gemfile b/Gemfile index 4855aecd..9343a95e 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,8 @@ group :development, :test do gem 'simplecov', require: false end +gem 'ldpath', github: 'samvera-labs/ldpath', branch: 'maintain_literals' + # BEGIN ENGINE_CART BLOCK # engine_cart: 0.10.0 # engine_cart stanza: 0.10.0 diff --git a/app/services/qa/linked_data/ldpath_service.rb b/app/services/qa/linked_data/ldpath_service.rb index 7a7499eb..67bf1296 100644 --- a/app/services/qa/linked_data/ldpath_service.rb +++ b/app/services/qa/linked_data/ldpath_service.rb @@ -42,10 +42,10 @@ def ldpath_program_code(ldpath:, prefixes: {}, languages: []) # @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls. # It will limit results to those found in the context graph. ## @return [Array] the extracted values based on the ldpath - def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?) + def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?, maintain_literals: false) raise ArgumentError, "You must specify a program when calling ldpath_evaluate" if program.blank? - output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context) - property_implode(output) + output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context, maintain_literals: maintain_literals) + maintain_literals ? property_implode(output) : output.values.flatten.uniq rescue ParseError => e Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}") raise ParseError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}" diff --git a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb index 1fa36d2d..32acc4c4 100644 --- a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb +++ b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb @@ -38,7 +38,7 @@ def self.map_values(graph:, ldpath_map:, subject_uri:, prefixes: {}) ldpath_map.each do |key, ldpath| next value_map[key] = [subject_uri] if ldpath == :subject_uri ldpath_program = ldpath_service.ldpath_program(ldpath: ldpath, prefixes: prefixes) - values = ldpath_service.ldpath_evaluate(program: ldpath_program, graph: graph, subject_uri: subject_uri) + values = ldpath_service.ldpath_evaluate(program: ldpath_program, graph: graph, subject_uri: subject_uri, maintain_literals: true) value_map[key] = values end value_map = yield value_map if block_given? diff --git a/qa.gemspec b/qa.gemspec index 65af7b34..b5029991 100644 --- a/qa.gemspec +++ b/qa.gemspec @@ -19,7 +19,7 @@ Gem::Specification.new do |s| s.add_dependency 'activerecord-import' s.add_dependency 'deprecation' s.add_dependency 'faraday' - s.add_dependency 'ldpath' + # s.add_dependency 'ldpath' s.add_dependency 'nokogiri', '~> 1.6' s.add_dependency 'rails', '~> 5.0' s.add_dependency 'rdf' diff --git a/spec/fixtures/authorities/linked_data/lod_lang_defaults.json b/spec/fixtures/authorities/linked_data/lod_lang_defaults.json index e8ce281b..22456b87 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_defaults.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_defaults.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -21,9 +24,9 @@ "term_id": "URI", "language": [ "fr" ], "results": { - "id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "id_ldpath": "loc:lccn", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -46,10 +49,10 @@ }, "language": [ "fr" ], "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json b/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json index 91264cb8..d7660b5d 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -21,9 +24,9 @@ "term_id": "URI", "language": [ "en", "fr" ], "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -46,10 +49,10 @@ }, "language": [ "en", "fr" ], "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json b/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json index 3f729c40..517abd56 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -20,8 +23,8 @@ }, "term_id": "URI", "results": { - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -43,10 +46,10 @@ "query": "query" }, "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_lang_param.json b/spec/fixtures/authorities/linked_data/lod_lang_param.json index 7049e4b3..8b0a9ea7 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_param.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_param.json @@ -1,5 +1,9 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/", + "loc": "http://id.loc.gov/vocabulary/identifiers/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -27,9 +31,9 @@ }, "term_id": "URI", "results": { - "id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "id_ldpath": "loc:lccn", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -58,10 +62,10 @@ "query": "query" }, "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_min_config.json b/spec/fixtures/authorities/linked_data/lod_min_config.json index 257bb371..78624a13 100644 --- a/spec/fixtures/authorities/linked_data/lod_min_config.json +++ b/spec/fixtures/authorities/linked_data/lod_min_config.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "loc": "http://id.loc.gov/vocabulary/identifiers/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -20,8 +23,8 @@ }, "term_id": "URI", "results": { - "id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "loc:lccn", + "label_ldpath": "skos:prefLabel" } }, "search": { @@ -43,7 +46,7 @@ "query": "query" }, "results": { - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "label_ldpath": "skos:prefLabel" } } } diff --git a/spec/models/linked_data/config/context_property_map_spec.rb b/spec/models/linked_data/config/context_property_map_spec.rb index c990dc38..5380d897 100644 --- a/spec/models/linked_data/config/context_property_map_spec.rb +++ b/spec/models/linked_data/config/context_property_map_spec.rb @@ -280,9 +280,9 @@ allow(Ldpath::Program).to receive(:parse).with("property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;\n").and_return(basic_program) allow(Ldpath::Program).to receive(:parse).with("property = skos:prefLabel ::xsd:string ;\n").and_return(expanded_label_program) allow(Ldpath::Program).to receive(:parse).with("property = loc:lccn ::xsd:string ;\n").and_return(expanded_id_program) - allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => [expanded_uri]) - allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_label]) - allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_id]) + allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_uri]) + allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_label]) + allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_id]) end it 'returns the uri, id, label for the expanded uri value' do expanded_values = subject.expanded_values(graph, subject_uri).first diff --git a/spec/services/linked_data/ldpath_service_spec.rb b/spec/services/linked_data/ldpath_service_spec.rb index 877d321f..268da57f 100644 --- a/spec/services/linked_data/ldpath_service_spec.rb +++ b/spec/services/linked_data/ldpath_service_spec.rb @@ -90,7 +90,7 @@ end describe '.ldpath_evaluate' do - subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri) } + subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri, maintain_literals: maintain_literals) } let(:program) { instance_double(Ldpath::Program) } let(:graph) { instance_double(RDF::Graph) } @@ -100,79 +100,161 @@ allow(Ldpath::Program).to receive(:parse).with(anything).and_return(program) end - context 'when program does not contain languages' do - context 'and value is a string' do - let(:values) { ['value'] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('property' => values) - end - it 'returns the string values as is' do - expected_values = values.map { |v| RDF::Literal.new(v) } - expect(subject).to match_array expected_values + context 'when program does not request languages' do + context 'and not maintaining literals' do + let(:maintain_literals) { false } + + context 'and value is a string' do + let(:values) { ['value', 'value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the string values as is' do + expected_values = ['value'] + expect(subject).to match_array expected_values + end end - end - context 'and value is a URI' do - let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('property' => values) + context 'and value is a URI' do + let(:values) { ['http://example.com/1', 'http://example.com/2'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the URIs' do + expected_values = values + expect(subject).to match_array expected_values + end end - it 'returns the URIs' do - expected_values = values - expect(subject).to match_array expected_values + + context 'and value is numeric' do + let(:values) { [23, 14, 55] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the numeric values' do + expected_values = values + expect(subject).to match_array expected_values + end end end - context 'and value is numeric' do - let(:values) { [23, 14, 55] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('property' => values) + context 'and maintaining literals' do + let(:maintain_literals) { true } + + context 'and value is a string' do + let(:values) { [RDF::Literal.new('value'), RDF::Literal.new('value')] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the string values as is' do + expected_values = [RDF::Literal.new('value')] + expect(subject).to match_array expected_values + end end - it 'returns the URIs' do - expected_values = values - expect(subject).to match_array expected_values + + context 'and value is a URI' do + let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the URIs' do + expect(subject).to match_array values + end + end + + context 'and value is numeric' do + let(:values) { [RDF::Literal.new(23), RDF::Literal.new(14), RDF::Literal.new(55)] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the numeric values' do + expect(subject).to match_array values + end end end end context 'when program has languages' do - context 'and one language specified' do - let(:en_values) { ['en_value'] } - let(:untagged_values) { ['untagged_value'] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('en_property' => en_values, 'property' => untagged_values) + context 'and not maintaining literals' do + let(:maintain_literals) { false } + + context 'and one language specified' do + let(:en_values) { ['en_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('en_property' => en_values, 'property' => untagged_values) + end + it 'generates a program with the language' do + expected_values = en_values + untagged_values + expect(subject).to match_array expected_values + end end - it 'generates a program with the language' do - expected_values = - en_values.map { |v| RDF::Literal.new(v, language: :en) } + - untagged_values.map { |v| RDF::Literal.new(v) } - expect(subject).to match_array expected_values + + context 'and multiple languages specified' do + let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } + let(:de_values) { ['de_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + end + it 'returns the extracted label' do + expected_values = fr_values.uniq + de_values + untagged_values + expect(subject).to match_array expected_values + end end end - context 'and multiple languages specified' do - let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } - let(:de_values) { ['de_value'] } - let(:untagged_values) { ['untagged_value'] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + context 'and maintaining literals' do + let(:maintain_literals) { true } + + context 'and one language specified' do + let(:en_values) { ['en_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('en_property' => en_values, 'property' => untagged_values) + end + it 'generates a program with the language' do + expected_values = + en_values.map { |v| RDF::Literal.new(v, language: :en) } + + untagged_values.map { |v| RDF::Literal.new(v) } + expect(subject).to match_array expected_values + end end - it 'returns the extracted label' do - expected_values = - (fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } + - de_values.map { |v| RDF::Literal.new(v, language: :de) } + - untagged_values.map { |v| RDF::Literal.new(v) }).uniq - expect(subject).to match_array expected_values + + context 'and multiple languages specified' do + let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } + let(:de_values) { ['de_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + end + it 'returns the extracted label' do + expected_values = + (fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } + + de_values.map { |v| RDF::Literal.new(v, language: :de) } + + untagged_values.map { |v| RDF::Literal.new(v) }).uniq + expect(subject).to match_array expected_values + end end end end @@ -181,8 +263,13 @@ let(:cause) { "unknown cause" } let(:warning) { I18n.t('qa.linked_data.ldpath.evaluate_logger_error') } let(:log_message) { "WARNING: #{warning} (cause: #{cause}" } + let(:maintain_literals) { false } - before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(ParseError, cause) } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: false) + .and_raise(ParseError, cause) + end it 'logs error and returns PARSE ERROR as the value' do expect(Rails.logger).to receive(:warn).with(log_message) @@ -192,7 +279,8 @@ context 'when program is empty' do let(:program) { nil } - it 'returns empty array' do + let(:maintain_literals) { false } + it 'raise ArgumentError' do expect { subject }.to raise_error ArgumentError, "You must specify a program when calling ldpath_evaluate" end end