Skip to content

Commit

Permalink
Merge pull request #249 from samvera/fix/247_predicates_nil
Browse files Browse the repository at this point in the history
Raise an exception if data can’t be normalized
  • Loading branch information
Julie Allinson committed Jun 4, 2019
2 parents 7c24d4b + 55dbc05 commit 13f1051
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 79 deletions.
4 changes: 4 additions & 0 deletions app/controllers/qa/linked_data_terms_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def show # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
"was not identified as a valid RDF format. You may need to include the linkeddata gem."
logger.warn msg
render json: { errors: msg }, status: :internal_server_error
rescue Qa::DataNormalizationError => e
msg = "Data Normalization Error - #{e.message}"
logger.warn msg
render json: { errors: msg }, status: :internal_server_error
end

# Return all the information for a given term given a URI
Expand Down
3 changes: 3 additions & 0 deletions lib/qa.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,7 @@ class TermNotFound < ArgumentError; end
module IriTemplate
class MissingParameter < StandardError; end
end

# Raised when data is returned but cannot be normalized
class DataNormalizationError < StandardError; end
end
33 changes: 22 additions & 11 deletions lib/qa/authorities/linked_data/find_term.rb
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,26 @@ def expects_uri?

def extract_uri
return @uri = RDF::URI.new(id) if expects_uri?
@uri = graph_service.subjects_for_object_value(graph: @filtered_graph, predicate: RDF::URI.new(term_config.term_results_id_predicate), object_value: URI.unescape(id)).first
return @uri unless loc? && @uri.blank?
# for backward compatibility, if an loc id as passed in fails to extract the URI, try to adding a blank to the id
@uri = graph_service.subjects_for_object_value(graph: @filtered_graph, predicate: RDF::URI.new(term_config.term_results_id_predicate), object_value: loc_id).first
if @uri.present?
Qa.deprecation_warning(
in_msg: 'Qa::Authorities::LinkedData::FindTerm',
msg: 'Special processing of LOC ids is deprecated; id should be an exact match of the id in the graph'
)
end
extract_uri_by_id
end

def extract_uri_by_id
@uri = get_uri_from_graph_by_id(id)
rescue Qa::DataNormalizationError
raise unless loc?
@uri = get_uri_from_graph_by_id(loc_id)
Qa.deprecation_warning(
in_msg: 'Qa::Authorities::LinkedData::FindTerm',
msg: 'Special processing of LOC ids is deprecated; id should be an exact match of the id in the graph'
)
@uri
end

def get_uri_from_graph_by_id(gid)
@uri = graph_service.subjects_for_object_value(graph: @filtered_graph,
predicate: RDF::URI.new(term_config.term_results_id_predicate),
object_value: URI.unescape(gid)).first
raise Qa::DataNormalizationError, "Unable to extract URI based on ID: #{id}" if @uri.blank?
@uri
end

Expand Down Expand Up @@ -240,7 +250,8 @@ def predicates_with_subject_uri(expected_uri) # rubocop:disable Metrics/MethodLe
end

def append_performance_data(results)
performance = { predicate_count: results['predicates'].size,
pred_count = results['predicates'].present? ? results['predicates'].size : 0
performance = { predicate_count: pred_count,
fetch_time_s: access_time_s,
normalization_time_s: normalize_time_s,
total_time_s: (access_time_s + normalize_time_s) }
Expand Down
12 changes: 12 additions & 0 deletions spec/controllers/linked_data_terms_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,18 @@
end
end

context 'when data normalization error' do
before do
stub_request(:get, 'http://id.worldcat.org/fast/530369')
.to_return(status: 200, body: webmock_fixture('lod_oclc_term_bad_id.nt'), headers: { 'Content-Type' => 'application/ntriples' })
end
it 'returns 500' do
expect(Rails.logger).to receive(:warn).with("Data Normalization Error - Unable to extract URI based on ID: 530369")
get :show, params: { id: '530369', vocab: 'OCLC_FAST' }
expect(response.code).to eq('500')
end
end

context 'when rdf format error' do
before do
stub_request(:get, 'http://id.worldcat.org/fast/530369').to_return(status: 200)
Expand Down
68 changes: 68 additions & 0 deletions spec/fixtures/lod_loc_term_bad_id.rdf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<madsrdf:Topic rdf:about="http://id.loc.gov/authorities/subjects/sh1234" xmlns:madsrdf="http://www.loc.gov/mads/rdf/v1#">
<rdf:type rdf:resource="http://www.loc.gov/mads/rdf/v1#Authority"/>
<madsrdf:authoritativeLabel xml:lang="en">More Science</madsrdf:authoritativeLabel>
<madsrdf:elementList rdf:parseType="Collection">
<madsrdf:TopicElement>
<madsrdf:elementValue xml:lang="en">More Science</madsrdf:elementValue>
</madsrdf:TopicElement>
</madsrdf:elementList>
<madsrdf:hasVariant>
<madsrdf:Topic>
<rdf:type rdf:resource="http://www.loc.gov/mads/rdf/v1#Variant"/>
<madsrdf:variantLabel xml:lang="en">More Natural science</madsrdf:variantLabel>
<madsrdf:elementList rdf:parseType="Collection">
<madsrdf:TopicElement>
<madsrdf:elementValue xml:lang="en">More Natural science</madsrdf:elementValue>
</madsrdf:TopicElement>
</madsrdf:elementList>
</madsrdf:Topic>
</madsrdf:hasVariant>
<madsrdf:hasVariant>
<madsrdf:Topic>
<rdf:type rdf:resource="http://www.loc.gov/mads/rdf/v1#Variant"/>
<madsrdf:variantLabel xml:lang="en">More Science of science</madsrdf:variantLabel>
<madsrdf:elementList rdf:parseType="Collection">
<madsrdf:TopicElement>
<madsrdf:elementValue xml:lang="en">More Science of science</madsrdf:elementValue>
</madsrdf:TopicElement>
</madsrdf:elementList>
</madsrdf:Topic>
</madsrdf:hasVariant>
<madsrdf:hasVariant>
<madsrdf:Topic>
<rdf:type rdf:resource="http://www.loc.gov/mads/rdf/v1#Variant"/>
<madsrdf:variantLabel xml:lang="en">More Sciences</madsrdf:variantLabel>
<madsrdf:elementList rdf:parseType="Collection">
<madsrdf:TopicElement>
<madsrdf:elementValue xml:lang="en">More Sciences</madsrdf:elementValue>
</madsrdf:TopicElement>
</madsrdf:elementList>
</madsrdf:Topic>
</madsrdf:hasVariant>
<identifiers:lccn xmlns:identifiers="http://id.loc.gov/vocabulary/identifiers/">BAD ID sh 1234</identifiers:lccn>
<rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"/>
<skos:prefLabel xml:lang="en" xmlns:skos="http://www.w3.org/2004/02/skos/core#">More Science</skos:prefLabel>
<skosxl:altLabel xmlns:skosxl="http://www.w3.org/2008/05/skos-xl#">
<rdf:Description>
<rdf:type rdf:resource="http://www.w3.org/2008/05/skos-xl#Label"/>
<skosxl:literalForm xml:lang="en">More Natural science</skosxl:literalForm>
</rdf:Description>
</skosxl:altLabel>
<skosxl:altLabel xmlns:skosxl="http://www.w3.org/2008/05/skos-xl#">
<rdf:Description>
<rdf:type rdf:resource="http://www.w3.org/2008/05/skos-xl#Label"/>
<skosxl:literalForm xml:lang="en">More Science of science</skosxl:literalForm>
</rdf:Description>
</skosxl:altLabel>
<skosxl:altLabel xmlns:skosxl="http://www.w3.org/2008/05/skos-xl#">
<rdf:Description>
<rdf:type rdf:resource="http://www.w3.org/2008/05/skos-xl#Label"/>
<skosxl:literalForm xml:lang="en">More Sciences</skosxl:literalForm>
</rdf:Description>
</skosxl:altLabel>
<skos:altLabel xml:lang="en" xmlns:skos="http://www.w3.org/2004/02/skos/core#">More Natural science</skos:altLabel>
<skos:altLabel xml:lang="en" xmlns:skos="http://www.w3.org/2004/02/skos/core#">More Science of science</skos:altLabel>
<skos:altLabel xml:lang="en" xmlns:skos="http://www.w3.org/2004/02/skos/core#">More Sciences</skos:altLabel>
</madsrdf:Topic>
</rdf:RDF>
4 changes: 4 additions & 0 deletions spec/fixtures/lod_oclc_term_bad_id.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<http://id.worldcat.org/fast/530369> <http://purl.org/dc/terms/identifier> "BAD_ID 530369" .
<http://id.worldcat.org/fast/530369> <http://www.w3.org/2004/02/skos/core#prefLabel> "Cornell University" .
<http://id.worldcat.org/fast/530369> <http://www.w3.org/2004/02/skos/core#altLabel> "Ithaca (N.Y.). Cornell University" .
<http://id.worldcat.org/fast/530369> <http://www.w3.org/2004/02/skos/core#sameAs> <http://id.loc.gov/authorities/names/n79021621> .
Loading

0 comments on commit 13f1051

Please sign in to comment.