Skip to content

Commit

Permalink
Merge pull request #185 from samvera/ld_refactor_qa_graph
Browse files Browse the repository at this point in the history
Move rdf_helper methods to graph_service
  • Loading branch information
elrayle committed Nov 30, 2018
2 parents c1f2333 + ae216a7 commit 7080c73
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 82 deletions.
77 changes: 77 additions & 0 deletions app/services/qa/linked_data/graph_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Extend the RDF graph to include additional processing methods.
module Qa
module LinkedData
class GraphService
attr_reader :graph

# Retrieve linked data from specified url
# @param [String] url from which to retrieve linked data
# @param [String | Symbol | Array<String|Symbol>] language for filtering graph (e.g. "en" or :en or ["en", "fr"] or [:en, :fr])
# @returns [RDF::Graph] graph of linked data
def initialize(url:)
@graph = RDF::Graph.load(url)
rescue IOError => e
process_error(e, url)
end

# Apply filters to the graph
# @param language [String | Symbol | Array<String|Symbol>] will keep any statement whose object's language matches the language filter
# (only applies to statements that respond to language) (e.g. "en" or :en or ["en", "fr"] or [:en, :fr])
# @param remove_blanknode_subjects [Boolean] will remove any statement whose subject is a blanknode, if true
def filter(language: nil, remove_blanknode_subjects: false)
return unless @graph.present?
return unless language.present? || remove_blanknode_subjects
language = normalize_language(language)
@graph.each do |st|
@graph.delete(st) if filter_out_blanknode(remove_blanknode_subjects, st.subject) || filter_out_language(language, st.object)
end
end

private

def filter_out_blanknode(remove, subj)
remove && subj.anonymous?
end

def filter_out_language(language, obj)
return false if language.blank?
return false unless obj.respond_to?(:language)
return false if obj.language.blank?
!language.include?(obj.language)
end

def process_error(e, url)
uri = URI(url)
raise RDF::FormatError, "Unknown RDF format of results returned by #{uri}. (RDF::FormatError) You may need to include gem 'linkeddata'." if e.is_a? RDF::FormatError
response_code = ioerror_code(e)
case response_code
when '404'
raise Qa::TermNotFound, "#{uri} Not Found - Term may not exist at LOD Authority. (HTTPNotFound - 404)"
when '500'
raise Qa::ServiceError, "#{uri.hostname} on port #{uri.port} is not responding. Try again later. (HTTPServerError - 500)"
when '503'
raise Qa::ServiceUnavailable, "#{uri.hostname} on port #{uri.port} is not responding. Try again later. (HTTPServiceUnavailable - 503)"
else
raise Qa::ServiceError, "Unknown error for #{uri.hostname} on port #{uri.port}. Try again later. (Cause - #{e.message})"
end
end

def ioerror_code(e)
msg = e.message
return 'format' if msg.start_with? "Unknown RDF format"
a = msg.size - 4
z = msg.size - 2
msg[a..z]
end

# Normalize language
# @param [String | Symbol | Array] language for filtering graph (e.g. "en" OR :en OR ["en", "fr"] OR [:en, :fr])
# @returns [Array<Symbol>] an array of languages encoded as symbols (e.g. [:en] OR [:en, :fr])
def normalize_language(language)
return language if language.blank?
language = [language] unless language.is_a? Array
language.map(&:to_sym)
end
end
end
end
21 changes: 13 additions & 8 deletions lib/qa/authorities/linked_data/find_term.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,21 @@ def find(id, language: nil, replacements: {}, subauth: nil, jsonld: false)
language ||= term_config.term_language
url = term_config.term_url_with_replacements(id, subauth, replacements)
Rails.logger.info "QA Linked Data term url: #{url}"
graph = get_linked_data(url)
graph = load_graph(url: url, language: language)
return "{}" unless graph.size.positive?
return graph.dump(:jsonld, standard_prefixes: true) if jsonld
parse_term_authority_response(id, graph, language)
parse_term_authority_response(id, graph)
end

private

def parse_term_authority_response(id, graph, language)
graph = filter_language(graph, language) unless language.nil?
def load_graph(url:, language:)
graph_service = Qa::LinkedData::GraphService.new(url: url)
graph_service.filter(language: language)
graph_service.graph
end

def parse_term_authority_response(id, graph)
results = extract_preds(graph, preds_for_term)
consolidated_results = consolidate_term_results(results)
json_results = convert_term_to_json(consolidated_results)
Expand Down Expand Up @@ -130,16 +135,16 @@ def predicates_with_subject_uri(graph, expected_uri) # rubocop:disable Metrics/M
subj = st.subject.to_s
next unless subj == expected_uri
pred = st.predicate.to_s
obj = st.object.to_s
next if blank_node? obj
obj = st.object
next if obj.anonymous?
if predicates_hash.key?(pred)
objs = predicates_hash[pred]
objs = [] unless objs.is_a?(Array)
objs << predicates_hash[pred] unless objs.length.positive?
objs << obj
objs << obj.to_s
predicates_hash[pred] = objs
else
predicates_hash[pred] = [obj]
predicates_hash[pred] = [obj.to_s]
end
end
predicates_hash
Expand Down
72 changes: 3 additions & 69 deletions lib/qa/authorities/linked_data/rdf_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ module LinkedData
module RdfHelper
private

# TODO: elr - The bulk of the methods in this class moved to app/services/linked_data/rdf_service.rb. The remaining
# methods are expected to move in a later refactor.

def object_value(stmt_hash, consolidated_hash, name, as_string = true)
new_object_value = stmt_hash[name]
new_object_value = new_object_value.to_s if as_string
Expand All @@ -24,70 +27,6 @@ def init_consolidated_hash(consolidated_results, uri, id)
consolidated_hash
end

def get_linked_data(url)
begin
graph = RDF::Graph.load(url)
rescue IOError => e
process_error(e, url)
end
graph
end

def process_error(e, url)
uri = URI(url)
raise RDF::FormatError, "Unknown RDF format of results returned by #{uri}. (RDF::FormatError) You may need to include gem 'linkeddata'." if e.is_a? RDF::FormatError
response_code = ioerror_code(e)
case response_code
when '404'
raise Qa::TermNotFound, "#{uri} Not Found - Term may not exist at LOD Authority. (HTTPNotFound - 404)"
when '500'
raise Qa::ServiceError, "#{uri.hostname} on port #{uri.port} is not responding. Try again later. (HTTPServerError - 500)"
when '503'
raise Qa::ServiceUnavailable, "#{uri.hostname} on port #{uri.port} is not responding. Try again later. (HTTPServiceUnavailable - 503)"
else
raise Qa::ServiceError, "Unknown error for #{uri.hostname} on port #{uri.port}. Try again later. (Cause - #{e.message})"
end
end

def ioerror_code(e)
msg = e.message
return 'format' if msg.start_with? "Unknown RDF format"
a = msg.size - 4
z = msg.size - 2
msg[a..z]
end

# Filter a graph to the specified languages
# @param [RDF::Graph] the graph to be filtered.
# @param [String | Symbol | Array<String|Symbol>] language for filtering graph (e.g. "en" or :en or ["en", "fr"] or [:en, :fr])
# @returns [RDF::Graph] graph of linked data filtered on the specified languages
def filter_language(graph, language)
language = normalize_language(language)
return graph if language.nil?
graph.each do |st|
graph.delete(st) unless !st.object.respond_to?(:language) || st.object.language.nil? || language.include?(st.object.language)
end
graph
end

# Filter a graph to remove any statement with a blanknode for the subject
# @param [RDF::Graph] the graph to be filtered.
# @returns [RDF::Graph] graph of linked data with blanknodes removed
def filter_out_blanknodes(graph)
return graph if graph.subjects.blank?
graph.each do |st|
graph.delete(st) if st.subject.anonymous?
end
graph
end

def normalize_language(language)
language = [language.to_sym] if language.is_a? String
language = [language] if language.is_a? Symbol
return nil unless language.is_a? Array
language
end

def extract_preds(graph, preds)
RDF::Query.execute(graph) do
preds[:required].each do |key, pred|
Expand All @@ -106,11 +45,6 @@ def sort_string_by_language(str_literals)
str_literals.uniq!
str_literals.delete_if { |s| s.nil? || s.length <= 0 }
end

def blank_node?(obj)
return true if obj.to_s.starts_with? "_:g"
false
end
end
end
end
14 changes: 9 additions & 5 deletions lib/qa/authorities/linked_data/search_query.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,19 @@ def search(query, language: nil, replacements: {}, subauth: nil)
language ||= search_config.language
url = search_config.url_with_replacements(query, subauth, replacements)
Rails.logger.info "QA Linked Data search url: #{url}"
graph = get_linked_data(url)
parse_search_authority_response(graph, language)
graph = load_graph(url: url, language: language)
parse_search_authority_response(graph)
end

private

def parse_search_authority_response(graph, language)
graph = filter_language(graph, language) unless language.nil?
graph = filter_out_blanknodes(graph)
def load_graph(url:, language:)
graph_service = Qa::LinkedData::GraphService.new(url: url)
graph_service.filter(language: language, remove_blanknode_subjects: true)
graph_service.graph
end

def parse_search_authority_response(graph)
results = extract_preds(graph, preds_for_search)
consolidated_results = consolidate_search_results(results)
json_results = convert_search_to_json(consolidated_results)
Expand Down
11 changes: 11 additions & 0 deletions spec/controllers/linked_data_terms_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,17 @@
expect(JSON.parse(response.body).keys).to match_array ["@context", "@graph"]
end
end

context 'blank nodes not included in predicates list' do
before do
stub_request(:get, 'http://localhost/test_default/term?uri=http://test.org/530369wbn')
.to_return(status: 200, body: webmock_fixture('lod_term_with_blanknode_objects.nt'), headers: { 'Content-Type' => 'application/n-triples' })
end
it 'succeeds' do
get :fetch, params: { uri: 'http://test.org/530369wbn', vocab: 'LOD_TERM_URI_PARAM_CONFIG' }
expect(response).to be_successful
end
end
end

context 'when cors headers are enabled' do
Expand Down
12 changes: 12 additions & 0 deletions spec/fixtures/lod_search_with_blanknode_subjects.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<http://id.worldcat.org/fast/530369> <http://purl.org/dc/terms/identifier> "530369" .
<http://id.worldcat.org/fast/530369> <http://schema.org/name> "Cornell University" .
<http://id.worldcat.org/fast/530369> <http://schema.org/sameAs> _:b0 .
_:b0 <http://www.w3.org/2000/01/rdf-schema#label> "Cornell University" .
<http://id.worldcat.org/fast/5140> <http://purl.org/dc/terms/identifier> "5140" .
<http://id.worldcat.org/fast/5140> <http://schema.org/name> "Cornell, Joseph" .
<http://id.worldcat.org/fast/5140> <http://schema.org/sameAs> _:b1 .
_:b1 <http://www.w3.org/2000/01/rdf-schema#label> "Cornell, Joseph" .
<http://id.worldcat.org/fast/557490> <http://purl.org/dc/terms/identifier> "557490" .
<http://id.worldcat.org/fast/557490> <http://schema.org/name> "New York State School of Industrial and Labor Relations" .
<http://id.worldcat.org/fast/557490> <http://schema.org/sameAs> _:b2 .
_:b2 <http://www.w3.org/2000/01/rdf-schema#label> "New York State School of Industrial and Labor Relations" .
8 changes: 8 additions & 0 deletions spec/fixtures/lod_term_with_blanknode_objects.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<http://test.org/530369wbn> <http://purl.org/dc/terms/identifier> "530369" .
<http://test.org/530369wbn> <http://www.w3.org/2004/02/skos/core#inScheme> <http://id.worldcat.org/fast/ontology/1.0/#fast> .
<http://test.org/530369wbn> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Organization> .
<http://test.org/530369wbn> <http://www.w3.org/2004/02/skos/core#prefLabel> "Cornell University" .
<http://test.org/530369wbn> <http://schema.org/name> "Cornell University" .
<http://test.org/530369wbn> <http://www.w3.org/2004/02/skos/core#altLabel> "Ithaca (N.Y.). Cornell University" .
<http://test.org/530369wbn> <http://schema.org/sameAs> _:b0 .
_:b0 <http://www.w3.org/2000/01/rdf-schema#label> "Cornell University" .
Loading

0 comments on commit 7080c73

Please sign in to comment.