-
Notifications
You must be signed in to change notification settings - Fork 121
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add a new indexer for 'linked_data_attributes', ie. those whose value…
…s are RDF::URIs from which we want to fetch a label to index in solr
- Loading branch information
Julie Allinson
committed
Oct 26, 2017
1 parent
b38aec1
commit 79a19df
Showing
18 changed files
with
251 additions
and
165 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# frozen_string_literal: true | ||
|
||
module Hyrax | ||
# Indexes labels for linked_data_attributes into solr. | ||
# @example Add linked_data_attributes to be indexed into `self.linked_data_attributes` as symbols. | ||
# self.linked_data_attributes = [:based_near] | ||
# | ||
# @todo This replicates current Hyrax DeepIndexingService behaviour of fetching from URI whenever | ||
# #to_solr is called; ideally avoid this by getting data from solr_document where | ||
# labels are already indexed; this would required a combined form of label and uri in solr | ||
class LinkedDataAttributesIndexer | ||
attr_reader :resource | ||
attr_reader :solr_hash | ||
|
||
class_attribute :linked_data_attributes | ||
self.linked_data_attributes = %i[ | ||
based_near | ||
] | ||
|
||
def initialize(resource) | ||
@resource = resource.fetch(:resource) | ||
@solr_hash = {} | ||
end | ||
|
||
def to_solr | ||
linked_data_attributes.each do |ld_attribute_name| | ||
next unless resource.try(ld_attribute_name) | ||
stored_searchable_and_facetable( | ||
ld_attribute_name | ||
) | ||
end | ||
solr_hash | ||
end | ||
|
||
private | ||
|
||
# Fetch the labels and add data to the solr_hash Store as stored_searchable (_tesim) and facetable (_sim) | ||
def stored_searchable_and_facetable(ld_attribute_name) | ||
labels = fetch_labels(ld_attribute_name) | ||
['_label_tesim', '_label_sim'].each_with_object(solr_hash) do |suffix, output| | ||
output["#{ld_attribute_name}#{suffix}".to_sym] = labels | ||
end | ||
end | ||
|
||
# Fetch labels for each RDF::URI in the given attribute name | ||
# @example call based_near | ||
# resource.based_near.map { |ld_uri| fetch_label(ld_attribute_name, ld_uri) } | ||
# | ||
# @param ld_attribute_name [Symbol] the attribute name | ||
# @return [Array<String>] an array of labels | ||
def fetch_labels(ld_attribute_name) | ||
resource.send(ld_attribute_name).map { |ld_uri| fetch_label(ld_attribute_name, ld_uri) } | ||
end | ||
|
||
# Fetch the label from the external source via Hyrax::LinkedDataResourceFactory | ||
# | ||
# @param ld_attribute_name [Symbol] the attribute name | ||
# @param ld_uri [RDF::URI] the uri | ||
# @return [String] single label | ||
def fetch_label(ld_attribute_name, ld_uri) | ||
factory = Hyrax::LinkedDataResourceFactory.for(ld_attribute_name, ld_uri) | ||
factory.fetch_external | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
module Hyrax | ||
class LinkedDataResourceFactory | ||
# Instantiate a LinkedDataResources class | ||
# | ||
# @param ld_attribute [Symbol] attribute used to define LinkedDataResources Class | ||
# @param ld_uri [RDF::URI] uri from which to retrieve a label | ||
# @return [Hyrax::LinkedDataResources::BaseResource] or more speciic class | ||
def self.for(ld_attribute, ld_uri) | ||
Hyrax.config.registered_linked_data_resources.fetch(ld_attribute, Hyrax::LinkedDataResources::BaseResource).new(ld_uri) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
module Hyrax | ||
module LinkedDataResources | ||
extend ActiveSupport::Autoload | ||
|
||
eager_autoload do | ||
autoload :BaseResource | ||
autoload :GeonamesResource | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
require 'linkeddata' # we need all the linked data types, because we don't know what types a service might return.I | ||
module Hyrax | ||
module LinkedDataResources | ||
# LinkedDataResources are used for fetching a RDF::URI and retrieving the rdf_label | ||
# Must be initialized with RDF::URI or URI String. | ||
# Must implement #fetch_external. | ||
# May extend BaseResource (which extends ActiveTriples::Resource) | ||
|
||
# Extend BaseResource where the rdf_label needs to be configured | ||
# ie. where rdf_label needs to be gotten from properties | ||
# other than those in ActiveTriples::RDFSource#default_labels | ||
# @example we might have a LocationResource which expects a Geonames URI. | ||
# In the RDF returned by Geonames, the closest match to a 'label' is 'name' | ||
# class LocationResource < BaseResource | ||
# configure rdf_label: ::RDF::Vocab::GEONAMES.name | ||
# end | ||
|
||
# BaseResource is used where the RDF::URI is expected to include one or more | ||
# ActiveTriples::RDFSource#default_labels and thus to respond to rdf_label | ||
class BaseResource < ActiveTriples::Resource | ||
# @return [String] rdf_label | ||
def fetch_external | ||
fetch_value | ||
rdf_label.first.to_s | ||
end | ||
|
||
private | ||
|
||
# Get the RDF data from the URL by calling #fetch | ||
def fetch_value | ||
Rails.logger.info "Fetching #{rdf_subject} from the authorative source. (this is slow)" | ||
fetch(headers: { 'Accept'.freeze => default_accept_header }) | ||
rescue IOError, SocketError, ArgumentError => e | ||
# IOError could result from a 500 error on the remote server | ||
# SocketError results if there is no server to connect to | ||
Rails.logger.error "Unable to fetch #{rdf_subject} from the authorative source.\n#{e.message}" | ||
end | ||
|
||
# Strip off the */* to work around https://github.com/rails/rails/issues/9940 | ||
# | ||
# @return [String] accept headers string | ||
def default_accept_header | ||
RDF::Util::File::HttpAdapter.default_accept_header.sub(/, \*\/\*;q=0\.1\Z/, '') | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
module Hyrax | ||
module LinkedDataResources | ||
# Do not Extend BaseResource For Geonames. | ||
# We can get richer information from the JSON API, so let's do that instead. | ||
class GeonamesResource | ||
attr_reader :rdf_subject | ||
|
||
def initialize(rdf_subject) | ||
@rdf_subject = rdf_subject | ||
end | ||
|
||
# @return [String] rdf_label | ||
def fetch_external | ||
fetch_value | ||
end | ||
|
||
private | ||
|
||
# Get the data from the URL by calling Faraday#get | ||
def fetch_value | ||
Rails.logger.info "Fetching #{rdf_subject} from the authorative source." | ||
response = Faraday.get build_json_uri(rdf_subject) | ||
return rdf_subject.to_s if response.status != 200 | ||
label(response.body) | ||
rescue IOError, Faraday::ConnectionFailed, ArgumentError => e | ||
# IOError could result from a 500 error on the remote server | ||
# Faraday::ConnectionFailed results if there is no server to connect to | ||
Rails.logger.error "Unable to fetch #{rdf_subject} from the authorative source.\n#{e.message}" | ||
rdf_subject.to_s | ||
end | ||
|
||
# Construct a URI for the Geonames getJSON API | ||
# | ||
# @return [String] uri | ||
def build_json_uri(uri) | ||
"http://www.geonames.org/getJSON?geonameId=#{find_id(uri)}&username=#{Qa::Authorities::Geonames.username}" | ||
end | ||
|
||
# Extract the Geonames id from a URI in the form http://sws.geonames.org/2638077/ | ||
# | ||
# @param uri [RDF::URI] the Geonames URI | ||
# @return [String] the Geonames id | ||
def find_id(uri) | ||
uri.to_s.split('/')[3] | ||
end | ||
|
||
# Construct a disambiguable label from the JSON response | ||
# | ||
# @param json_string [String] the Faraday::Response#body | ||
def label(json_string) | ||
item = JSON.parse(json_string) | ||
[item['name'], item['adminName1'], item['countryName']].compact.join(', ') | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.