Skip to content

Commit

Permalink
add a new indexer for 'linked_data_attributes', ie. those whose value…
Browse files Browse the repository at this point in the history
…s are RDF::URIs from which we want to fetch a label to index in solr
  • Loading branch information
Julie Allinson committed Oct 26, 2017
1 parent b38aec1 commit 79a19df
Show file tree
Hide file tree
Showing 18 changed files with 251 additions and 165 deletions.
9 changes: 0 additions & 9 deletions app/indexers/concerns/hyrax/indexes_linked_metadata.rb

This file was deleted.

102 changes: 0 additions & 102 deletions app/indexers/hyrax/deep_indexing_service.rb

This file was deleted.

65 changes: 65 additions & 0 deletions app/indexers/hyrax/linked_data_attributes_indexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# frozen_string_literal: true

module Hyrax
# Indexes labels for linked_data_attributes into solr.
# @example Add linked_data_attributes to be indexed into `self.linked_data_attributes` as symbols.
# self.linked_data_attributes = [:based_near]
#
# @todo This replicates current Hyrax DeepIndexingService behaviour of fetching from URI whenever
# #to_solr is called; ideally avoid this by getting data from solr_document where
# labels are already indexed; this would required a combined form of label and uri in solr
class LinkedDataAttributesIndexer
attr_reader :resource
attr_reader :solr_hash

class_attribute :linked_data_attributes
self.linked_data_attributes = %i[
based_near
]

def initialize(resource)
@resource = resource.fetch(:resource)
@solr_hash = {}
end

def to_solr
linked_data_attributes.each do |ld_attribute_name|
next unless resource.try(ld_attribute_name)
stored_searchable_and_facetable(
ld_attribute_name
)
end
solr_hash
end

private

# Fetch the labels and add data to the solr_hash Store as stored_searchable (_tesim) and facetable (_sim)
def stored_searchable_and_facetable(ld_attribute_name)
labels = fetch_labels(ld_attribute_name)
['_label_tesim', '_label_sim'].each_with_object(solr_hash) do |suffix, output|
output["#{ld_attribute_name}#{suffix}".to_sym] = labels
end
end

# Fetch labels for each RDF::URI in the given attribute name
# @example call based_near
# resource.based_near.map { |ld_uri| fetch_label(ld_attribute_name, ld_uri) }
#
# @param ld_attribute_name [Symbol] the attribute name
# @return [Array<String>] an array of labels
def fetch_labels(ld_attribute_name)
resource.send(ld_attribute_name).map { |ld_uri| fetch_label(ld_attribute_name, ld_uri) }
end

# Fetch the label from the external source via Hyrax::LinkedDataResourceFactory
#
# @param ld_attribute_name [Symbol] the attribute name
# @param ld_uri [RDF::URI] the uri
# @return [String] single label
def fetch_label(ld_attribute_name, ld_uri)
factory = Hyrax::LinkedDataResourceFactory.for(ld_attribute_name, ld_uri)
factory.fetch_external
end
end
end
5 changes: 5 additions & 0 deletions lib/generators/hyrax/templates/config/initializers/hyrax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@
# Stream realtime notifications to users in the browser
# config.realtime_notifications = true

# Register new linked_data_resource classes; used to define special behavior for retrieving a label from an
# external service for the given attribute.
# @example :based_near is already registerd to use Hyrax::LinkedDataResources::GeonamesResource
# config.registered_registered_linked_data_resources[:my_new_attribute] = MyClass

# Location autocomplete uses geonames to search for named regions
# Username for connecting to geonames
# config.geonames_username = ''
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,13 @@
)

Valkyrie::MetadataAdapter.register(
Valkyrie::Persistence::Solr::MetadataAdapter.new(connection: Blacklight.default_index.connection,
resource_indexer: Valkyrie::Indexers::AccessControlsIndexer),
Valkyrie::Persistence::Solr::MetadataAdapter.new(
connection: Blacklight.default_index.connection,
resource_indexer: Valkyrie::Persistence::Solr::CompositeIndexer.new(
Valkyrie::Indexers::AccessControlsIndexer,
Hyrax::LinkedDataAttributesIndexer
)
),
:index_solr
)

Expand Down
5 changes: 0 additions & 5 deletions lib/generators/hyrax/work/templates/indexer.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@ class <%= class_name %>Indexer < Hyrax::WorkIndexer
# provide your own metadata and indexing.
include Hyrax::IndexesBasicMetadata

# Fetch remote labels for based_near. You can remove this if you don't want
# this behavior
include Hyrax::IndexesLinkedMetadata


# Uncomment this block if you want to add custom indexing behavior:
# def generate_solr_document
# super.tap do |solr_doc|
Expand Down
3 changes: 2 additions & 1 deletion lib/hyrax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ module Hyrax
autoload :Arkivo
autoload :Collections
autoload :Configuration
autoload :ControlledVocabularies
autoload :LinkedDataResourceFactory
autoload :LinkedDataResources
autoload :RedisEventStore
autoload :ResourceSync
autoload :Zotero
Expand Down
7 changes: 7 additions & 0 deletions lib/hyrax/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,13 @@ def google_analytics_id
end
alias google_analytics_id? google_analytics_id

attr_writer :registered_linked_data_resources
def registered_linked_data_resources
@registered_linked_data_resources ||= {
based_near: Hyrax::LinkedDataResources::GeonamesResource
}
end

# Defaulting analytic start date to whenever the file was uploaded by leaving it blank
attr_writer :analytic_start_date
attr_reader :analytic_start_date
Expand Down
9 changes: 0 additions & 9 deletions lib/hyrax/controlled_vocabularies.rb

This file was deleted.

13 changes: 0 additions & 13 deletions lib/hyrax/controlled_vocabularies/location.rb

This file was deleted.

12 changes: 12 additions & 0 deletions lib/hyrax/linked_data_resource_factory.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module Hyrax
class LinkedDataResourceFactory
# Instantiate a LinkedDataResources class
#
# @param ld_attribute [Symbol] attribute used to define LinkedDataResources Class
# @param ld_uri [RDF::URI] uri from which to retrieve a label
# @return [Hyrax::LinkedDataResources::BaseResource] or more speciic class
def self.for(ld_attribute, ld_uri)
Hyrax.config.registered_linked_data_resources.fetch(ld_attribute, Hyrax::LinkedDataResources::BaseResource).new(ld_uri)
end
end
end
10 changes: 10 additions & 0 deletions lib/hyrax/linked_data_resources.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module Hyrax
module LinkedDataResources
extend ActiveSupport::Autoload

eager_autoload do
autoload :BaseResource
autoload :GeonamesResource
end
end
end
47 changes: 47 additions & 0 deletions lib/hyrax/linked_data_resources/base_resource.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
require 'linkeddata' # we need all the linked data types, because we don't know what types a service might return.I
module Hyrax
module LinkedDataResources
# LinkedDataResources are used for fetching a RDF::URI and retrieving the rdf_label
# Must be initialized with RDF::URI or URI String.
# Must implement #fetch_external.
# May extend BaseResource (which extends ActiveTriples::Resource)

# Extend BaseResource where the rdf_label needs to be configured
# ie. where rdf_label needs to be gotten from properties
# other than those in ActiveTriples::RDFSource#default_labels
# @example we might have a LocationResource which expects a Geonames URI.
# In the RDF returned by Geonames, the closest match to a 'label' is 'name'
# class LocationResource < BaseResource
# configure rdf_label: ::RDF::Vocab::GEONAMES.name
# end

# BaseResource is used where the RDF::URI is expected to include one or more
# ActiveTriples::RDFSource#default_labels and thus to respond to rdf_label
class BaseResource < ActiveTriples::Resource
# @return [String] rdf_label
def fetch_external
fetch_value
rdf_label.first.to_s
end

private

# Get the RDF data from the URL by calling #fetch
def fetch_value
Rails.logger.info "Fetching #{rdf_subject} from the authorative source. (this is slow)"
fetch(headers: { 'Accept'.freeze => default_accept_header })
rescue IOError, SocketError, ArgumentError => e
# IOError could result from a 500 error on the remote server
# SocketError results if there is no server to connect to
Rails.logger.error "Unable to fetch #{rdf_subject} from the authorative source.\n#{e.message}"
end

# Strip off the */* to work around https://github.com/rails/rails/issues/9940
#
# @return [String] accept headers string
def default_accept_header
RDF::Util::File::HttpAdapter.default_accept_header.sub(/, \*\/\*;q=0\.1\Z/, '')
end
end
end
end
56 changes: 56 additions & 0 deletions lib/hyrax/linked_data_resources/geonames_resource.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
module Hyrax
module LinkedDataResources
# Do not Extend BaseResource For Geonames.
# We can get richer information from the JSON API, so let's do that instead.
class GeonamesResource
attr_reader :rdf_subject

def initialize(rdf_subject)
@rdf_subject = rdf_subject
end

# @return [String] rdf_label
def fetch_external
fetch_value
end

private

# Get the data from the URL by calling Faraday#get
def fetch_value
Rails.logger.info "Fetching #{rdf_subject} from the authorative source."
response = Faraday.get build_json_uri(rdf_subject)
return rdf_subject.to_s if response.status != 200
label(response.body)
rescue IOError, Faraday::ConnectionFailed, ArgumentError => e
# IOError could result from a 500 error on the remote server
# Faraday::ConnectionFailed results if there is no server to connect to
Rails.logger.error "Unable to fetch #{rdf_subject} from the authorative source.\n#{e.message}"
rdf_subject.to_s
end

# Construct a URI for the Geonames getJSON API
#
# @return [String] uri
def build_json_uri(uri)
"http://www.geonames.org/getJSON?geonameId=#{find_id(uri)}&username=#{Qa::Authorities::Geonames.username}"
end

# Extract the Geonames id from a URI in the form http://sws.geonames.org/2638077/
#
# @param uri [RDF::URI] the Geonames URI
# @return [String] the Geonames id
def find_id(uri)
uri.to_s.split('/')[3]
end

# Construct a disambiguable label from the JSON response
#
# @param json_string [String] the Faraday::Response#body
def label(json_string)
item = JSON.parse(json_string)
[item['name'], item['adminName1'], item['countryName']].compact.join(', ')
end
end
end
end

0 comments on commit 79a19df

Please sign in to comment.