Skip to content

Commit

Permalink
Refactor sort process into a service
Browse files Browse the repository at this point in the history
  • Loading branch information
elrayle committed Dec 10, 2018
1 parent 571c8da commit 46ad37d
Show file tree
Hide file tree
Showing 11 changed files with 670 additions and 66 deletions.
1 change: 1 addition & 0 deletions .rubocop_todo.yml
Expand Up @@ -11,6 +11,7 @@ RSpec/ExampleLength:
Metrics/ClassLength:
Exclude:
- 'app/controllers/qa/linked_data_terms_controller.rb'
- 'app/services/qa/linked_data/deep_sort_service.rb'
- 'lib/qa/authorities/linked_data/find_term.rb'
- 'lib/qa/authorities/linked_data/search_query.rb'
- 'lib/qa/authorities/linked_data/config/term_config.rb'
Expand Down
2 changes: 1 addition & 1 deletion app/models/qa/iri_template/url_config.rb
Expand Up @@ -22,7 +22,7 @@ def initialize(url_config)

# Selective extract substitution variable-value pairs from the provided substitutions.
# @param [Hash, ActionController::Parameters] full set of passed in substitution values
# @returns [HashWithIndifferentAccess] Only variable-value pairs for variables defined in the variable mapping.
# @return [HashWithIndifferentAccess] Only variable-value pairs for variables defined in the variable mapping.
def extract_substitutions(substitutions)
selected_substitutions = HashWithIndifferentAccess.new
mapping.each do |m|
Expand Down
2 changes: 1 addition & 1 deletion app/services/qa/linked_data/authority_url_service.rb
Expand Up @@ -8,7 +8,7 @@ class AuthorityUrlService
# @param action [Symbol] action with valid values :search or :term
# @param action_request [String] the request the user is making of the authority (e.g. query text or term id/uri)
# @param substitutions [Hash] variable-value pairs to substitute into the URL template
# @returns a valid URL the submits the action request to the external authority
# @return a valid URL the submits the action request to the external authority
def self.build_url(action_config:, action:, action_request:, substitutions: {}, subauthority: nil)
action_validation(action)
url_config = Qa::IriTemplate::UrlConfig.new(action_url(action_config, action))
Expand Down
244 changes: 244 additions & 0 deletions app/services/qa/linked_data/deep_sort_service.rb
@@ -0,0 +1,244 @@
# Provide service for for sorting an array of hash based on the values at a specified key in the hash.
module Qa
module LinkedData
class DeepSortService
# @params [Array<Hash<Symbol,Array<RDF::Literal>>>] the array of hashes to sort
# @params [sort_key] the key in the hash on whose value the array will be sorted
# @returns instance of this class
# @example the_array parameter
# [
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n2010043281>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 2010043281")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Valli, Sabrina"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("2")>]},
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n201002344>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 201002344")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Cornell, Joseph"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("1")>]}
# ]
def initialize(the_array, sort_key, preferred_language = :en)
@sortable_elements = the_array.map { |element| DeepSortElement.new(element, sort_key, preferred_language) }
end

# Sort an array of hash on the specified sort key. The value in the hash at sort key is expected to be an array
# with one or more values that are RDF::Literals that translate to a number (e.g. 2), a string number (e.g. "3"),
# a string (e.g. "hello"), or a language qualified string (e.g. "hello"@en).
# The sort occurs in the following precedence.
# * preference for numeric sort (if only one value each and both are integers or a string that can be converted to an integer)
# * single value sort (if only one value each and at least one is not an integer)
# * multiple values sort (if either has multiple values)
# @returns the sorted array
# @example returned sorted array
# [
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n201002344>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 201002344")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Cornell, Joseph"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("1")>]},
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n2010043281>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 2010043281")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Valli, Sabrina"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("2")>]}
# ]
def sort
@sortable_elements.sort.map(&:element)
end

class DeepSortElement
attr_reader :element, :literals, :preferred_language
private :preferred_language

delegate :size, to: :@literals

def initialize(element, sort_key, preferred_language)
element[sort_key] = Qa::LinkedData::LanguageSortService.new(element[sort_key], preferred_language).sort
@element = element
@literals = element[sort_key]
@preferred_language = preferred_language
@has_preferred_language = includes_preferred_language?
@all_same_language = all_same_language?
end

def <=>(other)
return numeric_comparator(other) if integer? && other.integer?
return single_value_comparator(other) if single? && other.single?
multiple_value_comparator(other)
end

# @returns true if there is a single literal that is an integer or a string that can be converted to an integer; otherwise, false
def integer?
return false unless single?
(/\A[-+]?\d+\z/ === literal.to_s) # rubocop:disable Style/CaseEquality
end

def integer(idx = 0)
Integer(literal(idx).to_s)
end

# @returns true if there is only one value; otherwise, false
def single?
@single ||= literals.size == 1
end

def literal(idx = 0)
literals[idx]
end

def downcase_string(idx = 0)
to_downcase(literal(idx))
end

def language(idx = 0, list = literals)
return list[idx].language if list[idx].respond_to?(:language)
nil
end

def includes_preferred_language?
return @has_preferred_language if @has_preferred_language.present?
filtered = filtered_literals(preferred_language)
@has_preferred_language = filtered.size.positive?
end

def all_same_language?
return @all_same_language if @all_same_language.present?
@all_same_language = true
1.upto(size - 1) { |idx| return @all_same_language = false unless language(idx) == language(0) }
@all_same_language
end

def languages
filtered_literals_by_language.keys
end

def filtered_literals(filter_language)
filtered_literals_by_language.key?(filter_language) ? filtered_literals_by_language[filter_language] : []
end

private

# If both test values are single value and both are integers, do a numeric sort
def numeric_comparator(other)
integer <=> other.integer
end

# If both test values are single value and at least one is not numeric, do a string sort taking language into consideration
# * sort values if neither has a language marker or they both have the same language marker
# * otherwise, sort language markers
def single_value_comparator(other)
return downcase_string <=> other.downcase_string if same_language?(literal, other.literal)
compare_languages(language, other.language)
end

def compare_languages(lang, other_lang)
return -1 if preferred_language? lang
return 1 if preferred_language? other_lang
return -1 if no_language? other_lang
return 1 if no_language? lang
lang <=> other_lang
end

# If at least one of the test values has multiple values, sort the multiple values taking language into consideration
# * if both lists have all the same language or no language markers at all, just sort the lists and compare each element
# * if either list has the preferred language, try to sort the two lists by element after filtering for the preferred language
# * otherwise, sort by language until there is a difference
def multiple_value_comparator(other)
return single_language_list_comparator(other) if all_same_language? && other.all_same_language?
return specified_language_list_comparator(other, preferred_language) if includes_preferred_language? && other.includes_preferred_language?
multi_language_list_comparator(other)
end

def single_language_list_comparator(other)
list_comparator(literals, other.literals)
end

def specified_language_list_comparator(other, language)
filtered = filtered_literals(language)
other_filtered = other.filtered_literals(language)
return -1 if !filtered.empty? && other_filtered.empty?
return 1 if filtered.empty? && !other_filtered.empty?
list_comparator(filtered, other_filtered)
end

# Walk through language sorted lists
# * for each language, determine how closely the list of terms matches
# * prioritize the list that gets the most low values
def multi_language_list_comparator(other)
combined_languages = languages.concat(other.languages).uniq
by_language_comparisons = {}
combined_languages.each do |lang|
cmp = list_comparator(filtered_literals(lang), other.filtered_literals(lang))
by_language_comparisons[lang] = cmp
end
cmp_sum = by_language_comparisons.values.sum
return 1 if cmp_sum.positive?
return -1 if cmp_sum.negative?
0
end

def list_comparator(list, other_list)
# if an element doesn't have any terms in a language, the other element sorts lower
return -1 if other_list.empty?
return 1 if list.empty?
shorter_list_size = [list.size, other_list.size].min
cmp = 0
0.upto(shorter_list_size - 1) do |idx|
cmp = to_downcase(list[idx]) <=> to_downcase(other_list[idx])
return cmp unless cmp.zero?
end
return cmp if list.size == other_list.size
other_list.size < list.size ? 1 : -1 # didn't find any diffs, shorter list is considered lower
end

def same_language?(lit, other_lit)
return false if only_one_has_language_marker?(lit, other_lit)
return true if neither_have_language_markers?(lit, other_lit)
lit.language == other_lit.language
end

def neither_have_language_markers?(lit, other_lit)
!language?(lit) && !language?(other_lit)
end

def only_one_has_language_marker?(lit, other_lit)
(!language?(lit) && language?(other_lit)) || (language?(lit) && !language?(other_lit))
end

def language?(lit)
language = lit.language if lit.respond_to?(:language)
language.present?
end

def preferred_language?(language)
preferred_language.present? ? language == preferred_language : false
end

def no_language?(language)
language.blank?
end

def to_downcase(lit)
lit.to_s.downcase
end

def filtered_literals_by_language
@filtered_literals_by_language ||= create_all_filters
end

def create_all_filters
bins = {}
0.upto(size - 1) do |idx|
lang = language(idx, literals)
filter = bins.key?(lang) ? bins[lang] : []
filter << literal(idx)
bins[lang] = filter
end
bins
end
end
private_constant :DeepSortElement
end
end
end
8 changes: 4 additions & 4 deletions app/services/qa/linked_data/graph_service.rb
Expand Up @@ -5,7 +5,7 @@ class GraphService
class << self
# Retrieve linked data from specified url
# @param [String] url from which to retrieve linked data
# @returns [RDF::Graph] graph of linked data
# @return [RDF::Graph] graph of linked data
def load_graph(url:)
RDF::Graph.load(url)
rescue IOError => e
Expand All @@ -17,7 +17,7 @@ def load_graph(url:)
# @param language [String | Symbol | Array<String|Symbol>] will keep any statement whose object's language matches the language filter
# (only applies to statements that respond to language) (e.g. "en" or :en or ["en", "fr"] or [:en, :fr])
# @param remove_blanknode_subjects [Boolean] will remove any statement whose subject is a blanknode, if true
# @returns [RDF::Graph] a new instance of graph with statements not matching the filters removed
# @return [RDF::Graph] a new instance of graph with statements not matching the filters removed
def filter(graph:, language: nil, remove_blanknode_subjects: false)
return graph unless graph.present?
return graph unless language.present? || remove_blanknode_subjects
Expand All @@ -33,7 +33,7 @@ def filter(graph:, language: nil, remove_blanknode_subjects: false)
# @param graph [RDF::Graph] the graph to search
# @param subject [RDF::URI] the URI of the subject
# @param predicate [RDF::URI] the URI of the predicate
# @returns [Array] all object values for the subject-predicate pair
# @return [Array] all object values for the subject-predicate pair
def object_values(graph:, subject:, predicate:)
values = []
graph.query([subject, predicate, :object]) do |statement|
Expand Down Expand Up @@ -81,7 +81,7 @@ def ioerror_code(e)

# Normalize language
# @param [String | Symbol | Array] language for filtering graph (e.g. "en" OR :en OR ["en", "fr"] OR [:en, :fr])
# @returns [Array<Symbol>] an array of languages encoded as symbols (e.g. [:en] OR [:en, :fr])
# @return [Array<Symbol>] an array of languages encoded as symbols (e.g. [:en] OR [:en, :fr])
def normalize_language(language)
return language if language.blank?
language = [language] unless language.is_a? Array
Expand Down
81 changes: 81 additions & 0 deletions app/services/qa/linked_data/language_sort_service.rb
@@ -0,0 +1,81 @@
# Service to sort an array of literals by language and within language.
module Qa
module LinkedData
class LanguageSortService
LANGUAGE_LOCALE_KEY_FOR_NO_LANGUAGE = :NO_LANGUAGE

attr_reader :literals, :preferred_language
attr_reader :languages, :bins
private :literals, :preferred_language, :languages, :bins
# private :literals, :preferred_language, :languages, :languages=, :bins, :bins=

# @param [Array<RDF::Literals>] string literals to sort
# @param [Symbol] preferred language to appear first in the list; defaults to no preference
# @return instance of this class
def initialize(literals, preferred_language = nil)
@literals = literals
@preferred_language = preferred_language
@languages = []
@bins = {}
end

# Sort the literals stored in this instance of the service
# @return sorted version of literals
def sort
return literals unless literals.present?
return @sorted_literals if @sorted_literals.present?
parse_into_language_bins
sort_languages
sort_language_bins
@sorted_literals = construct_sorted_literals
end

private

def construct_sorted_literals
sorted_literals = []
0.upto(languages.size - 1) { |idx| sorted_literals.concat(bins[languages[idx]]) }
sorted_literals
end

def language(literal)
language = literal.language if literal.respond_to?(:language)
language.present? ? language : LANGUAGE_LOCALE_KEY_FOR_NO_LANGUAGE
end

def move_no_language_to_end
return unless languages.include?(LANGUAGE_LOCALE_KEY_FOR_NO_LANGUAGE)
languages.delete(LANGUAGE_LOCALE_KEY_FOR_NO_LANGUAGE)
languages << LANGUAGE_LOCALE_KEY_FOR_NO_LANGUAGE
end

def move_preferred_language_to_front
return unless preferred_language.present? && languages.include?(preferred_language)
languages.delete(preferred_language)
languages.insert(0, preferred_language)
end

def parse_into_language_bins
0.upto(literals.size - 1) do |idx|
lang = language(literals[idx])
languages << lang
bin = bins.key?(lang) ? bins[lang] : []
bin << literals[idx]
bins[lang] = bin
end
@language = languages
@bins = bins
end

def sort_languages
languages.sort!.uniq!
move_preferred_language_to_front
move_no_language_to_end
end

def sort_language_bins
bins.each_value { |bin| bin.sort_by! { |literal| literal.to_s.downcase } }
end
end
end
end

0 comments on commit 46ad37d

Please sign in to comment.