/
deep_sort_service.rb
238 lines (206 loc) · 10.1 KB
/
deep_sort_service.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# Provide service for for sorting an array of hash based on the values at a specified key in the hash.
module Qa
module LinkedData
class DeepSortService
# @params [Array<Hash<Symbol,Array<RDF::Literal>>>] the array of hashes to sort
# @params [sort_key] the key in the hash on whose value the array will be sorted
# @returns instance of this class
# @example the_array parameter
# [
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n2010043281>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 2010043281")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Valli, Sabrina"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("2")>]},
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n201002344>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 201002344")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Cornell, Joseph"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("1")>]}
# ]
def initialize(the_array, sort_key, preferred_language = :en)
@sortable_elements = the_array.map { |element| DeepSortElement.new(element, sort_key, preferred_language) }
end
# Sort an array of hash on the specified sort key. The value in the hash at sort key is expected to be an array
# with one or more values that are RDF::Literals that translate to a number (e.g. 2), a string number (e.g. "3"),
# a string (e.g. "hello"), or a language qualified string (e.g. "hello"@en).
# The sort occurs in the following precedence.
# * preference for numeric sort (if only one value each and both are integers or a string that can be converted to an integer)
# * single value sort (if only one value each and at least one is not an integer)
# * multiple values sort (if either has multiple values)
# @returns the sorted array
# @example returned sorted array
# [
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n201002344>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 201002344")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Cornell, Joseph"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("1")>]},
# {:uri=>[#<RDF::URI:0x3fcff54a829c URI:http://id.loc.gov/authorities/names/n2010043281>],
# :id=>[#<RDF::Literal:0x3fcff4a367b4("n 2010043281")>],
# :label=>[#<RDF::Literal:0x3fcff54a9a98("Valli, Sabrina"@en)>],
# :altlabel=>[],
# :sort=>[#<RDF::Literal:0x3fcff54b4c18("2")>]}
# ]
def sort
@sortable_elements.sort.map(&:element)
end
class DeepSortElement
attr_reader :element, :literals, :preferred_language
private :preferred_language
delegate :size, to: :@literals
def initialize(element, sort_key, preferred_language)
element[sort_key] = Qa::LinkedData::LanguageSortService.new(element[sort_key], preferred_language).sort
@element = element
@literals = element[sort_key]
@preferred_language = preferred_language
@includes_preferred_language = includes_preferred_language?
@all_same_language = all_same_language?
end
def <=>(other)
return numeric_comparator(other) if integer? && other.integer?
return single_value_comparator(other) if single? && other.single?
multiple_value_comparator(other)
end
# @returns true if there is a single literal that is an integer or a string that can be converted to an integer; otherwise, false
def integer?
return false unless single?
(/\A[-+]?\d+\z/ === literal.to_s) # rubocop:disable Style/CaseEquality
end
def integer(idx = 0)
Integer(literal(idx).to_s)
end
# @returns true if there is only one value; otherwise, false
def single?
@single ||= literals.size == 1
end
def literal(idx = 0)
literals[idx]
end
def downcase_string(idx = 0)
to_downcase(literal(idx))
end
def language(lit = literals.first)
return nil unless Qa::LinkedData::LanguageService.literal_has_language_marker? lit
lit.language
end
def includes_preferred_language?
return @includes_preferred_language if @includes_preferred_language.present?
# literals are sorted by language with preferred language first in the list
@includes_preferred_language = (language == preferred_language)
end
def all_same_language?
return @all_same_language if @all_same_language.present?
# literals are sorted by language, so if first = last, then all are the same
@all_same_language = (language(literals.first) == language(literals.last))
end
def languages
filtered_literals_by_language.keys
end
def filtered_literals(filter_language)
filtered_literals_by_language.fetch(filter_language, [])
end
private
# If both test values are single value and both are integers, do a numeric sort
def numeric_comparator(other)
integer <=> other.integer
end
# If both test values are single value and at least one is not numeric, do a string sort taking language into consideration
# * sort values if neither has a language marker or they both have the same language marker
# * otherwise, sort language markers
def single_value_comparator(other)
return downcase_string <=> other.downcase_string if same_language?(literal, other.literal)
compare_languages(language, other.language)
end
def compare_languages(lang, other_lang)
return -1 if preferred_language? lang
return 1 if preferred_language? other_lang
return -1 if other_lang.blank?
return 1 if lang.blank?
lang <=> other_lang
end
# If at least one of the test values has multiple values, sort the multiple values taking language into consideration
# * if both lists have all the same language or no language markers at all, just sort the lists and compare each element
# * if either list has the preferred language, try to sort the two lists by element after filtering for the preferred language
# * otherwise, sort by language until there is a difference
def multiple_value_comparator(other)
return single_language_list_comparator(other) if all_same_language? && other.all_same_language?
return specified_language_list_comparator(other, preferred_language) if includes_preferred_language? && other.includes_preferred_language?
multi_language_list_comparator(other)
end
def single_language_list_comparator(other)
list_comparator(literals, other.literals)
end
def specified_language_list_comparator(other, lang)
filtered = filtered_literals(lang)
other_filtered = other.filtered_literals(lang)
return -1 if !filtered.empty? && other_filtered.empty?
return 1 if filtered.empty? && !other_filtered.empty?
list_comparator(filtered, other_filtered)
end
# Walk through language sorted lists
# * for each language, determine how closely the list of terms matches
# * prioritize the list that gets the most low values
def multi_language_list_comparator(other)
combined_languages = languages.concat(other.languages).uniq
by_language_comparisons = {}
combined_languages.each do |lang|
cmp = list_comparator(filtered_literals(lang), other.filtered_literals(lang))
by_language_comparisons[lang] = cmp
end
cmp_sum = by_language_comparisons.values.sum
return 1 if cmp_sum.positive?
return -1 if cmp_sum.negative?
0
end
def list_comparator(list, other_list)
# if an element doesn't have any terms in a language, the other element sorts lower
return -1 if other_list.empty?
return 1 if list.empty?
shorter_list_size = [list.size, other_list.size].min
cmp = 0
0.upto(shorter_list_size - 1) do |idx|
cmp = to_downcase(list[idx]) <=> to_downcase(other_list[idx])
return cmp unless cmp.zero?
end
return cmp if list.size == other_list.size
other_list.size < list.size ? 1 : -1 # didn't find any diffs, shorter list is considered lower
end
def same_language?(lit, other_lit)
return false if only_one_has_language_marker?(lit, other_lit)
return true if neither_have_language_markers?(lit, other_lit)
lit.language == other_lit.language
end
def neither_have_language_markers?(lit, other_lit)
!language?(lit) && !language?(other_lit)
end
def only_one_has_language_marker?(lit, other_lit)
(!language?(lit) && language?(other_lit)) || (language?(lit) && !language?(other_lit))
end
def language?(lit)
Qa::LinkedData::LanguageService.literal_has_language_marker? lit
end
def preferred_language?(lang)
preferred_language.present? ? lang == preferred_language : false
end
def to_downcase(lit)
lit.to_s.downcase
end
def filtered_literals_by_language
@filtered_literals_by_language ||= create_all_filters
end
def create_all_filters
bins = {}
0.upto(size - 1) do |idx|
lang = language(literals[idx])
filter = bins.fetch(lang, [])
filter << literal(idx)
bins[lang] = filter
end
bins
end
end
private_constant :DeepSortElement
end
end
end