Skip to content
This repository has been archived by the owner on May 28, 2024. It is now read-only.

Commit

Permalink
Merge pull request #327 from sul-dlss/indexers
Browse files Browse the repository at this point in the history
Move indexing code into the app (from dor-services)
  • Loading branch information
justinlittman committed Jan 29, 2020
2 parents a04dc46 + 369e2f3 commit 39f00c6
Show file tree
Hide file tree
Showing 30 changed files with 1,682 additions and 9 deletions.
4 changes: 4 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

require:
- rubocop-performance
- rubocop-rails
Expand Down Expand Up @@ -25,3 +26,6 @@ Metrics/AbcSize:
Metrics/MethodLength:
Exclude:
- 'app/controllers/dor_controller.rb'

Naming/PredicateName:
NamePrefixBlacklist: is_
86 changes: 80 additions & 6 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,48 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2019-09-24 11:52:09 -0700 using RuboCop version 0.74.0.
# on 2020-01-08 23:14:02 -0800 using RuboCop version 0.74.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.

# Offense count: 1
Lint/UselessAssignment:
Exclude:
- 'app/controllers/dor_controller.rb'

# Offense count: 5
Metrics/AbcSize:
Max: 35

# Offense count: 1
Metrics/CyclomaticComplexity:
Max: 7

# Offense count: 3
# Configuration parameters: CountComments, ExcludedMethods.
Metrics/MethodLength:
Max: 31

# Offense count: 1
# Configuration parameters: CountKeywordArgs.
Metrics/ParameterLists:
Max: 6

# Offense count: 2
RSpec/AnyInstance:
Exclude:
- 'spec/indexers/composite_indexer_spec.rb'
- 'spec/indexers/processable_indexer_spec.rb'

# Offense count: 9
# Configuration parameters: Max.
RSpec/ExampleLength:
Exclude:
- 'spec/indexers/composite_indexer_spec.rb'
- 'spec/indexers/describable_indexer_spec.rb'
- 'spec/indexers/identifiable_indexer_spec.rb'
- 'spec/indexers/processable_indexer_spec.rb'
- 'spec/routing/dor_spec.rb'

# Offense count: 1
Expand All @@ -24,26 +57,67 @@ RSpec/MessageSpies:
Exclude:
- 'spec/controllers/dor_controller_spec.rb'

# Offense count: 8
# Offense count: 18
# Configuration parameters: AggregateFailuresByDefault.
RSpec/MultipleExpectations:
Max: 4
Max: 10

# Offense count: 2
# Configuration parameters: IgnoreSharedExamples.
RSpec/NamedSubject:
Exclude:
- 'spec/models/queue_status_spec.rb'

# Offense count: 1
# Offense count: 3
RSpec/NestedGroups:
Max: 4

# Offense count: 3
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
RSpec/VerifiedDoubles:
Exclude:
- 'spec/indexers/composite_indexer_spec.rb'
- 'spec/models/queue_status_spec.rb'

# Offense count: 65
# Offense count: 1
# Cop supports --auto-correct.
# Configuration parameters: EnforcedStyle.
# SupportedStyles: strict, flexible
Rails/TimeZone:
Exclude:
- 'app/indexers/process_indexer.rb'

# Offense count: 4
Style/ClassVars:
Exclude:
- 'app/indexers/identifiable_indexer.rb'

# Offense count: 9
Style/Documentation:
Exclude:
- 'spec/**/*'
- 'test/**/*'
- 'app/indexers/composite_indexer.rb'
- 'app/indexers/describable_indexer.rb'
- 'app/indexers/editable_indexer.rb'
- 'app/indexers/identifiable_indexer.rb'
- 'app/indexers/processable_indexer.rb'
- 'app/indexers/releasable_indexer.rb'
- 'app/indexers/solr_doc_helper.rb'
- 'app/services/indexer.rb'

# Offense count: 1
# Cop supports --auto-correct.
# Configuration parameters: AutoCorrect, EnforcedStyle, IgnoredMethods.
# SupportedStyles: predicate, comparison
Style/NumericPredicate:
Exclude:
- 'spec/**/*'
- 'app/indexers/processable_indexer.rb'

# Offense count: 215
# Cop supports --auto-correct.
# Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
# URISchemes: http, https
Metrics/LineLength:
Max: 189
Max: 192
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ group :production do
end

group :development, :test do
gem 'byebug'
gem 'coveralls', require: false
gem 'rspec-rails', '~> 3.0'
gem 'simplecov', require: false
gem 'webmock'
end

group :development do
Expand Down
14 changes: 14 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ GEM
i18n (>= 0.7, < 2)
minitest (~> 5.1)
tzinfo (~> 1.1)
addressable (2.7.0)
public_suffix (>= 2.0.2, < 5.0)
airbrussh (1.4.0)
sshkit (>= 1.6.1, != 1.7.0)
arel (9.0.0)
Expand All @@ -67,6 +69,7 @@ GEM
bundler-audit (0.6.1)
bundler (>= 1.2.0, < 3)
thor (~> 0.18)
byebug (11.0.1)
capistrano (3.11.2)
airbrussh (>= 1.0.0)
i18n
Expand Down Expand Up @@ -96,6 +99,8 @@ GEM
term-ansicolor (~> 1.3)
thor (>= 0.19.4, < 2.0)
tins (~> 1.6)
crack (0.4.3)
safe_yaml (~> 1.0.0)
crass (1.0.6)
daemons (1.3.1)
deep_merge (1.2.1)
Expand Down Expand Up @@ -191,6 +196,7 @@ GEM
haml (5.1.2)
temple (>= 0.8.0)
tilt
hashdiff (1.0.0)
honeybadger (3.3.1)
hooks (0.4.1)
uber (~> 0.0.14)
Expand Down Expand Up @@ -247,6 +253,7 @@ GEM
parallel (1.19.1)
parser (2.7.0.2)
ast (~> 2.4.0)
public_suffix (4.0.3)
puma (3.12.2)
rack (2.1.1)
rack-test (1.1.0)
Expand Down Expand Up @@ -347,6 +354,7 @@ GEM
mime-types
nokogiri
rest-client
safe_yaml (1.0.5)
simplecov (0.16.1)
docile (~> 1.1)
json (>= 1.8, < 3)
Expand Down Expand Up @@ -390,6 +398,10 @@ GEM
unf_ext
unf_ext (0.0.7.6)
unicode-display_width (1.6.1)
webmock (3.7.6)
addressable (>= 2.3.6)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)
websocket-driver (0.7.1)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.4)
Expand All @@ -401,6 +413,7 @@ PLATFORMS

DEPENDENCIES
bootsnap (>= 1.1.0)
byebug
capistrano (~> 3.0)
capistrano-bundler
capistrano-passenger
Expand All @@ -425,6 +438,7 @@ DEPENDENCIES
rubocop-rails
rubocop-rspec
simplecov
webmock

BUNDLED WITH
2.1.4
3 changes: 2 additions & 1 deletion app/controllers/dor_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def reindex_pid(pid, logger:, add_attributes:)

# benchmark how long it takes to convert the object to a Solr document
to_solr_stats = Benchmark.measure('to_solr') do
solr_doc = obj.to_solr
indexer = Indexer.for(obj)
solr_doc = indexer.to_solr
solr.add(solr_doc, add_attributes: add_attributes)
end.format('%n realtime %rs total CPU %ts').gsub(/[\(\)]/, '')

Expand Down
26 changes: 26 additions & 0 deletions app/indexers/composite_indexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# frozen_string_literal: true

# Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb
class CompositeIndexer
attr_reader :indexers
def initialize(*indexers)
@indexers = indexers
end

def new(resource:)
Instance.new(indexers, resource: resource)
end

class Instance
attr_reader :indexers, :resource
def initialize(indexers, resource:)
@resource = resource
@indexers = indexers.map { |i| i.new(resource: resource) }
end

# @return [Hash] the merged solr document for all the sub-indexers
def to_solr
indexers.map(&:to_solr).inject({}, &:merge)
end
end
end
22 changes: 22 additions & 0 deletions app/indexers/data_indexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true

# Indexing provided by ActiveFedora
class DataIndexer
include ActiveFedora::Indexing

attr_reader :resource
def initialize(resource:)
@resource = resource
end

# we need to override this until https://github.com/samvera/active_fedora/pull/1371
# has been released
def to_solr(solr_doc = {}, opts = {})
super.tap do |doc|
doc['active_fedora_model_ssi'] = has_model
end
end

delegate :create_date, :modified_date, :state, :pid, :inner_object,
:datastreams, :relationships, :has_model, to: :resource
end
58 changes: 58 additions & 0 deletions app/indexers/describable_indexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# frozen_string_literal: true

class DescribableIndexer
attr_reader :resource
def initialize(resource:)
@resource = resource
end

# @return [Hash] the partial solr document for describable concerns
def to_solr
add_metadata_format_to_solr_doc.merge(add_mods_to_solr_doc)
end

def add_metadata_format_to_solr_doc
{ 'metadata_format_ssim' => 'mods' }
end

def add_mods_to_solr_doc
solr_doc = {}
mods_sources = {
sw_title_display: %w[sw_display_title_tesim],
main_author_w_date: %w[sw_author_ssim sw_author_tesim],
sw_sort_author: %w[sw_author_sort_ssi],
sw_language_facet: %w[sw_language_ssim sw_language_tesim],
sw_genre: %w[sw_genre_ssim sw_genre_tesim],
format_main: %w[sw_format_ssim sw_format_tesim],
topic_facet: %w[sw_topic_ssim sw_topic_tesim],
era_facet: %w[sw_subject_temporal_ssim sw_subject_temporal_tesim],
geographic_facet: %w[sw_subject_geographic_ssim sw_subject_geographic_tesim],
%i[term_values typeOfResource] => %w[mods_typeOfResource_ssim mods_typeOfResource_tesim],
pub_year_sort_str: %w[sw_pub_date_sort_ssi],
pub_year_int: %w[sw_pub_date_sort_isi],
pub_year_display_str: %w[sw_pub_date_facet_ssi]
}

mods_sources.each_pair do |meth, solr_keys|
vals = meth.is_a?(Array) ? resource.stanford_mods.send(meth.shift, *meth) : resource.stanford_mods.send(meth)

next if vals.nil? || (vals.respond_to?(:empty?) && vals.empty?)

solr_keys.each do |key|
solr_doc[key] ||= []
solr_doc[key].push(*vals)
end
# asterisk to avoid multi-dimensional array: push values, not the array
end

# convert multivalued fields to single value
%w[sw_pub_date_sort_ssi sw_pub_date_sort_isi sw_pub_date_facet_ssi].each do |key|
solr_doc[key] = solr_doc[key].first unless solr_doc[key].nil?
end
# some fields get explicit "(none)" placeholder values, mostly for faceting
%w[sw_language_tesim sw_genre_tesim sw_format_tesim].each do |key|
solr_doc[key] = ['(none)'] if solr_doc[key].blank?
end
solr_doc
end
end
23 changes: 23 additions & 0 deletions app/indexers/editable_indexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# frozen_string_literal: true

class EditableIndexer
include SolrDocHelper

attr_reader :resource
def initialize(resource:)
@resource = resource
end

def to_solr
{}.tap do |solr_doc|
add_solr_value(solr_doc, 'default_rights', default_rights_for_indexing, :string, [:symbol])
add_solr_value(solr_doc, 'agreement', resource.agreement, :string, [:symbol]) if resource.agreement_object
add_solr_value(solr_doc, 'default_use_license_machine', resource.use_license, :string, [:stored_sortable])
end
end

# @return [String] A description of the rights defined in the default object rights datastream. Can be 'Stanford', 'World', 'Dark' or 'None'
def default_rights_for_indexing
Dor::RightsMetadataDS::RIGHTS_TYPE_CODES.fetch(resource.default_rights, 'Unrecognized default rights value')
end
end
Loading

0 comments on commit 39f00c6

Please sign in to comment.