Skip to content
This repository has been archived by the owner on Nov 26, 2019. It is now read-only.

improve work indexing of representative data #736

Merged
merged 5 commits into from Jul 28, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 9 additions & 8 deletions app/indexers/generic_work_indexer.rb
Expand Up @@ -28,9 +28,11 @@ def generate_solr_document
license_service.authority.find(id).fetch('term', nil)
end.compact

if object.representative_id && object.representative
representative = ultimate_representative(object)
if representative
# need to index these for when it's a child work on a parent's show page
representative = ultimate_representative(object.representative)
# Note corresponding code in GenericWork#update_index makes sure works using
# this work as a representative also get updated.
doc[ActiveFedora.index_field_mapper.solr_name('representative_width', type: :integer)] = representative.width.first if representative.width.present?
doc[ActiveFedora.index_field_mapper.solr_name('representative_height', type: :integer)] = representative.height.first if representative.height.present?
doc[ActiveFedora.index_field_mapper.solr_name('representative_original_file_id')] = representative.original_file.id if representative.original_file
Expand All @@ -47,18 +49,17 @@ def remove_duplicates(field)

# If works representative is another work, find IT's representative,
# recursively, until you get a terminal node, presumably fileset.
# Return nil if there is no terminal representative.
def ultimate_representative(work)
return work unless work.respond_to?(:representative) && work.representative.present?
return nil unless work.representative_id && work.representative

candidate = work.representative
if candidate.respond_to?(:representative) &&
candidate.representative_id.present? &&
candidate.representative.present? &&
! candidate.reprsentative.equal?(candidate)
return nil if candidate.equal?(work) # recursive self-pointing representative, bah

if candidate.respond_to?(:representative)
ultimate_representative(candidate)
else
candidate
end
end

end
24 changes: 24 additions & 0 deletions app/models/generic_work.rb
Expand Up @@ -14,6 +14,30 @@ class GenericWork < ActiveFedora::Base

validate :legal_representative_id, :legal_thumbnail_id

# If this work is a representative for some OTHER work, changes to it's
# representative requires reindexing that parent, so some expensive work
# is sadly needed.
#
# We want to try to do this expensive thing only do this if representative has actually
# changed, but hard to count on getting changes in various circumstances, forwards-compatibly.
# We try, but if both changes and previous_changes are blank, we figure
# we better do it anyway. Not totally sure if this catching the right things,
# but it seems to work not missing anything.
#
# This code goes with custom code in indexer to index representative_width,
# representative_height, and representative_original_file_id.
def update_index(*args)
super.tap do
if self.changes.keys.include?("representative_id") ||
self.previous_changes.keys.include?("representative_id") ||
(self.changes.blank? && self.previous_changes.blank?)
GenericWork.where(GenericWork.reflections[:representative_id].solr_key => self.id).each do |parent_work|
parent_work.update_index
end
end
end
end

protected

def legal_representative_id
Expand Down
116 changes: 116 additions & 0 deletions spec/indexers/generic_work_indexer_spec.rb
Expand Up @@ -41,4 +41,120 @@
expect(solr_document[mapper.solr_name('maker_facet', :facetable)].size).to eq 3
end

# These are slow, was hard to get them to work reliably and be reliably testing at all
describe "representative fields" do
let(:width) { 100 }
let(:height) { 200 }
let(:width_field) { Solrizer.solr_name("representative_width", type: :integer) }
let(:height_field) { Solrizer.solr_name("representative_height", type: :integer) }
let(:file_id_field) { Solrizer.solr_name("representative_original_file_id") }

describe "standard work with representative fileset" do
let(:work) do
FactoryGirl.create(:work, :real_public_image) do |work|
work.representative.original_file.width = [width]
work.representative.original_file.height = [height]
end
end
it "indexes representative fields" do
expect(solr_document[file_id_field]).to eq(work.representative.original_file.id)
expect(solr_document[width_field]).to eq(width)
expect(solr_document[height_field]).to eq(height)
end
end

describe "work with no representative" do
let(:work) { FactoryGirl.create(:work) }
before do
# precondition, make sure we set things up right
expect(work.representative).to eq(nil)
end
it "indexes without those fields without raising" do
expect(solr_document[file_id_field]).to be nil
expect(solr_document[width_field]).to be nil
expect(solr_document[height_field]).to be nil
end
end

# This is a mess and very slow, better way to test?
describe "work with representative child work" do
let(:child_work) do
FactoryGirl.create(:work, :real_public_image) do |w|
w.representative.original_file.width = [width]
w.representative.original_file.height = [height]
end
end
let(:work) do
FactoryGirl.create(:work) do |w|
w.representative_id = child_work.id
w.representative = child_work
end
end
it "indexes representative from child work" do
expect(solr_document[file_id_field]).to eq(work.representative.representative.original_file.id)
expect(solr_document[width_field]).to eq(width)
expect(solr_document[height_field]).to eq(height)
end

describe "when child work representative is updated" do
let(:new_width) { 1000 }
let(:new_height) { 2000 }
let(:new_file_set) { FactoryGirl.create(:file_set, :public) }

before do
# have to get original work in index, so child work can find it
# to update it.
work.ordered_members << work.representative
work.save!
end

it "updates parent work in index" do
child_work.ordered_members << new_file_set
IngestFileJob.perform_now(new_file_set, (Rails.root + "spec/fixtures/sample.jpg").to_s, nil)
child_work.representative = new_file_set

indexed_parent = SolrDocument.find(work.id)
expect(indexed_parent["representative_original_file_id_tesim"]).not_to include(new_file_set.original_file.id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want contain_exactly instead of include?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not_to contain_exactly does not mean the same thing as not_to include. Like it could include those things, but not exactly, and still pass.

I guess I could say I expect it to eq [] if that's what I expect. I guess I'll look more into what I expect.


child_work.save!

indexed_parent = SolrDocument.find(work.id)
expect(indexed_parent["representative_original_file_id_tesim"]).to include(new_file_set.original_file.id)
end
end
end

describe "with with representative child work with no representative" do
let(:work) do
FactoryGirl.create(:work) do |w|
w.representative = FactoryGirl.create(:work)
end
end
before do
# precondition, make sure we set things up right
expect(work.representative.representative).to eq(nil)
end
it "indexes without those fields without raising" do
expect(solr_document[file_id_field]).to be nil
expect(solr_document[width_field]).to be nil
expect(solr_document[height_field]).to be nil
end
end

describe "with self-pointing representative" do
# pathological, but since the model can do it, we want to make sure
# we don't infinite loop on it.
let(:work) do
FactoryGirl.create(:work) do |w|
w.representative_id = w.id
w.representative = w
end
end
it "finishes with blank values" do
expect(solr_document[file_id_field]).to be nil
expect(solr_document[width_field]).to be nil
expect(solr_document[height_field]).to be nil
end
end
end
end