From 8194d37f8ee9fff887b0fdfa3d338508ae262997 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 17 Nov 2016 14:56:38 -0800 Subject: [PATCH] Reindex the entire collection when you hit 'reindex --- .rubocop.yml | 1 + app/controllers/exhibits_controller.rb | 4 +-- app/jobs/iiif_ingest_job.rb | 2 +- app/jobs/spotlight/reindex_job.rb | 27 ++++++++++++++++ app/models/exhibit_proxy.rb | 18 +++++++++++ app/models/iiif_resource.rb | 6 ---- .../plum_event_processor/create_processor.rb | 2 +- .../plum_event_processor/update_processor.rb | 2 +- spec/controllers/catalog_controller_spec.rb | 4 +-- spec/jobs/iiif_ingest_job_spec.rb | 8 ++--- spec/jobs/spotlight/reindex_job_spec.rb | 31 +++++++++++++++++++ spec/models/iiif_resource_spec.rb | 6 ++-- spec/services/plum_event_processor_spec.rb | 12 +++---- 13 files changed, 96 insertions(+), 27 deletions(-) create mode 100644 app/jobs/spotlight/reindex_job.rb create mode 100644 app/models/exhibit_proxy.rb create mode 100644 spec/jobs/spotlight/reindex_job_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index 19dd9ca6..a0dd1ce9 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -107,6 +107,7 @@ Rails/TimeZone: Rails/DynamicFindBy: Exclude: - 'app/controllers/exhibits_controller.rb' + - 'app/models/exhibit_proxy.rb' - 'app/decorators/applies_title_from_slug.rb' RSpec/ExampleWording: diff --git a/app/controllers/exhibits_controller.rb b/app/controllers/exhibits_controller.rb index b5005ca7..89060895 100644 --- a/app/controllers/exhibits_controller.rb +++ b/app/controllers/exhibits_controller.rb @@ -4,9 +4,7 @@ class ExhibitsController < Spotlight::ExhibitsController def ingest_members return unless @exhibit.persisted? - collection_manifest = CollectionManifest.find_by_slug(@exhibit.slug) - members = collection_manifest.manifests.map { |x| x['@id'] } - IIIFIngestJob.new.perform members, @exhibit + ExhibitProxy.new(@exhibit).reindex end private diff --git a/app/jobs/iiif_ingest_job.rb b/app/jobs/iiif_ingest_job.rb index 76d74659..693280db 100644 --- a/app/jobs/iiif_ingest_job.rb +++ b/app/jobs/iiif_ingest_job.rb @@ -9,6 +9,6 @@ def perform(urls, exhibit) # Ingest a single IIIF manifest URL as a resource. def ingest(url, exhibit) - IIIFResource.new(manifest_url: url, exhibit: exhibit).save_and_index + IIIFResource.find_or_initialize_by(url: url, exhibit_id: exhibit.id).save_and_index end end diff --git a/app/jobs/spotlight/reindex_job.rb b/app/jobs/spotlight/reindex_job.rb new file mode 100644 index 00000000..6edfe172 --- /dev/null +++ b/app/jobs/spotlight/reindex_job.rb @@ -0,0 +1,27 @@ +module Spotlight + ## + # Reindex the given resources or exhibits + class ReindexJob < ActiveJob::Base + queue_as :default + + before_enqueue do |job| + resource_list(job.arguments.first).each(&:waiting!) + end + + def perform(exhibit_or_resources) + resource_list(exhibit_or_resources).each(&:reindex) + end + + private + + def resource_list(exhibit_or_resources) + if exhibit_or_resources.is_a?(Spotlight::Exhibit) + [ExhibitProxy.new(exhibit_or_resources)] + elsif exhibit_or_resources.is_a?(Enumerable) + exhibit_or_resources + else + Array(exhibit_or_resources) + end + end + end +end diff --git a/app/models/exhibit_proxy.rb b/app/models/exhibit_proxy.rb new file mode 100644 index 00000000..a6822d02 --- /dev/null +++ b/app/models/exhibit_proxy.rb @@ -0,0 +1,18 @@ +class ExhibitProxy + attr_reader :exhibit + def initialize(exhibit) + @exhibit = exhibit + end + + def reindex + IIIFIngestJob.perform_now members, exhibit + end + + def collection_manifest + CollectionManifest.find_by_slug(exhibit.slug) + end + + def members + collection_manifest.manifests.map { |x| x['@id'] } + end +end diff --git a/app/models/iiif_resource.rb b/app/models/iiif_resource.rb index 50566960..97b306a3 100644 --- a/app/models/iiif_resource.rb +++ b/app/models/iiif_resource.rb @@ -1,9 +1,3 @@ class IIIFResource < Spotlight::Resources::IiifHarvester belongs_to :exhibit, class_name: 'Spotlight::Exhibit' - - def initialize(manifest_url: nil, exhibit: nil) - super() - self.url = manifest_url - self.exhibit_id = exhibit.id if exhibit - end end diff --git a/app/services/plum_event_processor/create_processor.rb b/app/services/plum_event_processor/create_processor.rb index d44bf992..a8a3965a 100644 --- a/app/services/plum_event_processor/create_processor.rb +++ b/app/services/plum_event_processor/create_processor.rb @@ -2,7 +2,7 @@ class PlumEventProcessor class CreateProcessor < Processor def process exhibits.map do |exhibit| - resource = IIIFResource.new(manifest_url: manifest_url, exhibit: exhibit) + resource = IIIFResource.new(url: manifest_url, exhibit: exhibit) resource.save_and_index end.all?(&:present?) end diff --git a/app/services/plum_event_processor/update_processor.rb b/app/services/plum_event_processor/update_processor.rb index 99b88c40..8f0dc854 100644 --- a/app/services/plum_event_processor/update_processor.rb +++ b/app/services/plum_event_processor/update_processor.rb @@ -37,7 +37,7 @@ def update_existing_resources def create_new_resources new_exhibits.each do |exhibit| - IIIFResource.new(manifest_url: manifest_url, exhibit: exhibit).save_and_index + IIIFResource.new(url: manifest_url, exhibit: exhibit).save_and_index end end diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index 67c6951e..1388cb34 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -5,7 +5,7 @@ let(:url) { "https://hydra-dev.princeton.edu/concern/multi_volume_works/f4752g76q/manifest" } it "hides scanned resources with parents" do exhibit = Spotlight::Exhibit.create title: 'Exhibit A' - resource = IIIFResource.new manifest_url: url, exhibit: exhibit + resource = IIIFResource.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy get :index, params: { q: "", exhibit_id: exhibit.id } @@ -14,7 +14,7 @@ end it "returns MVW from metadata found in volume" do exhibit = Spotlight::Exhibit.create title: 'Exhibit A' - resource = IIIFResource.new manifest_url: url, exhibit: exhibit + resource = IIIFResource.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy get :index, params: { q: "SR1", exhibit_id: exhibit.id } diff --git a/spec/jobs/iiif_ingest_job_spec.rb b/spec/jobs/iiif_ingest_job_spec.rb index 03cc9fbf..d48fbd1e 100644 --- a/spec/jobs/iiif_ingest_job_spec.rb +++ b/spec/jobs/iiif_ingest_job_spec.rb @@ -4,7 +4,7 @@ let(:url1) { 'http://example.com/1/manifest' } let(:url2) { 'http://example.com/2/manifest' } let(:exhibit) { Spotlight::Exhibit.new } - let(:resource) { IIIFResource.new manifest_url: nil, exhibit: exhibit } + let(:resource) { IIIFResource.new url: nil, exhibit: exhibit } before do allow(exhibit).to receive(:id).and_return('exhibit1') @@ -12,14 +12,14 @@ end it 'ingests a single url' do - expect(IIIFResource).to receive(:new).with(manifest_url: url1, exhibit: exhibit).and_return(resource) + expect(IIIFResource).to receive(:new).with(url: url1, exhibit_id: exhibit.id).and_return(resource) described_class.new.perform(url1, exhibit) end it 'ingests each of an array of urls' do - expect(IIIFResource).to receive(:new).with(manifest_url: url1, exhibit: exhibit).and_return(resource) - expect(IIIFResource).to receive(:new).with(manifest_url: url2, exhibit: exhibit).and_return(resource) + expect(IIIFResource).to receive(:new).with(url: url1, exhibit_id: exhibit.id).and_return(resource) + expect(IIIFResource).to receive(:new).with(url: url2, exhibit_id: exhibit.id).and_return(resource) described_class.new.perform([url1, url2], exhibit) end diff --git a/spec/jobs/spotlight/reindex_job_spec.rb b/spec/jobs/spotlight/reindex_job_spec.rb new file mode 100644 index 00000000..032496ad --- /dev/null +++ b/spec/jobs/spotlight/reindex_job_spec.rb @@ -0,0 +1,31 @@ +require 'rails_helper' + +RSpec.describe Spotlight::ReindexJob do + let(:url1) { 'http://example.com/1/manifest' } + let(:exhibit) { Spotlight::Exhibit.new } + let(:resource) { IIIFResource.new url: nil, exhibit: exhibit } + let(:manifest) { object_double(CollectionManifest.new, manifests: [{ "@id" => url1 }]) } + + before do + allow(exhibit).to receive(:id).and_return('exhibit1') + allow(CollectionManifest).to receive(:find_by_slug).and_return(manifest) + allow(resource).to receive(:save_and_index) + end + + it 'reindexes an exhibit' do + allow(IIIFResource).to receive(:new).and_return(resource) + + described_class.perform_now(exhibit) + + expect(IIIFResource).to have_received(:new).with(url: url1, exhibit_id: exhibit.id) + end + + it 'can reindex multiple IIIF Resources' do + resources = [instance_double(IIIFResource, reindex: true), instance_double(IIIFResource, reindex: true)] + + described_class.perform_now(resources) + + expect(resources.first).to have_received(:reindex) + expect(resources.last).to have_received(:reindex) + end +end diff --git a/spec/models/iiif_resource_spec.rb b/spec/models/iiif_resource_spec.rb index b5d5a9ec..4f415d0f 100644 --- a/spec/models/iiif_resource_spec.rb +++ b/spec/models/iiif_resource_spec.rb @@ -5,7 +5,7 @@ let(:url) { 'https://hydra-dev.princeton.edu/concern/scanned_resources/1r66j1149/manifest' } it 'ingests a iiif manifest' do exhibit = Spotlight::Exhibit.create title: 'Exhibit A' - resource = described_class.new manifest_url: url, exhibit: exhibit + resource = described_class.new url: url, exhibit: exhibit expect(resource.save).to be true solr_doc = nil @@ -18,7 +18,7 @@ let(:url) { "https://hydra-dev.princeton.edu/concern/multi_volume_works/f4752g76q/manifest" } it "ingests both items as individual solr records, marking the child" do exhibit = Spotlight::Exhibit.create title: 'Exhibit A' - resource = described_class.new manifest_url: url, exhibit: exhibit + resource = described_class.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy docs = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"] @@ -33,7 +33,7 @@ let(:url) { "https://hydra-dev.princeton.edu/concern/scanned_resources/s9w032300r/manifest" } it "ingests a iiif manifest using the metadata pool, excludes range labels when missing" do exhibit = Spotlight::Exhibit.create title: 'Exhibit A' - resource = described_class.new manifest_url: url, exhibit: exhibit + resource = described_class.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy docs = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"] scanned_resource_doc = docs.find { |x| x["full_title_ssim"] == ["Christopher and his kind, 1929-1939"] } diff --git a/spec/services/plum_event_processor_spec.rb b/spec/services/plum_event_processor_spec.rb index 0ad5a94b..b0d13243 100644 --- a/spec/services/plum_event_processor_spec.rb +++ b/spec/services/plum_event_processor_spec.rb @@ -39,7 +39,7 @@ end it "deletes that resource" do exhibit = FactoryGirl.create(:exhibit, slug: "first") - IIIFResource.new(manifest_url: url, exhibit: exhibit).save_and_index + IIIFResource.new(url: url, exhibit: exhibit).save_and_index expect(processor.process).to eq true @@ -51,7 +51,7 @@ let(:type) { "UPDATED" } it "updates that resource" do exhibit = FactoryGirl.create(:exhibit, slug: "first") - IIIFResource.new(manifest_url: url, exhibit: exhibit).save_and_index + IIIFResource.new(url: url, exhibit: exhibit).save_and_index expect(processor.process).to eq true resource = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"].first @@ -61,7 +61,7 @@ context "when it's no longer accessible" do it "marks it as non-public" do exhibit = FactoryGirl.create(:exhibit, slug: "first") - IIIFResource.new(manifest_url: url, exhibit: exhibit).save_and_index + IIIFResource.new(url: url, exhibit: exhibit).save_and_index # swap casseette to make the resource inaccessible VCR.use_cassette('plum_events_no_permission') do @@ -75,7 +75,7 @@ context "when it's private and then is made accessible" do it "marks it as public" do exhibit = FactoryGirl.create(:exhibit, slug: "first") - IIIFResource.new(manifest_url: url, exhibit: exhibit).save_and_index + IIIFResource.new(url: url, exhibit: exhibit).save_and_index resource_id = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"].first["id"] document = SolrDocument.find(resource_id) document.make_private!(exhibit) @@ -93,7 +93,7 @@ let(:collection_slugs) { [] } it "removes old ones" do exhibit = FactoryGirl.create(:exhibit, slug: "first") - IIIFResource.new(manifest_url: url, exhibit: exhibit).save_and_index + IIIFResource.new(url: url, exhibit: exhibit).save_and_index expect(processor.process).to eq true @@ -106,7 +106,7 @@ it "moves it to a new one" do exhibit = FactoryGirl.create(:exhibit, slug: "first") FactoryGirl.create(:exhibit, slug: "banana") - IIIFResource.new(manifest_url: url, exhibit: exhibit).save_and_index + IIIFResource.new(url: url, exhibit: exhibit).save_and_index expect(processor.process).to eq true