From bbde10d93a8e2c575552de2b86f088fcf88fa1dd Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 18 Oct 2018 11:28:09 -0700 Subject: [PATCH] Never commit from code, only from Solr This is the recommended way to handle indexing from Solr's documentation (https://lucene.apache.org/solr/guide/6_6/near-real-time-searching.html#near-real-time-searching). Confirmed to work in development. This'll make sync time take 5 minutes to show up, but will mean reindexes won't result in a ton of hard commits. --- app/models/iiif_resource.rb | 11 +++++++++++ .../figgy_event_processor/delete_processor.rb | 1 - .../figgy_event_processor/update_processor.rb | 1 - solr/config/solrconfig.xml | 10 ++++++++++ spec/controllers/catalog_controller_spec.rb | 3 +++ spec/models/iiif_resource_spec.rb | 6 ++++++ spec/repositories/friendly_id_repository_spec.rb | 1 + spec/services/figgy_event_processor_spec.rb | 3 +++ 8 files changed, 34 insertions(+), 2 deletions(-) diff --git a/app/models/iiif_resource.rb b/app/models/iiif_resource.rb index 80432f78..759040c0 100644 --- a/app/models/iiif_resource.rb +++ b/app/models/iiif_resource.rb @@ -49,4 +49,15 @@ def solr def document_ids document_builder.documents_to_index.to_a.map { |y| y[:id] } end + + def write_to_index(batch) + documents = documents_that_have_ids(batch) + return unless write? && documents.present? + + blacklight_solr.update data: documents.to_json, + headers: { 'Content-Type' => 'application/json' } + end + + # Override hard commit after indexing every document, for performance. + def commit; end end diff --git a/app/services/figgy_event_processor/delete_processor.rb b/app/services/figgy_event_processor/delete_processor.rb index a5573811..8337a06d 100644 --- a/app/services/figgy_event_processor/delete_processor.rb +++ b/app/services/figgy_event_processor/delete_processor.rb @@ -6,7 +6,6 @@ def process docs["response"]["docs"].each do |doc| index.connection.delete_by_id doc["id"] end - index.connection.commit resource.destroy end true diff --git a/app/services/figgy_event_processor/update_processor.rb b/app/services/figgy_event_processor/update_processor.rb index ab9de6fe..d5e4da10 100644 --- a/app/services/figgy_event_processor/update_processor.rb +++ b/app/services/figgy_event_processor/update_processor.rb @@ -11,7 +11,6 @@ def delete_old_resources delete_resources.each do |resource| resource.document_builder.to_solr.map { |x| x[:id] }.each do |id| index.delete_by_id id.to_s - index.commit end resource.destroy end diff --git a/solr/config/solrconfig.xml b/solr/config/solrconfig.xml index f8ab0bf4..29254867 100644 --- a/solr/config/solrconfig.xml +++ b/solr/config/solrconfig.xml @@ -14,6 +14,16 @@ ${solr.core0.data.dir:} + + + 10000 + 36000000 + false + + + + 300000 + diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index 274b8da9..8d3e48c7 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -8,6 +8,7 @@ exhibit = Spotlight::Exhibit.create title: 'Exhibit A', published: true resource = IIIFResource.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy + Blacklight.default_index.connection.commit get :index, params: { q: "", exhibit_id: exhibit.id } @@ -43,6 +44,7 @@ exhibit = Spotlight::Exhibit.create title: 'Exhibit A', published: false resource = IIIFResource.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy + Blacklight.default_index.connection.commit sign_in user get :index, params: { q: "", exhibit_id: exhibit.id } @@ -85,6 +87,7 @@ exhibit = Spotlight::Exhibit.create title: 'Exhibit A', published: true resource = IIIFResource.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy + Blacklight.default_index.connection.commit get :index, params: { q: "Scanned Resource", exhibit_id: exhibit.id } diff --git a/spec/models/iiif_resource_spec.rb b/spec/models/iiif_resource_spec.rb index 456cbde9..43cfd4da 100644 --- a/spec/models/iiif_resource_spec.rb +++ b/spec/models/iiif_resource_spec.rb @@ -9,6 +9,7 @@ expect(resource.save).to be true solr_doc = nil + Blacklight.default_index.connection.commit resource.document_builder.to_solr { |x| solr_doc = x } expect(solr_doc["full_title_tesim"]).to eq ['Christopher and his kind, 1929-1939'] expect(solr_doc["readonly_created_tesim"]).to eq ["1976-01-01T00:00:00Z"] @@ -23,6 +24,7 @@ expect(resource.save).to be true solr_doc = nil + Blacklight.default_index.connection.commit resource.document_builder.to_solr { |x| solr_doc = x } expect(solr_doc["readonly_collections_tesim"]).to eq ["East Asian Library Digital Bookshelf"] end @@ -33,6 +35,7 @@ resource = described_class.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy + Blacklight.default_index.connection.commit docs = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"] expect(docs.length).to eq 2 scanned_resource_doc = docs.find { |x| x["full_title_tesim"] == ["Scanned Resource 1"] } @@ -45,6 +48,7 @@ resource = described_class.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy + Blacklight.default_index.connection.commit docs = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"] scanned_resource_doc = docs.find { |x| x["full_title_tesim"] == ["Scanned Resource 1"] } expect(scanned_resource_doc["full_image_url_ssm"]).to eq ["https://libimages1.princeton.edu/loris/plum/hq%2F37%2Fvn%2F61%2F6-intermediate_file.jp2/full/!600,600/0/default.jpg"] @@ -56,6 +60,8 @@ exhibit = Spotlight::Exhibit.create title: 'Exhibit A' resource = described_class.new url: url, exhibit: exhibit expect(resource.save_and_index).to be_truthy + + Blacklight.default_index.connection.commit docs = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"] scanned_resource_doc = docs.find { |x| x["full_title_tesim"] == ["Christopher and his kind, 1929-1939"] } expect(scanned_resource_doc["readonly_date-created_tesim"]).to eq ['1976-01-01T00:00:00Z'] diff --git a/spec/repositories/friendly_id_repository_spec.rb b/spec/repositories/friendly_id_repository_spec.rb index 6c76821d..4846283b 100644 --- a/spec/repositories/friendly_id_repository_spec.rb +++ b/spec/repositories/friendly_id_repository_spec.rb @@ -14,6 +14,7 @@ end before do resource.reindex + Blacklight.default_index.connection.commit end context "when an exhibit isn't passed" do diff --git a/spec/services/figgy_event_processor_spec.rb b/spec/services/figgy_event_processor_spec.rb index 107122b2..94cee33c 100644 --- a/spec/services/figgy_event_processor_spec.rb +++ b/spec/services/figgy_event_processor_spec.rb @@ -54,6 +54,7 @@ IIIFResource.new(url: url, exhibit: exhibit).save_and_index expect(processor.process).to eq true + Blacklight.default_index.connection.commit resource = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"].first expect(resource["full_title_tesim"]).to eq ["Updated Record"] @@ -95,6 +96,7 @@ it "marks it as public" do exhibit = FactoryBot.create(:exhibit, slug: "first") IIIFResource.new(url: url, exhibit: exhibit).save_and_index + Blacklight.default_index.connection.commit resource_id = Blacklight.default_index.connection.get("select", params: { q: "*:*" })["response"]["docs"].first["access_identifier_ssim"].first document = SolrDocument.find(resource_id) document.make_private!(exhibit) @@ -128,6 +130,7 @@ IIIFResource.new(url: url, exhibit: exhibit).save_and_index expect(processor.process).to eq true + Blacklight.default_index.connection.commit expect(IIIFResource.joins(:exhibit).where("spotlight_exhibits.slug" => "banana").length).to eq 1 expect(Blacklight.default_index.connection.get("select")["response"]["docs"].length).to eq 1