From b14203a6d15dd4451b546bb3da270ab836fb1875 Mon Sep 17 00:00:00 2001 From: Chris Beer Date: Mon, 17 Nov 2025 11:22:10 -0800 Subject: [PATCH] Add an OAI set for seamless harvesting from an organization. --- app/controllers/concerns/oai_concern.rb | 15 +++- app/controllers/oai_controller.rb | 72 ++++++++++++++----- .../oai_marc_record_writer_service.rb | 14 ++-- spec/features/oai_spec.rb | 14 ++-- 4 files changed, 83 insertions(+), 32 deletions(-) diff --git a/app/controllers/concerns/oai_concern.rb b/app/controllers/concerns/oai_concern.rb index 549de4ce..5000f0df 100644 --- a/app/controllers/concerns/oai_concern.rb +++ b/app/controllers/concerns/oai_concern.rb @@ -104,14 +104,25 @@ def date_range # rubocop:disable Metrics/AbcSize # valid iff all values can be parsed and set/page are nonnegative integers def valid? - Integer(set) if @set.present? + assert_set_valid! + Integer(page) if @page.present? Date.parse(from_date) if @from_date.present? Date.parse(until_date) if @until_date.present? !set.to_i.negative? && !page.to_i.negative? - rescue ArgumentError + rescue ArgumentError, ActiveRecord::RecordNotFound false end + + def assert_set_valid! + return if @set.blank? + + if @set.start_with?('organization/') + Organization.find(@set.sub('organization/', '')) + else + Integer(set) + end + end # rubocop:enable Metrics/AbcSize end diff --git a/app/controllers/oai_controller.rb b/app/controllers/oai_controller.rb index 344e864d..b5a934d1 100644 --- a/app/controllers/oai_controller.rb +++ b/app/controllers/oai_controller.rb @@ -110,8 +110,16 @@ def error_params end # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity - def next_record_page(token, use_interstream_deltas: false) - streams = if token.set.present? + def next_record_page(token, use_interstream_deltas: nil) + streams = if token.set&.start_with?('organization/') + org_slug = token.set.split('/').last + stream = Organization.find_by!(slug: org_slug).default_stream + use_interstream_deltas = true if use_interstream_deltas.nil? + + authorize! :read, stream + + [stream] + elsif token.set.present? Stream.accessible_by(current_ability).where(id: token.set) else Stream.accessible_by(current_ability).where(status: 'default') @@ -205,31 +213,57 @@ def build_list_records_response(page, token = nil) end # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListSets - # rubocop:disable Metrics/AbcSize - # rubocop:disable Metrics/MethodLength def build_list_sets_response(streams) Nokogiri::XML::Builder.new do |xml| build_oai_response xml, list_sets_params do xml.ListSets do - streams.each do |stream| - xml.set do - xml.setSpec stream.id - xml.setName stream.display_name - xml.setDescription do - xml[:oai_dc].dc(oai_dc_xmlns) do - xml[:dc].description oai_dc_description(stream) - xml[:dc].contributor stream.organization.slug - xml[:dc].type oai_dc_type(stream) - oai_dc_dates(stream).each do |date| - xml[:dc].date date - end - end - end + build_organizations_list_sets_response(xml, streams.map(&:organization).uniq) + build_streams_list_sets_response(xml, streams) + end + end + end.to_xml + end + + # rubocop:disable Metrics/AbcSize + # rubocop:disable Metrics/MethodLength + def build_streams_list_sets_response(xml, streams) + streams.each do |stream| + xml.set do + xml.setSpec stream.id + xml.setName stream.display_name + xml.setDescription do + xml[:oai_dc].dc(oai_dc_xmlns) do + xml[:dc].description oai_dc_description(stream) + xml[:dc].contributor stream.organization.slug + xml[:dc].type oai_dc_type(stream) + oai_dc_dates(stream).each do |date| + xml[:dc].date date end end end end - end.to_xml + end + end + + def build_organizations_list_sets_response(xml, organizations) + organizations.each do |organization| + xml.set do + xml.setSpec "organization/#{organization.slug}" + xml.setName organization.name + xml.setDescription do + xml[:oai_dc].dc(oai_dc_xmlns) do + xml[:dc].description "Seamless harvesting for #{organization.name}" + xml[:dc].contributor organization.slug + xml[:dc].type 'organization' + xml[:dc].source "stream #{organization.default_stream.id}" + + oai_dc_dates(organization.default_stream).each do |date| + xml[:dc].date date + end + end + end + end + end end # rubocop:enable Metrics/AbcSize # rubocop:enable Metrics/MethodLength diff --git a/app/services/oai_marc_record_writer_service.rb b/app/services/oai_marc_record_writer_service.rb index 42e2225d..70506644 100644 --- a/app/services/oai_marc_record_writer_service.rb +++ b/app/services/oai_marc_record_writer_service.rb @@ -9,7 +9,8 @@ def initialize(base_name = nil) end def write_marc_record(record, dump_created_at) - oai_writer.write(record.augmented_marc, oai_id(record), record.stream.id, dump_created_at) + oai_writer.write(record.augmented_marc, oai_id(record), sets: ["organization/#{record.organization.slug}", record.stream.id], + datestamp: dump_created_at) rescue StandardError => e error = "Error writing MARC OAI file #{base_name} id #{record.id}: #{e}" Rails.logger.info(error) @@ -17,7 +18,8 @@ def write_marc_record(record, dump_created_at) end def write_delete(record, dump_created_at) - oai_writer.write_delete(oai_id(record), record.stream.id, dump_created_at) + oai_writer.write_delete(oai_id(record), sets: ["organization/#{record.organization.slug}", record.stream.id], + datestamp: dump_created_at) end def finalize @@ -60,13 +62,13 @@ def initialize(io) @bytes_written = 0 end - def write(record, identifier, set, datestamp = Time.zone.now) + def write(record, identifier, sets: [], datestamp: Time.zone.now) @bytes_written += @io.write <<-EOXML
#{identifier} #{datestamp.strftime('%F')} - #{set} + #{sets.map { |s| "#{s}" }.join("\n")}
#{Ox.dump(OxMarcXmlWriter.encode(record, include_namespace: true))} @@ -75,13 +77,13 @@ def write(record, identifier, set, datestamp = Time.zone.now) EOXML end - def write_delete(identifier, set, datestamp = Time.zone.now) + def write_delete(identifier, sets: [], datestamp: Time.zone.now) @bytes_written += @io.write <<-EOXML
#{identifier} #{datestamp.strftime('%F')} - #{set} + #{sets.map { |s| "#{s}" }.join("\n")}
EOXML diff --git a/spec/features/oai_spec.rb b/spec/features/oai_spec.rb index fe84369e..8c8a0962 100644 --- a/spec/features/oai_spec.rb +++ b/spec/features/oai_spec.rb @@ -67,13 +67,14 @@ it 'renders a name for each set' do visit oai_path(verb: 'ListSets') doc = Nokogiri::XML(page.body) - expect(doc.at_css('ListSets > set > setName').text).to eq('2020-05-06 - ') + expect(doc.at_css('ListSets > set[1] > setName').text).to eq('My Org') + expect(doc.at_css('ListSets > set[2] > setName').text).to eq('2020-05-06 - ') end it 'renders an identifier (setSpec) for each set' do visit oai_path(verb: 'ListSets') doc = Nokogiri::XML(page.body) - expect(doc.at_css('ListSets > set > setSpec').text).to eq(organization.default_stream.id.to_s) + expect(doc.at_css('ListSets > set[2] > setSpec').text).to eq(organization.default_stream.id.to_s) end it 'renders a description for each set' do @@ -184,14 +185,17 @@ it 'renders the set membership of each item' do visit oai_path(verb: 'ListRecords', metadataPrefix: 'marc21') doc = Nokogiri::XML(page.body) - expect(doc.at_css('ListRecords > record > header > setSpec').text).to eq(organization.default_stream.id.to_s) + record_set = doc.css('ListRecords > record > header').first + expect(record_set.css('setSpec').map(&:text)).to contain_exactly("organization/#{organization.slug}", + organization.default_stream.id.to_s) end it 'renders records in the requested set' do visit oai_path(verb: 'ListRecords', metadataPrefix: 'marc21', set: organization.default_stream.id.to_s) doc = Nokogiri::XML(page.body) - doc.css('ListRecords > record > header > setSpec').each do |record_set| - expect(record_set.text).to eq(organization.default_stream.id.to_s) + doc.css('ListRecords > record > header') do |record_set| + expect(record_set.css('setSpec').map(&:text)).to contain_exactly("organization/#{organization.slug}", + organization.default_stream.id.to_s) end end