diff --git a/app/models/mets_document/factory.rb b/app/models/mets_document/factory.rb index cbfb6d60c5..1421ecdc63 100644 --- a/app/models/mets_document/factory.rb +++ b/app/models/mets_document/factory.rb @@ -7,7 +7,17 @@ def initialize(mets) end def new - mets + if pudl3_mvw? + Pudl3MVWMetsDocument.new(mets.source_file) + else + mets + end end + + private + + def pudl3_mvw? + mets.collection_slugs == "pudl0003" && mets.mets.xpath("/mets:mets/mets:structMap[@type='Physical']").empty? + end end end diff --git a/app/models/pudl3_mvw_mets_document.rb b/app/models/pudl3_mvw_mets_document.rb new file mode 100644 index 0000000000..05e858880d --- /dev/null +++ b/app/models/pudl3_mvw_mets_document.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true +class Pudl3MVWMetsDocument < METSDocument + def multi_volume? + true + end + + def volume_ids + files.map { |x| x[:path].split("/")[-2] }.uniq + end + + def label_for_volume(volume_id) + volume_id.gsub("vol", "") + end + + def files_for_volume(volume_id) + files.select { |x| x[:path].include?(volume_id) } + end + + def structureless? + true + end +end diff --git a/spec/jobs/ingest_mets_job_spec.rb b/spec/jobs/ingest_mets_job_spec.rb index d102219f80..c17e5ac13a 100644 --- a/spec/jobs/ingest_mets_job_spec.rb +++ b/spec/jobs/ingest_mets_job_spec.rb @@ -61,5 +61,26 @@ expect(child_books[1].title).to eq ["second volume"] end end + context "when given a pudl0003 MVW with no structmap" do + let(:mets_file) { Rails.root.join("spec", "fixtures", "mets", "pudl0003-tc85_2621.mets") } + before do + allow(File).to receive(:open).with("/mnt/diglibdata/pudl/pudl0003/tc85_2621/vol01/00000001.tif").and_return(File.open(tiff_file)) + allow(File).to receive(:open).with("/mnt/diglibdata/pudl/pudl0003/tc85_2621/vol01/00000002.tif").and_return(File.open(tiff_file)) + allow(File).to receive(:open).with("/mnt/diglibdata/pudl/pudl0003/tc85_2621/vol02/00000001.tif").and_return(File.open(tiff_file)) + end + it "hacks together a MVW from the path" do + described_class.perform_now(mets_file, user) + + books = adapter.query_service.find_all_of_model(model: ScannedResource) + parent_book = books.find { |x| x.source_metadata_identifier.present? } + expect(parent_book).not_to be_nil + expect(parent_book.member_ids).not_to be_blank + children = adapter.query_service.find_members(resource: parent_book).to_a + + expect(children.map(&:class)).to eq [ScannedResource, ScannedResource] + expect(children[0].member_ids.length).to eq 2 + expect(children[1].member_ids.length).to eq 1 + end + end end end