Skip to content

Commit

Permalink
Merge 3bb602e into 4819ae2
Browse files Browse the repository at this point in the history
  • Loading branch information
ndushay committed Sep 20, 2018
2 parents 4819ae2 + 3bb602e commit 5a719c6
Show file tree
Hide file tree
Showing 39 changed files with 175 additions and 828 deletions.
2 changes: 1 addition & 1 deletion app/lib/pre_assembly/bundle.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def digital_object_base_params
def discover_containers_via_manifest
raise RuntimeError, ':manifest_cols must be specified' unless manifest_cols
col_name = manifest_cols[:object_container].to_sym
raise RuntimeError, "object_container must be specified in manifest_cols: #{manifest_cols}" unless col_name
raise RuntimeError, "object must be specified in manifest_cols: #{manifest_cols}" unless col_name
manifest_rows.each_with_index { |r, i| raise "Missing #{col_name} in row #{i}: #{r}" unless r[col_name] }
manifest_rows.map { |r| path_in_bundle r[col_name] }
end
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/csv_importer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
it "loads a CSV as a hash with indifferent access" do
expect(manifest).to be_an(Array)
expect(manifest.size).to eq(3)
headers = %w{format sourceid filename label year inst_notes prod_notes has_more_metadata description}
headers = %w{format sourceid object label year inst_notes prod_notes has_more_metadata description}
expect(manifest).to all(be_an(ActiveSupport::HashWithIndifferentAccess)) # accessible w/ string and symbols
expect(manifest).to all(include(*headers))
expect(manifest[0][:description]).to be_nil
Expand Down
108 changes: 40 additions & 68 deletions spec/lib/pre_assembly/bundle_spec.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
RSpec.describe PreAssembly::Bundle do
let(:md5_regex) { /^[0-9a-f]{32}$/ }
let(:revs_context) { bundle_context_from_hash(:proj_revs)}
let(:revs) { described_class.new(revs_context) }
let(:img_context) { bundle_context_from_hash(:images_jp2_tif)}
let(:images_jp2_tif) { described_class.new(img_context) }
let(:smpl_multimedia_context) do
bundle_context_from_hash(:smpl_multimedia).tap do |c|
c.manifest_cols[:object_container] = 'folder'
allow(c).to receive(:path_in_bundle).with(any_args).and_call_original
allow(c).to receive(:path_in_bundle).with("manifest.csv").and_return('spec/test_data/smpl_multimedia/manifest_of_3.csv')
end
end
let(:smpl_multimedia) { described_class.new(smpl_multimedia_context) }
let(:flat_dir_images) { bundle_setup(:flat_dir_images) }
let(:images_jp2_tif) { bundle_setup(:images_jp2_tif) }
let(:smpl_multimedia) { bundle_setup(:smpl_multimedia) }

describe '#run_pre_assembly' do
let(:exp_workflow_svc_url) { Regexp.new("^#{Dor::Config.dor_services.url}/objects/.*/apo_workflows/assemblyWF$") }
Expand All @@ -25,12 +16,7 @@
bc.save

b = PreAssembly::Bundle.new bc
b.manifest_rows.each {|row| row.merge!("object" => row["folder"]) }
pids = []
dobj = b.digital_objects
dobj.each do |obj|
allow(obj).to receive(:dor_object).and_return(nil)
end
expect {
pids = b.run_pre_assembly
}.not_to raise_error
Expand All @@ -49,44 +35,41 @@

describe '#run_log_msg' do
it 'returns a string' do
expect(revs.run_log_msg).to be_a(String)
expect(flat_dir_images.run_log_msg).to be_a(String)
end
end

describe '#processed_pids' do
it 'pulls pids from digital_objects' do
exp_pids = [11, 22, 33]
revs.digital_objects = exp_pids.map { |p| double('dobj', :pid => p) }
expect(revs.processed_pids).to eq(exp_pids)
flat_dir_images.digital_objects = exp_pids.map { |p| double('dobj', :pid => p) }
expect(flat_dir_images.processed_pids).to eq(exp_pids)
end
end

describe '#digital_objects' do
it "finds the correct number of objects" do
b = bundle_setup(:folder_manifest)
b.manifest_rows.each {|row| row.merge!("object" => row["folder"]) }

expect(b.digital_objects.size).to eq(3)
end

it "handles containers correctly" do
smpl_multimedia.manifest_rows.each {|row| row.merge!("object" => row["folder"]) }
expect(smpl_multimedia.digital_objects.first.container.size).to be > smpl_multimedia.bundle_dir.size
end
end

describe '#object_discovery: discovery via manifest and crawl' do
it "discover_containers_via_manifest() should return expected information" do
vals = %w(123.tif 456.tif 789.tif)
revs.manifest_cols[:object_container] = :col_foo
allow(revs).to receive(:manifest_rows).and_return(vals.map { |v| { object: v } })
expect(revs.discover_containers_via_manifest).to eq(vals.map { |v| revs.path_in_bundle v })
flat_dir_images.manifest_cols[:object_container] = :col_foo
allow(flat_dir_images).to receive(:manifest_rows).and_return(vals.map { |v| { object: v } })
expect(flat_dir_images.discover_containers_via_manifest).to eq(vals.map { |v| flat_dir_images.path_in_bundle v })
end

it '#discover_items_via_crawl should return expected information' do
items = %w[abc.txt def.txt ghi.txt 123.tif 456.tif 456.TIF].map { |i| revs.path_in_bundle i }
allow(revs).to receive(:dir_glob).and_return(items)
expect(revs.discover_items_via_crawl(revs.bundle_dir)).to eq(items.sort)
items = %w[abc.txt def.txt ghi.txt 123.tif 456.tif 456.TIF].map { |i| flat_dir_images.path_in_bundle i }
allow(flat_dir_images).to receive(:dir_glob).and_return(items)
expect(flat_dir_images.discover_items_via_crawl(flat_dir_images.bundle_dir)).to eq(items.sort)
end
end

Expand Down Expand Up @@ -128,12 +111,12 @@
it "is able to exercise all_object_files()" do
fake_files = [[1, 2], [3, 4], [5, 6]]
fake_dobjs = fake_files.map { |fs| double('dobj', :object_files => fs) }
revs.digital_objects = fake_dobjs
expect(revs.all_object_files).to eq(fake_files.flatten)
flat_dir_images.digital_objects = fake_dobjs
expect(flat_dir_images.all_object_files).to eq(fake_files.flatten)
end

it "new_object_file() should return an ObjectFile with expected path values" do
allow(revs).to receive(:exclude_from_content).and_return(false)
allow(flat_dir_images).to receive(:exclude_from_content).and_return(false)
tests = [
# Stageable is a file:
# - immediately in bundle dir.
Expand Down Expand Up @@ -163,7 +146,7 @@
:exp_rel_path => 'b/c/d/x.tif' },
]
tests.each do |t|
ofile = revs.new_object_file t[:stageable], t[:file_path]
ofile = flat_dir_images.new_object_file t[:stageable], t[:file_path]
expect(ofile).to be_a(PreAssembly::ObjectFile)
expect(ofile.path).to eq(t[:file_path])
expect(ofile.relative_path).to eq(t[:exp_rel_path])
Expand All @@ -179,18 +162,16 @@

describe '#load_checksums' do
it "loads checksums and attach them to the ObjectFiles" do
smpl_multimedia.manifest_rows.each {|row| row.merge!("object" => row["folder"]) }
smpl_multimedia.all_object_files.each { |f| expect(f.checksum).to be_nil }
smpl_multimedia.digital_objects.each { |dobj| smpl_multimedia.load_checksums(dobj) }
smpl_multimedia.all_object_files.each { |f| expect(f.checksum).to match(md5_regex) }
smpl_multimedia.all_object_files.each { |f| expect(f.checksum).to be_nil }
smpl_multimedia.digital_objects.each { |dobj| smpl_multimedia.load_checksums(dobj) }
smpl_multimedia.all_object_files.each { |f| expect(f.checksum).to match(md5_regex) }
end
end

describe '#digital_objects' do
it "augments the digital objects with additional information" do
revs.manifest_rows.each {|row| row.merge!("object" => row["filename"]) }
expect(revs.digital_objects.size).to eq(3)
revs.digital_objects.each do |dobj|
expect(flat_dir_images.digital_objects.size).to eq(3)
flat_dir_images.digital_objects.each do |dobj|
expect(dobj.label).to be_a(String)
expect(dobj.label).not_to eq('Unknown') # hardcoded in class
expect(dobj.source_id).to be_a(String)
Expand All @@ -208,12 +189,8 @@
end

it "raises exception if one of the object files is an invalid image" do
smpl_multimedia.manifest_rows.each {|row| row.merge!("object" => row["folder"]) }
# Create a double that will simulate an invalid image.
img_params = { :image? => true, :valid_image? => false, :path => 'bad/image.tif' }
bad_image = double 'bad_image', img_params
# Check for exceptions.
exp_msg = /^File validation failed/
bad_image = instance_double('bad_image', image?: true, valid_image?: false, path: 'bad/image.tif')
exp_msg = /^File validation failed/
smpl_multimedia.digital_objects.each do |dobj|
dobj.object_files = [bad_image]
expect { smpl_multimedia.validate_files(dobj) }.to raise_error(exp_msg)
Expand All @@ -223,60 +200,57 @@

describe '#objects_to_process' do
it "returns all objects if there are no skippables" do
revs.manifest_rows.each {|row| row.merge!("object" => row["filename"]) }
revs.skippables = {}
expect(revs.objects_to_process).to eq(revs.digital_objects)
flat_dir_images.skippables = {}
expect(flat_dir_images.objects_to_process).to eq(flat_dir_images.digital_objects)
end

it "returns a filtered list of digital objects" do
revs.manifest_rows.each {|row| row.merge!("object" => row["filename"]) }
revs.skippables = {}
revs.skippables[revs.digital_objects[-1].unadjusted_container] = true
o2p = revs.objects_to_process
expect(o2p.size).to eq(revs.digital_objects.size - 1)
expect(o2p).to eq(revs.digital_objects[0..-2])
flat_dir_images.skippables = {}
flat_dir_images.skippables[flat_dir_images.digital_objects[-1].unadjusted_container] = true
o2p = flat_dir_images.objects_to_process
expect(o2p.size).to eq(flat_dir_images.digital_objects.size - 1)
expect(o2p).to eq(flat_dir_images.digital_objects[0..-2])
end
end

describe "#log_progress_info" do
it "returns expected info about a digital object" do
revs.manifest_rows.each {|row| row.merge!("object" => row["filename"]) }
dobj = revs.digital_objects[0]
dobj = flat_dir_images.digital_objects[0]
exp = {
:unadjusted_container => dobj.unadjusted_container,
:pid => dobj.pid,
:pre_assem_finished => dobj.pre_assem_finished,
:timestamp => Time.now.strftime('%Y-%m-%d %H:%I:%S')
}
expect(revs.log_progress_info(dobj)).to eq(exp)
expect(flat_dir_images.log_progress_info(dobj)).to eq(exp)
end
end

describe "file and directory utilities" do
let(:relative) { 'abc/def.jpg' }
let(:full) { revs.path_in_bundle(relative) }
let(:full) { flat_dir_images.path_in_bundle(relative) }

it "#path_in_bundle returns expected value" do
expect(revs.path_in_bundle(relative)).to eq('spec/test_data/flat_dir_images/abc/def.jpg')
expect(flat_dir_images.path_in_bundle(relative)).to eq('spec/test_data/flat_dir_images/abc/def.jpg')
end
it "#relative_path returns expected value" do
expect(revs.relative_path(revs.bundle_dir, full)).to eq(relative)
expect(flat_dir_images.relative_path(flat_dir_images.bundle_dir, full)).to eq(relative)
end
it "#get_base_dir returns expected value" do
expect(revs.get_base_dir('foo/bar/fubb.txt')).to eq('foo/bar')
expect(flat_dir_images.get_base_dir('foo/bar/fubb.txt')).to eq('foo/bar')
end

it "#get_base_dir raises error if given bogus arguments" do
exp_msg = /^Bad arg to get_base_dir/
bad_args = ['foo.txt', '', 'x\y\foo.txt']
bad_args.each do |arg|
expect { revs.get_base_dir(arg) }.to raise_error(ArgumentError, exp_msg)
expect { flat_dir_images.get_base_dir(arg) }.to raise_error(ArgumentError, exp_msg)
end
end

it "#dir_glob returns expected information" do
exp = [1, 2, 3].map { |n| revs.path_in_bundle "image#{n}.tif" }
expect(revs.dir_glob(revs.path_in_bundle "*.tif")).to eq(exp)
exp = [1, 2, 3].map { |n| flat_dir_images.path_in_bundle "image#{n}.tif" }
expect(flat_dir_images.dir_glob(flat_dir_images.path_in_bundle "*.tif")).to eq(exp)
end

it "#find_files_recursively returns expected information" do
Expand All @@ -287,8 +261,7 @@
"image2.tif",
"image3.tif",
"manifest.csv",
"manifest_badsourceid_column.csv",
"mods_template.xml",
"manifest_badsourceid_column.csv"
],
:images_jp2_tif => [
"gn330dv6119/image1.jp2",
Expand All @@ -299,7 +272,6 @@
"jy812bp9403/00/image2.tif",
"jy812bp9403/05/image1.jp2",
"manifest.csv",
"mods_template.xml",
"tz250tk7584/00/image1.tif",
"tz250tk7584/00/image2.tif"
],
Expand Down
6 changes: 3 additions & 3 deletions spec/models/bundle_context_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@
it "expect the content of manifest rows" do
expect(bc.manifest_rows).to eq(
[
{"druid"=>"druid:jy812bp9403", "sourceid"=>"bar-1.0", "folder"=>"jy812bp9403", "label"=>"Label 1", "description"=>"This is a description for label 1"},
{"druid"=>"druid:tz250tk7584", "sourceid"=>"bar-2.1", "folder"=>"tz250tk7584", "label"=>"Label 2", "description"=>"This is a description for label 2"},
{"druid"=>"druid:gn330dv6119", "sourceid"=>"bar-3.1", "folder"=>"gn330dv6119", "label"=>"Label 3", "description"=>"This is a description for label 3"}
{"druid"=>"druid:jy812bp9403", "sourceid"=>"bar-1.0", "object"=>"jy812bp9403", "label"=>"Label 1", "description"=>"This is a description for label 1"},
{"druid"=>"druid:tz250tk7584", "sourceid"=>"bar-2.1", "object"=>"tz250tk7584", "label"=>"Label 2", "description"=>"This is a description for label 2"},
{"druid"=>"druid:gn330dv6119", "sourceid"=>"bar-3.1", "object"=>"gn330dv6119", "label"=>"Label 3", "description"=>"This is a description for label 3"}
]
)
end
Expand Down
6 changes: 1 addition & 5 deletions spec/services/discovery_report_spec.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
require 'rails_helper'

RSpec.describe DiscoveryReport do
let(:bundle) { bundle_setup(:proj_revs) }
let(:bundle) { bundle_setup(:flat_dir_images) }
subject(:report) { described_class.new(bundle) }

before do
bundle.manifest_rows.each {|row| row.merge!("object" => row["filename"]) }
end

describe '#initialize' do
it 'raises if PreAssembly::Bundle not received' do
expect { described_class.new }.to raise_error(ArgumentError)
Expand Down
2 changes: 1 addition & 1 deletion spec/test_data/flat_dir_images/manifest.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
druid,format,sourceid,filename,label,year,inst_notes,prod_notes,has_more_metadata,description
druid,format,sourceid,object,label,year,inst_notes,prod_notes,has_more_metadata,description
"druid:oo000oo0000","BW film","foo-1.0","image1.tif","Avus 1937","1937","","",""
"druid:oo111oo1111","BW film","foo-2.1","image2.tif","Avus 1938","1938-1939","frames 6,7,8 glass slide broken","","X",""
"druid:oo222oo2222","BW film","foo-2.2","image3.tif","Avus 1938, 1956","1938, 1956","strip 2 is duplicate; don't scan","","","yo, this is a description"
34 changes: 0 additions & 34 deletions spec/test_data/flat_dir_images/mods_template.xml

This file was deleted.

4 changes: 2 additions & 2 deletions spec/test_data/images_jp2_tif/manifest.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
druid,sourceid,folder,label,description
druid,sourceid,object,label,description
"druid:jy812bp9403","bar-1.0","jy812bp9403","Label 1","This is a description for label 1"
"druid:tz250tk7584","bar-2.1","tz250tk7584","Label 2","This is a description for label 2"
"druid:gn330dv6119","bar-3.1","gn330dv6119","Label 3","This is a description for label 3"
"druid:gn330dv6119","bar-3.1","gn330dv6119","Label 3","This is a description for label 3"
21 changes: 0 additions & 21 deletions spec/test_data/images_jp2_tif/mods_template.xml

This file was deleted.

2 changes: 1 addition & 1 deletion spec/test_data/obj_dirs_images/manifest.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
druid,sourceid,folder,label,description
druid,sourceid,object,label,description
"druid:oo000oo0000","bar-1.0","obj1","Label 1","This is a description for label 1"
"druid:oo111oo1111","bar-2.1","obj2","Label 2","This is a description for label 1"
"druid:oo222oo2222","bar-2.2","obj3","Label 3","This is a description for label 1"
21 changes: 0 additions & 21 deletions spec/test_data/obj_dirs_images/mods_template.xml

This file was deleted.

0 comments on commit 5a719c6

Please sign in to comment.