Skip to content

Commit

Permalink
Merge pull request #173 from sul-dlss/desc-md-removal
Browse files Browse the repository at this point in the history
Starting to remove desc_md_template and methods that come along
  • Loading branch information
atz authored Sep 4, 2018
2 parents f4d1a16 + dee8047 commit 7f31760
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 237 deletions.
22 changes: 4 additions & 18 deletions app/lib/pre_assembly/bundle.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class BundleContext
:accession_items,
:manifest,
:checksums_file,
:desc_md_template,
:progress_log_file,
:project_name,
:file_attr,
Expand Down Expand Up @@ -85,9 +84,6 @@ def setup_paths
bundle_dir.chomp!('/') # get rid of any trailing slash on the bundle directory
self.manifest &&= path_in_bundle(manifest)
self.checksums_file &&= path_in_bundle(checksums_file)
if !desc_md_template.nil? && !(Pathname.new desc_md_template).absolute? # check for a desc MD template being defined and not being absolute
self.desc_md_template = path_in_bundle(desc_md_template) # set it relative to the bundle
end
self.staging_dir = Assembly::ASSEMBLY_WORKSPACE if staging_dir.nil? # if the user didn't supply a staging_dir, use the default
self.progress_log_file = File.join(File.dirname(config_filename), File.basename(config_filename, '.yaml') + '_progress.yaml') unless progress_log_file # if the user didn't supply a progress log file, use the yaml config file as a base, and add '_progress'
end
Expand Down Expand Up @@ -130,8 +126,7 @@ def required_dirs
def required_files
[
manifest,
checksums_file,
desc_md_template
checksums_file
].compact
end

Expand Down Expand Up @@ -194,7 +189,6 @@ def validate_usage
end
else # if we are not using a manifest, check some stuff
validation_errors << "The glob for object_discovery must be set if object_discovery:use_manifest=false." if object_discovery[:glob].blank? # glob must be set
validation_errors << "Manifest and desc_md_template files should be set to nil if object_discovery:use_manifest=false." unless manifest.blank? && desc_md_template.blank?
end

if stageable_discovery[:use_container] # if we are staging the whole container, check some stuff
Expand Down Expand Up @@ -230,11 +224,9 @@ class Bundle
:provider_checksums,
:digital_objects,
:skippables,
:smpl_manifest,
:desc_md_template_xml
:smpl_manifest

delegate :desc_md_template,
:progress_log_file,
delegate :progress_log_file,
:content_md_creation,
:stageable_discovery,
:bundle_dir,
Expand Down Expand Up @@ -265,15 +257,9 @@ def initialize(bundle_context)
self.digital_objects = []
self.skippables = {}

load_desc_md_template
load_skippables
end

def load_desc_md_template
return nil unless desc_md_template && File.readable?(desc_md_template)
self.desc_md_template_xml = IO.read(desc_md_template)
end

def load_skippables
docs = YAML.load_stream(Assembly::Utils.read_file(progress_log_file))
docs = docs.documents if docs.respond_to? :documents
Expand Down Expand Up @@ -590,7 +576,7 @@ def process_digital_objects
# Try to pre_assemble the digital object.
load_checksums(dobj)
validate_files(dobj) if validate_files
dobj.pre_assemble(desc_md_template_xml)
dobj.pre_assemble
# Indicate that we finished.
dobj.pre_assem_finished = true
log_and_show "Completed #{dobj.druid.druid}"
Expand Down
50 changes: 5 additions & 45 deletions app/lib/pre_assembly/digital_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ class DigitalObject
:file_attr,
:bundle_dir,
:staging_dir,
:desc_md_template_xml,
:content_md_creation,
:staging_style,
:smpl_manifest
Expand All @@ -27,15 +26,15 @@ class DigitalObject
:label,
:content_md_file,
:technical_md_file,
:desc_md_file,
:content_md_xml,
:technical_md_xml,
:desc_md_xml,
:pre_assem_finished,
:content_structure
:content_structure,
:druid,
:source_id,
:manifest_row

attr_writer :dor_object, :druid_tree_dir
attr_accessor :druid, :source_id, :manifest_row

INIT_PARAMS.each { |p| attr_accessor p }

Expand All @@ -54,10 +53,8 @@ def setup
self.label = Dor::Config.dor.default_label
self.content_md_file = Assembly::CONTENT_MD_FILE
self.technical_md_file = Assembly::TECHNICAL_MD_FILE
self.desc_md_file = Assembly::DESC_MD_FILE
self.content_md_xml = ''
self.technical_md_xml = ''
self.desc_md_xml = ''
self.content_structure = (project_style ? project_style[:content_structure] : 'file')
end

Expand Down Expand Up @@ -112,15 +109,12 @@ def metadata_dir
# The main process.
####

def pre_assemble(desc_md_xml = nil)
self.desc_md_template_xml = desc_md_xml

def pre_assemble
log " - pre_assemble(#{source_id}) started"
determine_druid
stage_files
generate_content_metadata unless content_md_creation[:style].to_s == 'none'
generate_technical_metadata
generate_desc_metadata
initialize_assembly_workflow
log " - pre_assemble(#{pid}) finished"
end
Expand Down Expand Up @@ -298,40 +292,6 @@ def object_files_exist?
# Descriptive metadata.
####

def generate_desc_metadata
# Do nothing for bundles that don't suppy a template.
return unless desc_md_template_xml
create_desc_metadata_xml
write_desc_metadata
end

def create_desc_metadata_xml
log " - create_desc_metadata_xml()"

# XML escape all of the entries in the manifest row so they won't break the XML
manifest_row.each { |k, v| manifest_row[k] = Nokogiri::XML::Text.new(v, Nokogiri::XML('')).to_s if v }

# ensure access with symbol or string keys
self.manifest_row = manifest_row.with_indifferent_access

# Run the XML template through ERB.
self.desc_md_xml = ERB.new(desc_md_template_xml, nil, '>').result(binding)

# The manifest_row is a hash, with column names as the key.
# In the template, as a conviennce we allow users to put specific column placeholders inside
# double brackets: "blah [[column_name]] blah".
# Here we replace those placeholders with the corresponding value from the manifest row.
manifest_row.each { |k, v| desc_md_xml.gsub! "[[#{k}]]", v.to_s.strip }
true
end

def write_desc_metadata
file_name = File.join(metadata_dir, desc_md_file)
log " - write_desc_metadata_xml(#{file_name})"
create_object_directories
File.open(file_name, 'w') { |fh| fh.puts desc_md_xml }
end

def create_object_directories
FileUtils.mkdir_p druid_tree_dir unless File.directory?(druid_tree_dir)
FileUtils.mkdir_p metadata_dir unless File.directory?(metadata_dir)
Expand Down
17 changes: 2 additions & 15 deletions spec/lib/pre_assembly/bundle_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,6 @@
expect(revs.bundle_dir).to eq('spec/test_data/bundle_input_a')
end

it '#load_desc_md_template should return nil or String' do
# Return nil if no template.
revs_context.desc_md_template = nil
expect(revs.load_desc_md_template).to be_nil
# Otherwise, read the template and return its content.
revs_context.desc_md_template = revs_context.path_in_bundle('mods_template.xml')
template = revs.load_desc_md_template
expect(template).to be_a(String)
expect(template.size).to be > 0
end

# TODO: this one's really more about BundleContext now
it '#setup_other should prune @file_attr' do
# All keys are present.
Expand Down Expand Up @@ -79,12 +68,10 @@
before { revs_context.user_params = Hash[revs_context.required_user_params.map { |p| [p, ''] }] }

it '#required_files should return expected N of items' do
expect(revs_context.required_files.size).to eq(3)
revs_context.manifest = nil
expect(revs_context.required_files.size).to eq(2)
revs_context.checksums_file = nil
revs_context.manifest = nil
expect(revs_context.required_files.size).to eq(1)
revs_context.desc_md_template = nil
revs_context.checksums_file = nil
expect(revs_context.required_files.size).to eq(0)
end

Expand Down
159 changes: 0 additions & 159 deletions spec/lib/pre_assembly/digital_object_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -412,165 +412,6 @@ def add_object_files(extension = 'tif')
end
end

describe "descriptive metadata" do
before do
dobj.druid = druid
dobj.manifest_row = {
:sourceid => 'foo-1',
:label => 'this is < a label with an & that will break XML unless it is escaped',
:year => '2012',
:description => 'this is a description > another description < other stuff',
:format => 'film',
:foo => '123',
:bar => '456',
}
dobj.desc_md_template_xml = <<-END.gsub(/^ {8}/, '')
<?xml version="1.0"?>
<mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/mods/v3" version="3.3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-3.xsd">
<typeOfResource>still image</typeOfResource>
<genre authority="att">digital image</genre>
<subject authority="lcsh">
<topic>Automobile</topic>
<topic>History</topic>
</subject>
<relatedItem type="host">
<titleInfo>
<title>The Collier Collection of the Revs Institute for Automotive Research</title>
</titleInfo>
<typeOfResource collection="yes"/>
</relatedItem>
<relatedItem type="original">
<physicalDescription>
<form authority="att">[[format]]</form>
</physicalDescription>
</relatedItem>
<originInfo>
<dateCreated>[[year]]</dateCreated>
</originInfo>
<titleInfo>
<title>'[[label]]' is the label!</title>
</titleInfo>
<note>[[description]]</note>
<note>ERB Test: <%=manifest_row[:description]%></note>
<identifier type="local" displayLabel="Revs ID">[[sourceid]]</identifier>
<note type="source note" ID="foo">[[foo]]</note>
<note type="source note" ID="bar">[[bar]]</note>
</mods>
END
@exp_xml = noko_doc <<-END.gsub(/^ {8}/, '')
<?xml version="1.0"?>
<mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/mods/v3" version="3.3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-3.xsd">
<typeOfResource>still image</typeOfResource>
<genre authority="att">digital image</genre>
<subject authority="lcsh">
<topic>Automobile</topic>
<topic>History</topic>
</subject>
<relatedItem type="host">
<titleInfo>
<title>The Collier Collection of the Revs Institute for Automotive Research</title>
</titleInfo>
<typeOfResource collection="yes"/>
</relatedItem>
<relatedItem type="original">
<physicalDescription>
<form authority="att">film</form>
</physicalDescription>
</relatedItem>
<originInfo>
<dateCreated>2012</dateCreated>
</originInfo>
<titleInfo>
<title>'this is &lt; a label with an &amp; that will break XML unless it is escaped' is the label!</title>
</titleInfo>
<note>this is a description &gt; another description &lt; other stuff</note>
<identifier type="local" displayLabel="Revs ID">foo-1</identifier>
<note>ERB Test: this is a description &gt; another description &lt; other stuff</note>
<note type="source note" ID="foo">123</note>
<note type="source note" ID="bar">456</note>
</mods>
END
end

it "generate_desc_metadata() should do nothing if there is no template" do
dobj.desc_md_template_xml = nil
expect(dobj).not_to receive :create_desc_metadata_xml
dobj.generate_desc_metadata
end

it "create_desc_metadata_xml() should generate the expected xml text with the manifest row having a hash with keys as symbols" do
dobj.create_desc_metadata_xml
expect(noko_doc(dobj.desc_md_xml)).to be_equivalent_to @exp_xml
end

it "create_desc_metadata_xml() should generate the expected xml text with the manifest row having a hash with keys as strings" do
dobj.manifest_row = {
'sourceid' => 'foo-1',
'label' => 'this is < a label with an & that will break XML unless it is escaped',
'year' => '2012',
'description' => 'this is a description > another description < other stuff',
'format' => 'film',
'foo' => '123',
'bar' => '456',
}
dobj.create_desc_metadata_xml
expect(noko_doc(dobj.desc_md_xml)).to be_equivalent_to @exp_xml
end

it "is able to write the desc_metadata XML to a file" do
dobj.create_desc_metadata_xml
Dir.mktmpdir(*tmp_dir_args) do |tmp_area|
dobj.druid_tree_dir = tmp_area
file_name = File.join(tmp_area, "metadata", dobj.desc_md_file)
expect(File.exist?(file_name)).to eq(false)
dobj.write_desc_metadata
expect(noko_doc(File.read file_name)).to be_equivalent_to @exp_xml
end
end

it "generates descMetadata correctly given a manifest row as loaded from the csv" do
manifest = PreAssembly::Bundle.import_csv("#{PRE_ASSEMBLY_ROOT}/spec/test_data/bundle_input_a/manifest.csv")
dobj.manifest_row = Hash[manifest[2].each_pair.to_a]

dobj.desc_md_template_xml = IO.read("#{PRE_ASSEMBLY_ROOT}/spec/test_data/bundle_input_a/mods_template.xml")
dobj.create_desc_metadata_xml
exp_xml = <<-END.gsub(/^ {8}/, '')
<?xml version="1.0"?>
<mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/mods/v3" version="3.3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-3.xsd">
<typeOfResource>still image</typeOfResource>
<genre authority="att">digital image</genre>
<subject authority="lcsh">
<topic>Automobile</topic>
<topic>History</topic>
</subject>
<relatedItem type="host">
<titleInfo>
<title>The Collier Collection of the Revs Institute for Automotive Research</title>
</titleInfo>
<typeOfResource collection="yes"/>
</relatedItem>
<relatedItem type="original">
<physicalDescription>
<form authority="att">BW film</form>
</physicalDescription>
</relatedItem>
<originInfo>
<dateCreated>1938, 1956</dateCreated>
</originInfo>
<titleInfo>a
<title>Avus 1938, 1956</title>
</titleInfo>
<note>yo, this is a description</note>
<identifier type="local" displayLabel="Revs ID">foo-2.2</identifier>
<note type="source note" ID="inst_notes">strip 2 is duplicate; don't scan</note>
<note type="source note" ID="inst_notes2">strip 2 is duplicate; don't scan</note>
</mods>
END

expect(noko_doc(dobj.desc_md_xml)).to be_equivalent_to exp_xml
end
end

describe '#assembly_workflow_url' do
it 'returns expected value' do
dobj.pid = pid
Expand Down

0 comments on commit 7f31760

Please sign in to comment.