Skip to content
This repository has been archived by the owner on May 11, 2022. It is now read-only.

Commit

Permalink
Merge 0f48efe into 010e8c3
Browse files Browse the repository at this point in the history
  • Loading branch information
jcoyne committed Dec 20, 2018
2 parents 010e8c3 + 0f48efe commit 004b2d7
Show file tree
Hide file tree
Showing 9 changed files with 260 additions and 137 deletions.
1 change: 1 addition & 0 deletions lib/dor-services.rb
Expand Up @@ -156,6 +156,7 @@ def logger
autoload :RegistrationService
autoload :SuriService
autoload :WorkflowService
autoload :DatastreamBuilder
autoload :DigitalStacksService
autoload :SdrIngestService
autoload :CleanupService
Expand Down
59 changes: 19 additions & 40 deletions lib/dor/models/concerns/processable.rb
Expand Up @@ -5,6 +5,8 @@
module Dor
module Processable
extend ActiveSupport::Concern
extend Deprecation
self.deprecation_horizon = 'dor-services version 7.0.0'

included do
has_metadata name: 'workflows',
Expand Down Expand Up @@ -41,54 +43,31 @@ module Processable
'opened' => 9
}.freeze

def empty_datastream?(datastream)
return true if datastream.new?

if datastream.class.respond_to?(:xml_template)
datastream.content.to_s.empty? || EquivalentXml.equivalent?(datastream.content, datastream.class.xml_template)
else
datastream.content.to_s.empty?
end
end

# Tries to find a file for the datastream.
# @param [String] datastream name of a datastream
# @return [String, nil] path to datastream or nil
def find_metadata_file(datastream)
druid = DruidTools::Druid.new(pid, Dor::Config.stacks.local_workspace_root)
druid.find_metadata("#{datastream}.xml")
end

# Builds that datastream using the content of a file if such a file
# exists and is newer than the object's current datastream; otherwise,
# The ContentMetadata and DescMetadata robot are allowed to build the
# datastream by reading a file from the /dor/workspace that matches the
# datastream name. This allows assembly or pre-assembly to prebuild the
# datastreams from templates or using other means
# (like the assembly-objectfile gem) and then have those datastreams picked
# up and added to the object during accessionWF.
#
# This method builds that datastream using the content of a file if such a file
# exists and is newer than the object's current datastream (see above); otherwise,
# builds the datastream by calling build_fooMetadata_datastream.
# @param [String] datastream name of a datastream (e.g. "fooMetadata")
# @param [Boolean] force overwrite existing datastream
# @param [Boolean] is_required
# @return [SomeDatastream]
# @return [ActiveFedora::Datastream]
def build_datastream(datastream, force = false, is_required = false)
# See if datastream exists as a file and if the file's timestamp is newer than datastream's timestamp.
ds = datastreams[datastream]
filename = find_metadata_file(datastream)
use_file = filename && (ds.createDate.nil? || File.mtime(filename) >= ds.createDate)
# Build datastream.
if use_file
content = File.read(filename)
ds.content = content
ds.ng_xml = Nokogiri::XML(content) if ds.respond_to?(:ng_xml)
ds.save unless ds.digital_object.new?
elsif force || empty_datastream?(ds)
meth = "build_#{datastream}_datastream".to_sym
if respond_to?(meth)
send(meth, ds)
ds.save unless ds.digital_object.new?
end
end
# Check for success.
raise "Required datastream #{datastream} could not be populated!" if is_required && empty_datastream?(ds)
ds = datastreams[datastream]
builder = Dor::DatastreamBuilder.new(object: self,
datastream: ds,
force: force,
required: is_required)
builder.build

ds
end
deprecation_deprecate build_datastream: 'Use Dor::DatastreamBuilder instead'

def cleanup
CleanupService.cleanup(self)
Expand Down
90 changes: 90 additions & 0 deletions lib/dor/services/datastream_builder.rb
@@ -0,0 +1,90 @@
# frozen_string_literal: true

module Dor
# The ContentMetadata and DescMetadata robot are allowed to build the
# datastream by reading a file from the /dor/workspace that matches the
# datastream name. This allows assembly or pre-assembly to prebuild the
# datastreams from templates or using other means
# (like the assembly-objectfile gem) and then have those datastreams picked
# up and added to the object during accessionWF.
#
# This class builds that datastream using the content of a file if such a file
# exists and is newer than the object's current datastream (see above); otherwise,
# builds the datastream by calling build_fooMetadata_datastream.
class DatastreamBuilder
# @param [ActiveFedora::Base] object The object that contains the datastream
# @param [ActiveFedora::Datastream] datastream The datastream object
# @param [Boolean] force Should we overwrite existing datastream?
# @param [Boolean] required If set to true, raise an error if we can't build the datastream
# @return [ActiveFedora::Datastream]
def initialize(object:, datastream:, force: false, required: false)
@object = object
@datastream = datastream
@force = force
@required = required
@filename = find_metadata_file
end

def build
# See if datastream exists as a file and if the file's timestamp is newer than datastream's timestamp.
if file_newer_than_datastream?
create_from_file(filename)
elsif force || empty_datastream?
create_default
end
# Check for success.
raise "Required datastream #{datastream_name} could not be populated!" if required && empty_datastream?
end

private

attr_reader :datastream, :force, :object, :required, :filename

def datastream_name
datastream.dsid
end

def file_newer_than_datastream?
filename && (!datastream_date || file_date > datastream_date)
end

def file_date
File.mtime(filename)
end

def datastream_date
datastream.createDate
end

def create_from_file(filename)
content = File.read(filename)
datastream.content = content
datastream.ng_xml = Nokogiri::XML(content) if datastream.respond_to?(:ng_xml)
datastream.save unless datastream.digital_object.new?
end

def create_default
meth = "build_#{datastream_name}_datastream".to_sym
return unless object.respond_to?(meth)

object.public_send(meth, datastream)
datastream.save unless datastream.digital_object.new?
end

# Tries to find a file for the datastream.
# @param [String] datastream name of a datastream
# @return [String, nil] path to datastream or nil
def find_metadata_file
druid = DruidTools::Druid.new(object.pid, Dor::Config.stacks.local_workspace_root)
druid.find_metadata("#{datastream_name}.xml")
end

def empty_datastream?
return true if datastream.new?

return datastream.content.to_s.empty? unless datastream.class.respond_to?(:xml_template)

datastream.content.to_s.empty? || EquivalentXml.equivalent?(datastream.content, datastream.class.xml_template)
end
end
end
31 changes: 19 additions & 12 deletions lib/dor/services/registration_service.rb
Expand Up @@ -107,21 +107,11 @@ def register_object(params = {})
new_item.set_read_rights(rights) unless rights == 'default' # already defaulted to default!
end
# create basic mods from the label
if metadata_source == 'label'
ds = new_item.build_datastream('descMetadata')
builder = Nokogiri::XML::Builder.new do |xml|
xml.mods(Dor::DescMetadataDS::MODS_HEADER_CONFIG) do
xml.titleInfo do
xml.title label
end
end
end
ds.content = builder.to_xml
end
build_desc_metadata_from_label(new_item, label) if metadata_source == 'label'

workflow_priority = params[:workflow_priority] ? params[:workflow_priority].to_i : 0

Array(params[:seed_datastream]).each { |datastream_name| new_item.build_datastream(datastream_name) }
seed_datastreams(Array(params[:seed_datastream]), new_item)
Array(params[:initiate_workflow]).each { |workflow_id| new_item.create_workflow(workflow_id, !new_item.new_record?, workflow_priority) }

new_item.class.ancestors.select { |x| x.respond_to?(:to_class_uri) && x != ActiveFedora::Base }.each do |parent_class|
Expand Down Expand Up @@ -182,6 +172,23 @@ def ids_to_hash(ids)

Hash[Array(ids).map { |id| id.split(':', 2) }]
end

def seed_datastreams(names, item)
names.each do |datastream_name|
item.build_datastream(datastream_name)
end
end

def build_desc_metadata_from_label(new_item, label)
builder = Nokogiri::XML::Builder.new do |xml|
xml.mods(Dor::DescMetadataDS::MODS_HEADER_CONFIG) do
xml.titleInfo do
xml.title label
end
end
end
new_item.descMetadata.content = builder.to_xml
end
end
end
end
34 changes: 19 additions & 15 deletions spec/models/concerns/describable_spec.rb
Expand Up @@ -28,21 +28,25 @@ class SimpleItem < ActiveFedora::Base
expect(@item.datastreams['descMetadata']).to be_a(Dor::DescMetadataDS)
end

it 'should provide a descMetadata datastream builder' do
stub_request(:get, "#{Dor::Config.metadata.catalog.url}/?barcode=36105049267078").to_return(body: read_fixture('ab123cd4567_descMetadata.xml'))
allow(@item).to receive(:find_metadata_file).and_return(nil)
expect(Dor::MetadataService).to receive(:fetch).with('barcode:36105049267078').and_call_original
xml = <<-END_OF_XML
<?xml version="1.0"?>
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="3.6" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
<titleInfo>
<title/>
</titleInfo>
</mods>
END_OF_XML
expect(@item.datastreams['descMetadata'].ng_xml.to_s).to be_equivalent_to(xml)
@item.build_datastream('descMetadata')
expect(@item.datastreams['descMetadata'].ng_xml.to_s).not_to be_equivalent_to(xml)
describe '#build_descMetadata_datastream' do
before do
stub_request(:get, "#{Dor::Config.metadata.catalog.url}/?barcode=36105049267078").to_return(body: read_fixture('ab123cd4567_descMetadata.xml'))
end

it 'calls the catalog service' do
expect(Dor::MetadataService).to receive(:fetch).with('barcode:36105049267078').and_call_original
xml = <<-END_OF_XML
<?xml version="1.0"?>
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="3.6" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
<titleInfo>
<title/>
</titleInfo>
</mods>
END_OF_XML
expect(@item.datastreams['descMetadata'].ng_xml.to_s).to be_equivalent_to(xml)
@item.build_descMetadata_datastream(@item.descMetadata)
expect(@item.datastreams['descMetadata'].ng_xml.to_s).not_to be_equivalent_to(xml)
end
end

it 'produces dublin core from the MODS in the descMetadata datastream' do
Expand Down
73 changes: 13 additions & 60 deletions spec/models/concerns/processable_spec.rb
Expand Up @@ -19,11 +19,11 @@ class ProcessableWithApoItem < ActiveFedora::Base
include Dor::Processable
end

describe Dor::Processable do
before(:each) { stub_config }
after(:each) { unstub_config }
RSpec.describe Dor::Processable do
after(:each) { unstub_config }

before :each do
before do
stub_config
@item = instantiate_fixture('druid:ab123cd4567', ProcessableItem)
@item.contentMetadata.content = '<contentMetadata/>'
end
Expand All @@ -48,63 +48,16 @@ class ProcessableWithApoItem < ActiveFedora::Base
expect(@item.workflows.content).to eq('<workflows>with some data</workflows>')
end

context 'build_datastream()' do
before(:each) do
# Paths to two files with the same content.
f1 = 'workspace/ab/123/cd/4567/ab123cd4567/metadata/descMetadata.xml'
f2 = 'workspace/ab/123/cd/4567/desc_metadata.xml'
@dm_filename = File.join(@fixture_dir, f1) # Path used inside build_datastream().
@dm_fixture_xml = read_fixture(f2) # Path to fixture.
@dm_builder_xml = @dm_fixture_xml.sub(/FROM_FILE/, 'FROM_BUILDER')
end

context 'datastream exists as a file' do
before(:each) do
allow(@item).to receive(:find_metadata_file).and_return(@dm_filename)
allow(File).to receive(:read).and_return(@dm_fixture_xml)
@t = Time.now.utc
end

it 'file newer than datastream: should read content from file' do
allow(File).to receive(:mtime).and_return(@t)
allow(@item.descMetadata).to receive(:createDate).and_return(@t - 99)
xml = @dm_fixture_xml
expect(@item.descMetadata.ng_xml).not_to be_equivalent_to(xml)
@item.build_datastream('descMetadata', true)
expect(@item.descMetadata.ng_xml).to be_equivalent_to(xml)
expect(@item.descMetadata.ng_xml).not_to be_equivalent_to(@dm_builder_xml)
end

it 'file older than datastream: should use the builder' do
allow(File).to receive(:mtime).and_return(@t - 99)
allow(@item.descMetadata).to receive(:createDate).and_return(@t)
xml = @dm_builder_xml
allow(@item).to receive(:fetch_descMetadata_datastream).and_return(xml)
expect(@item.descMetadata.ng_xml).not_to be_equivalent_to(xml)
@item.build_datastream('descMetadata', true)
expect(@item.descMetadata.ng_xml).to be_equivalent_to(xml)
expect(@item.descMetadata.ng_xml).not_to be_equivalent_to(@dm_fixture_xml)
end
end

context 'datastream does not exist as a file' do
before(:each) do
allow(@item).to receive(:find_metadata_file).and_return(nil)
end

it 'should use the datastream builder' do
xml = @dm_builder_xml
allow(@item).to receive(:fetch_descMetadata_datastream).and_return(xml)
expect(@item.descMetadata.ng_xml).not_to be_equivalent_to(xml)
@item.build_datastream('descMetadata')
expect(@item.descMetadata.ng_xml).to be_equivalent_to(xml)
expect(@item.descMetadata.ng_xml).not_to be_equivalent_to(@dm_fixture_xml)
end
describe '#build_datastream' do
let(:builder) { instance_double(Dor::DatastreamBuilder, build: true) }

it 'should raise an exception if required datastream cannot be generated' do
# Fails because there is no build_contentMetadata_datastream() method.
expect { @item.build_datastream('contentMetadata', false, true) }.to raise_error(RuntimeError)
end
it 'Calls the datastream builder' do
expect(Deprecation).to receive(:warn)
expect(Dor::DatastreamBuilder).to receive(:new)
.with(datastream: Dor::DescMetadataDS, force: true, object: @item, required: false)
.and_return(builder)
@item.build_datastream('descMetadata', true)
expect(builder).to have_received(:build)
end
end

Expand Down
12 changes: 7 additions & 5 deletions spec/models/concerns/publishable_spec.rb
Expand Up @@ -92,11 +92,13 @@ class ItemizableItem < ActiveFedora::Base
expect(@item.datastreams['rightsMetadata']).to be_a(ActiveFedora::OmDatastream)
end

it 'should provide a rightsMetadata datastream builder' do
rights_md = @apo.defaultObjectRights.content
expect(@item.datastreams['rightsMetadata'].ng_xml.to_s).not_to be_equivalent_to(rights_md)
@item.build_datastream('rightsMetadata', true)
expect(@item.datastreams['rightsMetadata'].ng_xml.to_s).to be_equivalent_to(rights_md)
describe '#build_rightsMetadata_datastream' do
it 'copies the default object rights' do
rights_md = @apo.defaultObjectRights.content
expect(@item.datastreams['rightsMetadata'].ng_xml.to_s).not_to be_equivalent_to(rights_md)
@item.build_rightsMetadata_datastream(@item.rightsMetadata)
expect(@item.datastreams['rightsMetadata'].ng_xml.to_s).to be_equivalent_to(rights_md)
end
end

describe '#thumb' do
Expand Down

0 comments on commit 004b2d7

Please sign in to comment.