Skip to content
This repository has been archived by the owner on May 11, 2022. It is now read-only.

Commit

Permalink
Merge 9e72fb3 into 1918bef
Browse files Browse the repository at this point in the history
  • Loading branch information
jcoyne committed Dec 20, 2018
2 parents 1918bef + 9e72fb3 commit 8d3049b
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 96 deletions.
6 changes: 5 additions & 1 deletion lib/dor/models/concerns/itemizable.rb
Expand Up @@ -6,6 +6,8 @@
module Dor
module Itemizable
extend ActiveSupport::Concern
extend Deprecation
self.deprecation_horizon = 'dor-services version 7.0.0'

included do
has_metadata name: 'contentMetadata', type: Dor::ContentMetadataDS, label: 'Content Metadata', control_group: 'M'
Expand All @@ -21,6 +23,7 @@ def clear_diff_cache
diff_pattern = File.join(druid.temp_dir, DIFF_FILENAME + '.*')
FileUtils.rm_f Dir.glob(diff_pattern)
end
deprecation_deprecate clear_diff_cache: 'No longer used by any DLSS code and will be removed without replacement'

# Retrieves file difference manifest for contentMetadata from SDR
#
Expand All @@ -32,7 +35,8 @@ def get_content_diff(subset = :all, version = nil)

raise Dor::Exception, 'Missing contentMetadata datastream' if !respond_to?(:contentMetadata) || contentMetadata.nil?

Sdr::Client.get_content_diff(pid, contentMetadata.content, subset, version)
Sdr::Client.get_content_diff(pid, contentMetadata.content, subset.to_s, version)
end
deprecation_deprecate get_content_diff: 'Use Sdr::Client.get_content_diff instead'
end
end
5 changes: 4 additions & 1 deletion lib/dor/services/shelving_service.rb
Expand Up @@ -34,7 +34,10 @@ def shelve
# retrieve the differences between the current contentMetadata and the previously ingested version
# (filtering to select only the files that should be shelved to stacks)
def shelve_diff
inventory_diff = work.get_content_diff(:shelve)
raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
raise Dor::Exception, 'Missing contentMetadata datastream' if work.contentMetadata.nil?

inventory_diff = Sdr::Client.get_content_diff(work.pid, work.contentMetadata.content, 'shelve')
inventory_diff.group_difference('content')
end

Expand Down
7 changes: 4 additions & 3 deletions lib/dor/services/technical_metadata_service.rb
Expand Up @@ -49,10 +49,11 @@ def self.test_jhove_service
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
# @return [FileGroupDifference] The differences between two versions of a group of files
def self.get_content_group_diff(dor_item)
inventory_diff = dor_item.get_content_diff('all')
return Moab::FileGroupDifference.new if dor_item.contentMetadata.nil?
raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?

inventory_diff = Sdr::Client.get_content_diff(dor_item.pid, dor_item.contentMetadata.content, 'all')
inventory_diff.group_difference('content')
rescue Dor::Exception # no contentMetadata
Moab::FileGroupDifference.new
end

# @param [FileGroupDifference] content_group_diff
Expand Down
19 changes: 15 additions & 4 deletions lib/dor/utils/sdr_client.rb
Expand Up @@ -36,18 +36,29 @@ def get_signature_catalog(druid)
Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0)
end

# @return [Moab::FileInventoryDifference] the differences for the given content and subset
def get_content_diff(druid, current_content, subset = :all, version = nil)
raise Dor::ParameterError, "Invalid subset value: #{subset}" unless %w(all shelve preserve publish).include?(subset.to_s)
# Retrieves file difference manifest for contentMetadata from SDR
#
# @param [String] druid The object identifier
# @param [String] current_content The contentMetadata xml
# @param [String] subset ('all') The keyword for file attributes 'shelve', 'preserve', 'publish'.
# @param [Integer, NilClass] version (nil)
# @return [Moab::FileInventoryDifference] the differences for the given content and subset (i.e.: cm_inv_diff manifest)
def get_content_diff(druid, current_content, subset = 'all', version = nil)
unless subset.is_a? String
Deprecation.warn(self, "subset parameter must be a string. You provided '#{subset.inspect}'. This will be an error in version 7")
subset = subset.to_s
end
raise Dor::ParameterError, "Invalid subset value: #{subset}" unless %w(all shelve preserve publish).include?(subset)

query_string = { subset: subset.to_s }
query_string = { subset: subset }
query_string[:version] = version.to_s unless version.nil?
query_string = URI.encode_www_form(query_string)
sdr_query = "objects/#{druid}/cm-inv-diff?#{query_string}"
response = client[sdr_query].post(current_content, content_type: 'application/xml')
Moab::FileInventoryDifference.parse(response)
end

# This is used by Argo
def get_preserved_file_content(druid, filename, version)
client["objects/#{druid}/content/#{URI.encode(filename)}?version=#{version}"].get
end
Expand Down
14 changes: 8 additions & 6 deletions spec/models/concerns/itemizable_spec.rb
Expand Up @@ -7,25 +7,27 @@ class ItemizableItem < ActiveFedora::Base
include Dor::Processable
end

describe Dor::Itemizable do
before(:each) { stub_config }
after(:each) { unstub_config }

before :each do
RSpec.describe Dor::Itemizable do
before do
stub_config
@item = instantiate_fixture('druid:bb046xn0881', ItemizableItem)
end

after { unstub_config }

it 'has a contentMetadata datastream' do
expect(@item.contentMetadata).to be_a(Dor::ContentMetadataDS)
end

it 'will run get_content_diff' do
expect(Deprecation).to receive(:warn)
expect(Sdr::Client).to receive(:get_content_diff)
.with(@item.pid, @item.contentMetadata.content, :all, nil)
.with(@item.pid, @item.contentMetadata.content, 'all', nil)
expect { @item.get_content_diff }.not_to raise_error
end

it 'will run get_content_diff without contentMetadata' do
expect(Deprecation).to receive(:warn)
@item.datastreams.delete 'contentMetadata'
expect { @item.get_content_diff }.to raise_error(Dor::Exception)
end
Expand Down
86 changes: 48 additions & 38 deletions spec/services/shelving_service_spec.rb
Expand Up @@ -49,44 +49,54 @@
describe '#shelve_diff' do
let(:druid) { 'druid:jq937jp0017' }
subject(:result) { service.send(:shelve_diff) }
it 'retrieves the differences between the current contentMetadata and the previously ingested version' do
# read in a FileInventoryDifference manifest from the fixtures area
xml_pathname = Pathname('spec').join('fixtures', 'content_diff_reports', 'jq937jp0017-v1-v2.xml')
expect(work).to receive(:get_content_diff).with(:shelve).and_return(Moab::FileInventoryDifference.parse(xml_pathname.read))
expect(result.to_xml).to be_equivalent_to(<<-XML
<fileGroupDifference groupId="content" differenceCount="3" identical="3" copyadded="0" copydeleted="0" renamed="0" modified="1" added="0" deleted="2">
<subset change="identical" count="3">
<file change="identical" basisPath="page-2.jpg" otherPath="same">
<fileSignature size="39450" md5="82fc107c88446a3119a51a8663d1e955" sha1="d0857baa307a2e9efff42467b5abd4e1cf40fcd5" sha256="235de16df4804858aefb7690baf593fb572d64bb6875ec522a4eea1f4189b5f0"/>
</file>
<file change="identical" basisPath="page-3.jpg" otherPath="same">
<fileSignature size="19125" md5="a5099878de7e2e064432d6df44ca8827" sha1="c0ccac433cf02a6cee89c14f9ba6072a184447a2" sha256="7bd120459eff0ecd21df94271e5c14771bfca5137d1dd74117b6a37123dfe271"/>
</file>
<file change="identical" basisPath="title.jpg" otherPath="same">
<fileSignature size="40873" md5="1a726cd7963bd6d3ceb10a8c353ec166" sha1="583220e0572640abcd3ddd97393d224e8053a6ad" sha256="8b0cee693a3cf93cf85220dd67c5dc017a7edcdb59cde8fa7b7f697be162b0c5"/>
</file>
</subset>
<subset change="renamed" count="0"/>
<subset change="modified" count="1">
<file change="modified" basisPath="page-1.jpg" otherPath="same">
<fileSignature size="25153" md5="3dee12fb4f1c28351c7482b76ff76ae4" sha1="906c1314f3ab344563acbbbe2c7930f08429e35b" sha256="41aaf8598c9d8e3ee5d55efb9be11c542099d9f994b5935995d0abea231b8bad"/>
<fileSignature size="32915" md5="c1c34634e2f18a354cd3e3e1574c3194" sha1="0616a0bd7927328c364b2ea0b4a79c507ce915ed" sha256="b78cc53b7b8d9ed86d5e3bab3b699c7ed0db958d4a111e56b6936c8397137de0"/>
</file>
</subset>
<subset change="deleted" count="2">
<file change="deleted" basisPath="intro-1.jpg" otherPath="">
<fileSignature size="41981" md5="915c0305bf50c55143f1506295dc122c" sha1="60448956fbe069979fce6a6e55dba4ce1f915178" sha256="4943c6ffdea7e33b74fd7918de900de60e9073148302b0ad1bf5df0e6cec032a"/>
</file>
<file change="deleted" basisPath="intro-2.jpg" otherPath="">
<fileSignature size="39850" md5="77f1a4efdcea6a476505df9b9fba82a7" sha1="a49ae3f3771d99ceea13ec825c9c2b73fc1a9915" sha256="3a28718a8867e4329cd0363a84aee1c614d0f11229a82e87c6c5072a6e1b15e7"/>
</file>
</subset>
<subset change="added" count="0"/>
<subset change="copyadded" count="0"/>
<subset change="copydeleted" count="0"/>
</fileGroupDifference>
XML
)

context 'when contentMetadata exists' do
it 'retrieves the differences between the current contentMetadata and the previously ingested version' do
# read in a FileInventoryDifference manifest from the fixtures area
xml_pathname = Pathname('spec').join('fixtures', 'content_diff_reports', 'jq937jp0017-v1-v2.xml')
expect(Sdr::Client).to receive(:get_content_diff).with(druid, nil, 'shelve').and_return(Moab::FileInventoryDifference.parse(xml_pathname.read))
expect(result.to_xml).to be_equivalent_to(<<-XML
<fileGroupDifference groupId="content" differenceCount="3" identical="3" copyadded="0" copydeleted="0" renamed="0" modified="1" added="0" deleted="2">
<subset change="identical" count="3">
<file change="identical" basisPath="page-2.jpg" otherPath="same">
<fileSignature size="39450" md5="82fc107c88446a3119a51a8663d1e955" sha1="d0857baa307a2e9efff42467b5abd4e1cf40fcd5" sha256="235de16df4804858aefb7690baf593fb572d64bb6875ec522a4eea1f4189b5f0"/>
</file>
<file change="identical" basisPath="page-3.jpg" otherPath="same">
<fileSignature size="19125" md5="a5099878de7e2e064432d6df44ca8827" sha1="c0ccac433cf02a6cee89c14f9ba6072a184447a2" sha256="7bd120459eff0ecd21df94271e5c14771bfca5137d1dd74117b6a37123dfe271"/>
</file>
<file change="identical" basisPath="title.jpg" otherPath="same">
<fileSignature size="40873" md5="1a726cd7963bd6d3ceb10a8c353ec166" sha1="583220e0572640abcd3ddd97393d224e8053a6ad" sha256="8b0cee693a3cf93cf85220dd67c5dc017a7edcdb59cde8fa7b7f697be162b0c5"/>
</file>
</subset>
<subset change="renamed" count="0"/>
<subset change="modified" count="1">
<file change="modified" basisPath="page-1.jpg" otherPath="same">
<fileSignature size="25153" md5="3dee12fb4f1c28351c7482b76ff76ae4" sha1="906c1314f3ab344563acbbbe2c7930f08429e35b" sha256="41aaf8598c9d8e3ee5d55efb9be11c542099d9f994b5935995d0abea231b8bad"/>
<fileSignature size="32915" md5="c1c34634e2f18a354cd3e3e1574c3194" sha1="0616a0bd7927328c364b2ea0b4a79c507ce915ed" sha256="b78cc53b7b8d9ed86d5e3bab3b699c7ed0db958d4a111e56b6936c8397137de0"/>
</file>
</subset>
<subset change="deleted" count="2">
<file change="deleted" basisPath="intro-1.jpg" otherPath="">
<fileSignature size="41981" md5="915c0305bf50c55143f1506295dc122c" sha1="60448956fbe069979fce6a6e55dba4ce1f915178" sha256="4943c6ffdea7e33b74fd7918de900de60e9073148302b0ad1bf5df0e6cec032a"/>
</file>
<file change="deleted" basisPath="intro-2.jpg" otherPath="">
<fileSignature size="39850" md5="77f1a4efdcea6a476505df9b9fba82a7" sha1="a49ae3f3771d99ceea13ec825c9c2b73fc1a9915" sha256="3a28718a8867e4329cd0363a84aee1c614d0f11229a82e87c6c5072a6e1b15e7"/>
</file>
</subset>
<subset change="added" count="0"/>
<subset change="copyadded" count="0"/>
<subset change="copydeleted" count="0"/>
</fileGroupDifference>
XML
)
end
end

context 'when contentMetadata does not exist' do
it 'raises an error' do
work.datastreams.delete 'contentMetadata'
expect { result }.to raise_error(Dor::Exception)
end
end
end

Expand Down
84 changes: 41 additions & 43 deletions spec/services/technical_metadata_service_spec.rb
Expand Up @@ -3,43 +3,38 @@
require 'spec_helper'
require 'moab/stanford'

describe Dor::TechnicalMetadataService do
before(:all) do
@object_ids = %w(dd116zh0343 du000ps9999 jq937jp0017)
@druid_tool = {}
end

before(:each) do
@fixtures = fixtures = Pathname(File.dirname(__FILE__)).join('../fixtures')
RSpec.describe Dor::TechnicalMetadataService do
let(:object_ids) { %w(dd116zh0343 du000ps9999 jq937jp0017) }
let(:druid_tool) { {} }
before do
fixtures = Pathname(File.dirname(__FILE__)).join('../fixtures')
wsfixtures = fixtures.join('workspace').to_s
Dor::Config.push! do
sdr.local_workspace_root wsfixtures
end

@sdr_repo = @fixtures.join('sdr_repo')
@workspace_pathname = Pathname(wsfixtures)

@sdr_repo = fixtures.join('sdr_repo')
@inventory_differences = {}
@deltas = {}
@new_files = {}
@repo_techmd = {}
@new_file_techmd = {}
@expected_techmd = {}

@object_ids.each do |id|
object_ids.each do |id|
druid = "druid:#{id}"
@druid_tool[id] = DruidTools::Druid.new(druid, @workspace_pathname.to_s)
repo_content_pathname = @fixtures.join('sdr_repo', id, 'v0001', 'data', 'content')
work_content_pathname = Pathname(@druid_tool[id].content_dir)
druid_tool[id] = DruidTools::Druid.new(druid, Pathname(wsfixtures).to_s)
repo_content_pathname = fixtures.join('sdr_repo', id, 'v0001', 'data', 'content')
work_content_pathname = Pathname(druid_tool[id].content_dir)
repo_content_inventory = Moab::FileGroup.new(group_id: 'content').group_from_directory(repo_content_pathname)
work_content_inventory = Moab::FileGroup.new.group_from_directory(work_content_pathname)
@inventory_differences[id] = Moab::FileGroupDifference.new
@inventory_differences[id].compare_file_groups(repo_content_inventory, work_content_inventory)
@deltas[id] = @inventory_differences[id].file_deltas
@new_files[id] = Dor::TechnicalMetadataService.get_new_files(@deltas[id])
@repo_techmd[id] = @fixtures.join('sdr_repo', id, 'v0001', 'data', 'metadata', 'technicalMetadata.xml').read
@repo_techmd[id] = fixtures.join('sdr_repo', id, 'v0001', 'data', 'metadata', 'technicalMetadata.xml').read
@new_file_techmd[id] = Dor::TechnicalMetadataService.get_new_technical_metadata(druid, @new_files[id])
@expected_techmd[id] = Pathname(@druid_tool[id].metadata_dir).join('technicalMetadata.xml').read
@expected_techmd[id] = Pathname(druid_tool[id].metadata_dir).join('technicalMetadata.xml').read
end
end

Expand All @@ -48,15 +43,15 @@
end

after(:all) do
@object_ids = [] if @object_ids.nil?
@object_ids.each do |id|
temp_pathname = Pathname(@druid_tool[id].temp_dir(false))
object_ids = [] if object_ids.nil?
object_ids.each do |id|
temp_pathname = Pathname(druid_tool[id].temp_dir(false))
temp_pathname.rmtree if temp_pathname.exist?
end
end

specify 'Dor::TechnicalMetadataService.add_update_technical_metadata' do
@object_ids.each do |id|
object_ids.each do |id|
dor_item = double(Dor::Item)
allow(dor_item).to receive(:pid).and_return("druid:#{id}")
expect(Dor::TechnicalMetadataService).to receive(:get_content_group_diff).with(dor_item).and_return(@inventory_differences[id])
Expand All @@ -75,32 +70,35 @@
end
end

specify 'Dor::TechnicalMetadataService.get_content_group_diff(dor_item)' do
@object_ids.each do |id|
group_diff = @inventory_differences[id]
inventory_diff = Moab::FileInventoryDifference.new(
digital_object_id: "druid:#{id}",
basis: 'old_content_metadata',
other: 'new_content_metadata',
report_datetime: Time.now.utc.to_s
)
inventory_diff.group_differences << group_diff
dor_item = double(Dor::Item)
allow(dor_item).to receive(:get_content_diff).with('all').and_return(inventory_diff)
content_group_diff = Dor::TechnicalMetadataService.get_content_group_diff(dor_item)
expect(content_group_diff.to_xml).to eq(group_diff.to_xml)
describe 'Dor::TechnicalMetadataService.get_content_group_diff(dor_item)' do
let(:contentMetadata) { instance_double(Dor::ContentMetadataDS, content: 'foo') }
it 'calculates the differences' do
object_ids.each do |id|
group_diff = @inventory_differences[id]
druid = "druid:#{id}"
inventory_diff = Moab::FileInventoryDifference.new(
digital_object_id: druid,
basis: 'old_content_metadata',
other: 'new_content_metadata',
report_datetime: Time.now.utc.to_s
)
inventory_diff.group_differences << group_diff
dor_item = instance_double(Dor::Item, contentMetadata: contentMetadata, pid: druid)
allow(Sdr::Client).to receive(:get_content_diff).with(druid, 'foo', 'all').and_return(inventory_diff)
content_group_diff = Dor::TechnicalMetadataService.get_content_group_diff(dor_item)
expect(content_group_diff.to_xml).to eq(group_diff.to_xml)
end
end
end

specify 'Dor::TechnicalMetadataService.get_content_group_diff(dor_item) without contentMetadata' do
dor_item = double(Dor::Item)
allow(dor_item).to receive(:get_content_diff).with('all').and_raise(Dor::Exception)
dor_item = instance_double(Dor::Item, contentMetadata: nil)
content_group_diff = Dor::TechnicalMetadataService.get_content_group_diff(dor_item)
expect(content_group_diff.difference_count).to be_zero
end

specify 'Dor::TechnicalMetadataService.get_file_deltas(content_group_diff)' do
@object_ids.each do |id|
object_ids.each do |id|
group_diff = @inventory_differences[id]
expect(Dor::TechnicalMetadataService.get_file_deltas(group_diff)).to eq(@deltas[id])
end
Expand Down Expand Up @@ -166,7 +164,7 @@
end

specify 'Dor::TechnicalMetadataService.get_new_technical_metadata' do
@object_ids.each do |id|
object_ids.each do |id|
new_techmd = Dor::TechnicalMetadataService.get_new_technical_metadata("druid:#{id}", @new_files[id])
file_nodes = Nokogiri::XML(new_techmd).xpath('//file')
case id
Expand All @@ -181,8 +179,8 @@
end

specify 'Dor::TechnicalMetadataService.write_fileset' do
@object_ids.each do |id|
temp_dir = @druid_tool[id].temp_dir
object_ids.each do |id|
temp_dir = druid_tool[id].temp_dir
new_files = @new_files[id]
filename = Dor::TechnicalMetadataService.write_fileset(temp_dir, new_files)
if new_files.size > 0
Expand All @@ -194,7 +192,7 @@
end

specify 'Dor::TechnicalMetadataService.merge_file_nodes' do
@object_ids.each do |id|
object_ids.each do |id|
old_techmd = @repo_techmd[id]
new_techmd = @new_file_techmd[id]
new_nodes = Dor::TechnicalMetadataService.get_file_nodes(new_techmd)
Expand Down Expand Up @@ -295,7 +293,7 @@
end

specify 'Dor::TechnicalMetadataService.build_technical_metadata(druid,merged_nodes)' do
@object_ids.each do |id|
object_ids.each do |id|
old_techmd = @repo_techmd[id]
new_techmd = @new_file_techmd[id]
deltas = @deltas[id]
Expand Down
1 change: 1 addition & 0 deletions spec/utils/sdr_client_spec.rb
Expand Up @@ -108,6 +108,7 @@
end

it 'is configured to use SDR' do
expect(Deprecation).to receive(:warn)
expect(Sdr::Client.client.url).to eq Dor::Config.dor_services.rest_client['v1/sdr'].url
end
end
Expand Down

0 comments on commit 8d3049b

Please sign in to comment.