Skip to content

Commit

Permalink
Move SdrIngestTransfer to dor-services-app (#483)
Browse files Browse the repository at this point in the history
Move SdrIngestTransfer to dor-services-app
  • Loading branch information
mjgiarlo committed Oct 29, 2019
2 parents 9322b65 + 80900d7 commit 4ce5c46
Show file tree
Hide file tree
Showing 82 changed files with 10,497 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .rubocop.yml
Expand Up @@ -36,7 +36,7 @@ RSpec/ExampleLength:
Enabled: false

RSpec/MultipleExpectations:
Max: 11
Max: 19

RSpec/ExpectActual:
Exclude:
Expand Down
28 changes: 20 additions & 8 deletions .rubocop_todo.yml
@@ -1,6 +1,6 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2019-09-18 07:25:01 -0500 using RuboCop version 0.74.0.
# on 2019-10-29 09:31:22 -0500 using RuboCop version 0.74.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
Expand All @@ -24,20 +24,25 @@ Lint/MissingCopEnableDirective:
Exclude:
- 'spec/dor/update_marc_record_service_spec.rb'

# Offense count: 30
# Offense count: 1
Lint/RescueException:
Exclude:
- 'app/services/sdr_ingest_service.rb'

# Offense count: 35
Metrics/AbcSize:
Max: 87

# Offense count: 3
# Offense count: 4
# Configuration parameters: CountComments.
Metrics/ClassLength:
Max: 124
Max: 125

# Offense count: 5
Metrics/CyclomaticComplexity:
Max: 11

# Offense count: 30
# Offense count: 36
# Configuration parameters: CountComments, ExcludedMethods.
Metrics/MethodLength:
Max: 43
Expand Down Expand Up @@ -142,13 +147,14 @@ RSpec/EmptyLineAfterSubject:
Exclude:
- 'spec/models/symphony_reader_spec.rb'

# Offense count: 23
# Offense count: 26
RSpec/ExpectInHook:
Exclude:
- 'spec/dor/update_marc_record_service_spec.rb'
- 'spec/services/public_desc_metadata_service_spec.rb'
- 'spec/services/publish_metadata_service_spec.rb'
- 'spec/services/registration_service_spec.rb'
- 'spec/services/sdr_ingest_service_spec.rb'
- 'spec/services/shelving_service_spec.rb'

# Offense count: 259
Expand All @@ -162,7 +168,7 @@ RSpec/InstanceVariable:
- 'spec/services/digital_stacks_service_spec.rb'
- 'spec/services/registration_service_spec.rb'

# Offense count: 51
# Offense count: 81
# Configuration parameters: EnforcedStyle.
# SupportedStyles: have_received, receive
RSpec/MessageSpies:
Expand All @@ -171,17 +177,23 @@ RSpec/MessageSpies:
- 'spec/services/digital_stacks_service_spec.rb'
- 'spec/services/public_desc_metadata_service_spec.rb'
- 'spec/services/registration_service_spec.rb'
- 'spec/services/sdr_ingest_service_spec.rb'
- 'spec/services/shelving_service_spec.rb'
- 'spec/services/version_service_spec.rb'

# Offense count: 1
# Configuration parameters: AggregateFailuresByDefault.
RSpec/MultipleExpectations:
Max: 19

# Offense count: 15
# Configuration parameters: IgnoreSharedExamples.
RSpec/NamedSubject:
Exclude:
- 'spec/services/shelving_service_spec.rb'
- 'spec/services/thumbnail_service_spec.rb'

# Offense count: 28
# Offense count: 29
RSpec/NestedGroups:
Max: 5

Expand Down
6 changes: 6 additions & 0 deletions app/controllers/objects_controller.rb
Expand Up @@ -46,6 +46,12 @@ def publish
head :created, location: result
end

def preserve
result = BackgroundJobResult.create
PreserveJob.perform_later(druid: params[:id], background_job_result: result)
head :created, location: result
end

def update_marc_record
Dor::UpdateMarcRecordService.new(@item).update
head :created
Expand Down
2 changes: 2 additions & 0 deletions app/controllers/sdr_controller.rb
Expand Up @@ -17,7 +17,9 @@ def cm_inv_diff
proxy_faraday_response(sdr_response)
end

# Deprecated
def ds_manifest
Honeybadger.notify('dor-services-app deprecated API endpoint `sdr#ds_manifest` called.')
sdr_response = sdr_client.manifest(ds_name: params[:dsname])
proxy_faraday_response(sdr_response)
end
Expand Down
23 changes: 23 additions & 0 deletions app/jobs/preserve_job.rb
@@ -0,0 +1,23 @@
# frozen_string_literal: true

# Move an object to SDR in the background
class PreserveJob < ApplicationJob
queue_as :default

# @param [String] druid the identifier of the item to be published
# @param [BackgroundJobResult] background_job_result identifier of a background job result to store status info
def perform(druid:, background_job_result:)
background_job_result.processing!

begin
item = Dor.find(druid)
SdrIngestService.transfer(item)
rescue StandardError => e
background_job_result.output = { errors: [{ title: 'Preservation error', detail: e.message }] }
background_job_result.complete!
raise e
end

background_job_result.complete!
end
end
192 changes: 192 additions & 0 deletions app/services/sdr_ingest_service.rb
@@ -0,0 +1,192 @@
# frozen_string_literal: true

require 'moab/stanford'

# Transfers files from the workspace to SDR
class SdrIngestService
# @param [Dor::Item] dor_item The representation of the digital object
# @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
def self.transfer(dor_item)
druid = dor_item.pid
workspace = DruidTools::Druid.new(druid, Settings.sdr.local_workspace_root)
signature_catalog = get_signature_catalog(druid)
new_version_id = signature_catalog.version_id + 1
metadata_dir = extract_datastreams(dor_item, workspace)
verify_version_metadata(metadata_dir, new_version_id)
version_inventory = get_version_inventory(metadata_dir, druid, new_version_id)
version_addtions = signature_catalog.version_additions(version_inventory)
content_addtions = version_addtions.group('content')
if content_addtions.nil? || content_addtions.files.empty?
content_dir = nil
else
new_file_list = content_addtions.path_list
content_dir = workspace.find_filelist_parent('content', new_file_list)
end
content_group = version_inventory.group('content')
signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
# export the bag (in tar format)
bag_dir = Pathname(Settings.sdr.local_export_home).join(druid.sub('druid:', ''))
bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
bagger.reset_bag
bagger.create_bag_inventory(:depositor)
bagger.deposit_group('content', content_dir)
bagger.deposit_group('metadata', metadata_dir)
bagger.create_tagfiles
verify_bag_structure(bag_dir)

# start SDR preservation workflow
Dor::Config.workflow.client.create_workflow_by_name(druid, 'preservationIngestWF')
rescue Exception => e
raise Dor::Exception, "Error exporting new object version to bag for #{dor_item.pid}: #{e.message}"
end

# Note: the following methods should probably all be private

# @param [String] druid The object identifier
# @return [Moab::SignatureCatalog] the catalog of all files previously ingested
def self.get_signature_catalog(druid)
response = SdrClient.new(druid).manifest(ds_name: 'signatureCatalog.xml')
return Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0) if response.status == 404

raise "Problem retrieving signatureCatalog for #{druid} from SDR: #{response}" unless response.success?

Moab::SignatureCatalog.parse response.body
end

# @param [Dor::Item] dor_item The representation of the digital object
# @param [DruidTools::Druid] workspace The representation of the item's work area
# @return [Pathname] Pull all the datastreams specified in the configuration file
# into the workspace's metadata directory, overwriting existing file if present
def self.extract_datastreams(dor_item, workspace)
metadata_dir = Pathname.new(workspace.path('metadata', true))
datastream_config.each do |ds_name, required|
metadata_file = metadata_dir.join("#{ds_name}.xml")
metadata_string = datastream_content(dor_item, ds_name, required)
metadata_file.open('w') { |f| f << metadata_string } if metadata_string
end
metadata_dir
end

# @return[Hash<Symbol,Boolean>] a hash of datastreams and whether they are required
def self.datastream_config
{
administrativeMetadata: false,
contentMetadata: false,
descMetadata: true,
defaultObjectRights: false,
events: false,
embargoMetadata: false,
identityMetadata: true,
provenanceMetadata: true,
relationshipMetadata: true,
rightsMetadata: false,
roleMetadata: false,
sourceMetadata: false,
technicalMetadata: false,
versionMetadata: true,
workflows: false,
geoMetadata: false
}
end
private_class_method :datastream_config

# @param [Dor::Item] dor_item The representation of the digital object
# @param [Symbol] ds_name The name of the desired Fedora datastream
# @param [Boolean] required is the datastream required
# @return [String] return the xml text of the specified datastream if it exists.
# If not found, return nil unless it is a required datastream in which case raise exception
def self.datastream_content(dor_item, ds_name, required)
ds = (ds_name == :relationshipMetadata ? 'RELS-EXT' : ds_name.to_s)
return dor_item.datastreams[ds].content if dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?

raise "required datastream #{ds_name} for #{dor_item.pid} not found in DOR" if required
end

# @param [Pathname] metadata_dir the location of the metadata directory in the workspace
# @param [Integer] expected the version identifer expected to be used in the versionMetadata
def self.verify_version_metadata(metadata_dir, expected)
vmfile = metadata_dir.join('versionMetadata.xml')
verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
true
end

# @param [Pathname] pathname The location of the file containing a version number
# @param [Integer] expected The version number that should be in the file
# @param [Integer] found The version number that is actually in the file
def self.verify_version_id(pathname, expected, found)
raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found

true
end

# @param [Pathname] pathname the location of the versionMetadata file
# @return [Integer] the versionId found in the last version element, or nil if missing
def self.vmfile_version_id(pathname)
verify_pathname(pathname)
doc = Nokogiri::XML(File.open(pathname.to_s))
nodeset = doc.xpath('/versionMetadata/version')
version_id = nodeset.last['versionId']
version_id.nil? ? nil : version_id.to_i
end

# @param [Pathname] metadata_dir The location of the the object's metadata files
# @param [String] druid The object identifier
# @param [Integer] version_id The version number
# @return [Moab::FileInventory] Generate and return a version inventory for the object
def self.get_version_inventory(metadata_dir, druid, version_id)
version_inventory = get_content_inventory(metadata_dir, druid, version_id)
version_inventory.groups << get_metadata_file_group(metadata_dir)
version_inventory
end

# @param [Pathname] metadata_dir The location of the the object's metadata files
# @param [String] druid The object identifier
# @param [Integer] version_id The version number
# @return [Moab::FileInventory] Parse the contentMetadata
# and generate a new version inventory object containing a content group
def self.get_content_inventory(metadata_dir, druid, version_id)
content_metadata = get_content_metadata(metadata_dir)
if content_metadata
Stanford::ContentInventory.new.inventory_from_cm(content_metadata, druid, 'preserve', version_id)
else
Moab::FileInventory.new(type: 'version', digital_object_id: druid, version_id: version_id)
end
end

# @param [Pathname] metadata_dir The location of the the object's metadata files
# @return [String] Return the contents of the contentMetadata.xml file from the content directory
def self.get_content_metadata(metadata_dir)
content_metadata_pathname = metadata_dir.join('contentMetadata.xml')
content_metadata_pathname.read if content_metadata_pathname.exist?
end

# @param [Pathname] metadata_dir The location of the the object's metadata files
# @return [Moab::FileGroup] Traverse the metadata directory and generate a metadata group
def self.get_metadata_file_group(metadata_dir)
file_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(metadata_dir)
file_group
end

# @param [Pathname] bag_dir the location of the bag to be verified
# @return [Boolean] true if all required files exist, raises exception if not
def self.verify_bag_structure(bag_dir)
verify_pathname(bag_dir)
verify_pathname(bag_dir.join('data'))
verify_pathname(bag_dir.join('bagit.txt'))
verify_pathname(bag_dir.join('bag-info.txt'))
verify_pathname(bag_dir.join('manifest-sha256.txt'))
verify_pathname(bag_dir.join('tagmanifest-sha256.txt'))
verify_pathname(bag_dir.join('versionAdditions.xml'))
verify_pathname(bag_dir.join('versionInventory.xml'))
verify_pathname(bag_dir.join('data', 'metadata', 'versionMetadata.xml'))
true
end

# @param [Pathname] pathname The file whose existence should be verified
# @return [Boolean] true if file exists, raises exception if not
def self.verify_pathname(pathname)
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?

true
end
end
3 changes: 2 additions & 1 deletion config/routes.rb
Expand Up @@ -13,7 +13,7 @@
scope '/sdr/objects/:druid' do
post 'cm-inv-diff', to: 'sdr#cm_inv_diff'
get 'current_version', to: 'sdr#current_version' # deprecated; caller should use preservation-client
get 'manifest/:dsname', to: 'sdr#ds_manifest', format: false, constraints: { dsname: /.+/ }
get 'manifest/:dsname', to: 'sdr#ds_manifest', format: false, constraints: { dsname: /.+/ } # deprecated
get 'metadata/:dsname', to: 'sdr#ds_metadata', format: false, constraints: { dsname: /.+/ }
get 'content/:filename', to: 'sdr#file_content', format: false, constraints: { filename: /.+/ }
end
Expand All @@ -29,6 +29,7 @@
resources :objects, only: [:create, :show] do
member do
post 'publish'
post 'preserve'
post 'update_marc_record'
post 'notify_goobi'
post 'release_tags'
Expand Down
35 changes: 35 additions & 0 deletions openapi.json
Expand Up @@ -383,6 +383,41 @@
]
}
},
"/objects/{id}/preserve": {
"post": {
"tags": [
"objects"
],
"summary": "Move an object to preservation",
"description": "",
"operationId": "objects#preserve",
"responses": {
"201": {
"description": "Preservation action started",
"headers": {
"Location": {
"description": "the status of the action is found at this URI",
"schema": {
"type": "string",
"format": "uri"
}
}
}
}
},
"parameters": [
{
"name": "id",
"in": "path",
"description": "ID of object",
"required": true,
"schema": {
"$ref": "#/components/schemas/Druid"
}
}
]
}
},
"/objects/{id}/update_marc_record": {
"post": {
"tags": [
Expand Down

0 comments on commit 4ce5c46

Please sign in to comment.