Skip to content

Commit

Permalink
Remove DiscoverReport user params, add Enumeration, basic specs
Browse files Browse the repository at this point in the history
Updated YARD
  • Loading branch information
atz committed Aug 28, 2018
1 parent c932d13 commit ccf167d
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 63 deletions.
109 changes: 46 additions & 63 deletions app/services/discovery_report.rb
Original file line number Diff line number Diff line change
@@ -1,42 +1,45 @@
# May not be pretty, but you should see where this logic came from.
# Previously a single untested 200-line method from ./lib/pre_assembly/reporting.rb
# Takes a Bundle, enumerates report data via #each_row
class DiscoveryReport
attr_accessor :bundle
attr_accessor :confirm_checksums, :check_sourceids, :no_check_reg, :show_other, :show_smpl_cm, :show_staged # user params
attr_accessor :start_time
attr_reader :bundle, :start_time

delegate :bundle_dir, :checksums_file, :content_md_creation, :manifest, :object_discovery, :project_style, to: :bundle
delegate :error_count, :report_error_message, to: :bundle
delegate :bundle_dir, :content_md_creation, :manifest, :object_discovery, :project_style, to: :bundle
delegate :checksums_file, :confirm_checksums, :error_count, :object_filenames_unique?, to: :bundle

# @param [PreAssembly::Bundle] bundle
# @param [Hash<Symbol => Boolean>] params
def initialize(bundle, params = {})
def initialize(bundle)
raise ArgumentError unless bundle.is_a?(PreAssembly::Bundle)
self.bundle = bundle
[:confirm_checksums, :check_sourceids, :no_check_reg, :show_other, :show_smpl_cm, :show_staged].each do |attrib|
self.public_send("#{attrib}=", params[attrib])
end
end

def run
@start_time = Time.now
@bundle = bundle
bundle.discover_objects
bundle.process_manifest
objects_to_process.each { |dobj| per_dobj(dobj) }
end

# @return [Enumerable<Hash<Symbol => Object>>]
# @yield [Hash<Symbol => Object>] data structure about a DigitalObject
def each_row
return enum_for(:each_row) unless block_given?
bundle.objects_to_process.each { |dobj| yield per_dobj(dobj) }
end

# @return [PreAssembly::Smpl]
def smpl_manifest
@smpl_manifest ||= PreAssembly::Smpl.new(csv_filename: content_md_creation[:smpl_manifest], bundle_dir: bundle_dir)
end

# @param [PreAssembly::DigitalObject]
# @return [Hash<Symbol => Object>]
def per_dobj(dobj)
bundle_id = File.basename(dobj.unadjusted_container)
errors = {}
counts = Hash.new(Hash.new(0))
counts[:total_size] = dobj.object_files.map(&:filesize).sum
counts = {
total_size: dobj.object_files.map(&:filesize).sum,
mimetypes: Hash.new(0)
}
dobj.object_files.each { |obj| counts[:mimetypes][obj.mimetype] += 1 } # number of files by mimetype
filenames_with_no_extension = dobj.object_files.any? { |obj| File.extname(obj.path).empty? }
errors[:filename_no_extension] = true if dobj.object_files.any? { |obj| File.extname(obj.path).empty? }
counts[:empty_files] = dobj.object_files.count { |obj| obj.filesize == 0 }
if using_smpl_manifest? # if we are using a SMPL manifest, let's add how many files were found
bundle_id = File.basename(dobj.unadjusted_container)
cm_files = smpl_manifest.manifest[bundle_id].fetch(:files, [])
counts[:files_in_manifest] = cm_files.count
relative_paths = dobj.object_files.map(&:relative_path)
Expand All @@ -48,42 +51,42 @@ def per_dobj(dobj)
errors[:empty_files] = true if counts[:empty_files] > 0
errors[:empty_object] = true if counts[:total_size] > 0
errors[:missing_files] = true unless dobj.object_files_exist?
errors[:checksum_mismatch] = true unless !confirming_checksums? || confirm_checksums(dobj)
errors[:checksum_mismatch] = true unless !checksums_file || confirm_checksums(dobj)
errors[:dupes] = true unless object_filenames_unique?(dobj)
counts[:source_ids][dobj.source_id] += 1

if confirming_registration? # objects should already be registered, let's confirm that
dobj.determine_druid
druid = dobj.pid.include?('druid') ? dobj.pid : "druid:#{dobj.pid}"
begin
obj = Dor::Item.find(druid)
rescue ActiveFedora::ObjectNotFoundError
errors[:item_not_registered] = true
end
begin
obj.admin_policy_object || errors[:apo_empty] = true
rescue ActiveFedora::ObjectNotFoundError
errors[:apo_not_registered] = true
end
errors.merge!(registration_check(dobj.determine_druid))
if using_manifest? # check global uniqueness
errors[:source_id_dup] = true if dobj.source_id.any? { |id| Dor::SearchService.query_by_id(id) }
end
return { errors: errors, counts: counts }
end

if checking_sourceids? # check global uniqueness
errors[:source_id_dup] = true if dobj.source_id.any? { |id| Dor::SearchService.query_by_id(id) }
# @param [String] druid
# @return [Hash<Symbol => Boolean>] errors
def registration_check(druid)
begin
obj = Dor::Item.find(druid)
rescue ActiveFedora::ObjectNotFoundError
return { item_not_registered: true }
end
return { apo_empty: true } unless obj.admin_policy_object
{}
rescue ActiveFedora::ObjectNotFoundError
return { apo_not_registered: true }
end

# @return [String] primitive version
def header
fields = ['Object Container', 'Number of Items', 'Files with no ext', 'Files with 0 Size', 'Total Size', 'Files Readable']
fields.concat ['Label', 'Source ID'] if using_manifest?
fields.concat ['Num Files in CM Manifest', 'All CM files found'] if using_smpl_manifest?
fields << 'Checksums' if confirming_checksums?
fields << 'Duplicate Filenames?'
fields.concat ['DRUID', 'Registered?', 'APO exists?'] if confirming_registration?
fields << 'SourceID unique in DOR?' if checking_sourceids?
fields << 'Checksums' if checksums_file
fields.concat ['Duplicate Filenames?', 'DRUID', 'Registered?', 'APO exists?']
fields << 'SourceID unique in DOR?' if using_manifest?
fields.join(' , ')
end

# For use by template
def skipped_files
files = ['Thumbs.db', '.DS_Store'] # if these files are in the bundle directory but not in the manifest, they will be ignorned and not reported as missing
files << File.basename(content_md_creation[:smpl_manifest]) if using_smpl_manifest?
Expand All @@ -92,32 +95,12 @@ def skipped_files
files
end

# Interrogative methods returning Boolean

def barcode_project?
project_style[:get_druid_from] == :container_barcode
end

def checking_sourceids?
check_sourceids && using_manifest
end

def confirming_checksums?
checksums_file && confirm_checksums
end

def confirming_registration?
!no_check_reg # && !project_style[:should_register]
end

def show_smpl_cm?
show_smpl_cm && using_smpl_manifest?
end

# @return [Boolean]
def using_manifest?
manifest && object_discovery[:use_manifest]
end

# @return [Boolean]
def using_smpl_manifest?
content_md_creation[:style] == :smpl && File.exist?(File.join(bundle_dir, content_md_creation[:smpl_manifest]))
end
Expand Down
1 change: 1 addition & 0 deletions lib/pre_assembly/digital_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def pre_assemble(desc_md_xml = nil)
# Determining the druid.
####

# @return [DruidTools::Druid]
def determine_druid
k = project_style[:get_druid_from]
log " - determine_druid(#{k})"
Expand Down
32 changes: 32 additions & 0 deletions spec/services/discover_report_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
require 'rails_helper'

describe DiscoveryReport do
let(:bundle) { bundle_setup(:proj_revs)}
let(:params) { {} }
subject(:report) { described_class.new(bundle) }

describe '#initialize' do
it 'raises if PreAssembly::Bundle not received' do
expect { described_class.new }.to raise_error(ArgumentError)
expect { described_class.new({}) }.to raise_error(ArgumentError)
end
it 'accepts PreAssembly::Bundle, performs setup' do
expect(bundle).to receive(:discover_objects)
expect(bundle).to receive(:process_manifest)
expect { described_class.new(bundle) }.not_to raise_error
end
end

describe '#each_row' do
it 'returns an Enumerable of Hashes' do
expect(report.each_row).to be_an(Enumerable)
end
it 'yields per objects_to_process' do
expect(report).to receive(:per_dobj).with(1).and_return(fake: 1)
expect(report).to receive(:per_dobj).with(2).and_return(fake: 2)
expect(report).to receive(:per_dobj).with(3).and_return(fake: 3)
expect(bundle).to receive(:objects_to_process).and_return([1, 2, 3])
report.each_row { |_r| } # no-op
end
end
end

0 comments on commit ccf167d

Please sign in to comment.