Skip to content

Commit

Permalink
Merge 4b5c471 into 1cdd043
Browse files Browse the repository at this point in the history
  • Loading branch information
atz committed Aug 7, 2018
2 parents 1cdd043 + 4b5c471 commit d68c673
Show file tree
Hide file tree
Showing 12 changed files with 130 additions and 245 deletions.
2 changes: 1 addition & 1 deletion .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ RSpec/NamedSubject:
RSpec/NestedGroups:
Max: 4 # default: 3
Exclude:
- 'spec/lib/audit/catalog_to_moab_instance_spec.rb'
- 'spec/lib/audit/catalog_to_moab_spec.rb'
- 'spec/lib/audit/moab_to_catalog_spec.rb'
- 'spec/services/checksum_validator_spec.rb'
- 'spec/services/preserved_object_handler_check_exist_spec.rb'
Expand Down
16 changes: 16 additions & 0 deletions app/jobs/catalog_to_moab_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Check filesystem based on catalog, updating database
# @see Audit::CatalogToMoab
class CatalogToMoabJob < ApplicationJob
queue_as :c2m

before_enqueue do |job|
raise ArgumentError, 'CompleteMoab param required' unless job.arguments.first.is_a?(CompleteMoab)
end

# @param [CompleteMoab] complete_moab object to checksum
# @param [String] storage_dir
# @see Audit::CatalogToMoab#initialize
def perform(complete_moab, storage_dir)
Audit::CatalogToMoab.new(complete_moab, storage_dir).check_catalog_version
end
end
56 changes: 9 additions & 47 deletions app/lib/audit/catalog_to_moab.rb
Original file line number Diff line number Diff line change
@@ -1,49 +1,7 @@
module Audit
# Catalog to Moab existence check code
class CatalogToMoab

def self.logger
@logger ||= Logger.new(STDOUT)
.extend(ActiveSupport::Logger.broadcast(Logger.new(Rails.root.join('log', 'c2m.log'))))
end

def self.check_version_on_dir(last_checked_b4_date, storage_dir, limit=Settings.c2m_sql_limit)
logger.info "#{Time.now.utc.iso8601} C2M check_version starting for #{storage_dir}"

# cms_to_audit_relation is an AR Relation; it could return a lot of results, so we want to process in batches.
# We can't use ActiveRecord's .find_each, because that'll disregard the order .least_recent_version_audit
# specified. so we use our own batch processing method, which does respect Relation order.
cms_to_audit_relation =
CompleteMoab.least_recent_version_audit(last_checked_b4_date).by_storage_location(storage_dir)
ActiveRecordUtils.process_in_batches(cms_to_audit_relation, limit) do |cm|
c2m = CatalogToMoab.new(cm, storage_dir)
c2m.check_catalog_version
end
ensure
logger.info "#{Time.now.utc.iso8601} C2M check_version ended for #{storage_dir}"
end

def self.check_version_on_dir_profiled(last_checked_b4_date, storage_dir)
Profiler.print_profile('C2M_check_version_on_dir') { check_version_on_dir(last_checked_b4_date, storage_dir) }
end

def self.check_version_all_dirs(last_checked_b4_date)
logger.info "#{Time.now.utc.iso8601} C2M check_version_all_dirs starting"
HostSettings.storage_roots.to_h.each_value do |strg_root_location|
check_version_on_dir(last_checked_b4_date, "#{strg_root_location}/#{Settings.moab.storage_trunk}")
end
ensure
logger.info "#{Time.now.utc.iso8601} C2M check_version_all_dirs ended"
end

def self.check_version_all_dirs_profiled(last_checked_b4_date)
Profiler.print_profile('C2M_check_version_all_dirs') { check_version_all_dirs(last_checked_b4_date) }
end

# ---- INSTANCE code below this line ---------------------------

include ::MoabValidationHandler

attr_reader :complete_moab, :storage_dir, :druid, :results

def initialize(complete_moab, storage_dir)
Expand All @@ -53,14 +11,18 @@ def initialize(complete_moab, storage_dir)
@results = AuditResults.new(druid, nil, complete_moab.moab_storage_root)
end

def logger
@logger ||= Logger.new(Rails.root.join('log', 'c2m.log'))
end

# shameless green implementation
def check_catalog_version
results.check_name = 'check_catalog_version'
unless complete_moab.matches_po_current_version?
results.add_result(AuditResults::CM_PO_VERSION_MISMATCH,
cm_version: complete_moab.version,
po_version: complete_moab.preserved_object.current_version)
return results.report_results(Audit::CatalogToMoab.logger)
return results.report_results(logger)
end

unless online_moab_found?
Expand All @@ -73,10 +35,10 @@ def check_catalog_version
results.add_result(AuditResults::MOAB_NOT_FOUND,
db_created_at: complete_moab.created_at.iso8601,
db_updated_at: complete_moab.updated_at.iso8601)
return results.report_results(Audit::CatalogToMoab.logger)
return results.report_results(logger)
end

return results.report_results(Audit::CatalogToMoab.logger) unless can_validate_current_comp_moab_status?
return results.report_results(logger) unless can_validate_current_comp_moab_status?

compare_version_and_take_action
end
Expand All @@ -100,7 +62,7 @@ def compare_version_and_take_action
if catalog_version == moab_version
set_status_as_seen_on_disk(true) unless complete_moab.ok?
results.add_result(AuditResults::VERSION_MATCHES, 'CompleteMoab')
results.report_results(Audit::CatalogToMoab.logger)
results.report_results(logger)
elsif catalog_version < moab_version
set_status_as_seen_on_disk(true)
pohandler = PreservedObjectHandler.new(druid, moab_version, moab.size, complete_moab.moab_storage_root)
Expand All @@ -110,7 +72,7 @@ def compare_version_and_take_action
results.add_result(
AuditResults::UNEXPECTED_VERSION, db_obj_name: 'CompleteMoab', db_obj_version: complete_moab.version
)
results.report_results(Audit::CatalogToMoab.logger)
results.report_results(logger)
end

complete_moab.update_audit_timestamps(ran_moab_validation?, true)
Expand Down
5 changes: 0 additions & 5 deletions app/models/complete_moab.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@ class CompleteMoab < ApplicationRecord

scope :least_recent_version_audit, lambda { |last_checked_b4_date|
where('last_version_audit IS NULL or last_version_audit < ?', normalize_date(last_checked_b4_date))

# possibly counter-intuitive: the .order sorts so that null values come first (because IS NOT NULL evaluates
# to 0 for nulls, which sorts before 1 for non-nulls, which are then sorted by last_version_audit)
}

scope :fixity_check_expired, lambda {
Expand All @@ -56,8 +53,6 @@ class CompleteMoab < ApplicationRecord
' AND (last_checksum_validation + (fixity_ttl * INTERVAL \'1 SECOND\')) < CURRENT_TIMESTAMP'\
' OR last_checksum_validation IS NULL'
)
# possibly counter-intuitive: the .order sorts so that null values come first (because IS NOT NULL evaluates
# to 0 for nulls, which sorts before 1 for non-nulls, which are then sorted by last_checksum_validation)
}

# This is where we make sure we have ZMV rows for all needed ZipEndpoints and versions.
Expand Down
7 changes: 7 additions & 0 deletions app/models/moab_storage_root.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ def validate_expired_checksums!
cms.find_each { |cm| ChecksumValidationJob.perform_later(cm) }
end

# Use a queue to check all associated CompleteMoab objects for C2M
def c2m_check!(last_checked_b4_date = Time.current)
complete_moabs.least_recent_version_audit(last_checked_b4_date).find_each do |cm|
CatalogToMoabJob.perform_later(cm, storage_location)
end
end

# Iterates over the storage roots enumerated in settings, creating a MoabStorageRoot for
# each if it doesn't already exist.
# @param preservation_policies [Enumerable<PreservationPolicy>] the list of preservation policies
Expand Down
34 changes: 7 additions & 27 deletions lib/tasks/c2m_tasks.rake
Original file line number Diff line number Diff line change
@@ -1,43 +1,23 @@
namespace :c2m do

desc "Run C2M version checks on a single storage root"
task :one_root, [:last_checked_b4_date, :storage_root, :profile] => [:environment] do |_t, args|
unless args[:profile] == 'profile' || args[:profile].nil?
p "usage: rake c2m:one_root[last_checked_b4_date, fixture_sr1] || rake c2m:one_root[last_checked_b4_date,fixture_sr1,profile]"
exit 1
end
root = args[:storage_root].to_sym
storage_dir = "#{HostSettings.storage_roots[root]}/#{Settings.moab.storage_trunk}"
task :one_root, [:last_checked_b4_date, :storage_root] => [:environment] do |_t, args|
root_key = args[:storage_root].to_sym
last_checked = args[:last_checked_b4_date].to_s
storage_dir = "#{HostSettings.storage_roots[root_key]}/#{Settings.moab.storage_trunk}"
root = MoabStorageRoot.find_by!(storage_location: storage_dir)
begin
if args[:profile] == 'profile'
puts "When done, check log/profile_C2M_check_version_on_dir[TIMESTAMP].txt for profiling details"
Audit::CatalogToMoab.check_version_on_dir_profiled(last_checked, storage_dir)
elsif args[:profile].nil?
Audit::CatalogToMoab.check_version_on_dir(last_checked, storage_dir)
end
puts "#{Time.now.utc.iso8601} Catalog to Moab version check on #{storage_dir} is done."
root.c2m_check!(last_checked_b4_date)
rescue TypeError, ArgumentError
p "You've entered an incorrect timestamp format #{last_checked}."
p "Please enter correct timestamp format (UTC) (2018-02-01T18:54:48Z)"
end
end

desc "Run C2M version checks on all storage roots"
task :all_roots, [:last_checked_b4_date, :profile] => [:environment] do |_t, args|
unless args[:profile] == 'profile' || args[:profile].nil?
p "usage: rake c2m:all_roots[last_checked_b4_date] || rake c2m:all_roots[last_checked_b4_date,profile]"
exit 1
end
task :all_roots, [:last_checked_b4_date] => [:environment] do |_t, args|
last_checked = args[:last_checked_b4_date].to_s
begin
if args[:profile] == 'profile'
puts "When done, check log/profile_C2M_check_version_all_roots[TIMESTAMP].txt for profiling details"
Audit::CatalogToMoab.check_version_all_dirs_profiled(last_checked)
elsif args[:profile].nil?
Audit::CatalogToMoab.check_version_all_dirs(last_checked)
end
puts "#{Time.now.utc.iso8601} Catalog to Moab version check on all roots is done."
MoabStorageRoot.find_each { |root| root.c2m_check!(last_checked) }
rescue TypeError, ArgumentError
p "You've entered an incorrect timestamp format #{last_checked}."
p "Please enter correct timestamp format (UTC) (2018-02-01T18:54:48Z)"
Expand Down
22 changes: 22 additions & 0 deletions spec/factories/preserved_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,26 @@
current_version 1
preservation_policy { PreservationPolicy.default_policy }
end

# searches through fixture dirs to find the druid, creates a complete moab for the PO
factory :preserved_object_fixture, parent: :preserved_object do
current_version { Stanford::StorageServices.current_version(druid) }

after(:create) do |po|
locations = Settings.storage_root_map['default'].to_h.values.map { |x| File.join(x, Settings.moab.storage_trunk) }
root_dir = locations.find do |root|
found = false
Stanford::MoabStorageDirectory.find_moab_paths(root) do |druid, _path, _match|
found = true if druid && druid == po.druid
end
found
end
create_list(:complete_moab, 1,
preserved_object: po,
moab_storage_root: MoabStorageRoot.find_by!(storage_location: root_dir),
version: po.current_version,
size: Stanford::StorageServices.object_size(po.druid),
status: 'validity_unknown')
end
end
end
17 changes: 17 additions & 0 deletions spec/jobs/catalog_to_moab_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
require 'rails_helper'

describe CatalogToMoabJob, type: :job do
let(:job) { described_class.new(cm) }
let(:cm) { create :complete_moab }
let(:storage_dir) { 'foobar' }

describe '#perform' do
let(:validator) { instance_double(Audit::CatalogToMoab) }

it 'calls Audit::CatalogToMoab#check_catalog_version' do
expect(validator).to receive(:check_catalog_version)
expect(Audit::CatalogToMoab).to receive(:new).with(cm, storage_dir).and_return(validator)
job.perform(cm, storage_dir)
end
end
end
108 changes: 0 additions & 108 deletions spec/lib/audit/catalog_to_moab_class_spec.rb

This file was deleted.

0 comments on commit d68c673

Please sign in to comment.