Skip to content

Commit

Permalink
Merge pull request #72 from sul-dlss/storage-object-validator-aggregate
Browse files Browse the repository at this point in the history
(MV) StorageObjectValidator Class
  • Loading branch information
jmartin-sul committed Nov 2, 2017
2 parents 4695dcc + 066efe5 commit 9d711b1
Show file tree
Hide file tree
Showing 45 changed files with 641 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .rubocop.yml
Expand Up @@ -27,6 +27,11 @@ Metrics/LineLength:
- 'lib/moab/file_manifestation.rb'
- 'spec/unit_tests/moab/bagger_spec.rb' # remove after PR #52 merged
- 'spec/unit_tests/moab/file_group_difference_spec.rb' # remove after PR #50 merged
- 'spec/unit_tests/moab/storage_object_validator_spec.rb'

# Code was readable with allowing higher perceived complexity
Metrics/PerceivedComplexity:
Max: 9

# --- Naming ---

Expand All @@ -41,6 +46,14 @@ Naming/FileName:

# --- Style ---

# Pointless change
Style/FormatStringToken:
Enabled: false

# Loops were easier to understand without using 'next' statement
Style/Next:
Enabled: false

# because ' vs " isn't a big deal for readability or maintainability or execution time
Style/StringLiterals:
Enabled: false
1 change: 1 addition & 0 deletions lib/moab.rb
Expand Up @@ -56,3 +56,4 @@ module Moab
require 'moab/storage_services'
require 'moab/exceptions'
require 'moab/verification_result'
require 'moab/storage_object_validator'
180 changes: 180 additions & 0 deletions lib/moab/storage_object_validator.rb
@@ -0,0 +1,180 @@
require 'moab'

module Moab
# Given a druid path, are the contents actually a well-formed Moab?
# Shameless green: repetitious code included.
class StorageObjectValidator

EXPECTED_DATA_SUB_DIRS = ["content", "metadata"].freeze
IMPLICIT_DIRS = ['.', '..'].freeze # unlike Find.find, Dir.entries returns these
DATA_DIR_NAME = "data".freeze
EXPECTED_VERSION_SUB_DIRS = [DATA_DIR_NAME, "manifests"].freeze
MANIFEST_INVENTORY_PATH = 'manifests/manifestInventory.xml'.freeze
SIGNATURE_CATALOG_PATH = 'manifests/signatureCatalog.xml'.freeze

# error codes
INCORRECT_DIR = 0
MISSING_DIR = 1
EXTRA_CHILD_DETECTED = 2
VERSION_DIR_BAD_FORMAT = 3
NO_SIGNATURE_CATALOG = 4
NO_MANIFEST_INVENTORY = 5
NO_XML_FILES = 6
VERSIONS_NOT_IN_ORDER = 7
FILES_IN_VERSION_DIR = 8

ERROR_CODE_TO_MESSAGES = {
INCORRECT_DIR=> "Incorrect items in path",
MISSING_DIR => "Missing directory: %{addl}",
EXTRA_CHILD_DETECTED => "Unexpected item in path: %{addl}",
VERSION_DIR_BAD_FORMAT => "Version directory name not in 'v00xx' format",
FILES_IN_VERSION_DIR => "Top level should contain only sequential version directories. Also contains files: %{addl}",
NO_SIGNATURE_CATALOG => "Version: %{addl} Missing signatureCatalog.xml",
NO_MANIFEST_INVENTORY => "Version: %{addl} Missing manifestInventory.xml",
NO_XML_FILES => "Version: %{addl} Missing all required metadata files",
VERSIONS_NOT_IN_ORDER => "Should contain only sequential version directories. Current directories: %{addl}"
}.freeze

attr_reader :storage_obj_path

def initialize(storage_object)
@storage_obj_path = storage_object.object_pathname
@directory_entries_hash = {}
end

def validation_errors
errors = []
errors.concat check_correctly_named_version_dirs
errors.concat check_sequential_version_dirs if errors.empty?
errors.concat check_correctly_formed_moabs if errors.empty?
errors
end

# TODO: Figure out which methods should be public

private

def version_directories
@vdirs ||= sub_dirs(storage_obj_path)
end

def check_correctly_named_version_dirs
errors = []
version_directories.each do |version_dir|
errors << result_hash(VERSION_DIR_BAD_FORMAT) unless version_dir =~ /^[v]\d{4}$/
end
errors
end

# This method will be called only if the version directories are correctly named
def check_sequential_version_dirs
errors = []
version_directories.each_with_index do |dir_name, index|
expected_vers_num = index + 1 # version numbering starts at 1, array indexing starts at 0
if dir_name[1..-1].to_i != expected_vers_num
errors << result_hash(VERSIONS_NOT_IN_ORDER, version_directories)
break
end
end

errors
end

def check_correctly_formed_moabs
errors = []
version_directories.each do |version_dir|
version_path = "#{storage_obj_path}/#{version_dir}"
version_sub_dirs = sub_dirs(version_path)
before_result_size = errors.size
errors.concat check_sub_dirs(version_sub_dirs, version_dir, EXPECTED_VERSION_SUB_DIRS)
after_result_size = errors.size
# run the following checks if this version dir passes check_sub_dirs, even if some prior version dirs didn't
if before_result_size == after_result_size
data_dir_path = "#{version_path}/#{DATA_DIR_NAME}"
data_sub_dirs = sub_dirs(data_dir_path)
errors.concat check_sub_dirs(data_sub_dirs, version_dir, EXPECTED_DATA_SUB_DIRS)
errors.concat check_required_manifest_files(version_path, version_dir)
end
end

errors
end

def check_sub_dirs(sub_dirs, version, required_sub_dirs)
errors = []
sub_dir_count = sub_dirs.size
if sub_dir_count == required_sub_dirs.size
errors.concat expected_dirs(sub_dirs, version, required_sub_dirs)
elsif sub_dir_count > required_sub_dirs.size
errors.concat found_unexpected(sub_dirs, version, required_sub_dirs)
elsif sub_dir_count < required_sub_dirs.size
errors.concat missing_dir(sub_dirs, version, required_sub_dirs)
end
errors
end

# This method removes the implicit '.' and '..' directories.
# Returns an array of strings.
def directory_entries(path)
@directory_entries_hash[path] ||=
begin
dirs = []
(Dir.entries(path).sort - IMPLICIT_DIRS).each do |child|
dirs << child
end
dirs
end
end

def sub_dirs(path)
directory_entries(path)
end

def found_unexpected(array, version, required_sub_dirs)
errors = []
unexpected = (array - required_sub_dirs)
unexpected = "#{unexpected} Version: #{version}"
errors << result_hash(EXTRA_CHILD_DETECTED, unexpected)
errors
end

def missing_dir(array, version, required_sub_dirs)
errors = []
missing = (required_sub_dirs - array)
missing ="#{missing} Version: #{version}"
errors << result_hash(MISSING_DIR, missing)
errors
end

def expected_dirs(array, _version, required_sub_dirs)
errors = []
errors << result_hash(INCORRECT_DIR) unless array == required_sub_dirs
errors
end

def result_hash(response_code, addl=nil)
{ response_code => error_code_msg(response_code, addl) }
end

def error_code_msg(response_code, addl=nil)
format(ERROR_CODE_TO_MESSAGES[response_code], addl: addl)
end

def check_required_manifest_files(dir, version)
errors = []
has_manifest_inventory = File.exist?("#{dir}/#{MANIFEST_INVENTORY_PATH}")
has_signature_catalog = File.exist?("#{dir}/#{SIGNATURE_CATALOG_PATH}")
result = if has_manifest_inventory && has_signature_catalog
nil
elsif has_manifest_inventory && !has_signature_catalog
result_hash(NO_SIGNATURE_CATALOG, version)
elsif !has_manifest_inventory && has_signature_catalog
result_hash(NO_MANIFEST_INVENTORY, version)
else
result_hash(NO_XML_FILES, version)
end
errors << result if result
errors
end
end
end
Empty file.
Empty file.
@@ -0,0 +1 @@
.keep
@@ -0,0 +1 @@
.keep
@@ -0,0 +1 @@
.keep
Empty file.
Empty file.
Empty file.
Empty file.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,20 @@
<?xml version="1.0"?>
<contentMetadata type="etd" objectId="druid:bj102hs9687">
<resource type="main-original" data="content" id="main">
<attr name="label">Body of dissertation (as submitted)</attr>
<file deliver="no" preserve="yes" size="1000217" mimetype="application/pdf" id="eric-smith-dissertation.pdf" shelve="yes">
<checksum type="MD5">aead2f6f734355c59af2d5b2689e4fb3</checksum>
<checksum type="SHA-1">22dc6464e25dc9a7d600b1de6e3848bf63970595</checksum>
<checksum type="SHA-256">e49957d53fb2a46e3652f4d399bd14d019600cf496b98d11ebcdf2d10a8ffd2f</checksum>
</file>
</resource>
<resource type="main-augmented" objectId="druid:kw095zh6093" data="content" id="main">
<attr name="label">Body of dissertation</attr>
<file deliver="yes" preserve="yes" size="905566" mimetype="application/pdf" id="eric-smith-dissertation-augmented.pdf" shelve="yes">
<location type="url">https://stacks.stanford.edu/file/druid:bj102hs9687/eric-smith-dissertation-augmented.pdf</location>
<checksum type="MD5">93802f1a639bc9215c6336ff5575ee22</checksum>
<checksum type="SHA-1">32f7129a81830004f0360424525f066972865221</checksum>
<checksum type="SHA-256">a67276820853ddd839ba614133f1acd7330ece13f1082315d40219bed10009de</checksum>
</file>
</resource>
</contentMetadata>
@@ -0,0 +1,74 @@

<mods xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="3.3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-3.xsd">
<titleInfo>
<title>Axe, an automated formal equivalence checking tool for programs</title>
</titleInfo>
<name type="personal">
<namePart>Smith, Eric Whitman.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">creator</roleTerm>
</role>
</name>
<name type="personal">
<namePart>Dill, David</namePart>
<role>
<roleTerm type="text">primary advisor.</roleTerm>
</role>
<role>
<roleTerm authority="marcrelator" type="code">ths</roleTerm>
</role>
</name>
<name type="personal">
<namePart>Engler, Dawson R.</namePart>
<role>
<roleTerm type="text">advisor.</roleTerm>
</role>
<role>
<roleTerm authority="marcrelator" type="code">ths</roleTerm>
</role>
</name>
<name type="personal">
<namePart>Mitchell, John</namePart>
<role>
<roleTerm type="text">advisor.</roleTerm>
</role>
<role>
<roleTerm authority="marcrelator" type="code">ths</roleTerm>
</role>
</name>
<name type="corporate">
<namePart>Stanford University</namePart>
<namePart>Computer Science Dept.</namePart>
</name>
<typeOfResource>text</typeOfResource>
<genre authority="marcgt">theses</genre>
<originInfo>
<place>
<placeTerm authority="marccountry" type="code">xx</placeTerm>
</place>
<dateIssued>2011</dateIssued>
<dateIssued encoding="marc" keyDate="yes">2011</dateIssued>
<issuance>monographic</issuance>
</originInfo>
<language>
<languageTerm authority="iso639-2b" type="code">eng</languageTerm>
</language>
<physicalDescription>
<form authority="marcform">electronic</form>
<form authority="gmd">electronic resource</form>
<extent>1 online resource.</extent>
</physicalDescription>
<abstract>This dissertation describes Axe, an automated formal verification tool for proving equivalence of programs. Axe has been used to verify real-world Java implementations of cryptographic operations, including block ciphers, stream ciphers, and cryptographic hash functions. Axe proves the bit-for-bit equivalence of the outputs of two programs, one of which may be a formal, mathematical specification. To do so, Axe relies on a novel combination of techniques from combinational equivalence checking and inductive theorem proving. First, the loops in some programs can be completely unrolled, creating large loop-free terms. Axe proves the equivalence of such terms using a phased approach, including aggressive word-level simplifications, bit-blasting, test-case-based identification of internal equivalences, and ``sweeping and merging.&apos;&apos; For loops that cannot be unrolled, Axe uses execution traces to detect loop properties, including loop invariants for single loops and connection relationships between corresponding loops. Axe proves such properties inductively. In many cases, synchronizing transformations must be performed to align the loop structures of the programs being compared; Axe can perform and verify a variety of these transformations.</abstract>
<note displayLabel="statement of responsibility">Eric Whitman Smith.</note>
<note>Submitted to the Department of Computer Science.</note>
<note>Thesis (Ph.D.)--Stanford University, 2011.</note>
<identifier type="uri">http://purl.stanford.edu/bj102hs9687</identifier>
<location>
<url usage="primary display">http://purl.stanford.edu/bj102hs9687</url>
</location>
<recordInfo>
<recordContentSource authority="marcorg">CSt</recordContentSource>
<recordCreationDate encoding="marc">110721</recordCreationDate>
<recordIdentifier source="SIRSI">a9238371</recordIdentifier>
</recordInfo>
</mods>
@@ -0,0 +1,15 @@

<identityMetadata>
<objectId>druid:bj102hs9687</objectId>
<objectType>item</objectType>
<objectLabel></objectLabel>
<objectCreator>DOR</objectCreator>
<citationTitle>Axe: An Automated Formal Equivalence Checking Tool for Programs</citationTitle>
<citationCreator>Smith, Eric Whitman</citationCreator>
<otherId name="dissertationid">0000001024</otherId>
<otherId name="catkey">9238371</otherId>
<otherId name="uuid"></otherId>
<agreementId>druid:ct692vv3660</agreementId>
<objectAdminClass>ETDs</objectAdminClass>
<tag>ETD : Dissertation</tag>
</identityMetadata>
@@ -0,0 +1,8 @@

<provenanceMetadata objectId="druid:bj102hs9687">
<agent name="DOR">
<what object="druid:bj102hs9687">
<event when="2011-10-28T01:24:34-07:00" who="DOR-accessionWF">DOR Common Accessioning completed</event>
</what>
</agent>
</provenanceMetadata>
@@ -0,0 +1,6 @@

<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="info:fedora/druid:bj102hs9687">
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:Etd"></hasModel>
</rdf:Description>
</rdf:RDF>
@@ -0,0 +1,21 @@
<?xml version="1.0"?>
<rightsMetadata objectId="druid:bj102hs9687">
<copyright>
<human>(c) Copyright 2011 by Eric Whitman Smith</human>
</copyright>
<access type="discover">
<machine>
<world/>
</machine>
</access>
<access type="read">
<machine>
<group>stanford</group>
<embargoReleaseDate>2013-06-02</embargoReleaseDate>
</machine>
</access>
<use>
<machine type="creativeCommons">none</machine>
<human type="creativeCommons">no Creative Commons (CC) license</human>
</use>
</rightsMetadata>
@@ -0,0 +1,25 @@

<jhove xmlns="http://hul.harvard.edu/ois/xml/ns/jhove" xmlns:mix="http://www.loc.gov/mix/v10" xmlns:textmd="info:lc/xmlns/textMD-v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" date="2009-08-06" name="JhoveToolkit" release="1.0" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/jhove http://cosimo.stanford.edu/standards/jhove/v1/jhove.xsd">
<date>2011-10-28T01:15:16-07:00</date>
<repInfo uri="eric-smith-dissertation-augmented.pdf">
<reportingModule date="2009-05-22" release="1.8">PDF-hul</reportingModule>
<format>PDF</format>
<version>1.4</version>
<status>Well-Formed and valid</status>
<sigMatch>
<module>PDF-hul</module>
</sigMatch>
<mimeType>application/pdf</mimeType>
<checksums></checksums>
</repInfo>
<repInfo uri="eric-smith-dissertation.pdf">
<reportingModule date="2009-05-22" release="1.8">PDF-hul</reportingModule>
<format>PDF</format>
<status>Not well-formed</status>
<sigMatch>
<module>PDF-hul</module>
</sigMatch>
<mimeType>application/pdf</mimeType>
<checksums></checksums>
</repInfo>
</jhove>

0 comments on commit 9d711b1

Please sign in to comment.