Skip to content
This repository has been archived by the owner on May 14, 2022. It is now read-only.

Commit

Permalink
Merge pull request #513 from pulibrary/mets-multi
Browse files Browse the repository at this point in the history
Ingest MultiVolumeSet METS files as MultiVolumeWorks
  • Loading branch information
Trey Pendragon committed Mar 29, 2016
2 parents 488c09d + 9fd7791 commit e6d7f02
Show file tree
Hide file tree
Showing 8 changed files with 3,932 additions and 35 deletions.
1 change: 0 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,6 @@ GEM
unf
method_source (0.8.2)
mime-types (2.99.1)
mime-types-data (3.2016.0221)
mimemagic (0.3.1)
mini_magick (4.5.1)
mini_portile (0.6.2)
Expand Down
80 changes: 57 additions & 23 deletions app/jobs/ingest_mets_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,64 @@ class IngestMETSJob < ActiveJob::Base
# @param [String] user User to ingest as
def perform(mets_file, user)
logger.info "Ingesting METS #{mets_file}"
mets = METSDocument.new mets_file

r = ScannedResource.new
r.identifier = mets.ark_id
r.replaces = mets.pudl_id
r.source_metadata_identifier = mets.bib_id
r.apply_depositor_metadata user
r.rights_statement = 'http://rightsstatements.org/vocab/NKC/1.0/'
r.viewing_direction = mets.viewing_direction
r.apply_remote_metadata
r.save!
logger.info "Created ScannedResource: #{r.id}"

mets.files.each do |f|
logger.info "Ingesting file #{f[:path]}"
file_set = FileSet.new
actor = ::CurationConcerns::FileSetActor.new(file_set, user)
actor.create_metadata(r, mets.file_opts(f))
actor.create_content(mets.decorated_file(f))

if f[:path] == mets.thumbnail_path
r.thumbnail_id = file_set.id
@mets = METSDocument.new mets_file
@user = user

ingest
end

private

def ingest
resource = minimal_record(@mets.multi_volume? ? MultiVolumeWork : ScannedResource)
resource.identifier = @mets.ark_id
resource.replaces = @mets.pudl_id
resource.source_metadata_identifier = @mets.bib_id
resource.apply_remote_metadata
resource.save!
logger.info "Created #{resource.class}: #{resource.id}"

if @mets.multi_volume?
ingest_volumes(resource)
else
ingest_files(resource: resource, files: @mets.files)
end
end

def ingest_files(parent: nil, resource: nil, files: [])
files.each do |f|
logger.info "Ingesting file #{f[:path]}"
file_set = FileSet.new
actor = ::CurationConcerns::FileSetActor.new(file_set, @user)
actor.create_metadata(resource, @mets.file_opts(f))
actor.create_content(@mets.decorated_file(f))

next unless f[:path] == @mets.thumbnail_path
resource.thumbnail_id = file_set.id
resource.save!
parent.thumbnail_id = file_set.id if parent
end
end

def ingest_volumes(parent)
@mets.volume_ids.each do |volume_id|
r = minimal_record(ScannedResource)
r.title = [@mets.label_for_volume(volume_id)]
r.save!
logger.info "Created ScannedResource: #{r.id}"

ingest_files(parent: parent, resource: r, files: @mets.files_for_volume(volume_id))

parent.ordered_members << r
parent.save!
end
end
end

def minimal_record(klass)
resource = klass.new
resource.viewing_direction = @mets.viewing_direction
resource.rights_statement = 'http://rightsstatements.org/vocab/NKC/1.0/'
resource.apply_depositor_metadata @user
resource
end
end
31 changes: 30 additions & 1 deletion app/models/mets_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def pudl_id
end

def thumbnail_path
@mets.xpath("/mets:mets/mets:fileSec/mets:fileGrp[@USE='thumbnail']/mets:file/mets:FLocat/@xlink:href").to_s.gsub(/file:\/\//, '')
xp = "/mets:mets/mets:fileSec/mets:fileGrp[@USE='thumbnail']/mets:file/mets:FLocat/@xlink:href"
@mets.xpath(xp).to_s.gsub(/file:\/\//, '')
end

def viewing_direction
Expand All @@ -27,6 +28,27 @@ def right_to_left
@mets.xpath("/mets:mets/mets:structMap[@TYPE='Physical']/mets:div/@TYPE").to_s.start_with? 'RTL'
end

def multi_volume?
volume_nodes.length > 1
end

def volume_ids
volume_nodes.map do |vol|
vol.attribute("ID").value
end
end

def label_for_volume(volume_id)
volume_node = volume_nodes.find { |vol| vol.attribute("ID").value == volume_id }
return volume_node.attribute("LABEL").value if volume_node
end

def files_for_volume(volume_id)
@mets.xpath("//mets:div[@ID='#{volume_id}']//mets:fptr/@FILEID").map do |file_id|
file_info(@mets.xpath("//mets:file[@ID='#{file_id.value}']"))
end
end

def files
@mets.xpath("/mets:mets/mets:fileSec/mets:fileGrp[@USE='masters']/mets:file").map do |f|
file_info(f)
Expand All @@ -50,4 +72,11 @@ def file_opts(file)
def decorated_file(f)
IoDecorator.new(File.open(f[:path]), f[:mime_type], File.basename(f[:path]))
end

private

def volume_nodes
xp = "/mets:mets/mets:structMap[@TYPE='Physical']/mets:div[@TYPE='MultiVolumeSet']/mets:div"
@volume_nodes ||= @mets.xpath(xp)
end
end
7 changes: 4 additions & 3 deletions lib/tasks/ingest_mets.rake
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ task ingest_mets: :environment do
user = User.find_by_user_key( ENV['USER'] ) if ENV['USER']
user = User.all.select{ |u| u.admin? }.first unless user

Rails.logger = Logger.new(STDOUT)
Rails.logger.info "ingesting mets files from: #{ARGV[1]}"
Rails.logger.info "ingesting as: #{user.user_key} (override with USER=foo)"
logger = Logger.new(STDOUT)
IngestMETSJob.logger = logger
logger.info "ingesting mets files from: #{ARGV[1]}"
logger.info "ingesting as: #{user.user_key} (override with USER=foo)"
abort "usage: rake ingest_mets /path/to/mets/files" unless ARGV[1] && Dir.exist?(ARGV[1])
Dir["#{ARGV[1]}/**/*.mets"].each do |file|
IngestMETSJob.perform_now(file, user)
Expand Down
Loading

0 comments on commit e6d7f02

Please sign in to comment.