Skip to content
This repository has been archived by the owner on May 14, 2022. It is now read-only.

Commit

Permalink
Task, job, and service for ingesting MapSets
Browse files Browse the repository at this point in the history
  • Loading branch information
eliotjordan committed May 12, 2017
1 parent 61fd71b commit 74a321e
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 0 deletions.
12 changes: 12 additions & 0 deletions app/jobs/ingest_map_set_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class IngestMapSetJob < ApplicationJob
queue_as :ingest

# @param [Hash] map_record as hash
# @param [String] user User to ingest as
def perform(map_record, base_file_path, user)
bibid = map_record.fetch('bibid')
logger.info "Ingesting Map Set #{bibid}"
ingest_service = ::IngestMapSetService.new(logger)
ingest_service.ingest_map_set(map_record, base_file_path, user)
end
end
105 changes: 105 additions & 0 deletions app/services/ingest_map_set_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
class IngestMapSetService < IngestScannedMapsService
attr_reader :record, :base_file_path, :user

def ingest_map_set(record, base_file_path, user)
@record = record
@base_file_path = base_file_path
@user = user

delete_duplicates!("source_metadata_identifier_tesim:\"#{RSolr.solr_escape(record['bibid'])}\"") if record['bibid']
map_set = create_map_set
members = create_map_set_members
map_set.ordered_members = members
map_set.save!
end

def minimal_record(klass, user, attributes)
default_attributes = { rights_statement: ['http://rightsstatements.org/vocab/NKC/1.0/'],
visibility: Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PUBLIC }
r = klass.new
r.attributes = default_attributes.merge(attributes)
r.apply_depositor_metadata user
r.apply_remote_metadata if r.source_metadata_identifier
r.id = ActiveFedora::Noid::Service.new.mint
r.save!
Workflow::InitializeState.call(r, workflow_name, workflow_state(klass))
@logger.info "Created #{klass}: #{r.id} #{attributes}"

r
end

def workflow_state(klass)
klass.is_a?(MapSet) ? 'final_review' : 'complete'
end

def ingest_work(file_path, ark, title, sheet_title)
klass = ImageWork
bib_id = record['bibid']
attribs = { source_metadata_identifier: [bib_id], identifier: ["ark:/88435/#{ark}"] }
r = minimal_record klass, user, attribs
r.source_metadata_identifier = []
members = [ingest_file(r, file_path, user, {}, file_set_attributes.merge(title: [sheet_title]))]
r.ordered_members = members
r.save!

update_image_work_title(r.id, title)
end

def update_image_work_title(id, title)
r = ImageWork.find(id)
r.title = [title]
r.save!

r
end

def create_map_set_members
members = []
record['members'].each do |member_record|
ark = member_record['ark']
title = member_record['title']
page = member_record['page']
sheet_title = generate_title(page)
delete_duplicates!("identifier_tesim:\"#{RSolr.solr_escape("ark:/88435/#{ark}")}\"") if ark
file_path = path_to_tiff(ark)
next unless File.exist?(file_path)
work = ingest_work(file_path, ark, title, sheet_title)
members << work if work
end

members
end

def create_map_set
klass = MapSet
bib_id = record['bibid']
attribs = { source_metadata_identifier: [bib_id] }
r = minimal_record klass, user, attribs
r.save!

r
end

def generate_title(page)
if page == 0
'Title'
elsif page == 1
'Overview'
elsif page == 2
'Index'
else
generate_sheet_num(page)
end
end

def generate_sheet_num(page)
sheet_num = page - 2
"Sheet #{sheet_num}"
end

def path_to_tiff(noid)
path = ''
noid.scan(/.{1,2}/).each { |seg| path << '/' + seg }
"#{base_file_path}#{path}/#{noid}.tif"
end
end
28 changes: 28 additions & 0 deletions lib/tasks/map_sets.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace :bulk do
desc "Ingest maps sets from JSON file"
task ingest_map_sets: :environment do
user = User.find_by_user_key( ENV['USER'] ) if ENV['USER']
user = User.all.select{ |u| u.admin? }.first unless user
json_file = ENV['JSON']
tiff_dir = ENV['TIFF']
background = ENV['BACKGROUND']
abort "usage: rake bulk:ingest_existing_scanned_maps JSON=/path/to/json TIFFS=/path/to/tiffs/" unless json_file && File.exist?(json_file)

map_set_records = JSON.parse(File.read(json_file))

@logger = Logger.new(STDOUT)
@logger.info "ingesting as: #{user.user_key} (override with USER=foo)"
map_set_records.each do |record|
begin
if background
IngestMapSetJob.perform_later(record, tiff_dir, user)
else
IngestMapSetService.new(@logger).ingest_map_set(record, tiff_dir, user)
end
rescue => e
puts "Error: #{e.message}"
puts e.backtrace
end
end
end
end

0 comments on commit 74a321e

Please sign in to comment.