Skip to content

Commit

Permalink
Job for importing an object from Purl (Stanford)
Browse files Browse the repository at this point in the history
Still need to wrap this with webmock or similar and invoke it via a
script
  • Loading branch information
jcoyne committed Jan 13, 2017
1 parent 8823a1b commit b9eb23d
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 19 deletions.
77 changes: 77 additions & 0 deletions app/jobs/import_work_from_purl_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
require 'stanford'
# Import works from Purl/stacks services at Stanford
#
# Example usage:
# log = Hyrax::Operation.create!(user: current_user,
# operation_type: "Import Purl Metadata")
# ImportWorkFromPurlJob.perform_later(current_user,
# 'abcd1234xxxx',
# log)
class ImportWorkFromPurlJob < ActiveJob::Base
queue_as :ingest

before_enqueue do |job|
log = job.arguments.last
log.pending_job(self)
end

# This copies metadata from the passed in attribute to all of the works that
# are members of the given upload set
# @param [User] user
# @param [String] druid
# @param [Hyrax::Operation] log
def perform(user, druid, log)
xml = Stanford::Importer::PurlRetriever.get(druid)
parser = Stanford::Importer::PurlParser.new(xml)
attributes = process_attributes(parser.attributes)
model = model_to_create(attributes)

CreateWorkJob.perform_now(user, model, attributes, log)
end

private

def process_attributes(attributes)
# We're pruning off :form_of_work, :record_origin, :created_attributes, :identifiers
attributes = attributes.slice(*attributes_to_keep)
# rename :location to :based_near
attributes[:based_near] = attributes.delete(:location)

process_collection(attributes)
filenames = attributes.delete(:files)
attributes[:remote_files] = filenames.map do |name|
{ url: "https://stacks.stanford.edu/file/druid:#{attributes[:id]}/#{name}",
file_name: name }
end

attributes
end

class_attribute :attributes_to_keep
self.attributes_to_keep = [:title,
:description,
:subject,
:language,
:resource_type,
:location,
:rights,
:visibility,
:id,
:collection,
:files]

def process_collection(attributes)
# rename :collection to :member_of_collection_ids
collection = attributes.delete(:collection)

Collection.create!(collection) unless Collection.exists?(collection[:id])
attributes[:member_of_collection_ids] = [collection[:id]]
end

# Override this method if you have a different rubric for choosing the model
# @param [Hash] attributes
# @return String the model to create
def model_to_create(attributes)
Hyrax.config.model_to_create.call(attributes)
end
end
9 changes: 7 additions & 2 deletions app/middleware/account_elevator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ def parse_tenant_name(request)

def self.switch!(cname)
account = Account.find_by(cname: Account.canonical_cname(cname))

Apartment::Tenant.switch!(account.tenant)
if account
Apartment::Tenant.switch!(account.tenant)
elsif Account.any?
raise "No tenant found for #{cname}"
else
logger.info "It looks like we're in single tenant mode. No tenant found for #{cname}"
end
end
end
27 changes: 15 additions & 12 deletions bin/import_from_purl
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,37 @@ def validate_druids!(druids)
exit(1)
end

def validate_imagepath!(imagepath)
return if imagepath
def validate_username!(username)
user = User.find_by_user_key(username)
return user if user
usage
$stderr.puts 'Image directory was left blank. No images will be ingested'
$stderr.puts 'username was left blank.'
exit(1)
end

def load_rails
def load_rails(hostname)
puts 'Loading environment...'
require File.expand_path('../../config/environment', __FILE__)
require 'stanford'
puts 'Starting import...'
AccountElevator.switch!(hostname)
end

def main(hostname, imagepath, druids)
def main(hostname, username, druids)
validate_hostname!(hostname)
validate_druids!(druids)
validate_imagepath!(imagepath)
load_rails
load_rails(hostname)
user = validate_username!(username)

AccountElevator.switch!(hostname)
size = Stanford::Importer::PurlImporter.new(imagepath, druids).import_all
druids.each do |druid|
log = Hyrax::Operation.create!(user: user, operation_type: "Import Purl Metadata")
ImportWorkFromPurlJob.perform_later(user, druid, log)
end

puts "Imported #{size} records."
puts "Enqueued #{druids.size} import jobs."
end

def usage
$stderr.puts "Usage: #{$PROGRAM_NAME} <hostname> <image directory> <druids to import>"
$stderr.puts "Usage: #{$PROGRAM_NAME} <hostname> <username> <druids to import>"
end

main(ARGV[0], ARGV[1], ARGV.drop(2))
2 changes: 2 additions & 0 deletions config/environments/development.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
config.cache_store = :null_store
end

config.active_job.queue_adapter = :sidekiq

# Don't care if the mailer can't send.
config.action_mailer.raise_delivery_errors = false

Expand Down
1 change: 1 addition & 0 deletions lib/stanford/importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ module Importer
autoload :ModsParser
autoload :PurlImporter
autoload :PurlParser
autoload :PurlRetriever
end
end
6 changes: 1 addition & 5 deletions lib/stanford/importer/purl_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,7 @@ def files(attributes)
private

def retrieve(druid)
conn.get("/#{druid}.xml").body
end

def conn
@conn ||= Faraday.new(url: 'https://purl.stanford.edu')
PurlRetriever.get(druid)
end
end
end
Expand Down
21 changes: 21 additions & 0 deletions lib/stanford/importer/purl_retriever.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module Stanford
module Importer
class PurlRetriever
def self.get(druid)
new(druid).get
end

def initialize(druid)
@druid = druid
end

def get
conn.get("/#{@druid}.xml").body
end

def conn
@conn ||= Faraday.new(url: 'https://purl.stanford.edu')
end
end
end
end
20 changes: 20 additions & 0 deletions spec/jobs/import_work_from_purl_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
require 'rails_helper'

RSpec.describe ImportWorkFromPurlJob do
let(:user) { create(:user) }
let(:log) { Hyrax::Operation.create!(user: user, operation_type: "Import Purl Metadata") }
let(:druid) { 'bc390xk2647' }
before do
if ActiveFedora::Base.exists? druid
ActiveFedora::Base.find(druid).destroy(eradicate: true)
end
Hyrax::Workflow::WorkflowImporter.load_workflows
Hyrax::PermissionTemplate.create!(admin_set_id: Hyrax::DefaultAdminSetActor::DEFAULT_ID, workflow_name: 'default')
end
it "works" do
expect do
described_class.perform_now(user, druid, log)
end.to change { GenericWork.count }.by(1)
.and change { FileSet.count }.by(2)
end
end
3 changes: 3 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@
# --seed 1234
config.order = :random

# Give us a full backtrace on Travis-CI
config.backtrace_exclusion_patterns = [] if ENV['CI']

# Seed global randomization in this process using the `--seed` CLI option.
# Setting this allows you to use `--seed` to deterministically reproduce
# test failures related to randomization by passing the same `--seed` value
Expand Down

0 comments on commit b9eb23d

Please sign in to comment.