Skip to content
This repository has been archived by the owner on May 14, 2022. It is now read-only.

Commit

Permalink
Merge pull request #19 from pulibrary/export-job
Browse files Browse the repository at this point in the history
Adding background export job
  • Loading branch information
eliotjordan committed Jan 20, 2017
2 parents a993c73 + c94d3d1 commit 12513f1
Show file tree
Hide file tree
Showing 10 changed files with 226 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ Metrics/BlockLength:
Metrics/LineLength:
Max: 100

RSpec/MessageChain:
Exclude:
- 'spec/jobs/grocer/export_job_spec.rb'

RSpec/MultipleExpectations:
Enabled: false

Expand Down
70 changes: 70 additions & 0 deletions app/jobs/grocer/export_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
require 'open3'

module Grocer
class ExportJob < ActiveJob::Base
# Export a Fedora object to a Bag on disk
# @param id [String] Resource id
# @param ark [String] Resource ARK (if assigned)
#
# Config options
# * baseurl: Fedora base url (optional)
# * export_dir: Directory to export objects to
# * metadata_template: YAML file containing default metadata
# * jar: Path to the Fedora Import/Export JAR file
# * predicates: Comma-separated list of predicates to use for resource membership (optional)
def perform(pid, ark = nil)
pair_path = pid.scan(/..?/).first(4).push(pid).join('/')
url = "#{Grocer.configuration.baseurl}/#{pair_path}"
dir = File.join(Grocer.configuration.export_dir, pair_path)
export(pid, url, dir, ark)
end

private

def export(pid, url, dir, ark = nil)
export = Export.find_or_create_by(pid: pid)
meta = bag_metadata url, ark
Open3.popen2e(export_command(url, dir, meta.path)) do |_stdin, out, wait_thread|
# update the Export record so it can be tracked/killed
export.running!(wait_thread.pid)

# monitor the thread
log = monitor_process(out, wait_thread)
log ? export.error!(log.path) : export.success!
end
ensure
File.delete(meta.path)
end

def monitor_process(out, wait_thread)
# buffer output to tempfile
log = Tempfile.create('export')
until (line = out.gets).nil?
log.write line
end

return log unless wait_thread.value.success?

File.delete(log.path)

nil
end

def bag_metadata(url, ark = nil)
meta = Tempfile.create('bagmeta')
meta.write File.open(Grocer.configuration.metadata_template).read
meta.puts " External-Identifier: #{ark}" if ark
meta.puts " Internal-Sender-Identifier: #{url}"
meta.close

meta
end

def export_command(url, dir, meta)
cmd = %W(java -jar #{Grocer.configuration.jar} -m export -b -x -r #{url} -d #{dir})
cmd << %W(-g default -G #{meta})
cmd << %W(-p "#{Grocer.configuration.predicates}") if Grocer.configuration.predicates
cmd.flatten.join(' ')
end
end
end
36 changes: 36 additions & 0 deletions app/models/grocer/config.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
module Grocer
class Config
attr_writer :baseurl, :export_dir, :jar, :metadata_template, :predicates

# Fedora baseurl, defaults to ActiveFedora settings
def baseurl
@baseurl ||= "#{af_config[:url]}/#{af_config[:base_path]}"
end

# Directory to export to, defaults to '/pub/export'
def export_dir
@export_dir ||= '/pub/export'
end

# Bag metadata template, defaults to '[export_dir]/metadata.yml'
def metadata_template
@metadata_template ||= "#{export_dir}/metadata.yml"
end

# Fedora Import/Export JAR file, defaults to '[export_dir]/export.jar'
def jar
@jar ||= "#{export_dir}/export.jar"
end

# Predicates that define resource membership, defaults to 'pcdm:hasMember,ldp:contains'
def predicates
@predicates ||= 'http://pcdm.org/models#hasMember,http://www.w3.org/ns/ldp#contains'
end

private

def af_config
ActiveFedora.config.credentials
end
end
end
21 changes: 21 additions & 0 deletions app/models/grocer/export.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
module Grocer
class Export < ApplicationRecord
validates :pid, presence: true, uniqueness: true

def running!(job_pid)
self.job = job_pid
self.status = 'running'
save!
end

def success!
self.job = nil
self.status = 'success'
self.last_success = Time.current
save!
end

def error!(log)
self.job = nil
self.status = 'error'
self.last_error = Time.current
self.logfile = log
save!
end
end
end
1 change: 1 addition & 0 deletions grocer.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
spec.require_paths = ["lib"]

spec.add_dependency 'active-fedora'
spec.add_development_dependency 'rake'
spec.add_development_dependency 'sqlite3'
spec.add_development_dependency 'bundler', '~> 1.6'
Expand Down
7 changes: 7 additions & 0 deletions lib/grocer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,11 @@
require 'grocer/engine'

module Grocer
def self.configuration
@configuration ||= Config.new
end

def self.configure
yield(configuration)
end
end
6 changes: 6 additions & 0 deletions spec/fixtures/files/metadata_template.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bag-info.txt:
Source-Organization: Princeton University Library
Organization-Address: One Washington Road, Princeton, NJ 08544-2098 USA
Contact-Name: Esme Cowles
Contact-Phone: +16092581470
Contact-Email: escowles@princeton.edu
46 changes: 46 additions & 0 deletions spec/jobs/grocer/export_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
require 'spec_helper'
require 'open3'
require 'active-fedora'

RSpec.describe Grocer::ExportJob do
let(:thread) { instance_double('Process::Waiter') }
let(:stdout) { StringIO.new("foo\nbar\nbaz\n") }
let(:meta) { Tempfile.create 'export_job_templ' }
let(:logfile) { Tempfile.create 'export_job_log' }
let(:template) { fixture_file('metadata_template.yml') }
let(:af_config) { { url: 'http://example.org/rest', base_path: '/test' } }
let(:pid) { 1234 }

before do
allow(Open3).to receive(:popen2e).and_yield(nil, stdout, thread)
allow(Tempfile).to receive(:create).and_return(meta, logfile)
allow(thread).to receive(:pid).and_return(pid)

allow(ActiveFedora).to receive_message_chain(:config, :credentials).and_return(af_config)
Grocer.configure do |conf|
conf.metadata_template = file_fixture('metadata_template.yml')
end
end

context 'a successful export' do
before do
allow(thread).to receive_message_chain(:value, :success?).and_return(true)
end

it 'reports success' do
described_class.perform_now('obj1')
expect(Grocer::Export.find_by(pid: 'obj1').status).to eq('success')
end
end

context 'an unsuccessful export' do
before do
allow(thread).to receive_message_chain(:value, :success?).and_return(false)
end

it 'reports success' do
described_class.perform_now('obj1')
expect(Grocer::Export.find_by(pid: 'obj1').status).to eq('error')
end
end
end
31 changes: 31 additions & 0 deletions spec/models/grocer/export_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,35 @@
expect { valid_export.save! }.not_to raise_error
end
end

context '' do
subject(:export) { described_class.new pid: 'obj1' }

describe '#running!' do
it 'sets the pid and status' do
export.running!(1234)
expect(export.job).to eq(1234)
expect(export.status).to eq('running')
end
end

describe '#success!' do
it 'sets the status and last_success' do
export.success!
expect(export.job).to be nil
expect(export.last_success).not_to be nil
expect(export.status).to eq('success')
end
end

describe '#error!' do
it 'sets the pid and status' do
export.error!('/tmp/export.log')
expect(export.job).to be nil
expect(export.last_error).not_to be nil
expect(export.logfile).not_to be nil
expect(export.status).to eq('error')
end
end
end
end
4 changes: 4 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
# clean database
config.before(:suite) do
DatabaseCleaner.strategy = :truncation
end

config.before(:each) do
DatabaseCleaner.clean
end

Expand All @@ -48,6 +51,7 @@
# a real object. This is generally recommended, and will default to
# `true` in RSpec 4.
mocks.verify_partial_doubles = true
mocks.verify_doubled_constant_names = true
end

# This option will default to `:apply_to_host_groups` in RSpec 4 (and will
Expand Down

0 comments on commit 12513f1

Please sign in to comment.