Skip to content

Commit

Permalink
Adding IIIF resource and ingest job
Browse files Browse the repository at this point in the history
  • Loading branch information
escowles committed Jan 14, 2016
1 parent b0e0a08 commit 6a0abc0
Show file tree
Hide file tree
Showing 11 changed files with 261 additions and 4 deletions.
7 changes: 5 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ group :development, :test do
gem 'pry-rails'
gem 'simplecov', '~> 0.9', require: false
gem 'coveralls', require: false
gem 'rubocop', require: false
gem 'rubocop-rspec', require: false
gem 'vcr'
gem 'webmock', require: false
end

group :development do
Expand All @@ -62,5 +66,4 @@ gem 'blacklight-gallery', '>= 0.3.0'
gem 'blacklight-oembed'
gem 'social-share-button'
gem 'devise_invitable'
gem 'rubocop', require: false
gem 'rubocop-rspec', require: false
gem 'faraday'
12 changes: 12 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ GEM
term-ansicolor (~> 1.3)
thor (~> 0.19.1)
tins (~> 1.6.0)
crack (0.4.2)
safe_yaml (~> 1.0.0)
debug_inspector (0.0.2)
deprecation (0.2.2)
activesupport
Expand Down Expand Up @@ -194,6 +196,7 @@ GEM
multi_json (~> 1.11)
os (~> 0.9)
signet (~> 0.7)
hashdiff (0.2.2)
http-cookie (1.0.2)
domain_name (~> 0.5)
i18n (0.7.0)
Expand Down Expand Up @@ -349,6 +352,7 @@ GEM
ruby-oembed (0.9.0)
ruby-progressbar (1.7.5)
rubyzip (1.1.7)
safe_yaml (1.0.4)
sass (3.4.20)
sass-rails (5.0.4)
railties (>= 4.0.0, < 5.0)
Expand Down Expand Up @@ -413,13 +417,18 @@ GEM
unf (0.1.4)
unf_ext
unf_ext (0.0.7.1)
vcr (2.9.3)
warden (1.2.4)
rack (>= 1.0)
web-console (2.2.1)
activemodel (>= 4.0)
binding_of_caller (>= 0.7.2)
railties (>= 4.0)
sprockets-rails (>= 2.0, < 4.0)
webmock (1.22.1)
addressable (>= 2.3.6)
crack (>= 0.3.2)
hashdiff

PLATFORMS
ruby
Expand All @@ -435,6 +444,7 @@ DEPENDENCIES
devise
devise-guests (~> 0.3)
devise_invitable
faraday
friendly_id
jbuilder (~> 2.0)
jettywrapper (>= 2.0)
Expand All @@ -454,7 +464,9 @@ DEPENDENCIES
sqlite3
turbolinks
uglifier (>= 1.3.0)
vcr
web-console (~> 2.0)
webmock

BUNDLED WITH
1.11.2
15 changes: 15 additions & 0 deletions app/jobs/iiif_ingest_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class IIIFIngestJob < ActiveJob::Base
# Ingest one or more IIIF manfiest URLs. Each manifest is ingested as its
# own resource.
def perform(urls)
arr = urls.is_a?(Array) ? urls : Array(urls)
arr.each do |url|

This comment has been minimized.

Copy link
@tpendragon

tpendragon Jan 14, 2016

Contributor

You can just change this to Array.wrap(arr), or (if there's no chance hashes are being passed in) Array(arr)

ingest url
end
end

# Ingest a single IIIF manifest URL as a resource.
def ingest(url)
IIIFResource.new(manifest_url: url).save
end
end
42 changes: 42 additions & 0 deletions app/models/iiif_resource.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
class IIIFResource < Spotlight::Resource
# If a manifest_url if provided, it is retrieved, parsed and indexed
def initialize(manifest_url: nil)

This comment has been minimized.

Copy link
@tpendragon

tpendragon Jan 14, 2016

Contributor

A LOT is happening in initialize, here. Can you just store @manifest_url, give it an accessor, then do something like...

require 'open-uri'
def iiif_resource
  @iiif_resource ||= IIIF::Service.parse(open(manifest_url).read)
end

and use iiif_resource to populate data? If data needs populated at instantiation, at least move that bit to its own method?

super()
return if manifest_url.blank?
manifest = IIIFResource.parse_manifest(manifest_url)
@title = manifest['label']
self.url = manifest_url

self.data ||= {}
self.data['thumbnail'] = manifest['thumbnail']['@id'] if manifest['thumbnail']
manifest['metadata'].each do |h|
self.data[h['label'].parameterize('_')] = h['value'].map { |v| v["@value"] }
end
end

def title_field
:"#{solr_fields.prefix}spotlight_title#{solr_fields.string_suffix}"
end

def to_solr
solr_doc = super
solr_doc[title_field] = @title

data.each do |k, v|
solr_doc[(k + solr_fields.string_suffix).to_sym] = v
end

solr_doc
end

def solr_fields
Spotlight::Engine.config.solr_fields
end

# Retrieve a IIIF manifest and parse the resulting JSON
def self.parse_manifest(manifest_url)
conn = Faraday.new(manifest_url)
conn.headers['Accept'] = 'application/json'
JSON.parse conn.get(manifest_url).body
end
end
2 changes: 1 addition & 1 deletion circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- sudo sh bin/ci_kakadu_install.sh
- RAILS_ENV=development bundle exec rake jetty:unzip
- bundle exec rake jetty:configure_solr
- cd jetty && java -Djetty.port=8983 -Dsolr.solr.home=/home/ubuntu/plum/jetty/solr -XX:MaxPermSize=256m -Xmx512m -jar start.jar:
- cd jetty && java -Djetty.port=8983 -Dsolr.solr.home=/home/ubuntu/pomegranate/jetty/solr -XX:MaxPermSize=256m -Xmx512m -jar start.jar:
background: true
- bin/jetty_wait
test:
Expand Down
2 changes: 1 addition & 1 deletion config/blacklight.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ development:
url: <%= ENV['SOLR_URL'] || "http://127.0.0.1:8983/solr/blacklight-core" %>
test: &test
adapter: solr
url: <%= "http://127.0.0.1:#{ENV['TEST_JETTY_PORT'] || 8888}/solr/blacklight-core" %>
url: <%= "http://127.0.0.1:#{ENV['TEST_JETTY_PORT'] || 8983}/solr/blacklight-core" %>
production:
adapter: solr
url: <%= ENV['SOLR_URL'] || "http://127.0.0.1:8983/solr/blacklight-core" %>
67 changes: 67 additions & 0 deletions spec/cassettes/iiif_manifest.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions spec/jobs/iiif_ingest_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
require 'rails_helper'

describe IIIFIngestJob do
let(:url1) { 'http://example.com/1/manifest' }
let(:url2) { 'http://example.com/2/manifest' }
let(:resource) { IIIFResource.new }

it 'ingests a single url' do
allow_any_instance_of(IIIFResource).to receive(:save)
expect(IIIFResource).to receive(:new).with(manifest_url: url1).and_return(resource)

described_class.new.perform(url1)
end

it 'ingests each of an array of urls' do
allow_any_instance_of(IIIFResource).to receive(:save)
expect(IIIFResource).to receive(:new).with(manifest_url: url1).and_return(resource)
expect(IIIFResource).to receive(:new).with(manifest_url: url2).and_return(resource)

described_class.new.perform([url1, url2])
end
end
79 changes: 79 additions & 0 deletions spec/models/iiif_resource_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
require 'rails_helper'

describe IIIFResource do
context 'with mock http interactions' do
let(:url) { 'http://example.com/1/manifest' }
let(:json) { '{
"@context":"http://iiif.io/api/presentation/2/context.json",
"@id":"http://example.com/1/manifest",
"@type":"sc:Manifest",
"label":"Sample Manifest",
"thumbnail":{
"@id":"http://example.com/loris/1.jp2/full/100,/0/default.jpg",
"service":{
"@context":"http://iiif.io/api/image/2/context.json",
"@id":"https://example.com/loris/1.jp2",
"profile":"http://iiif.io/api/image/2/level2.json" }},
"metadata":[
{ "label": "Creator", "value": [{ "@value": "Author, Alice, 1954-" }] },
{ "label": "Date created", "value": [{ "@value": "1985" }] }
]}'
}

before do
resp = double('response')
allow_any_instance_of(Faraday::Connection).to receive(:get).and_return(resp)
allow(resp).to receive(:body).and_return(json)
end

describe '#initialize' do
it 'loads metadata from the IIIF manifest' do
resource = described_class.new(manifest_url: url)
expect(resource.url).to eq(url)
expect(resource.data['thumbnail']).to eq('http://example.com/loris/1.jp2/full/100,/0/default.jpg')
expect(resource.data['creator']).to eq(['Author, Alice, 1954-'])
end
end

describe '#parse_manifest' do
it 'retrieves and parses an IIIF manifest' do
expect_any_instance_of(Faraday::Connection).to receive(:get).with(url)
manifest = described_class.parse_manifest(url)
expect(manifest['@id']).to eq(url)
expect(manifest['label']).to eq('Sample Manifest')
expect(manifest['thumbnail']['@id']).to eq('http://example.com/loris/1.jp2/full/100,/0/default.jpg')
end
end

describe '#to_solr' do
subject { described_class.new(manifest_url: url) }
before do
exhibit = Spotlight::Exhibit.new
allow(exhibit).to receive(:blacklight_config).and_return(Blacklight::Configuration.new)
subject.exhibit = exhibit
end

it 'indexes iiif metadata' do
solr_doc = subject.to_solr
expect(solr_doc[:spotlight_title_ssim]).to eq('Sample Manifest')
expect(solr_doc[:thumbnail_ssim]).to eq('http://example.com/loris/1.jp2/full/100,/0/default.jpg')
expect(solr_doc[:creator_ssim]).to eq(['Author, Alice, 1954-'])
expect(solr_doc[:date_created_ssim]).to eq(['1985'])
end
end
end

context 'with recorded http interactions', vcr: { cassette_name: 'iiif_manifest' } do
let(:url) { 'https://hydra-dev.princeton.edu/concern/scanned_resources/1r66j1149/manifest' }
it 'ingests a iiif manifest' do
exhibit = Spotlight::Exhibit.create title: 'Exhibit A'
resource = described_class.new manifest_url: url
resource.exhibit = exhibit
expect(resource.save).to be true

reloaded = described_class.last
expect(reloaded.url).to eq url
expect(reloaded.data['date_created']).to eq ['1976']
end
end
end
9 changes: 9 additions & 0 deletions spec/models/user_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
require 'rails_helper'

describe User do
let(:email) { 'user@example.com' }
subject { described_class.new email: email }
it 'has an email address' do
expect(subject.to_s).to eq email
end
end
8 changes: 8 additions & 0 deletions spec/support/vcr.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
require 'vcr'

VCR.configure do |c|
c.cassette_library_dir = 'spec/cassettes'
c.hook_into :webmock
c.ignore_localhost = true
c.configure_rspec_metadata!
end

0 comments on commit 6a0abc0

Please sign in to comment.