diff --git a/Gemfile b/Gemfile index 7d1fc2e5..6e8ac36c 100644 --- a/Gemfile +++ b/Gemfile @@ -39,6 +39,10 @@ group :development, :test do gem 'pry-rails' gem 'simplecov', '~> 0.9', require: false gem 'coveralls', require: false + gem 'rubocop', require: false + gem 'rubocop-rspec', require: false + gem 'vcr' + gem 'webmock', require: false end group :development do @@ -62,5 +66,4 @@ gem 'blacklight-gallery', '>= 0.3.0' gem 'blacklight-oembed' gem 'social-share-button' gem 'devise_invitable' -gem 'rubocop', require: false -gem 'rubocop-rspec', require: false +gem 'faraday' diff --git a/Gemfile.lock b/Gemfile.lock index 36d85f9e..e11bc171 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -142,6 +142,8 @@ GEM term-ansicolor (~> 1.3) thor (~> 0.19.1) tins (~> 1.6.0) + crack (0.4.2) + safe_yaml (~> 1.0.0) debug_inspector (0.0.2) deprecation (0.2.2) activesupport @@ -194,6 +196,7 @@ GEM multi_json (~> 1.11) os (~> 0.9) signet (~> 0.7) + hashdiff (0.2.2) http-cookie (1.0.2) domain_name (~> 0.5) i18n (0.7.0) @@ -349,6 +352,7 @@ GEM ruby-oembed (0.9.0) ruby-progressbar (1.7.5) rubyzip (1.1.7) + safe_yaml (1.0.4) sass (3.4.20) sass-rails (5.0.4) railties (>= 4.0.0, < 5.0) @@ -413,6 +417,7 @@ GEM unf (0.1.4) unf_ext unf_ext (0.0.7.1) + vcr (2.9.3) warden (1.2.4) rack (>= 1.0) web-console (2.2.1) @@ -420,6 +425,10 @@ GEM binding_of_caller (>= 0.7.2) railties (>= 4.0) sprockets-rails (>= 2.0, < 4.0) + webmock (1.22.1) + addressable (>= 2.3.6) + crack (>= 0.3.2) + hashdiff PLATFORMS ruby @@ -435,6 +444,7 @@ DEPENDENCIES devise devise-guests (~> 0.3) devise_invitable + faraday friendly_id jbuilder (~> 2.0) jettywrapper (>= 2.0) @@ -454,7 +464,9 @@ DEPENDENCIES sqlite3 turbolinks uglifier (>= 1.3.0) + vcr web-console (~> 2.0) + webmock BUNDLED WITH 1.11.2 diff --git a/app/jobs/iiif_ingest_job.rb b/app/jobs/iiif_ingest_job.rb new file mode 100644 index 00000000..3cf0e72c --- /dev/null +++ b/app/jobs/iiif_ingest_job.rb @@ -0,0 +1,15 @@ +class IIIFIngestJob < ActiveJob::Base + # Ingest one or more IIIF manfiest URLs. Each manifest is ingested as its + # own resource. + def perform(urls) + arr = urls.is_a?(Array) ? urls : Array(urls) + arr.each do |url| + ingest url + end + end + + # Ingest a single IIIF manifest URL as a resource. + def ingest(url) + IIIFResource.new(manifest_url: url).save + end +end diff --git a/app/models/iiif_resource.rb b/app/models/iiif_resource.rb new file mode 100644 index 00000000..e69d45f8 --- /dev/null +++ b/app/models/iiif_resource.rb @@ -0,0 +1,42 @@ +class IIIFResource < Spotlight::Resource + # If a manifest_url if provided, it is retrieved, parsed and indexed + def initialize(manifest_url: nil) + super() + return if manifest_url.blank? + manifest = IIIFResource.parse_manifest(manifest_url) + @title = manifest['label'] + self.url = manifest_url + + self.data ||= {} + self.data['thumbnail'] = manifest['thumbnail']['@id'] if manifest['thumbnail'] + manifest['metadata'].each do |h| + self.data[h['label'].parameterize('_')] = h['value'].map { |v| v["@value"] } + end + end + + def title_field + :"#{solr_fields.prefix}spotlight_title#{solr_fields.string_suffix}" + end + + def to_solr + solr_doc = super + solr_doc[title_field] = @title + + data.each do |k, v| + solr_doc[(k + solr_fields.string_suffix).to_sym] = v + end + + solr_doc + end + + def solr_fields + Spotlight::Engine.config.solr_fields + end + + # Retrieve a IIIF manifest and parse the resulting JSON + def self.parse_manifest(manifest_url) + conn = Faraday.new(manifest_url) + conn.headers['Accept'] = 'application/json' + JSON.parse conn.get(manifest_url).body + end +end diff --git a/config/blacklight.yml b/config/blacklight.yml index e38d79e3..ead72b2a 100644 --- a/config/blacklight.yml +++ b/config/blacklight.yml @@ -15,7 +15,7 @@ development: url: <%= ENV['SOLR_URL'] || "http://127.0.0.1:8983/solr/blacklight-core" %> test: &test adapter: solr - url: <%= "http://127.0.0.1:#{ENV['TEST_JETTY_PORT'] || 8888}/solr/blacklight-core" %> + url: <%= "http://127.0.0.1:#{ENV['TEST_JETTY_PORT'] || 8983}/solr/blacklight-core" %> production: adapter: solr url: <%= ENV['SOLR_URL'] || "http://127.0.0.1:8983/solr/blacklight-core" %> diff --git a/spec/cassettes/iiif_manifest.yml b/spec/cassettes/iiif_manifest.yml new file mode 100644 index 00000000..e4f8ff46 --- /dev/null +++ b/spec/cassettes/iiif_manifest.yml @@ -0,0 +1,67 @@ +--- +http_interactions: +- request: + method: get + uri: https://hydra-dev.princeton.edu/concern/scanned_resources/1r66j1149/manifest + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Faraday v0.9.2 + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Status: + - 200 OK + Cache-Control: + - max-age=0, private, must-revalidate + Vary: + - Accept + X-Xss-Protection: + - 1; mode=block + X-Request-Id: + - 387b8cd9-892b-4524-88e5-9e6459b820f9 + Etag: + - W/"c8f3976d4d4eeadb8deae907589ab584" + X-Frame-Options: + - SAMEORIGIN + X-Runtime: + - '0.042380' + X-Content-Type-Options: + - nosniff + Date: + - Thu, 14 Jan 2016 14:39:33 GMT + X-Powered-By: + - Phusion Passenger 5.0.23 + Server: + - nginx/1.8.0 + Phusion Passenger 5.0.23 + Access-Control-Allow-Origin: + - "*" + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Allow-Methods: + - GET, POST, OPTIONS + Access-Control-Allow-Headers: + - DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type + body: + encoding: UTF-8 + string: '{"@context":"http://iiif.io/api/presentation/2/context.json","@id":"https://hydra-dev.princeton.edu/concern/scanned_resources/1r66j1149/manifest","@type":"sc:Manifest","label":"Christopher + and his kind, 1929-1939","viewingHint":"individuals","viewingDirection":"left-to-right","service":{"@context":"http://iiif.io/api/auth/0/context.json","@id":"https://hydra-dev.princeton.edu/users/auth/cas","label":"Login + to Plum using CAS","profile":"http://iiif.io/api/auth/0/login","service":{"hsh":null}},"structures":[{"@id":"https://hydra-dev.princeton.edu/concern/scanned_resources/1r66j1149/manifest/range/g70037437469480","@type":"sc:Range","label":"Logical","viewingHint":"top"}],"metadata":[{"label":"Date + created","value":[{"@value":"1976"}]}]}' + http_version: + recorded_at: Thu, 14 Jan 2016 14:39:33 GMT +recorded_with: VCR 2.9.3 diff --git a/spec/jobs/iiif_ingest_job_spec.rb b/spec/jobs/iiif_ingest_job_spec.rb new file mode 100644 index 00000000..0c87174e --- /dev/null +++ b/spec/jobs/iiif_ingest_job_spec.rb @@ -0,0 +1,22 @@ +require 'rails_helper' + +describe IIIFIngestJob do + let(:url1) { 'http://example.com/1/manifest' } + let(:url2) { 'http://example.com/2/manifest' } + let(:resource) { IIIFResource.new } + + it 'ingests a single url' do + allow_any_instance_of(IIIFResource).to receive(:save) + expect(IIIFResource).to receive(:new).with(manifest_url: url1).and_return(resource) + + described_class.new.perform(url1) + end + + it 'ingests each of an array of urls' do + allow_any_instance_of(IIIFResource).to receive(:save) + expect(IIIFResource).to receive(:new).with(manifest_url: url1).and_return(resource) + expect(IIIFResource).to receive(:new).with(manifest_url: url2).and_return(resource) + + described_class.new.perform([url1, url2]) + end +end diff --git a/spec/models/iiif_resource_spec.rb b/spec/models/iiif_resource_spec.rb new file mode 100644 index 00000000..434d3d92 --- /dev/null +++ b/spec/models/iiif_resource_spec.rb @@ -0,0 +1,79 @@ +require 'rails_helper' + +describe IIIFResource do + context 'with mock http interactions' do + let(:url) { 'http://example.com/1/manifest' } + let(:json) { '{ + "@context":"http://iiif.io/api/presentation/2/context.json", + "@id":"http://example.com/1/manifest", + "@type":"sc:Manifest", + "label":"Sample Manifest", + "thumbnail":{ + "@id":"http://example.com/loris/1.jp2/full/100,/0/default.jpg", + "service":{ + "@context":"http://iiif.io/api/image/2/context.json", + "@id":"https://example.com/loris/1.jp2", + "profile":"http://iiif.io/api/image/2/level2.json" }}, + "metadata":[ + { "label": "Creator", "value": [{ "@value": "Author, Alice, 1954-" }] }, + { "label": "Date created", "value": [{ "@value": "1985" }] } + ]}' + } + + before do + resp = double('response') + allow_any_instance_of(Faraday::Connection).to receive(:get).and_return(resp) + allow(resp).to receive(:body).and_return(json) + end + + describe '#initialize' do + it 'loads metadata from the IIIF manifest' do + resource = described_class.new(manifest_url: url) + expect(resource.url).to eq(url) + expect(resource.data['thumbnail']).to eq('http://example.com/loris/1.jp2/full/100,/0/default.jpg') + expect(resource.data['creator']).to eq(['Author, Alice, 1954-']) + end + end + + describe '#parse_manifest' do + it 'retrieves and parses an IIIF manifest' do + expect_any_instance_of(Faraday::Connection).to receive(:get).with(url) + manifest = described_class.parse_manifest(url) + expect(manifest['@id']).to eq(url) + expect(manifest['label']).to eq('Sample Manifest') + expect(manifest['thumbnail']['@id']).to eq('http://example.com/loris/1.jp2/full/100,/0/default.jpg') + end + end + + describe '#to_solr' do + subject { described_class.new(manifest_url: url) } + before do + exhibit = Spotlight::Exhibit.new + allow(exhibit).to receive(:blacklight_config).and_return(Blacklight::Configuration.new) + subject.exhibit = exhibit + end + + it 'indexes iiif metadata' do + solr_doc = subject.to_solr + expect(solr_doc[:spotlight_title_ssim]).to eq('Sample Manifest') + expect(solr_doc[:thumbnail_ssim]).to eq('http://example.com/loris/1.jp2/full/100,/0/default.jpg') + expect(solr_doc[:creator_ssim]).to eq(['Author, Alice, 1954-']) + expect(solr_doc[:date_created_ssim]).to eq(['1985']) + end + end + end + + context 'with recorded http interactions', vcr: { cassette_name: 'iiif_manifest' } do + let(:url) { 'https://hydra-dev.princeton.edu/concern/scanned_resources/1r66j1149/manifest' } + it 'ingests a iiif manifest' do + exhibit = Spotlight::Exhibit.create title: 'Exhibit A' + resource = IIIFResource.new manifest_url: url + resource.exhibit = exhibit + expect(resource.save).to be true + + reloaded = IIIFResource.last + expect(reloaded.url).to eq url + expect(reloaded.data['date_created']).to eq ['1976'] + end + end +end diff --git a/spec/models/user_spec.rb b/spec/models/user_spec.rb new file mode 100644 index 00000000..2a2467d3 --- /dev/null +++ b/spec/models/user_spec.rb @@ -0,0 +1,9 @@ +require 'rails_helper' + +describe User do + let(:email) { 'user@example.com' } + subject { User.new email: email } + it 'has an email address' do + expect(subject.to_s).to eq email + end +end diff --git a/spec/support/vcr.rb b/spec/support/vcr.rb new file mode 100644 index 00000000..697b05e3 --- /dev/null +++ b/spec/support/vcr.rb @@ -0,0 +1,8 @@ +require 'vcr' + +VCR.configure do |c| + c.cassette_library_dir = 'spec/cassettes' + c.hook_into :webmock + c.ignore_localhost = true + c.configure_rspec_metadata! +end