diff --git a/.rubocop.yml b/.rubocop.yml index 2a7c3d6d4e..9df75e7be5 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,6 +1,7 @@ inherit_from: .rubocop_todo.yml AllCops: + TargetRubyVersion: 2.1 DisplayCopNames: true Include: - '**/Rakefile' diff --git a/app/controllers/sufia/resource_sync_controller.rb b/app/controllers/sufia/resource_sync_controller.rb new file mode 100644 index 0000000000..7ffccabbca --- /dev/null +++ b/app/controllers/sufia/resource_sync_controller.rb @@ -0,0 +1,41 @@ +class Sufia::ResourceSyncController < ApplicationController + def source_description + # Caching based on host, for multitenancy support + body = Rails.cache.fetch("source_description_#{request.host}", expires_in: 1.week) do + build_source_description + end + render body: body, content_type: 'application/xml' + end + + def capability_list + # Caching based on host, for multitenancy support + body = Rails.cache.fetch("source_description_#{request.host}", expires_in: 1.week) do + build_capability_list + end + render body: body, content_type: 'application/xml' + end + + def resource_list + # Caching based on host, for multitenancy support + body = Rails.cache.fetch("source_description_#{request.host}", expires_in: 1.week) do + build_resource_list + end + render body: body, content_type: 'application/xml' + end + + private + + def build_resource_list + Sufia::ResourceSync::ResourceListWriter.new(capability_list_url: sufia.capability_list_url, + resource_host: request.host).write + end + + def build_capability_list + Sufia::ResourceSync::CapabilityListWriter.new(resource_list_url: sufia.resource_list_url, + description_url: sufia.source_description_url).write + end + + def build_source_description + Sufia::ResourceSync::SourceDescriptionWriter.new(capability_list_url: sufia.capability_list_url).write + end +end diff --git a/app/views/layouts/_head_tag_content.html.erb b/app/views/layouts/_head_tag_content.html.erb index 92df457c0d..86b6ffd6bf 100644 --- a/app/views/layouts/_head_tag_content.html.erb +++ b/app/views/layouts/_head_tag_content.html.erb @@ -3,6 +3,7 @@ + <%= yield :twitter_meta %> diff --git a/config/routes.rb b/config/routes.rb index 6f8481325f..fff26790cd 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -9,6 +9,11 @@ # e.g. https://scholarsphere.psu.edu/files/gm80hv36p get '/files/:id', to: redirect('/concern/generic_works/%{id}') + # ResourceSync routes + get '/.well-known/resourcesync' => 'sufia/resource_sync#source_description', as: :source_description + get '/capabilitylist' => 'sufia/resource_sync#capability_list', as: :capability_list + get '/resourcelist' => 'sufia/resource_sync#resource_list', as: :resource_list + delete '/uploads/:id', to: 'sufia/uploads#destroy', as: :sufia_uploaded_file post '/uploads', to: 'sufia/uploads#create' # This is a hack that is required because the rails form the uploader is on diff --git a/lib/sufia.rb b/lib/sufia.rb index 2c286c71f6..6ed0ba4520 100644 --- a/lib/sufia.rb +++ b/lib/sufia.rb @@ -30,6 +30,7 @@ module Sufia autoload :Arkivo autoload :Configuration autoload :RedisEventStore + autoload :ResourceSync autoload :Zotero end diff --git a/lib/sufia/resource_sync.rb b/lib/sufia/resource_sync.rb new file mode 100644 index 0000000000..49d3c4287d --- /dev/null +++ b/lib/sufia/resource_sync.rb @@ -0,0 +1,11 @@ +module Sufia + module ResourceSync + extend ActiveSupport::Autoload + + eager_autoload do + autoload :CapabilityListWriter + autoload :ResourceListWriter + autoload :SourceDescriptionWriter + end + end +end diff --git a/lib/sufia/resource_sync/capability_list_writer.rb b/lib/sufia/resource_sync/capability_list_writer.rb new file mode 100644 index 0000000000..c1c61db017 --- /dev/null +++ b/lib/sufia/resource_sync/capability_list_writer.rb @@ -0,0 +1,31 @@ +module Sufia + module ResourceSync + class CapabilityListWriter + attr_reader :resource_list_url, :description_url + def initialize(resource_list_url:, description_url:) + @resource_list_url = resource_list_url + @description_url = description_url + end + + def write + builder.to_xml + end + + private + + def builder + Nokogiri::XML::Builder.new do |xml| + xml.urlset('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9', + 'xmlns:rs' => 'http://www.openarchives.org/rs/terms/') do + xml['rs'].ln(rel: "up", href: description_url) + xml['rs'].md(capability: "capabilitylist") + xml.url do + xml.loc resource_list_url + xml['rs'].md(capability: 'resourcelist') + end + end + end + end + end + end +end diff --git a/lib/sufia/resource_sync/resource_list_writer.rb b/lib/sufia/resource_sync/resource_list_writer.rb new file mode 100644 index 0000000000..59b15c4404 --- /dev/null +++ b/lib/sufia/resource_sync/resource_list_writer.rb @@ -0,0 +1,77 @@ +module Sufia + module ResourceSync + # TODO: the big assumption I'm making here is that the repository has fewer + # than 50,000 resources to list. The Sitemap protocol is limited at 50,000 + # items, so if we require more than that, we must have multiple Resource + # lists and add a Resource List Index to point to all of them. + class ResourceListWriter + attr_reader :resource_host, :capability_list_url + + def initialize(resource_host:, capability_list_url:) + @resource_host = resource_host + @capability_list_url = capability_list_url + end + + def write + builder.to_xml + end + + private + + def builder(capability_list_url: 'http://example.com/dataset1/capabilitylist.xml') + Nokogiri::XML::Builder.new do |xml| + xml.urlset('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9', + 'xmlns:rs' => 'http://www.openarchives.org/rs/terms/') do + xml['rs'].ln(rel: "up", href: capability_list_url) + xml['rs'].md(capability: "resourcelist", at: Time.now.utc.iso8601) + build_collections(xml) + build_works(xml) + build_files(xml) + end + end + end + + def build_collections(xml) + Collection.search_in_batches(public_access) do |doc_set| + build_resources(xml, doc_set) + end + end + + def build_works(xml) + CurationConcerns::WorkRelation.new.search_in_batches(public_access) do |doc_set| + build_resources(xml, doc_set) + end + end + + def build_files(xml) + FileSet.search_in_batches(public_access) do |doc_set| + build_resources(xml, doc_set) + end + end + + def build_resources(xml, doc_set) + doc_set.each do |doc| + build_resource(xml, doc) + end + end + + def build_resource(xml, doc) + xml.url do + key = doc.fetch('has_model_ssim', []).first.constantize.model_name.singular_route_key + xml.loc routes.send(key + "_url", doc['id'], host: resource_host) + xml.lastmod doc['system_modified_dtsi'] + end + end + + def routes + Rails.application.routes.url_helpers + end + + delegate :collection_url, to: :routes + + def public_access + { Hydra.config.permissions.read.group => 'public' } + end + end + end +end diff --git a/lib/sufia/resource_sync/source_description_writer.rb b/lib/sufia/resource_sync/source_description_writer.rb new file mode 100644 index 0000000000..9a5486a29c --- /dev/null +++ b/lib/sufia/resource_sync/source_description_writer.rb @@ -0,0 +1,30 @@ +module Sufia + module ResourceSync + class SourceDescriptionWriter + attr_reader :capability_list_url + def initialize(capability_list_url: 'http://example.com/dataset1/capabilitylist.xml') + @capability_list_url = capability_list_url + end + + def write + builder.to_xml + end + + private + + def builder + Nokogiri::XML::Builder.new do |xml| + xml.urlset('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9', + 'xmlns:rs' => 'http://www.openarchives.org/rs/terms/') do + xml['rs'].ln(rel: "up", href: capability_list_url) + xml['rs'].md(capability: "description") + xml.url do + xml.loc capability_list_url + xml['rs'].md(capability: 'capabilitylist') + end + end + end + end + end + end +end diff --git a/spec/controllers/sufia/resource_sync_controller_spec.rb b/spec/controllers/sufia/resource_sync_controller_spec.rb new file mode 100644 index 0000000000..66f35d916c --- /dev/null +++ b/spec/controllers/sufia/resource_sync_controller_spec.rb @@ -0,0 +1,52 @@ +RSpec.describe Sufia::ResourceSyncController do + before do + Rails.cache.clear + end + + describe "source_description" do + let(:writer) { double } + let(:document) { '' } + let(:capability_list) { Sufia::Engine.routes.url_helpers.capability_list_url(host: 'test.host') } + + it "is successful" do + allow(Sufia::ResourceSync::SourceDescriptionWriter).to receive(:new).with(capability_list_url: capability_list).and_return(writer) + expect(writer).to receive(:write).and_return(document) + get :source_description + expect(response.content_type).to eq 'application/xml' + expect(response.body).to eq document + end + end + + describe "capability_list" do + let(:writer) { double } + let(:document) { '' } + let(:capability_list) { Sufia::Engine.routes.url_helpers.capability_list_url(host: 'test.host') } + + it "is successful" do + allow(Sufia::ResourceSync::CapabilityListWriter).to receive(:new).with(resource_list_url: "http://test.host/resourcelist", + description_url: "http://test.host/.well-known/resourcesync").and_return(writer) + expect(writer).to receive(:write).and_return(document) + get :capability_list + expect(response.content_type).to eq 'application/xml' + expect(response.body).to eq document + end + end + + describe "resource_list" do + before do + Rails.cache.clear + end + + let(:writer) { double } + let(:document) { '' } + let(:capability_list) { Sufia::Engine.routes.url_helpers.capability_list_url(host: 'test.host') } + + it "is successful" do + allow(Sufia::ResourceSync::ResourceListWriter).to receive(:new).with(capability_list_url: capability_list, resource_host: "test.host").and_return(writer) + expect(writer).to receive(:write).and_return(document) + get :resource_list + expect(response.content_type).to eq 'application/xml' + expect(response.body).to eq document + end + end +end diff --git a/spec/lib/sufia/capability_list_writer_spec.rb b/spec/lib/sufia/capability_list_writer_spec.rb new file mode 100644 index 0000000000..95c1a26bc2 --- /dev/null +++ b/spec/lib/sufia/capability_list_writer_spec.rb @@ -0,0 +1,26 @@ +require 'spec_helper' + +RSpec.describe Sufia::ResourceSync::CapabilityListWriter do + let(:sitemap) { 'http://www.sitemaps.org/schemas/sitemap/0.9' } + let(:rs) { 'http://www.openarchives.org/rs/terms/' } + + let(:resource_list) { 'http://example.com/resourcelist.xml' } + let(:description) { 'http://example.com/resourcesync_description.xml' } + + subject { described_class.new(resource_list_url: resource_list, + description_url: description).write } + let(:xml) { Nokogiri::XML.parse(subject) } + + it "has url to the capability list" do + description_href = xml.xpath('/x:urlset/rs:ln[@rel="up"]/@href', 'x' => sitemap, 'rs' => rs).map(&:value) + expect(description_href).to eq [description] + + capability = xml.xpath('/x:urlset/rs:md/@capability', 'x' => sitemap, 'rs' => rs).map(&:value) + expect(capability).to eq ["capabilitylist"] + + url = xml.xpath('//x:url[1]/x:loc', 'x' => sitemap).text + expect(url).to eq resource_list + capability = xml.xpath('//x:url[1]/rs:md/@capability', 'x' => sitemap, 'rs' => rs).map(&:value) + expect(capability).to eq ["resourcelist"] + end +end diff --git a/spec/lib/sufia/resource_list_writer_spec.rb b/spec/lib/sufia/resource_list_writer_spec.rb new file mode 100644 index 0000000000..0819ce303f --- /dev/null +++ b/spec/lib/sufia/resource_list_writer_spec.rb @@ -0,0 +1,24 @@ +require 'spec_helper' + +RSpec.describe Sufia::ResourceSync::ResourceListWriter do + let(:sitemap) { 'http://www.sitemaps.org/schemas/sitemap/0.9' } + let!(:private_collection) { create(:private_collection) } + let!(:public_collection) { create(:public_collection) } + let!(:public_work) { create(:public_generic_work) } + let!(:private_work) { create(:work) } + let!(:file_set) { create(:file_set, :public) } + let(:capability_list) { 'http://example.com/capabilityList.xml' } + + subject { described_class.new(resource_host: 'example.com', capability_list_url: capability_list).write } + let(:xml) { Nokogiri::XML.parse(subject) } + + it "has two urls" do + first_url = xml.xpath('//x:url[1]/x:loc', 'x' => sitemap).text + second_url = xml.xpath('//x:url[2]/x:loc', 'x' => sitemap).text + third_url = xml.xpath('//x:url[3]/x:loc', 'x' => sitemap).text + expect(first_url).to eq "http://example.com/collections/#{public_collection.id}" + expect(second_url).to eq "http://example.com/concern/generic_works/#{public_work.id}" + expect(third_url).to eq "http://example.com/concern/file_sets/#{file_set.id}" + expect(xml.xpath('//x:url', 'x' => sitemap).count).to eq 3 + end +end diff --git a/spec/lib/sufia/source_description_writer_spec.rb b/spec/lib/sufia/source_description_writer_spec.rb new file mode 100644 index 0000000000..611d902eff --- /dev/null +++ b/spec/lib/sufia/source_description_writer_spec.rb @@ -0,0 +1,21 @@ +require 'spec_helper' + +RSpec.describe Sufia::ResourceSync::SourceDescriptionWriter do + let(:sitemap) { 'http://www.sitemaps.org/schemas/sitemap/0.9' } + let(:rs) { 'http://www.openarchives.org/rs/terms/' } + + let(:capability_list) { 'http://example.com/capabilityList.xml' } + + subject { described_class.new(capability_list_url: capability_list).write } + let(:xml) { Nokogiri::XML.parse(subject) } + + it "has url to the capability list" do + capability = xml.xpath('/x:urlset/rs:md/@capability', 'x' => sitemap, 'rs' => rs).map(&:value) + expect(capability).to eq ["description"] + + url = xml.xpath('//x:url[1]/x:loc', 'x' => sitemap).text + expect(url).to eq capability_list + capability = xml.xpath('//x:url[1]/rs:md/@capability', 'x' => sitemap, 'rs' => rs).map(&:value) + expect(capability).to eq ["capabilitylist"] + end +end diff --git a/spec/routing/route_spec.rb b/spec/routing/route_spec.rb index c982fa9c3b..0e33654184 100644 --- a/spec/routing/route_spec.rb +++ b/spec/routing/route_spec.rb @@ -1,6 +1,20 @@ describe 'Routes', type: :routing do routes { Sufia::Engine.routes } + describe "ResourceSync" do + it 'routes the well-known uri' do + expect(get: '/.well-known/resourcesync').to route_to(controller: 'sufia/resource_sync', action: 'source_description') + end + + it 'routes the capability list' do + expect(get: '/capabilitylist').to route_to(controller: 'sufia/resource_sync', action: 'capability_list') + end + + it 'routes the resource list' do + expect(get: '/resourcelist').to route_to(controller: 'sufia/resource_sync', action: 'resource_list') + end + end + describe 'Homepage' do it 'routes the root url to the homepage controller' do expect(get: '/').to route_to(controller: 'sufia/homepage', action: 'index')