Skip to content

Commit

Permalink
Store derivatives on local file system
Browse files Browse the repository at this point in the history
rather than in Fedora. This is much faster than Fedora and avoids the
problem of differentiating files when there are multiple access copies
for different purposes (e.g. webm vs mp4, mp3 vs ogg) sharing a use predicate.
Fixes #188
  • Loading branch information
val99erie authored and jcoyne committed Aug 24, 2015
1 parent 0d3362f commit c0d8f85
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 17 deletions.
12 changes: 12 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ AllCops:
- 'spec/internal/**/*'
- 'curation_concerns-models/app/models/concerns/curation_concerns/generic_file/export.rb'

Lint/AssignmentInCondition:
Exclude:
- 'curation_concerns-models/app/services/curation_concerns/persist_derivatives.rb'

Metrics/LineLength:
Enabled: false

Expand Down Expand Up @@ -68,6 +72,14 @@ Style/MultilineBlockLayout:
Exclude:
- 'spec/**/*'

Style/Semicolon:
Exclude:
- 'spec/**/*'

Style/Lambda:
Exclude:
- 'spec/**/*'

Style/IndentationConsistency:
EnforcedStyle: rails

Expand Down
20 changes: 15 additions & 5 deletions app/controllers/concerns/curation_concerns/download_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@ def default_content_path
# Render the 404 page if the file doesn't exist.
# Otherwise renders the file.
def show
if file
case file
when ActiveFedora::File
# For original files that are stored in fedora
super
when String
# For derivatives stored on the local file system
send_file file, type: mime_type_for(file), disposition: 'inline'
else
render_404
end
Expand All @@ -28,17 +33,18 @@ def authorize_download!
authorize! :read, asset
end

# Overrides Hydra::Controller::DownloadBehavior#load_file, which is hard-coded to assume files are in BasicContainer (PCDM Objects use direct containment)
# Overrides Hydra::Controller::DownloadBehavior#load_file, which is hard-coded to assume files are in BasicContainer.
# Override this method to change which file is shown.
# Loads the file specified by the HTTP parameter `:file`.
# If this object does not have a file by that name, return the default file
# as returned by {#default_file}
# @return [ActiveFedora::File] the file
# @return [ActiveFedora::File, String, NilClass] Returns the file from the repository or a path to a file on the local file system, if it exists.
def load_file
file_reference = params[:file]
return default_file unless file_reference
association = dereference_file(file_reference)
association.reader if association

file_path = CurationConcerns::DerivativePath.derivative_path_for_reference(asset, file_reference)
File.exist?(file_path) ? file_path : nil
end

def default_file
Expand All @@ -53,6 +59,10 @@ def default_file

private

def mime_type_for(file)
MIME::Types.type_for(File.extname(file)).first.content_type
end

def dereference_file(file_reference)
return false if file_reference.nil?
association = asset.association(file_reference.to_sym)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,7 @@ module Derivatives
extend ActiveSupport::Concern

included do
# Using File and ServiceFile so that we can have two alternative
# sound encoding formats and two alternative video formats
# with unique RDF URIs.
# TODO: Is there a mime type ontology we should be using instead?
directly_contains_one :ogg, through: :files, type: ::RDF::URI("http://pcdm.org/use#ServiceFile"), class_name: "Hydra::PCDM::File"
directly_contains_one :mp3, through: :files, type: ::RDF::URI("http://pcdm.org/use#File"), class_name: "Hydra::PCDM::File"
directly_contains_one :mp4, through: :files, type: ::RDF::URI("http://pcdm.org/use#ServiceFile"), class_name: "Hydra::PCDM::File"
directly_contains_one :webm, through: :files, type: ::RDF::URI("http://pcdm.org/use#File"), class_name: "Hydra::PCDM::File"
Hydra::Derivatives.output_file_service = CurationConcerns::PersistDerivatives

makes_derivatives do |obj|
case obj.original_file.mime_type
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module CurationConcerns
class DerivativePath
# Path on file system where derivative file is stored
def self.derivative_path_for_reference(object, destination_name)
destination_name = destination_name.gsub(/^original_file_/, '')
derivative_path(object, extension_for(destination_name), destination_name)
end

private_class_method

def self.derivative_path(object, extension, destination_name)
file_name = destination_name + extension
File.join(CurationConcerns.config.derivatives_path, object.id, file_name)
end

def self.extension_for(destination_name)
case destination_name
when 'thumbnail'
".#{MIME::Types.type_for('jpg').first.extensions.first}"
else
".#{destination_name}"
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
module CurationConcerns
class PersistDerivatives < Hydra::Derivatives::PersistOutputFileService
# Persists a derivative to the local file system.
# This Service conforms to the signature of `Hydra::Derivatives::PersistOutputFileService`.
# This service is an alternative to the default Hydra::Derivatives::PersistOutputFileService.
# This service will always update existing and does not do versioning of persisted files.
#
# @param [Hydra::Works::GenericFile::Base] object the file will be added to
# @param [Hydra::Derivatives::IoDecorator] file the derivative filestream
# @param [String] extract file type (e.g. 'thumbnail') from Hydra::Derivatives created destination_name
#
def self.call(object, file, destination_name)
output_file(object, destination_name) do |output|
while buffer = file.read(4096)
output.write buffer
end
end
end

# Open the output file to write and yield the block to the
# file. It will make the directories in the path if
# necessary.
def self.output_file(object, destination_name, &blk)
name = DerivativePath.derivative_path_for_reference(object, destination_name)
output_file_dir = File.dirname(name)
FileUtils.mkdir_p(output_file_dir) unless File.directory?(output_file_dir)
File.open(name, 'wb', &blk)
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def application_root_url
@application_root_url || (fail 'Make sure to set your CurationConcerns.config.application_root_url')
end

# Path on the local file system where derivatives will be stored
attr_writer :derivatives_path
def derivatives_path
@derivatives_path ||= File.join(Rails.root, 'tmp', 'derivatives')
end

# When was this last built/deployed
attr_writer :build_identifier
def build_identifier
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@
# Specify the form of hostpath to be used in Endnote exports
# config.persistent_hostpath = 'http://localhost/files/'

# Location on local file system where derivatives will be stored.
# config.derivatives_path = File.join(Rails.root, 'tmp', 'derivatives')

# If you have ffmpeg installed and want to transcode audio and video uncomment this line
# config.enable_ffmpeg = true

Expand Down
10 changes: 6 additions & 4 deletions spec/controllers/downloads_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
end

context "when user isn't logged in" do
# before { generic_file }
it 'redirects to sign in' do
get :show, id: generic_file.to_param
expect(response).to redirect_to new_user_session_path
Expand All @@ -44,14 +43,17 @@

context "with an alternative file" do
context "that is persisted" do
let(:file) { File.open(fixture_file_path('world.png'), 'rb') }

let(:content) { file.rewind; file.read }

before do
content = File.open(fixture_file_path('world.png'))
Hydra::Works::AddFileToGenericFile.call(generic_file, content, :thumbnail)
CurationConcerns::PersistDerivatives.call(generic_file, file, 'thumbnail')
end

it 'sends requested file content' do
get :show, id: generic_file, file: 'thumbnail'
expect(response.body).to eq generic_file.thumbnail.content
expect(response.body).to eq content
end
end

Expand Down
16 changes: 16 additions & 0 deletions spec/services/derivative_path_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
require 'spec_helper'

describe CurationConcerns::DerivativePath do
before do
allow(CurationConcerns.config).to receive(:derivatives_path).and_return('tmp')
end

describe '.derivative_path_for_reference' do
subject { described_class.derivative_path_for_reference(object, destination_name) }

let(:object) { double(id: '123') }
let(:destination_name) { 'thumbnail' }

it { is_expected.to eq 'tmp/123/thumbnail.jpeg' }
end
end
24 changes: 24 additions & 0 deletions spec/services/persist_derivatives_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
require 'spec_helper'

describe CurationConcerns::PersistDerivatives do
before do
allow(CurationConcerns.config).to receive(:derivatives_path).and_return('tmp')
end

describe '.output_file' do
subject { described_class.output_file(object, destination_name, &block) }

let(:object) { double(id: '123') }
let(:destination_name) { 'thumbnail' }

let(:block) { lambda { true } }

it 'yields to the file' do
expect(FileUtils).to receive(:mkdir_p).with('tmp/123')
expect(File).to receive(:open).with('tmp/123/thumbnail.jpeg', 'wb') do |*_, &blk|
expect(blk).to be(block)
end
subject
end
end
end

0 comments on commit c0d8f85

Please sign in to comment.