Skip to content

Commit

Permalink
(incomplete) refactors Processors to use services for retrieving and …
Browse files Browse the repository at this point in the history
…persisting files
  • Loading branch information
flyingzumwalt committed Jun 16, 2015
1 parent d116bfa commit 946f581
Show file tree
Hide file tree
Showing 17 changed files with 240 additions and 32 deletions.
3 changes: 2 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ group :development, :test do

# Not-Yet-Released Development dependencies
gem 'hydra-works', github: 'projecthydra-labs/hydra-works', ref: '013d817'
gem 'hydra-pcdm', github: 'projecthydra-labs/hydra-pcdm', ref: '30a9643'
# gem 'hydra-pcdm', github: 'projecthydra-labs/hydra-pcdm', ref: '30a9643'
gem 'hydra-pcdm', path: '../hydra-pcdm'
gem 'active-fedora', github: 'projecthydra/active_fedora', ref:'57ac754'
gem 'activefedora-aggregation', github: 'projecthydra-labs/activefedora-aggregation', ref: 'eef02b0'

Expand Down
25 changes: 22 additions & 3 deletions lib/hydra/derivatives.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ module Derivatives
autoload :Logger
autoload :TempfileService

# services
autoload :RetrieveSourceFileService, 'hydra/derivatives/services/retrieve_source_file_service'
autoload :PersistOutputFileService, 'hydra/derivatives/services/persist_output_file_service'
autoload :PersistIndirectlyContainedOutputFile, 'hydra/derivatives/services/persist_indirectly_contained_file'
autoload :TempfileService, 'hydra/derivatives/services/tempfile_service'


# Raised if the timout elapses
class TimeoutError < ::Timeout::Error; end

Expand All @@ -33,7 +40,7 @@ def self.reset_config!
end

[:ffmpeg_path, :libreoffice_path, :temp_file_base, :fits_path, :kdu_compress_path,
:kdu_compress_recipes, :enable_ffmpeg].each do |method|
:kdu_compress_recipes, :enable_ffmpeg, :source_file_service, :output_file_service].each do |method|
module_eval <<-RUBY
def self.#{method.to_s}
config.#{method.to_s}
Expand Down Expand Up @@ -68,6 +75,9 @@ def create_derivatives
# @param file_name
# @param [Hash] transform_directives - each key corresponds to a desired derivative. Associated values vary according to processor being used.
# @param [Hash] opts for specifying things like choice of :processor (processor defaults to :image)
# @option opts [Symbol] :processor (:image) Processor to use
# @option opts [Class] :source_file_service (Hydra::Derivatives::RetrieveSourceFileService) service to use when persisting generated derivatives. The default for this can be set in your config file.
# @option opts [Class] :output_file_service (Hydra::Derivatives::PersistIndirectlyContainedOutputFile) service to use when retrieving the source. The default for this can be set in your config file.
#
# @example This will create content_thumb
# transform_file :content, { :thumb => "100x100>" }
Expand All @@ -82,9 +92,18 @@ def create_derivatives
# transform_file :content, { :mp3 => {format: 'mp3'}, :ogg => {format: 'ogg'} }, processor: :audio
# transform_file :content, { :mp4 => {format: 'mp4'}, :webm => {format: 'webm'} }, processor: :video
#
# @example Specify an output file service to use when persisting generated derivatives
# obj.transform_file :content, { mp4: { format: 'mp4' } }, processor: :video, output_file_service: My::System::PersistOutputFileToTapeStorage
#
# @example Specify a source file service to use when retrieving the source
# obj.transform_file :content, { mp4: { format: 'mp4' } }, processor: :video, source_file_service: My::System::PersistOutputFileToTapeStorage

def transform_file(file_name, transform_directives, opts={})
processor = processor_class(opts[:processor] || :image)
processor.new(self, file_name, transform_directives).process
initialize_processor(file_name, transform_directives, opts).process
end

def initialize_processor(file_name, transform_directives, opts={})
processor_class(opts[:processor] || :image).new(self, file_name, transform_directives, opts)
end

def processor_class(processor)
Expand Down
8 changes: 8 additions & 0 deletions lib/hydra/derivatives/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ def temp_file_base
@temp_file_base ||= '/tmp'
end

def source_file_service
@source_file_service ||= Hydra::Derivatives::RetrieveSourceFileService
end

def output_file_service
@output_file_service ||= Hydra::Derivatives::PersistIndirectlyContainedOutputFile
end

def fits_path
@fits_path ||= 'fits.sh'
end
Expand Down
5 changes: 3 additions & 2 deletions lib/hydra/derivatives/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def self.encode(path, options, output_file)
execute "#{Hydra::Derivatives.libreoffice_path} --invisible --headless --convert-to #{format} --outdir #{outdir} #{path}"
end

def encode_file(dest_path, file_suffix, mime_type, options = { })
def encode_file(destination_name, file_suffix, mime_type, options = { })
new_output = ''
Hydra::Derivatives::TempfileService.create(source_file) do |f|
if mime_type == 'image/jpeg'
Expand All @@ -24,7 +24,8 @@ def encode_file(dest_path, file_suffix, mime_type, options = { })
end
end
out_file = File.open(new_output, "rb")
object.add_file(out_file.read, path: dest_path, mime_type: mime_type)
# object.add_file(out_file.read, path: destination_name, mime_type: mime_type)
output_file_service.call(object, out_file.read, destination_name, mime_type: mime_type)
File.unlink(out_file)
end

Expand Down
5 changes: 3 additions & 2 deletions lib/hydra/derivatives/image.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def process_without_timeout
directives.each do |name, args|
opts = args.kind_of?(Hash) ? args : {size: args}
format = opts.fetch(:format, 'png')
output_file_name = opts.fetch(:datastream, output_file_id(name))
create_resized_image(output_file(output_file_name), opts[:size], format)
end
end
Expand Down Expand Up @@ -50,7 +49,9 @@ def write_image(output_file, xfrm)
stream = StringIO.new
xfrm.write(stream)
stream.rewind
output_file.content = stream
# output_file.content = stream
output_file_service.call(object, stream, destination_name, mime_type: mime_type)

end

# Override this method if you want a different transformer, or need to load the
Expand Down
6 changes: 4 additions & 2 deletions lib/hydra/derivatives/jpeg2k_image.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def process
end
end

def encode_file(dest_path, recipe, opts={})
def encode_file(destination_name, recipe, opts={})
output_file = self.class.tmp_file('.jp2')
if opts[:file_path]
self.class.encode(opts[:file_path], recipe, output_file)
Expand All @@ -35,7 +35,9 @@ def encode_file(dest_path, recipe, opts={})
end
end
out_file = File.open(output_file, "rb")
object.add_file(out_file.read, path: dest_path, mime_type: 'image/jp2')
# object.add_file(out_file.read, path: destination_name, mime_type: 'image/jp2')
output_file_service.call(object, out_file.read, destination_name, mime_type: 'image/jp2')

File.unlink(output_file)
end

Expand Down
30 changes: 16 additions & 14 deletions lib/hydra/derivatives/processor.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
module Hydra
module Derivatives
class Processor
attr_accessor :object, :source_name, :directives
attr_accessor :object, :source_name, :directives, :source_file_service, :output_file_service

def initialize(obj, source_name, directives)
def initialize(obj, source_name, directives, opts={})
self.object = obj
self.source_name = source_name
self.directives = directives
self.source_file_service = opts.fetch(:source_file_service, Hydra::Derivatives.source_file_service)
self.output_file_service = opts.fetch(:output_file_service, Hydra::Derivatives.output_file_service)
end

def process
Expand All @@ -17,19 +19,19 @@ def output_file_id(name)
[source_name, name].join('_')
end

def output_file(path)
# first, check for a defined file
output_file = if object.attached_files[path]
object.attached_files[path]
else
ActiveFedora::File.new("#{object.uri}/#{path}").tap do |file|
object.attach_file(file, path)
end
end
end

# def output_file(path)
# # first, check for a defined file
# output_file = if object.attached_files[path]
# object.attached_files[path]
# else
# ActiveFedora::File.new("#{object.uri}/#{path}").tap do |file|
# object.attach_file(file, path)
# end
# end
# end
#
def source_file
object.attached_files[source_name.to_s]
@source_file ||= source_file_service.call(object, source_name)
end

end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
module Hydra::Derivatives
# This Service is an implementation of the Hydra::Derivatives::PeristOutputFileService
# It supports indirectly contained files, which is the behavior associated with Fedora 3 file datastreams that were migrated to Fedora 4
# and, at the time that this class was authored, corresponds to the behavior of ActiveFedora::Base.attach_file and ActiveFedora::Base.attached_files
class PersistIndirectlyContainedOutputFile < PersistOutputFileService

# This method conforms to the signature of the .call method on Hydra::Derivatives::PeristOutputFileService
# * Persists the file within the object at destination_name
#
# NOTE: Uses indirect containment. If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
#

def self.call(object, file, destination_name, opts={})
# first, check for a defined file
# if object.attached_files[destination_name]
# output_file = object.attached_files[destination_name]
# output_file.content = file
# else
# output_file = ActiveFedora::File.new("#{object.uri}/#{destination_name}").tap do |file|
# object.attach_file(file, destination_name)
# end
# end
# output_file.mime_type = opts[:mime_type] if opts[:mime_type]

object.add_file(file, path: destination_name, mime_type: opts[:mime_type])
object.save
end
end
end
10 changes: 10 additions & 0 deletions lib/hydra/derivatives/services/persist_output_file_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module Hydra::Derivatives
class PersistOutputFileService

# Persists the file within the object at destination_name. Uses indirect containment.
# If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
def self.call(object, file, destination_name, opts={})
raise NotImplementedError, "PersistOutputFileService is an abstract class. Implement `call' on #{self.class.name}"
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module Hydra::Derivatives
class RetrieveSourceFileService

def self.call(object, source_name)
object.send(source_name)
end
end
end
File renamed without changes.
5 changes: 3 additions & 2 deletions lib/hydra/derivatives/shell_based_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ def options_for(format)
{}
end

def encode_file(dest_path, file_suffix, mime_type, options)
def encode_file(destination_name, file_suffix, mime_type, options)
out_file = nil
output_file = Dir::Tmpname.create(['sufia', ".#{file_suffix}"], Hydra::Derivatives.temp_file_base){}
Hydra::Derivatives::TempfileService.create(source_file) do |f|
self.class.encode(f.path, options, output_file)
end
out_file = File.open(output_file, "rb")
object.add_file(out_file.read, path: dest_path, mime_type: mime_type)
# object.add_file(out_file.read, path: dest_path, mime_type: mime_type)
output_file_service.call(object, out_file.read, destination_name, mime_type: mime_type)
File.unlink(output_file)
end

Expand Down
44 changes: 44 additions & 0 deletions spec/services/persist_indirectly_contained_output_file_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
require 'spec_helper'
require 'hydra/works'

describe Hydra::Derivatives::PersistIndirectlyContainedOutputFile do

before(:all) do
class IndirectContainerObject < ActiveFedora::Base
contains "the_derivative_name"
end

# This uses directly_contains (inherited from Hydra::PCDM::ObjectBehavior)
class DirectContainerObject < Hydra::Works::GenericFile::Base
end
# If you manually built DirectContainerObject, it would look like this:
# class DirectContainerObject < ActiveFedora::Base
#
# directly_contains :files, has_member_relation: RDFVocabularies::PCDMTerms.hasFile,
# class_name: "Hydra::PCDM::File"
#
# def original_file
# file_of_type(::RDF::URI("http://pcdm.org/OriginalFile"))
# end
#
# def thumbnail
# file_of_type(::RDF::URI("http://pcdm.org/ThumbnailImage"))
# end
# end
end

let(:object) { IndirectContainerObject.new }
let(:file_path) { File.join(fixture_path, 'test.tif') }
let(:file) { File.new(file_path)}
let(:destination_name) { 'the_derivative_name' }

context "when file is indirectly contained (default assumption)" do # alas, we have to support this as the default because all legacy code (and fedora 3 systems) created indirectly contained files
let(:object) { IndirectContainerObject.new }
it "persists the file to the specified destination on the given object" do
described_class.call(object, "fake file content", destination_name)
expect(object.send(destination_name.to_sym).content).to eq("fake file content")
expect(object.send(destination_name.to_sym).content_changed?).to eq false
end
end

end
61 changes: 61 additions & 0 deletions spec/services/retrieve_source_file_service_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
require 'spec_helper'
require 'hydra/works'

describe Hydra::Derivatives::RetrieveSourceFileService do

before(:all) do
class IndirectContainerObject < ActiveFedora::Base
contains "the_source_name"
end

# This uses directly_contains (inherited from Hydra::PCDM::ObjectBehavior)
class DirectContainerObject < Hydra::Works::GenericFile::Base
end
# If you manually built DirectContainerObject, it would look like this:
# class DirectContainerObject < ActiveFedora::Base
#
# directly_contains :files, has_member_relation: RDFVocabularies::PCDMTerms.hasFile,
# class_name: "Hydra::PCDM::File"
#
# def original_file
# file_of_type(::RDF::URI("http://pcdm.org/OriginalFile"))
# end
#
# def thumbnail
# file_of_type(::RDF::URI("http://pcdm.org/ThumbnailImage"))
# end
# end
end

let(:object) { IndirectContainerObject.new }
let(:file_path) { File.join(fixture_path, 'test.pdf') }
let(:file) { File.new(file_path)}
let(:type_uri) { ::RDF::URI("http://sample.org/SourceFile") }

let(:source_name) { 'the_source_name' }

context "when file is indirectly contained (default assumption)" do # alas, we have to support this as the default because all legacy code (and fedora 3 systems) created indirectly contained files
let(:object) { IndirectContainerObject.new }
before do
# attaches the file as an indirectly contained object
object.the_source_name.content = "fake file content"
end
it "persists the file to the specified destination on the given object" do
described_class.call(object, source_name)
expect(object.send(source_name).content).to eq("fake file content")
end
end

context "when file is directly contained" do # direct containers are more efficient, but most legacy code will have indirect containers
let(:object) { DirectContainerObject.create }
before do
Hydra::Works::AddFileToGenericFile.call(object, file_path, type_uri) # attaches the file as a directly contained object
end
it "retrieves the file from the specified location on the given object" do
expect(object.file_of_type(type_uri).content).to start_with("%PDF-1.4")
end
end



end
4 changes: 4 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@
end

$in_travis = !ENV['TRAVIS'].nil? && ENV['TRAVIS'] == 'true'

def fixture_path
File.expand_path("../fixtures", __FILE__)
end
Loading

0 comments on commit 946f581

Please sign in to comment.