From 33c309543113b28dc76966b83939d402129863f5 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Thu, 22 Jan 2015 11:30:15 -0600 Subject: [PATCH] Add caching for lookups from LDP. Also added Instrumentation and a log subscriber If you run ActiveFedora in Rails, it injects a middleware that enables the cache at the beginning of the request. Then if you ever ask for an object from LDP, it caches the response and returns the cached result. At the end of the request the cache is cleared. If you have a list of N objects and you iterate over each to get an associated object (e.g. collection), you'd have N loads. This prevents extra loads if the associated object has already been loaded. --- active-fedora.gemspec | 2 +- lib/active_fedora.rb | 2 + lib/active_fedora/base.rb | 2 + lib/active_fedora/caching_connection.rb | 80 ++++++++++++++++++++ lib/active_fedora/core.rb | 2 +- lib/active_fedora/fedora.rb | 2 +- lib/active_fedora/ldp_cache.rb | 46 +++++++++++ lib/active_fedora/ldp_resource_service.rb | 9 ++- lib/active_fedora/log_subscriber.rb | 38 ++++++++++ lib/active_fedora/persistence.rb | 4 +- lib/active_fedora/railtie.rb | 3 + lib/active_fedora/relation/finder_methods.rb | 5 +- spec/integration/caching_spec.rb | 59 +++++++++++++++ spec/spec_helper.rb | 1 + spec/unit/ldp_resource_spec.rb | 17 +++++ spec/unit/logger_spec.rb | 2 +- 16 files changed, 263 insertions(+), 11 deletions(-) create mode 100644 lib/active_fedora/caching_connection.rb create mode 100644 lib/active_fedora/ldp_cache.rb create mode 100644 lib/active_fedora/log_subscriber.rb create mode 100644 spec/integration/caching_spec.rb create mode 100644 spec/unit/ldp_resource_spec.rb diff --git a/active-fedora.gemspec b/active-fedora.gemspec index a3fa9cf12..d057b47e3 100644 --- a/active-fedora.gemspec +++ b/active-fedora.gemspec @@ -22,7 +22,7 @@ Gem::Specification.new do |s| s.add_dependency "rdf-rdfxml", '~> 1.1.0' s.add_dependency "linkeddata" s.add_dependency "deprecation" - s.add_dependency "ldp", '~> 0.2.0' + s.add_dependency "ldp", '~> 0.2.1' s.add_dependency "rdf-ldp" s.add_development_dependency "rdoc" diff --git a/lib/active_fedora.rb b/lib/active_fedora.rb index ec7d472f7..bef934e49 100644 --- a/lib/active_fedora.rb +++ b/lib/active_fedora.rb @@ -46,6 +46,7 @@ module ActiveFedora #:nodoc: autoload :Attributes autoload :AutosaveAssociation autoload :Base + autoload :CachingConnection autoload :Callbacks autoload :ChangeSet autoload :Config @@ -62,6 +63,7 @@ module ActiveFedora #:nodoc: autoload :FixityService autoload :Indexing autoload :IndexingService + autoload :LdpCache autoload :LdpResource autoload :LdpResourceService autoload :LoadableFromJson diff --git a/lib/active_fedora/base.rb b/lib/active_fedora/base.rb index 27681e1ae..d312497c4 100644 --- a/lib/active_fedora/base.rb +++ b/lib/active_fedora/base.rb @@ -2,6 +2,7 @@ ENABLE_SOLR_UPDATES = true unless defined?(ENABLE_SOLR_UPDATES) require 'active_support/descendants_tracker' require 'active_fedora/errors' +require 'active_fedora/log_subscriber' module ActiveFedora @@ -26,6 +27,7 @@ module ActiveFedora class Base extend ActiveModel::Naming extend ActiveSupport::DescendantsTracker + extend LdpCache::ClassMethods include Core include Persistence diff --git a/lib/active_fedora/caching_connection.rb b/lib/active_fedora/caching_connection.rb new file mode 100644 index 000000000..508834dac --- /dev/null +++ b/lib/active_fedora/caching_connection.rb @@ -0,0 +1,80 @@ +module ActiveFedora + class CachingConnection < Ldp::Client + def initialize(host) + super + @cache = {} + @cache_enabled = false + end + + def get(url, options = {}) + if @cache_enabled + cache_resource(url) { super } + else + super + end + end + + def post(*) + clear_cache if @cache_enabled + super + end + + def put(*) + clear_cache if @cache_enabled + super + end + + def patch(*) + clear_cache if @cache_enabled + super + end + + # Enable the cache within the block. + def cache + old, @cache_enabled = @cache_enabled, true + yield + ensure + @cache_enabled = old + clear_cache unless @cache_enabled + end + + def enable_cache! + @cache_enabled = true + end + + def disable_cache! + @cache_enabled = false + end + + # Disable the query cache within the block. + def uncached + old, @cache_enabled = @cache_enabled, false + yield + ensure + @cache_enabled = old + end + + def clear_cache + @cache.clear + end + + private + + def log(url) + ActiveSupport::Notifications.instrument("ldp.active_fedora", + id: url, name: "Load LDP", ldp_service: object_id) { yield } + end + + def cache_resource(url, &block) + result = + if @cache.key?(url) + ActiveSupport::Notifications.instrument("ldp.active_fedora", + id: url, name: "CACHE", ldp_service: object_id) + @cache[url] + else + @cache[url] = log(url) { yield } + end + result.dup + end + end +end diff --git a/lib/active_fedora/core.rb b/lib/active_fedora/core.rb index 6f09b76aa..5abff1ee9 100644 --- a/lib/active_fedora/core.rb +++ b/lib/active_fedora/core.rb @@ -177,7 +177,7 @@ def init_internals end def build_ldp_resource(id=nil) - ActiveFedora.fedora.ldp_resource_service.get(self.class, id) + ActiveFedora.fedora.ldp_resource_service.build(self.class, id) end def check_persistence diff --git a/lib/active_fedora/fedora.rb b/lib/active_fedora/fedora.rb index 6f7b6531e..02487c98c 100644 --- a/lib/active_fedora/fedora.rb +++ b/lib/active_fedora/fedora.rb @@ -14,7 +14,7 @@ def base_path end def connection - @connection ||= Ldp::Client.new(host) + @connection ||= CachingConnection.new(host) end def ldp_resource_service diff --git a/lib/active_fedora/ldp_cache.rb b/lib/active_fedora/ldp_cache.rb new file mode 100644 index 000000000..8b420b058 --- /dev/null +++ b/lib/active_fedora/ldp_cache.rb @@ -0,0 +1,46 @@ +module ActiveFedora + # = Active Fedora Ldp Cache + class LdpCache + module ClassMethods + # Enable the query cache within the block if Active Fedora is configured. + # If it's not, it will execute the given block. + def cache(&block) + connection = ActiveFedora.fedora.connection + connection.cache(&block) + end + + # Disable the query cache within the block if Active Fedora is configured. + # If it's not, it will execute the given block. + def uncached(&block) + ActiveFedora.fedora.connection.uncached(&block) + end + end + + def initialize(app) + @app = app + end + + def call(env) + ActiveFedora.fedora.connection.enable_cache! + + response = @app.call(env) + response[2] = Rack::BodyProxy.new(response[2]) do + reset_cache_settings + end + + response + rescue Exception => e + reset_cache_settings + raise e + end + + private + + def reset_cache_settings + ActiveFedora.fedora.connection.clear_cache + ActiveFedora.fedora.connection.disable_cache! + end + + end +end + diff --git a/lib/active_fedora/ldp_resource_service.rb b/lib/active_fedora/ldp_resource_service.rb index feff15b08..89b1219ba 100644 --- a/lib/active_fedora/ldp_resource_service.rb +++ b/lib/active_fedora/ldp_resource_service.rb @@ -6,7 +6,7 @@ def initialize(conn) @connection = conn end - def get(klass, id) + def build(klass, id) if id LdpResource.new(connection, to_uri(klass, id)) else @@ -14,9 +14,16 @@ def get(klass, id) end end + # TODO break the cache. + def update(change_set, klass, id) + SparqlInsert.new(change_set.changes).execute(to_uri(klass, id)) + end + + private def to_uri(klass, id) klass.id_to_uri(id) end + end end diff --git a/lib/active_fedora/log_subscriber.rb b/lib/active_fedora/log_subscriber.rb new file mode 100644 index 000000000..7e1b85ea6 --- /dev/null +++ b/lib/active_fedora/log_subscriber.rb @@ -0,0 +1,38 @@ +module ActiveFedora + class LogSubscriber < ActiveSupport::LogSubscriber + + def initialize + super + @odd = false + end + + def ldp(event) + return unless logger.debug? + + payload = event.payload + + name = "#{payload[:name]} (#{event.duration.round(1)}ms)" + id = payload[:id] || "[no id]" + + if odd? + name = color(name, CYAN, true) + id = color(id, nil, true) + else + name = color(name, MAGENTA, true) + end + + debug " #{name} #{id} Service: #{payload[:ldp_service]}" + end + + def odd? + @odd = !@odd + end + + def logger + ActiveFedora::Base.logger + end + end +end + +ActiveFedora::LogSubscriber.attach_to :active_fedora + diff --git a/lib/active_fedora/persistence.rb b/lib/active_fedora/persistence.rb index c2c3aa79e..f9c4333ef 100644 --- a/lib/active_fedora/persistence.rb +++ b/lib/active_fedora/persistence.rb @@ -152,16 +152,14 @@ def update_record(options = {}) end def refresh - # TODO break the cache @ldp_source = build_ldp_resource(id) @resource = nil end - # TODO break the cache. def execute_sparql_update change_set = ChangeSet.new(self, self.resource, self.changed_attributes.keys) return true if change_set.empty? - SparqlInsert.new(change_set.changes).execute(uri) + ActiveFedora.fedora.ldp_resource_service.update(change_set, self.class, id) end # Override to tie in an ID minting service diff --git a/lib/active_fedora/railtie.rb b/lib/active_fedora/railtie.rb index 1f09d8cfd..463b47dad 100644 --- a/lib/active_fedora/railtie.rb +++ b/lib/active_fedora/railtie.rb @@ -1,6 +1,9 @@ module ActiveFedora class Railtie < Rails::Railtie + config.app_middleware.insert_after "::ActionDispatch::Callbacks", + "ActiveFedora::LdpCache" + initializer 'active_fedora.autoload', before: :set_autoload_paths do |app| app.config.autoload_paths << 'app/models/datastreams' end diff --git a/lib/active_fedora/relation/finder_methods.rb b/lib/active_fedora/relation/finder_methods.rb index 10c059675..854897c5a 100644 --- a/lib/active_fedora/relation/finder_methods.rb +++ b/lib/active_fedora/relation/finder_methods.rb @@ -188,7 +188,7 @@ def find_one(id, cast=nil) def load_from_fedora(id, cast) raise ActiveFedora::ObjectNotFoundError if id.empty? - resource = ActiveFedora.fedora.ldp_resource_service.get(klass, id) + resource = ActiveFedora.fedora.ldp_resource_service.build(klass, id) raise ActiveFedora::ObjectNotFoundError if resource.new? class_to_load(resource, cast).allocate.init_with_resource(resource) # Triggers the find callback end @@ -202,9 +202,8 @@ def class_to_load(resource, cast) end end - # TODO just use has_model def has_model_value(resource) - Ldp::Orm.new(resource).value(ActiveFedora::RDF::Fcrepo::Model.hasModel).first.to_s + resource.graph.query([nil, ActiveFedora::RDF::Fcrepo::Model.hasModel, nil]).first.object.to_s end def find_with_ids(ids, cast) diff --git a/spec/integration/caching_spec.rb b/spec/integration/caching_spec.rb new file mode 100644 index 000000000..c0f5862d6 --- /dev/null +++ b/spec/integration/caching_spec.rb @@ -0,0 +1,59 @@ +require 'spec_helper' + +describe "Caching" do + before do + class TestClass < ActiveFedora::Base + property :title, predicate: ::RDF::DC.title + end + end + + after { Object.send(:remove_const, :TestClass) } + + let!(:object) { TestClass.create(id: '123') } + + describe "#cache" do + it "should find records in the cache" do + expect_any_instance_of(Faraday::Connection).to receive(:get).once.and_call_original + ActiveFedora::Base.cache do + o1 = TestClass.find(object.id) + o2 = TestClass.find(object.id) + expect(o1.ldp_source.get.body.object_id).to eq o2.ldp_source.get.body.object_id + end + end + + it "should clear the cache at the end of the block" do + expect_any_instance_of(Faraday::Connection).to receive(:get).twice.and_call_original + ActiveFedora::Base.cache do + TestClass.find(object.id) + end + ActiveFedora::Base.cache do + TestClass.find(object.id) + end + end + + context "an update" do + it "should flush the cache" do + expect_any_instance_of(Faraday::Connection).to receive(:get).twice.and_call_original + ActiveFedora::Base.cache do + TestClass.find(object.id) + object.title= ['foo'] + object.save! + TestClass.find(object.id) + end + end + end + end + + describe "#uncached" do + it "should not use the cache" do + expect_any_instance_of(Faraday::Connection).to receive(:get).twice.and_call_original + ActiveFedora::Base.cache do + TestClass.find(object.id) + ActiveFedora::Base.uncached do + TestClass.find(object.id) + end + TestClass.find(object.id) + end + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 33f9b95d2..6aaa09e02 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -20,6 +20,7 @@ ActiveFedora::Base.logger = Logger.new(STDERR) ActiveFedora::Base.logger.level = Logger::WARN +# require 'http_logger' # HttpLogger.logger = Logger.new(STDOUT) # HttpLogger.ignore = [/localhost:8983\/solr/] # HttpLogger.colorize = false diff --git a/spec/unit/ldp_resource_spec.rb b/spec/unit/ldp_resource_spec.rb new file mode 100644 index 000000000..fd48981da --- /dev/null +++ b/spec/unit/ldp_resource_spec.rb @@ -0,0 +1,17 @@ +require 'spec_helper' + +describe ActiveFedora::LdpResource do + let(:obj) { ActiveFedora::Base.create! } + let!(:r1) { ActiveFedora::LdpResource.new(ActiveFedora.fedora.connection, obj.uri) } + let!(:r2) { ActiveFedora::LdpResource.new(ActiveFedora.fedora.connection, obj.uri) } + + it "should cache requests" do + expect_any_instance_of(Faraday::Connection).to receive(:get).once.and_call_original + ActiveFedora::Base.cache do + r1.get + r2.get + end + end + + +end diff --git a/spec/unit/logger_spec.rb b/spec/unit/logger_spec.rb index 885fe5a3e..463799da3 100644 --- a/spec/unit/logger_spec.rb +++ b/spec/unit/logger_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' describe ActiveFedora::Base do - let(:logger1) { double } + let(:logger1) { double(debug?: false) } before do @initial_logger = ActiveFedora::Base.logger