Permalink
Browse files

Merge pull request #66 from fanhattan/connection-handling-improvements

Connection handling improvements.
  • Loading branch information...
2 parents a95bb65 + df4ff01 commit 50496110efbb27225b49e0a584034142df8bd405 @arohter arohter committed Jan 15, 2014
Showing with 125 additions and 39 deletions.
  1. +1 −0 .gitignore
  2. +13 −12 README.md
  3. +6 −0 lib/redis_failover.rb
  4. +47 −23 lib/redis_failover/client.rb
  5. +1 −1 redis_failover.gemspec
  6. +57 −3 spec/client_spec.rb
View
1 .gitignore
@@ -17,3 +17,4 @@ test/version_tmp
tmp
tags
.DS_Store
+vendor/
View
25 README.md
@@ -140,18 +140,19 @@ a drop-in replacement for your existing pure redis client usage.
The full set of options that can be passed to RedisFailover::Client are:
- :zk - an existing ZK client instance
- :zkservers - comma-separated ZooKeeper host:port pairs
- :znode_path - the Znode path override for redis server list (optional)
- :password - password for redis nodes (optional)
- :db - db to use for redis nodes (optional)
- :namespace - namespace for redis nodes (optional)
- :logger - logger override (optional)
- :retry_failure - indicate if failures should be retried (default true)
- :max_retries - max retries for a failure (default 3)
- :safe_mode - indicates if safe mode is used or not (default true)
- :master_only - indicates if only redis master is used (default false)
- :verify_role - verify the actual role of a redis node before every command (default true)
+ :zk - an existing ZK client instance
+ :zkservers - comma-separated ZooKeeper host:port pairs
+ :znode_path - the Znode path override for redis server list (optional)
+ :password - password for redis nodes (optional)
+ :db - db to use for redis nodes (optional)
+ :namespace - namespace for redis nodes (optional)
+ :trace_id - trace string tag logged for client debugging (optional)
+ :logger - logger override (optional)
+ :retry_failure - indicate if failures should be retried (default true)
+ :max_retries - max retries for a failure (default 3)
+ :safe_mode - indicates if safe mode is used or not (default true)
+ :master_only - indicates if only redis master is used (default false)
+ :verify_role - verify the actual role of a redis node before every command (default true)
The RedisFailover::Client also supports a custom callback that will be invoked whenever the list of redis clients changes. Example usage:
View
6 lib/redis_failover.rb
@@ -1,4 +1,10 @@
require 'zk'
+
+#NOTE: We've found that using the 'recommended' zk fork-hook would trigger
+#kernel mutex deadlocks in forking env (unicorn & resque) [ruby 1.9]
+#https://github.com/zk-ruby/zk/wiki/Forking & https://github.com/zk-ruby/zk/blob/master/RELEASES.markdown#v150
+#ZK.install_fork_hook
+
require 'set'
require 'yaml'
require 'redis'
View
70 lib/redis_failover/client.rb
@@ -51,6 +51,7 @@ def call(command, &block)
# @option options [String] :password password for redis nodes
# @option options [String] :db database to use for redis nodes
# @option options [String] :namespace namespace for redis nodes
+ # @option options [String] :trace_id trace string tag logged for client debugging
# @option options [Logger] :logger logger override
# @option options [Boolean] :retry_failure indicates if failures are retried
# @option options [Integer] :max_retries max retries for a failure
@@ -61,6 +62,7 @@ def call(command, &block)
# @return [RedisFailover::Client]
def initialize(options = {})
Util.logger = options[:logger] if options[:logger]
+ @trace_id = options[:trace_id]
@master = nil
@slaves = []
@node_addresses = {}
@@ -130,7 +132,7 @@ def respond_to_missing?(method, include_private)
# @return [String] a string representation of the client
def inspect
- "#<RedisFailover::Client (db: #{@db.to_i}, master: #{master_name}, slaves: #{slave_names})>"
+ "#<RedisFailover::Client [#{@trace_id}] (db: #{@db.to_i}, master: #{master_name}, slaves: #{slave_names})>"
end
alias_method :to_s, :inspect
@@ -157,13 +159,12 @@ def shutdown
purge_clients
end
- # Reconnect will first perform a shutdown of the underlying redis clients.
- # Next, it attempts to reopen the ZooKeeper client and re-create the redis
- # clients after it fetches the most up-to-date list from ZooKeeper.
+ # Reconnect method needed for compatibility with 3rd party libs that expect this for redis client objects.
def reconnect
- purge_clients
- @zk ? @zk.reopen : setup_zk
- build_clients
+ #NOTE: Explicit/manual reconnects are no longer needed or desired, and
+ #triggered kernel mutex deadlocks in forking env (unicorn & resque) [ruby 1.9]
+ #Resque automatically calls this method on job fork.
+ #We now auto-detect underlying zk & redis client InheritedError's and reconnect automatically as needed.
end
# Retrieves the current redis master.
@@ -235,16 +236,21 @@ def dispatch(method, *args, &block)
verify_supported!(method)
tries = 0
begin
- client_for(method).send(method, *args, &block)
+ redis = client_for(method)
+ redis.send(method, *args, &block)
+ rescue ::Redis::InheritedError => ex
+ logger.debug( "Caught #{ex.class} - reconnecting [#{@trace_id}] #{redis.inspect}" )
+ redis.client.reconnect
+ retry
rescue *CONNECTIVITY_ERRORS => ex
logger.error("Error while handling `#{method}` - #{ex.inspect}")
logger.error(ex.backtrace.join("\n"))
if tries < @max_retries
tries += 1
free_client
- build_clients
sleep(RETRY_WAIT_TIME)
+ build_clients
retry
end
raise
@@ -288,7 +294,7 @@ def build_clients
return unless nodes_changed?(nodes)
purge_clients
- logger.info("Building new clients for nodes #{nodes.inspect}")
+ logger.info("Building new clients for nodes [#{@trace_id}] #{nodes.inspect}")
new_master = new_clients_for(nodes[:master]).first if nodes[:master]
new_slaves = new_clients_for(*nodes[:slaves])
@master = new_master
@@ -320,19 +326,37 @@ def should_notify?
#
# @return [Hash] the known master/slave redis servers
def fetch_nodes
- data = @zk.get(redis_nodes_path, :watch => true).first
- nodes = symbolize_keys(decode(data))
- logger.debug("Fetched nodes: #{nodes.inspect}")
+ tries = 0
+ begin
+ data = @zk.get(redis_nodes_path, :watch => true).first
+ nodes = symbolize_keys(decode(data))
+ logger.debug("Fetched nodes: #{nodes.inspect}")
+ nodes
+ rescue Zookeeper::Exceptions::InheritedConnectionError, ZK::Exceptions::InterruptedSession => ex
+ logger.debug { "Caught #{ex.class} '#{ex.message}' - reopening ZK client [#{@trace_id}]" }
+ sleep 1 if ex.kind_of?(ZK::Exceptions::InterruptedSession)
+ @zk.reopen
+ retry
+ rescue *ZK_ERRORS => ex
+ logger.error { "Caught #{ex.class} '#{ex.message}' - retrying ... [#{@trace_id}]" }
+ sleep(RETRY_WAIT_TIME)
- nodes
- rescue Zookeeper::Exceptions::InheritedConnectionError, ZK::Exceptions::InterruptedSession => ex
- logger.debug { "Caught #{ex.class} '#{ex.message}' - reopening ZK client" }
- @zk.reopen
- retry
- rescue *ZK_ERRORS => ex
- logger.warn { "Caught #{ex.class} '#{ex.message}' - retrying" }
- sleep(RETRY_WAIT_TIME)
- retry
+ if tries < @max_retries
+ tries += 1
+ retry
+ elsif tries < (@max_retries * 2)
+ tries += 1
+ logger.error { "Hmmm, more than [#{@max_retries}] retries: reopening ZK client [#{@trace_id}]" }
+ @zk.reopen
+ retry
+ else
+ tries = 0
+ logger.error { "Oops, more than [#{@max_retries * 2}] retries: establishing fresh ZK client [#{@trace_id}]" }
+ @zk.close!
+ setup_zk
+ retry
+ end
+ end
end
# Builds new Redis clients for the specified nodes.
@@ -434,7 +458,7 @@ def disconnect(*redis_clients)
# Disconnects current redis clients.
def purge_clients
@lock.synchronize do
- logger.info("Purging current redis clients")
+ logger.info("Purging current redis clients [#{@trace_id}]")
disconnect(@master, *@slaves)
@master = nil
@slaves = []
View
2 redis_failover.gemspec
@@ -18,7 +18,7 @@ Gem::Specification.new do |gem|
gem.add_dependency('redis', ['>= 2.2', '< 4'])
gem.add_dependency('redis-namespace')
gem.add_dependency('multi_json', '~> 1')
- gem.add_dependency('zk', ['>= 1.7.4', '< 1.8'])
+ gem.add_dependency('zk', ['>= 1.7.4', '< 2.0'])
gem.add_development_dependency('rake')
gem.add_development_dependency('rspec')
View
60 spec/client_spec.rb
@@ -54,16 +54,23 @@ def setup_zk
client.del('foo')
called.should be_true
end
+ end
describe '#inspect' do
it 'should always include db' do
opts = {:zkservers => 'localhost:1234'}
client = ClientStub.new(opts)
- client.inspect.should match('<RedisFailover::Client \(db: 0,')
+ client.inspect.should match('<RedisFailover::Client \\[.*\\] \(db: 0,')
db = '5'
opts.merge!(:db => db)
client = ClientStub.new(opts)
- client.inspect.should match("<RedisFailover::Client \\(db: #{db},")
+ client.inspect.should match("<RedisFailover::Client \\[.*\\] \\(db: #{db},")
+ end
+
+ it 'should include trace id' do
+ tid = 'tracer'
+ client = ClientStub.new(:zkservers => 'localhost:1234', :trace_id => tid)
+ client.inspect.should match("<RedisFailover::Client \\[#{tid}\\] ")
end
end
@@ -121,6 +128,53 @@ def fetch_nodes
client.reconnected.should be_true
end
+
+ describe 'redis connectivity failure handling' do
+ before(:each) do
+ class << client
+ attr_reader :tries
+
+ def client_for(method)
+ @tries ||= 0
+ @tries += 1
+ super
+ end
+ end
+ end
+
+ it 'retries without client rebuild when redis throws inherited error' do
+ loops = 0
+ client.current_master.stub(:send) {
+ loops += 1
+ loops < 2 ? (raise ::Redis::InheritedError) : nil
+ }
+
+ client.should_not_receive(:build_clients)
+ client.persist('foo')
+ client.tries.should be == 2
+ end
+
+ it 'retries with client rebuild when redis throws connectivity error' do
+ loops = 0
+ client.current_master.stub(:send) {
+ loops += 1
+ loops < 2 ? (raise InvalidNodeError) : nil
+ }
+
+ client.should_receive(:build_clients)
+ client.persist('foo')
+ client.tries.should be == 2
+ end
+
+ it 'throws exception when too many redis connectivity errors' do
+ client.current_master.stub(:send) { raise InvalidNodeError }
+ client.instance_variable_set(:@max_retries, 2)
+ expect { client.persist('foo') }.to raise_error(InvalidNodeError)
+ end
+
+ end
+
+
context 'with :verify_role true' do
it 'properly detects when a node has changed roles' do
client.current_master.change_role_to('slave')
@@ -148,6 +202,6 @@ def fetch_nodes
client.del('foo')
end
end
- end
end
end
+

0 comments on commit 5049611

Please sign in to comment.