From c66ac507f3ebe29c65db332bf735209fbaca13bf Mon Sep 17 00:00:00 2001 From: Reid Morrison Date: Mon, 3 Mar 2014 13:42:56 -0500 Subject: [PATCH 1/5] Retry on SocketError when calling resolv() --- lib/moped/address.rb | 11 ++++++++++- lib/moped/node.rb | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/moped/address.rb b/lib/moped/address.rb index da4598a..5e239f8 100644 --- a/lib/moped/address.rb +++ b/lib/moped/address.rb @@ -45,6 +45,7 @@ def initialize(address, timeout) # # @since 2.0.0 def resolve(node) + attempt = 1 begin Timeout::timeout(@timeout) do Resolv.each_address(host) do |ip| @@ -56,7 +57,15 @@ def resolve(node) raise Resolv::ResolvError unless @ip end @resolved ||= "#{ip}:#{port}" - rescue Timeout::Error, Resolv::ResolvError, SocketError + rescue Timeout::Error, Resolv::ResolvError + Loggable.warn(" MOPED:", "Could not resolve IP for: #{original}", "n/a") + node.down! and false + rescue SocketError + if attempt <= 3 + Loggable.warn(" MOPED:", "Retrying DNS Resolv for: #{original}, Retry: #{attempt}", "n/a") + attempt += 1 + retry + end Loggable.warn(" MOPED:", "Could not resolve IP for: #{original}", "n/a") node.down! and false end diff --git a/lib/moped/node.rb b/lib/moped/node.rb index 0ede953..f8268ea 100644 --- a/lib/moped/node.rb +++ b/lib/moped/node.rb @@ -287,7 +287,7 @@ def kill_cursors(cursor_ids) process(Protocol::KillCursors.new(cursor_ids)) end - # Can we send messages to this node in normal cirucmstances? This is true + # Can we send messages to this node in normal circumstances? This is true # only if the node is a primary or secondary node - arbiters or passives # cannot be sent anything. # @@ -434,7 +434,7 @@ def refresh raise Errors::ReplicaSetReconfigured.new("#{inspect} is no longer the primary node.", {}) elsif !messagable? # not primary or secondary so mark it as down, since it's probably - # a recovering node withing the replica set + # a recovering node within the replica set down! end rescue Timeout::Error @@ -511,7 +511,7 @@ def update(database, collection, selector, change, concern, options = {}) # # @since 1.0.0 def inspect - "<#{self.class.name} resolved_address=#{address.resolved.inspect}>" + "<#{self.class.name} resolved_address=#{address.resolved.inspect}, address=#{address.original.inspect}, arbiter=#{arbiter?}, passive=#{passive?}, primary=#{primary?}, secondary=#{secondary?}, down_at=#{down_at.inspect}, latency=#{latency.inspect}>" end private From 69d133e0bb4409802db71acc8d3163fa136b2a7a Mon Sep 17 00:00:00 2001 From: Reid Morrison Date: Tue, 8 Apr 2014 14:01:36 -0400 Subject: [PATCH 2/5] Fixes 265, allow nearest to work in a multi-threaded environment --- lib/moped/read_preference/nearest.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/moped/read_preference/nearest.rb b/lib/moped/read_preference/nearest.rb index 663c550..4b30ff2 100644 --- a/lib/moped/read_preference/nearest.rb +++ b/lib/moped/read_preference/nearest.rb @@ -42,7 +42,8 @@ def name # @since 2.0.0 def with_node(cluster, &block) with_retry(cluster) do - nearest = cluster.nodes.sort_by(&:latency).first + # Exclude nodes without a latency. If none have a latency yet use the first node in the list + nearest = cluster.nodes.select(&:latency).sort_by(&:latency).first || cluster.nodes.first if nearest block.call(nearest) else From 1641876b6c8b8255da34809fa301f4eff4976178 Mon Sep 17 00:00:00 2001 From: Reid Morrison Date: Wed, 9 Apr 2014 17:13:11 -0400 Subject: [PATCH 3/5] Fixes #258 Latest needs to handle condition where nodes may not yet have a latency. For example when a node is removed and then added back later. --- lib/moped/errors.rb | 2 +- lib/moped/read_preference/nearest.rb | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/moped/errors.rb b/lib/moped/errors.rb index 702892d..97e8a84 100644 --- a/lib/moped/errors.rb +++ b/lib/moped/errors.rb @@ -112,7 +112,7 @@ class DoNotDisconnect < MongoError; end class PotentialReconfiguration < MongoError # Not master error codes. - NOT_MASTER = [ 13435, 13436, 10009 ] + NOT_MASTER = [ 13435, 13436, 10009, 10054, 10058, 10056 ] # Error codes received around reconfiguration CONNECTION_ERRORS_RECONFIGURATION = [ 15988, 10276, 11600, 9001 ] diff --git a/lib/moped/read_preference/nearest.rb b/lib/moped/read_preference/nearest.rb index 4b30ff2..11c213b 100644 --- a/lib/moped/read_preference/nearest.rb +++ b/lib/moped/read_preference/nearest.rb @@ -42,12 +42,11 @@ def name # @since 2.0.0 def with_node(cluster, &block) with_retry(cluster) do - # Exclude nodes without a latency. If none have a latency yet use the first node in the list - nearest = cluster.nodes.select(&:latency).sort_by(&:latency).first || cluster.nodes.first - if nearest + # Find node with lowest latency, if not calculated yet use :primary + if nearest = cluster.nodes.select(&:latency).sort_by(&:latency).first block.call(nearest) else - raise Errors::ConnectionFailure, "No nodes available to select in the cluster" + cluster.with_primary(&block) end end end From 432d618d4c9d714746af907743cc0f09b48fbb48 Mon Sep 17 00:00:00 2001 From: Reid Morrison Date: Thu, 10 Apr 2014 14:14:58 -0400 Subject: [PATCH 4/5] Fixes #267 Retry write operations on connection failure and master change --- lib/moped/cluster.rb | 61 ++++++++++++++----- lib/moped/collection.rb | 6 +- lib/moped/query.rb | 54 ++++++++-------- lib/moped/read_preference/nearest.rb | 2 +- lib/moped/read_preference/primary.rb | 2 +- .../read_preference/primary_preferred.rb | 2 +- lib/moped/read_preference/secondary.rb | 2 +- .../read_preference/secondary_preferred.rb | 2 +- lib/moped/read_preference/selectable.rb | 36 +---------- 9 files changed, 86 insertions(+), 81 deletions(-) diff --git a/lib/moped/cluster.rb b/lib/moped/cluster.rb index 2bb17cb..057418f 100644 --- a/lib/moped/cluster.rb +++ b/lib/moped/cluster.rb @@ -220,17 +220,13 @@ def retry_interval @retry_interval ||= options[:retry_interval] || RETRY_INTERVAL end - # Yields the replica set's primary node to the provided block. This method - # will retry the block in case of connection errors or replica set - # reconfiguration. + # Yields the replica set's primary node to the provided block. # # @example Yield the primary to the block. # cluster.with_primary do |node| # # ... # end # - # @param [ Integer ] retries The number of times to retry. - # # @raises [ ConnectionFailure ] When no primary node can be found # # @return [ Object ] The result of the yield. @@ -238,27 +234,22 @@ def retry_interval # @since 1.0.0 def with_primary(&block) if node = nodes.find(&:primary?) - begin - node.ensure_primary do - return yield(node) - end - rescue Errors::ConnectionFailure, Errors::ReplicaSetReconfigured + node.ensure_primary do + return yield(node) end end raise Errors::ConnectionFailure, "Could not connect to a primary node for replica set #{inspect}" end - # Yields a secondary node if available, otherwise the primary node. This - # method will retry the block in case of connection errors. + # Yields a secondary node + # When multiple secondary nodes are present, one is yielded at random # # @example Yield the secondary to the block. # cluster.with_secondary do |node| # # ... # end # - # @param [ Integer ] retries The number of times to retry. - # - # @raises [ ConnectionFailure ] When no primary node can be found + # @raises [ ConnectionFailure ] When no secondary node can be found # # @return [ Object ] The result of the yield. # @@ -275,6 +266,46 @@ def with_secondary(&block) raise Errors::ConnectionFailure, "Could not connect to a secondary node for replica set #{inspect}" end + # Execute the provided block on the cluster and retry if the execution + # fails. + # + # @example Execute with retry. + # cluster.with_retry(retries) do + # cluster.with_primary do |node| + # node.refresh + # end + # end + # + # @param [ Cluster ] cluster The cluster. + # @param [ Integer ] retries The number of times to retry. + # + # @return [ Object ] The result of the block. + # + # @since 2.0.0 + def with_retry(retries = max_retries, &block) + begin + block.call + rescue Errors::OperationFailure => e + if retries > 0 && e.reconfiguring_replica_set? + Loggable.warn(" MOPED:", "Operation Failure, Retrying connection attempt #{retries} more time(s).", "n/a") + sleep(retry_interval) + refresh + retries -= 1 + retry + end + raise e + rescue Errors::ConnectionFailure => e + if retries > 0 + Loggable.warn(" MOPED:", "ConnectionFailure, Retrying connection attempt #{retries} more time(s).", "n/a") + sleep(retry_interval) + refresh + retries -= 1 + retry + end + raise e + end + end + private # Apply the credentials on all nodes diff --git a/lib/moped/collection.rb b/lib/moped/collection.rb index d68f8da..9776df0 100644 --- a/lib/moped/collection.rb +++ b/lib/moped/collection.rb @@ -121,8 +121,10 @@ def initialize(database, name) # @since 1.0.0 def insert(documents, flags = nil) docs = documents.is_a?(Array) ? documents : [ documents ] - cluster.with_primary do |node| - node.insert(database.name, name, docs, write_concern, flags: flags || []) + cluster.with_retry do + cluster.with_primary do |node| + node.insert(database.name, name, docs, write_concern, flags: flags || []) + end end end diff --git a/lib/moped/query.rb b/lib/moped/query.rb index f308010..d89c6cf 100644 --- a/lib/moped/query.rb +++ b/lib/moped/query.rb @@ -321,14 +321,16 @@ def modify(change, options = {}) # # @since 1.0.0 def remove - cluster.with_primary do |node| - node.remove( - operation.database, - operation.collection, - operation.basic_selector, - write_concern, - flags: [ :remove_first ] - ) + cluster.with_retry do + cluster.with_primary do |node| + node.remove( + operation.database, + operation.collection, + operation.basic_selector, + write_concern, + flags: [ :remove_first ] + ) + end end end @@ -341,13 +343,15 @@ def remove # # @since 1.0.0 def remove_all - cluster.with_primary do |node| - node.remove( - operation.database, - operation.collection, - operation.basic_selector, - write_concern - ) + cluster.with_retry do + cluster.with_primary do |node| + node.remove( + operation.database, + operation.collection, + operation.basic_selector, + write_concern + ) + end end end @@ -423,15 +427,17 @@ def tailable # # @since 1.0.0 def update(change, flags = nil) - cluster.with_primary do |node| - node.update( - operation.database, - operation.collection, - operation.selector["$query"] || operation.selector, - change, - write_concern, - flags: flags - ) + cluster.with_retry do + cluster.with_primary do |node| + node.update( + operation.database, + operation.collection, + operation.selector["$query"] || operation.selector, + change, + write_concern, + flags: flags + ) + end end end diff --git a/lib/moped/read_preference/nearest.rb b/lib/moped/read_preference/nearest.rb index 11c213b..53203da 100644 --- a/lib/moped/read_preference/nearest.rb +++ b/lib/moped/read_preference/nearest.rb @@ -41,7 +41,7 @@ def name # # @since 2.0.0 def with_node(cluster, &block) - with_retry(cluster) do + cluster.with_retry do # Find node with lowest latency, if not calculated yet use :primary if nearest = cluster.nodes.select(&:latency).sort_by(&:latency).first block.call(nearest) diff --git a/lib/moped/read_preference/primary.rb b/lib/moped/read_preference/primary.rb index 68124aa..2675484 100644 --- a/lib/moped/read_preference/primary.rb +++ b/lib/moped/read_preference/primary.rb @@ -51,7 +51,7 @@ def query_options(options) # # @since 2.0.0 def with_node(cluster, &block) - with_retry(cluster) do + cluster.with_retry do cluster.with_primary(&block) end end diff --git a/lib/moped/read_preference/primary_preferred.rb b/lib/moped/read_preference/primary_preferred.rb index 3c35001..cf780ad 100644 --- a/lib/moped/read_preference/primary_preferred.rb +++ b/lib/moped/read_preference/primary_preferred.rb @@ -42,7 +42,7 @@ def name # # @since 2.0.0 def with_node(cluster, &block) - with_retry(cluster) do + cluster.with_retry do begin cluster.with_primary(&block) rescue Errors::ConnectionFailure diff --git a/lib/moped/read_preference/secondary.rb b/lib/moped/read_preference/secondary.rb index 44dfc1c..8cc338e 100644 --- a/lib/moped/read_preference/secondary.rb +++ b/lib/moped/read_preference/secondary.rb @@ -41,7 +41,7 @@ def name # # @since 2.0.0 def with_node(cluster, &block) - with_retry(cluster) do + cluster.with_retry do cluster.with_secondary(&block) end end diff --git a/lib/moped/read_preference/secondary_preferred.rb b/lib/moped/read_preference/secondary_preferred.rb index fd502a9..25de5b3 100644 --- a/lib/moped/read_preference/secondary_preferred.rb +++ b/lib/moped/read_preference/secondary_preferred.rb @@ -40,7 +40,7 @@ def name # # @since 2.0.0 def with_node(cluster, &block) - with_retry(cluster) do + cluster.with_retry do begin cluster.with_secondary(&block) rescue Errors::ConnectionFailure diff --git a/lib/moped/read_preference/selectable.rb b/lib/moped/read_preference/selectable.rb index 73686de..a56b86e 100644 --- a/lib/moped/read_preference/selectable.rb +++ b/lib/moped/read_preference/selectable.rb @@ -39,41 +39,7 @@ def query_options(options) options[:flags] |= [ :slave_ok ] options end - - private - - # Execute the provided block on the cluster and retry if the execution - # fails. - # - # @api private - # - # @example Execute with retry. - # preference.with_retry(cluster) do - # cluster.with_primary do |node| - # node.refresh - # end - # end - # - # @param [ Cluster ] cluster The cluster. - # @param [ Integer ] retries The number of times to retry. - # - # @return [ Object ] The result of the block. - # - # @since 2.0.0 - def with_retry(cluster, retries = cluster.max_retries, &block) - begin - block.call - rescue Errors::ConnectionFailure => e - if retries > 0 - Loggable.warn(" MOPED:", "Retrying connection attempt #{retries} more time(s).", "n/a") - sleep(cluster.retry_interval) - cluster.refresh - with_retry(cluster, retries - 1, &block) - else - raise e - end - end - end + end end end From 1fddac2ad406075ca5058da1a21f01cca3f7103d Mon Sep 17 00:00:00 2001 From: Reid Morrison Date: Fri, 18 Apr 2014 12:28:51 -0400 Subject: [PATCH 5/5] #267 When read preference is primary it failed to move reads to the new primary Fails immediately with Moped::Errors::QueryFailure: failed with error 13435: "not master and slaveOk=false" --- lib/moped/cluster.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/moped/cluster.rb b/lib/moped/cluster.rb index 057418f..57db398 100644 --- a/lib/moped/cluster.rb +++ b/lib/moped/cluster.rb @@ -285,7 +285,7 @@ def with_secondary(&block) def with_retry(retries = max_retries, &block) begin block.call - rescue Errors::OperationFailure => e + rescue Errors::OperationFailure, Moped::Errors::QueryFailure => e if retries > 0 && e.reconfiguring_replica_set? Loggable.warn(" MOPED:", "Operation Failure, Retrying connection attempt #{retries} more time(s).", "n/a") sleep(retry_interval)