Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

CBQE-2320 :: Add more tests for graceful failover with dynamic nodes

Change-Id: I518a3e18c92bee48ec29f684cbe449790e96081d
Reviewed-on: http://review.couchbase.org/39290
Tested-by: buildbot <build@couchbase.com>
Reviewed-by: Parag Agarwal <agarwal.parag@gmail.com>
Tested-by: Parag Agarwal <agarwal.parag@gmail.com>
  • Loading branch information...
commit d874faa79568a3d420cf7c81fcc3267c99e5a772 1 parent aa1aa4e
@paragagarwal paragagarwal authored karma2ns committed
View
4 conf/py-newfailover.conf
@@ -41,3 +41,7 @@ failover.failovertests.FailoverTests:
test_failover_then_add_back,replicas=2,num_failed_nodes=1,items=100000,standard_buckets=1,recoveryType=delta,deltaRecoveryBuckets=default:standard_buckets0,graceful=True,GROUP=P1;GRACEFUL
test_failover_then_add_back,replicas=1,num_failed_nodes=1,items=100000,sasl_buckets=1,upr_check=False,recoveryType=full,graceful=True,GROUP=P0;GRACEFUL
test_failover_then_add_back,replicas=2,num_failed_nodes=1,items=100000,recoveryType=delta,standard_buckets=1,upr_check=False,withQueries=True,numViews=5,runViews=True,graceful=True,GROUP=P0;GRACEFUL
+ test_failover_normal,replicas=1,graceful=True,check_verify_failover_type=True,num_failed_nodes=1,items=100,dgm_run=True,,failoverMaster=True,graceful=True,runRebalanceAfterFailover=False,GROUP=P1;GRACEFUL
+ test_failover_normal,replicas=2,graceful=True,check_verify_failover_type=True,num_failed_nodes=3,items=100,dgm_run=True,,failoverMaster=True,graceful=True,runRebalanceAfterFailover=False,GROUP=P1;GRACEFUL
+ test_failover_normal,replicas=3,graceful=True,check_verify_failover_type=True,num_failed_nodes=4,items=100,dgm_run=True,,failoverMaster=True,graceful=True,runRebalanceAfterFailover=False,GROUP=P1;GRACEFUL
+ test_failover_normal,replicas=0,graceful=True,check_verify_failover_type=True,num_failed_nodes=2,items=100,dgm_run=True,,failoverMaster=True,graceful=True,runRebalanceAfterFailover=False,GROUP=P1;GRACEFUL
View
5 lib/membase/api/rest_client.py
@@ -1253,6 +1253,10 @@ def node_statuses(self, timeout=120):
node.ip = self.ip
node.port = int(key[key.rfind(":") + 1:])
node.replication = value['replication']
+ if 'gracefulFailoverPossible' in value.keys():
+ node.gracefulFailoverPossible = value['gracefulFailoverPossible']
+ else:
+ node.gracefulFailoverPossible = False
nodes.append(node)
return nodes
@@ -2306,6 +2310,7 @@ def __init__(self, id='', status=''):
self.ip = ''
self.replication = ''
self.port = 8091
+ self.gracefulFailoverPossible = 'true'
#extract ns ip from the otpNode string
#its normally ns_1@10.20.30.40
if id.find('@') >= 0:
View
2  pytests/failover/failoverbasetests.py
@@ -23,7 +23,9 @@ def setUp(self):
self.withQueries = self.input.param("withQueries", False)
self.numberViews = self.input.param("numberViews", False)
self.gracefulFailoverFail = self.input.param("gracefulFailoverFail", False)
+ self.runRebalanceAfterFailover = self.input.param("runRebalanceAfterFailover", True)
self.failoverMaster = self.input.param("failoverMaster", False)
+ self.check_verify_failover_type = self.input.param("check_verify_failover_type", True)
self.recoveryType = self.input.param("recoveryType", "delta")
self.bidirectional = self.input.param("bidirectional", False)
self._value_size = self.input.param("value_size", 256)
View
57 pytests/failover/failovertests.py
@@ -94,7 +94,7 @@ def common_test_body(self, failover_reason):
self.run_failover_operations(self.chosen, failover_reason)
# Perform Add Back Operation with Rebalance Or only Rebalance with Verificaitons
- if not self.gracefulFailoverFail:
+ if not self.gracefulFailoverFail and self.runRebalanceAfterFailover:
if self.add_back_flag:
self.run_add_back_operation_and_verify(self.chosen, prev_vbucket_stats, record_static_data_set, prev_failover_stats)
else:
@@ -223,15 +223,20 @@ def print_test_params(self, failover_reason):
def run_failover_operations(self, chosen, failover_reason):
""" Method to run fail over operations used in the test scenario based on failover reason """
# Perform Operations relalted to failover
+ graceful_count = 0
+ graceful_failover = True
failed_over = True
for node in chosen:
+ unreachable = False
if failover_reason == 'stop_server':
+ unreachable=True
self.stop_server(node)
self.log.info("10 seconds delay to wait for membase-server to shutdown")
# wait for 5 minutes until node is down
self.assertTrue(RestHelper(self.rest).wait_for_node_status(node, "unhealthy", 300),
msg="node status is not unhealthy even after waiting for 5 minutes")
elif failover_reason == "firewall":
+ unreachable=True
self.filter_list.append (node.ip)
server = [srv for srv in self.servers if node.ip == srv.ip][0]
RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
@@ -257,10 +262,12 @@ def run_failover_operations(self, chosen, failover_reason):
json_parsed = json.loads(content)
self.log.info("nodeStatuses: {0}".format(json_parsed))
self.fail("node status is not unhealthy even after waiting for 5 minutes")
-
+ # verify the failover type
+ if self.check_verify_failover_type:
+ graceful_count, graceful_failover = self.verify_failover_type(node, graceful_count, self.num_replicas, unreachable)
# define precondition check for failover
- success_failed_over = self.rest.fail_over(node.id, graceful=self.graceful)
- if self.graceful and not self.gracefulFailoverFail:
+ success_failed_over = self.rest.fail_over(node.id, graceful=(self.graceful and graceful_failover))
+ if self.graceful and graceful_failover:
msg = "rebalance failed while removing failover nodes {0}".format(node.id)
self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg=msg)
failed_over = failed_over and success_failed_over
@@ -272,9 +279,9 @@ def run_failover_operations(self, chosen, failover_reason):
self.rest.print_UI_logs()
self.assertFalse(failed_over, "Graceful Falover was started for unhealthy node!!! ")
return
- elif self.gracefulFailoverFail and failed_over:
+ elif self.gracefulFailoverFail and not failed_over:
""" Check if the fail_over fails as expected """
- self.assertTrue(not failed_over,""" Graceful failover should fail due to not enough replicas """)
+ self.assertFalse(failed_over,""" Graceful failover should fail due to not enough replicas """)
return
# Check if failover happened as expected or re-try one more time
@@ -438,6 +445,44 @@ def create_file(self,chosen,buckets,serverMap):
shell.disconnect()
return fileMap
+ def verify_failover_type(self, chosen = None, graceful_count = 0, replica_count = 0, unreachable = False):
+ logic = True
+ summary = ""
+ nodes = self.rest.node_statuses()
+ node_count = len(nodes)
+ change_graceful_count = graceful_count
+ graceful_failover = True
+ if unreachable:
+ node_count -= 1
+ else:
+ change_graceful_count += 1
+ if replica_count != 0:
+ for node in nodes:
+ if unreachable and node.ip == chosen.ip:
+ graceful_failover = node.gracefulFailoverPossible
+ if node.gracefulFailoverPossible:
+ logic = False
+ summary += "\n failover type for unreachable node {0} Expected :: Hard, Actual :: Graceful".format(node.ip)
+ elif node.ip == chosen.ip:
+ graceful_failover = node.gracefulFailoverPossible
+ if replica_count > graceful_count and (node_count - 1)+ graceful_count >= replica_count:
+ if not node.gracefulFailoverPossible:
+ logic = False
+ summary += "\n failover type for node {0} Expected :: Graceful, Actual :: Hard".format(node.ip)
+ else:
+ if node.gracefulFailoverPossible:
+ logic = False
+ summary += "\n failover type for {0} Expected :: Hard, Actual :: Graceful".format(node.ip)
+ else:
+ for node in nodes:
+ if node.ip == chosen.ip:
+ graceful_failover = node.gracefulFailoverPossible
+ if node.gracefulFailoverPossible:
+ logic = False
+ summary += "\n failover type for node {0} Expected :: Hard, Actual :: Graceful".format(node.ip)
+ self.assertTrue(logic,summary)
+ return change_graceful_count,graceful_failover
+
def get_server_map(self,node):
""" Map of ips and server information """
map = {}
Please sign in to comment.
Something went wrong with that request. Please try again.