Skip to content

Commit

Permalink
Add node level chaos scenarios for bastion node
Browse files Browse the repository at this point in the history
Signed-off-by: Pravin Dsilva <pravin.d-silva@ibm.com>
  • Loading branch information
Pravin Dsilva committed Feb 15, 2021
1 parent bb6429a commit 0149dd1
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 14 deletions.
11 changes: 11 additions & 0 deletions docs/node_scenarios.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,15 @@ node_scenarios:
label_selector: node-role.kubernetes.io/infra
instance_kill_count: 1
timeout: 120
- actions:
- stop_start_helper_node_scenario # node chaos scenario for helper node
instance_kill_count: 1
timeout: 120
helper_node_ip: # ip address of the helper node
service: # check status of the services on the helper node
- haproxy
- dhcpd
- named
ssh_private_key: /root/.ssh/id_rsa # ssh key to access the helper node
cloud_type: openstack
```
18 changes: 11 additions & 7 deletions run_kraken.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,17 @@ def inject_node_scenario(action, node_scenario, node_scenario_object):
elif action == "node_crash_scenario":
node_scenario_object.node_crash_scenario(instance_kill_count, node, timeout)
elif action == "stop_start_helper_node_scenario":
if not node_scenario['helper_node_ip']:
logging.error("Helper node IP address is not provided")
sys.exit(1)
node_scenario_object.helper_node_stop_start_scenario(
instance_kill_count, node_scenario['helper_node_ip'], timeout)
node_scenario_object.helper_node_service_status(
node_scenario['helper_node_ip'], service, ssh_private_key, timeout)
if node_scenario['cloud_type'] != "openstack":
logging.error("Scenario: " + action + " is not supported for "
"cloud type " + node_scenario['cloud_type'] + ", skipping action")
else:
if not node_scenario['helper_node_ip']:
logging.error("Helper node IP address is not provided")
sys.exit(1)
node_scenario_object.helper_node_stop_start_scenario(
instance_kill_count, node_scenario['helper_node_ip'], timeout)
node_scenario_object.helper_node_service_status(
node_scenario['helper_node_ip'], service, ssh_private_key, timeout)


# Get cerberus status
Expand Down
7 changes: 0 additions & 7 deletions scenarios/node_scenarios_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,11 @@ node_scenarios:
- node_stop_start_scenario
- stop_start_kubelet_scenario
- node_crash_scenario
#- stop_start_helper_node_scenario
node_name: # node on which scenario has to be injected
label_selector: node-role.kubernetes.io/worker # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
instance_kill_count: 1 # number of times to inject each scenario under actions
timeout: 120 # duration to wait for completion of node scenario injection
cloud_type: aws # cloud type on which Kubernetes/OpenShift runs
helper_node_ip: # ip address of the helper node
service: # check status of the services on the helper node
- haproxy
- dhcpd
- named
ssh_private_key: ~/.ssh/id_rsa # path to the private key file to access the helper node
- actions:
- node_reboot_scenario
node_name:
Expand Down

0 comments on commit 0149dd1

Please sign in to comment.