Skip to content

Commit

Permalink
Merge pull request #61 from arcprabh/arc_openstack_cloud
Browse files Browse the repository at this point in the history
Enable support for Openstack cloud.
  • Loading branch information
mffiedler committed Feb 2, 2021
2 parents 1833138 + 8dd18af commit ca44f53
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 2 deletions.
7 changes: 6 additions & 1 deletion docs/node_scenarios.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,16 @@ A google service account is required to give proper authentication to GCP for no

After creating the service account you'll need to enable the account using the following: ```export GOOGLE_APPLICATION_CREDENTIALS="<serviceaccount.json>"```

#### OPENSTACK

**NOTE**: For clusters with OPENSTACK Cloud, ensure to create and source the [OPENSTACK RC file](https://docs.openstack.org/newton/user-guide/common/cli-set-environment-variables-using-openstack-rc.html) to set the OPENSTACK environment variables from the server where Kraken runs.

The supported node level chaos scenarios on an OPENSTACK cloud are `node_stop_start_scenario`, `stop_start_kubelet_scenario` and `node_reboot_scenario`.

**NOTE**: The `node_crash_scenario` and `stop_kubelet_scenario` scenario is supported independent of the cloud platform.

Use 'generic' or do not add the 'cloud_type' key to your scenario if your cluster is not set up using one of the current supported cloud types


Node scenarios can be injected by placing the node scenarios config files under node_scenarios option in the kraken config. Refer to [node_scenarios_example](https://github.com/openshift-scale/kraken/blob/master/scenarios/node_scenarios_example.yml) config file.

```
Expand Down
4 changes: 4 additions & 0 deletions kraken/node_actions/common_node_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ def wait_for_unknown_status(node, timeout):
time.sleep(1)
if kubecli.get_node_status(node) != "Unknown":
raise Exception("Node condition status isn't Unknown")

# Get the ip of the cluster node
def get_node_ip(node):
return runcommand.invoke("kubectl get node %s -o jsonpath='{.status.addresses[?(@.type==\"InternalIP\")].address}'" % (node))
133 changes: 133 additions & 0 deletions kraken/node_actions/openstack_node_scenarios.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import sys
import time
import logging
import subprocess
import requests
import kraken.kubernetes.client as kubecli
import kraken.invoke.command as runcommand
import kraken.node_actions.common_node_functions as nodeaction
from kraken.node_actions.abstract_node_scenarios import abstract_node_scenarios


class OPENSTACKCLOUD:
def __init__(self):
self.Wait = 30

# Start the node instance
def start_instances(self,node):
runcommand.invoke("openstack server start %s" % (node))
logging.info("OPENSTACKCLOUD CLI INFO: Completed instance start action for node %s" % (node))

# Stop the node instance
def stop_instances(self, node):
runcommand.invoke("openstack server stop %s" % (node))
logging.info("OPENSTACKCLOUD CLI INFO: Completed instance stop action for node %s" % (node))
#return action_output


# Reboot the node instance
def reboot_instances(self,node):
runcommand.invoke("openstack server reboot --soft %s" % (node))
logging.info("OPENSTACKCLOUD CLI INFO: Completed instance reboot action for node %s" % (node))

# Wait until the node instance is running
def wait_until_running(self, node):
self.get_instance_status(node, "ACTIVE", self.Wait)

# Wait until the node instance is stopped
def wait_until_stopped(self, node):
self.get_instance_status(node, "SHUTOFF", self.Wait)

# Get instance status
def get_instance_status(self, node, expected_status, timeout):
i = 0
sleeper = 1
while i <= timeout:
instStatus = runcommand.invoke("openstack server show %s | tr -d ' ' | grep '^|status' | cut -d '|' -f3 | tr -d '\n'" % (node))
logging.info("instance status is %s" % (instStatus))
logging.info("expected status is %s" % (expected_status))
if (instStatus.strip() == expected_status):
logging.info("instance status has reached desired status %s" % (instStatus))
return True
time.sleep(sleeper)
i += sleeper

# Get the openstack instance name
def get_openstack_nodename(self,os_node_ip):
server_list = runcommand.invoke("openstack server list | grep %s" % (os_node_ip))
list_of_servers = server_list.split('\n')
for item in list_of_servers:
items = item.split("|")
counter = 0
for i in items:
if i.strip() != "" and counter == 2:
node_name = i.strip()
logging.info("Openstack node name is %s " % (node_name))
counter += 1
continue
item_list = i.split('=')
if len(item_list) == 2 and item_list[-1].strip() == os_node_ip:
return node_name
counter += 1

class openstack_node_scenarios(abstract_node_scenarios):
def __init__(self):
self.openstackcloud = OPENSTACKCLOUD()

# Node scenario to start the node
def node_start_scenario(self, instance_kill_count, node, timeout):
for _ in range(instance_kill_count):
try:
logging.info("Starting node_start_scenario injection")
logging.info("Starting the node %s" % (node))
openstack_node_ip = nodeaction.get_node_ip(node)
openstack_node_name = self.openstackcloud.get_openstack_nodename(openstack_node_ip)
self.openstackcloud.start_instances(openstack_node_name)
self.openstackcloud.wait_until_running(openstack_node_name)
nodeaction.wait_for_ready_status(node, timeout)
logging.info("Node with instance ID: %s is in running state" % (node))
logging.info("node_start_scenario has been successfully injected!")
except Exception as e:
logging.error("Failed to start node instance. Encountered following "
"exception: %s. Test Failed" % (e))
logging.error("node_start_scenario injection failed!")
sys.exit(1)

# Node scenario to stop the node
def node_stop_scenario(self, instance_kill_count, node, timeout):
for _ in range(instance_kill_count):
try:
logging.info("Starting node_stop_scenario injection")
logging.info("Stopping the node %s " % (node))
openstack_node_ip = nodeaction.get_node_ip(node)
openstack_node_name = self.openstackcloud.get_openstack_nodename(openstack_node_ip)
self.openstackcloud.stop_instances(openstack_node_name)
self.openstackcloud.wait_until_stopped(openstack_node_name)
logging.info("Node with instance name: %s is in stopped state" % (node))
nodeaction.wait_for_ready_status(node, timeout)
except Exception as e:
logging.error("Failed to stop node instance. Encountered following exception: %s. "
"Test Failed" % (e))
logging.error("node_stop_scenario injection failed!")
sys.exit(1)


# Node scenario to reboot the node
def node_reboot_scenario(self, instance_kill_count, node, timeout):
for _ in range(instance_kill_count):
try:
logging.info("Starting node_reboot_scenario injection")
logging.info("Rebooting the node %s" % (node))
openstack_node_ip = nodeaction.get_node_ip(node)
openstack_node_name = self.openstackcloud.get_openstack_nodename(openstack_node_ip)
self.openstackcloud.reboot_instances(openstack_node_name)
nodeaction.wait_for_unknown_status(node, timeout)
nodeaction.wait_for_ready_status(node, timeout)
logging.info("Node with instance name: %s has been rebooted" % (node))
logging.info("node_reboot_scenario has been successfuly injected!")
except Exception as e:
logging.error("Failed to reboot node instance. Encountered following exception:"
" %s. Test Failed" % (e))
logging.error("node_reboot_scenario injection failed!")
sys.exit(1)

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ requests
boto3
google-api-python-client
kubernetes==12.0.0a1
oauth2client>=4.1.3
oauth2client>=4.1.3
python-openstackclient
3 changes: 3 additions & 0 deletions run_kraken.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from kraken.node_actions.aws_node_scenarios import aws_node_scenarios
from kraken.node_actions.general_cloud_node_scenarios import general_node_scenarios
from kraken.node_actions.gcp_node_scenarios import gcp_node_scenarios
from kraken.node_actions.openstack_node_scenarios import openstack_node_scenarios
import kraken.time_actions.common_time_functions as time_actions


Expand All @@ -31,6 +32,8 @@ def get_node_scenario_object(node_scenario):
return aws_node_scenarios()
elif node_scenario['cloud_type'] == 'gcp':
return gcp_node_scenarios()
elif node_scenario['cloud_type'] == 'openstack':
return openstack_node_scenarios()
else:
logging.error("Cloud type " + node_scenario['cloud_type'] + " is not currently supported; "
"try using 'generic' if wanting to stop/start kubelet or fork bomb on any "
Expand Down

0 comments on commit ca44f53

Please sign in to comment.