Skip to content

Commit

Permalink
adding time scenario
Browse files Browse the repository at this point in the history
  • Loading branch information
paigerube14 committed Oct 21, 2020
1 parent 8274323 commit c71be0b
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 5 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ Instructions on how to setup the config and the options supported can be found a


### Kubernetes/OpenShift chaos scenarios supported
Kraken supports pod and node based scenarios.
Kraken supports pod, node and time/date based scenarios.

- [Pod Scenarios](docs/pod_scenarios.md)

- [Node Scenarios](docs/node_scenarios.md)

- [Time Scenarios](docs/time_scenarios.md)

### Kraken scenario pass/fail criteria and report
It's important to make sure to check if the targeted component recovered from the chaos injection and also if the Kubernetes/OpenShift cluster is healthy as failures in one component can have an adverse impact on other components. Kraken does this by:
Expand Down
3 changes: 2 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ kraken:
- scenarios/post_action_regex.py
node_scenarios: # List of chaos node scenarios to load
- scenarios/node_scenarios_example.yml

time_scenarios: # List of chaos time scenarios to load
- scenarios/time_scenarios_example.yml
cerberus:
cerberus_enabled: False # Enable it when cerberus is previously installed
cerberus_url: # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal
Expand Down
29 changes: 29 additions & 0 deletions docs/time_skew.rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
### Time/Date Skew Scenarios

Using this type of scenario configuration, one is able to change the time and/or date of the system for pods or nodes

Configuration Options:

**action:** skew_time or skew_date

**object_type:** pod or node

**namespace:** namespace of the pods you want to skew, need to be set if setting a specific pod name

**label_selector:** label on the nodes or pods you want to skew

**object_name:** list of the names of pods or nodes you want to skew

Refer to [time_scenarios_example](https://github.com/openshift-scale/kraken/blob/master/scenarios/time_scenarios_example.yml) config file.

```
time_scenarios:
- action: skew_time
object_type: pod
object_name:
- apiserver-868595fcbb-6qnsc
- apiserver-868595fcbb-mb9j5
namespace: openshift-apiserver
- action: skew_date
object_type: node
label_selector: node-role.kubernetes.io/worker```
7 changes: 5 additions & 2 deletions kraken/kubernetes/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,12 @@ def list_pods(namespace):
return pods


def get_all_pods():
def get_all_pods(label_selector=None):
pods = []
ret = cli.list_pod_for_all_namespaces(pretty=True)
if label_selector:
ret = cli.list_pod_for_all_namespaces(pretty=True, label_selector=label_selector)
else:
ret = cli.list_pod_for_all_namespaces(pretty=True)
for pod in ret.items:
pods.append([pod.metadata.name, pod.metadata.namespace])
return pods
Expand Down
Empty file added kraken/time_actions/__init__.py
Empty file.
138 changes: 138 additions & 0 deletions kraken/time_actions/common_time_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import datetime
import time
import logging
import kraken.invoke.command as runcommand
import kraken.kubernetes.client as kubecli
import re
import sys


def pod_exec(pod_name, command, namespace):
i = 0
for i in range(5):
response = runcommand.invoke('kubectl exec %s -n %s -- %s' % (pod_name, namespace, command))
if "unauthorized" in response.lower() or "authorization" in response.lower():
continue
else:
break
return response


def node_debug(node_name, command):

response = runcommand.invoke("oc debug node/" + node_name + ' -- chroot /host ' + command)
return response


def skew_time(scenario):
skew_command = "date -s "
if scenario['action'] == "skew_date":
skewed_date = "00-01-01"
skew_command += skewed_date
elif scenario['action'] == "skew_time":
skewed_time = "01:01:01"
skew_command += skewed_time
if "node" in scenario["object_type"]:
node_names = []
if "object_name" in scenario.keys() and scenario['object_name']:
node_names = scenario['object_name']
elif "label_selector" in scenario.keys() and scenario['label_selector']:
node_names = kubecli.list_nodes(scenario['label_selector'])

for node in node_names:
node_debug(node, skew_command)
logging.info("Reset date/time on node " + str(node))
return "node", node_names

elif "pod" in scenario['object_type']:
pod_names = []
if "object_name" in scenario.keys() and scenario['object_name']:
for name in scenario['object_name']:
if "namespace" not in scenario.keys():
logging.error("Need to set namespace when using pod name")
sys.exit(1)
pod_names.append([name, scenario['namespace']])
elif "label_selector" in scenario.keys() and scenario['label_selector']:
pod_names = kubecli.get_all_pods(scenario['label_selector'])
elif "namespace" in scenario.keys() and scenario['namespace']:
pod_names = kubecli.list_pods(scenario['namespace'])
counter = 0
for pod_name in pod_names:
pod_names[counter] = [pod_name, scenario['namespace']]
counter += 1

for pod in pod_names:
if len(pod) > 1:
pod_exec(pod[0], skew_command, pod[1])
else:
pod_exec(pod, skew_command, scenario['namespace'])
logging.info("Reset date/time on pod " + str(pod[0]))
return "pod", pod_names


# From kubectl/oc command get time output
def parse_string_date(obj_datetime):
try:
date_line = re.search(r'[a-zA-Z0-9_() .]*\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \w{3} '
r'\d{4}\W*', obj_datetime)
return date_line.group().strip()
except Exception:
return ""


# Get date and time from string returned from OC
def string_to_date(obj_datetime):
obj_datetime = parse_string_date(obj_datetime)
try:
date_time_obj = datetime.datetime.strptime(obj_datetime, '%a %b %d %H:%M:%S %Z %Y')
return date_time_obj
except Exception:
return datetime.datetime(datetime.MINYEAR, 1, 1)


def check_date_time(object_type, names):
skew_command = "date"
not_reset = []
max_retries = 30
if object_type == "node":
for node_name in names:
first_date_time = datetime.datetime.utcnow()
node_datetime_string = node_debug(node_name, skew_command)
node_datetime = string_to_date(node_datetime_string)
counter = 0
while not first_date_time < node_datetime < datetime.datetime.utcnow():
time.sleep(5)
logging.info("Date/time on node %s still not reset, waiting 5 seconds and retrying"
% node_name)
node_datetime_string = node_debug(node_name, skew_command)
node_datetime = string_to_date(node_datetime_string)
counter += 1
if counter > max_retries:
logging.error("Date and time in node %s didn't reset properly"
% node_name)
not_reset.append(node_name)
break
if counter < max_retries:
logging.info("Date in node " + str(node_name) + " reset properly")
elif object_type == "pod":
for pod_name in names:
first_date_time = datetime.datetime.utcnow()
counter = 0
pod_datetime_string = pod_exec(pod_name[0], skew_command, pod_name[1])
pod_datetime = string_to_date(pod_datetime_string)
while not first_date_time < pod_datetime < datetime.datetime.utcnow():
time.sleep(5)
logging.info("Date/time on pod %s still not reset, waiting 5 seconds and retrying"
% pod_name[0])
first_date_time = datetime.datetime.utcnow()
pod_datetime = pod_exec(pod_name[0], skew_command, pod_name[1])
pod_datetime = string_to_date(pod_datetime)
counter += 1
if counter > max_retries:
logging.error("Date and time in pod %s didn't reset properly"
% pod_name[0])
not_reset.append(pod_name[0])
break
if counter < max_retries:
logging.info("Date in pod " + str(pod_name[0]) + " reset properly")
return not_reset
18 changes: 17 additions & 1 deletion run_kraken.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import kraken.invoke.command as runcommand
import kraken.node_actions.common_node_functions as nodeaction
from kraken.node_actions.aws_node_scenarios import aws_node_scenarios
import kraken.time_actions.common_time_functions as time_actions


# Get the node scenarios object of specfied cloud type
Expand Down Expand Up @@ -83,7 +84,6 @@ def publish_kraken_status(config, failed_post_scenarios):
logging.info("Cerberus status is not healthy and post action scenarios "
"are still failing")
else:

if failed_post_scenarios:
if config['kraken']['exit_on_failure']:
logging.info("Cerberus status is healthy but post action scenarios "
Expand Down Expand Up @@ -165,6 +165,7 @@ def main(cfg):
kubeconfig_path = config["kraken"].get("kubeconfig_path", "")
scenarios = config["kraken"].get("scenarios", [])
node_scenarios = config["kraken"].get("node_scenarios", [])
time_scenarios = config['kraken'].get("time_scenarios", [])
wait_duration = config["tunings"].get("wait_duration", 60)
iterations = config["tunings"].get("iterations", 1)
daemon_mode = config["tunings"].get("daemon_mode", False)
Expand Down Expand Up @@ -247,6 +248,21 @@ def main(cfg):
cerberus_integration(config)
logging.info("")

# Inject time skew chaos scenarios specified in the config
if time_scenarios:
for time_scenario_config in time_scenarios:
with open(time_scenario_config, 'r') as f:
scenario_config = yaml.full_load(f)
for time_scenario in scenario_config['time_scenarios']:
object_type, object_names = time_actions.skew_time(time_scenario)
not_reset = time_actions.check_date_time(object_type, object_names)
if len(not_reset) > 0:
logging.info('Object times were not reset')
logging.info("Waiting for the specified duration: %s"
% wait_duration)
time.sleep(wait_duration)
publish_kraken_status(config, not_reset)

iteration += 1
logging.info("")
if failed_post_scenarios:
Expand Down
7 changes: 7 additions & 0 deletions scenarios/time_scenarios_example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
time_scenarios:
- action: skew_time
object_type: pod
label_selector: app=multus
- action: skew_date
object_type: node
label_selector: node-role.kubernetes.io/worker

0 comments on commit c71be0b

Please sign in to comment.