From c71be0b10872f4db6600f6526a5ad0ee1cb5214c Mon Sep 17 00:00:00 2001
From: prubenda <prubenda@redhat.com>
Date: Thu, 8 Oct 2020 16:41:01 -0400
Subject: [PATCH] adding time scenario

---
 README.md                                    |   3 +-
 config/config.yaml                           |   3 +-
 docs/time_skew.rd                            |  29 ++++
 kraken/kubernetes/client.py                  |   7 +-
 kraken/time_actions/__init__.py              |   0
 kraken/time_actions/common_time_functions.py | 138 +++++++++++++++++++
 run_kraken.py                                |  18 ++-
 scenarios/time_scenarios_example.yml         |   7 +
 8 files changed, 200 insertions(+), 5 deletions(-)
 create mode 100644 docs/time_skew.rd
 create mode 100644 kraken/time_actions/__init__.py
 create mode 100644 kraken/time_actions/common_time_functions.py
 create mode 100644 scenarios/time_scenarios_example.yml

diff --git a/README.md b/README.md
index 44ff2e77..489fa363 100644
--- a/README.md
+++ b/README.md
@@ -16,12 +16,13 @@ Instructions on how to setup the config and the options supported can be found a
 
 
 ### Kubernetes/OpenShift chaos scenarios supported
-Kraken supports pod and node based scenarios.
+Kraken supports pod, node and time/date based scenarios.
 
 - [Pod Scenarios](docs/pod_scenarios.md)
 
 - [Node Scenarios](docs/node_scenarios.md)
 
+- [Time Scenarios](docs/time_scenarios.md)
 
 ### Kraken scenario pass/fail criteria and report
 It's important to make sure to check if the targeted component recovered from the chaos injection and also if the Kubernetes/OpenShift cluster is healthy as failures in one component can have an adverse impact on other components. Kraken does this by:
diff --git a/config/config.yaml b/config/config.yaml
index 1e1f58af..75a4c0e9 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -9,7 +9,8 @@ kraken:
           -    scenarios/post_action_regex.py
     node_scenarios:                                        # List of chaos node scenarios to load
         -   scenarios/node_scenarios_example.yml
-
+    time_scenarios:                                        # List of chaos time scenarios to load
+        - scenarios/time_scenarios_example.yml
 cerberus:
     cerberus_enabled: False                                # Enable it when cerberus is previously installed
     cerberus_url:                                          # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal
diff --git a/docs/time_skew.rd b/docs/time_skew.rd
new file mode 100644
index 00000000..4f1344d3
--- /dev/null
+++ b/docs/time_skew.rd
@@ -0,0 +1,29 @@
+###  Time/Date Skew Scenarios
+
+Using this type of scenario configuration, one is able to change the time and/or date of the system for pods or nodes
+
+Configuration Options:
+
+**action:** skew_time or skew_date
+
+**object_type:** pod or node
+
+**namespace:** namespace of the pods you want to skew, need to be set if setting a specific pod name
+
+**label_selector:** label on the nodes or pods you want to skew
+
+**object_name:** list of the names of pods or nodes you want to skew
+
+Refer to [time_scenarios_example](https://github.com/openshift-scale/kraken/blob/master/scenarios/time_scenarios_example.yml) config file.
+
+```
+time_scenarios:
+  - action: skew_time
+    object_type: pod
+    object_name:
+      - apiserver-868595fcbb-6qnsc
+      - apiserver-868595fcbb-mb9j5
+    namespace: openshift-apiserver
+  - action: skew_date
+    object_type: node
+    label_selector: node-role.kubernetes.io/worker```
\ No newline at end of file
diff --git a/kraken/kubernetes/client.py b/kraken/kubernetes/client.py
index 7aece66f..25e22a13 100644
--- a/kraken/kubernetes/client.py
+++ b/kraken/kubernetes/client.py
@@ -61,9 +61,12 @@ def list_pods(namespace):
     return pods
 
 
-def get_all_pods():
+def get_all_pods(label_selector=None):
     pods = []
-    ret = cli.list_pod_for_all_namespaces(pretty=True)
+    if label_selector:
+        ret = cli.list_pod_for_all_namespaces(pretty=True, label_selector=label_selector)
+    else:
+        ret = cli.list_pod_for_all_namespaces(pretty=True)
     for pod in ret.items:
         pods.append([pod.metadata.name, pod.metadata.namespace])
     return pods
diff --git a/kraken/time_actions/__init__.py b/kraken/time_actions/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/kraken/time_actions/common_time_functions.py b/kraken/time_actions/common_time_functions.py
new file mode 100644
index 00000000..67b0366b
--- /dev/null
+++ b/kraken/time_actions/common_time_functions.py
@@ -0,0 +1,138 @@
+import datetime
+import time
+import logging
+import kraken.invoke.command as runcommand
+import kraken.kubernetes.client as kubecli
+import re
+import sys
+
+
+def pod_exec(pod_name, command, namespace):
+    i = 0
+    for i in range(5):
+        response = runcommand.invoke('kubectl exec %s -n %s -- %s' % (pod_name, namespace, command))
+        if "unauthorized" in response.lower() or "authorization" in response.lower():
+            continue
+        else:
+            break
+    return response
+
+
+def node_debug(node_name, command):
+
+    response = runcommand.invoke("oc debug node/" + node_name + ' -- chroot /host ' + command)
+    return response
+
+
+def skew_time(scenario):
+    skew_command = "date -s "
+    if scenario['action'] == "skew_date":
+        skewed_date = "00-01-01"
+        skew_command += skewed_date
+    elif scenario['action'] == "skew_time":
+        skewed_time = "01:01:01"
+        skew_command += skewed_time
+    if "node" in scenario["object_type"]:
+        node_names = []
+        if "object_name" in scenario.keys() and scenario['object_name']:
+            node_names = scenario['object_name']
+        elif "label_selector" in scenario.keys() and scenario['label_selector']:
+            node_names = kubecli.list_nodes(scenario['label_selector'])
+
+        for node in node_names:
+            node_debug(node, skew_command)
+            logging.info("Reset date/time on node " + str(node))
+        return "node", node_names
+
+    elif "pod" in scenario['object_type']:
+        pod_names = []
+        if "object_name" in scenario.keys() and scenario['object_name']:
+            for name in scenario['object_name']:
+                if "namespace" not in scenario.keys():
+                    logging.error("Need to set namespace when using pod name")
+                    sys.exit(1)
+                pod_names.append([name, scenario['namespace']])
+        elif "label_selector" in scenario.keys() and scenario['label_selector']:
+            pod_names = kubecli.get_all_pods(scenario['label_selector'])
+        elif "namespace" in scenario.keys() and scenario['namespace']:
+            pod_names = kubecli.list_pods(scenario['namespace'])
+            counter = 0
+            for pod_name in pod_names:
+                pod_names[counter] = [pod_name, scenario['namespace']]
+                counter += 1
+
+        for pod in pod_names:
+            if len(pod) > 1:
+                pod_exec(pod[0], skew_command, pod[1])
+            else:
+                pod_exec(pod, skew_command, scenario['namespace'])
+            logging.info("Reset date/time on pod " + str(pod[0]))
+        return "pod", pod_names
+
+
+# From kubectl/oc command get time output
+def parse_string_date(obj_datetime):
+    try:
+        date_line = re.search(r'[a-zA-Z0-9_() .]*\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \w{3} '
+                              r'\d{4}\W*', obj_datetime)
+        return date_line.group().strip()
+    except Exception:
+        return ""
+
+
+# Get date and time from string returned from OC
+def string_to_date(obj_datetime):
+    obj_datetime = parse_string_date(obj_datetime)
+    try:
+        date_time_obj = datetime.datetime.strptime(obj_datetime, '%a %b %d %H:%M:%S %Z %Y')
+        return date_time_obj
+    except Exception:
+        return datetime.datetime(datetime.MINYEAR, 1, 1)
+
+
+def check_date_time(object_type, names):
+    skew_command = "date"
+    not_reset = []
+    max_retries = 30
+    if object_type == "node":
+        for node_name in names:
+            first_date_time = datetime.datetime.utcnow()
+            node_datetime_string = node_debug(node_name, skew_command)
+            node_datetime = string_to_date(node_datetime_string)
+            counter = 0
+            while not first_date_time < node_datetime < datetime.datetime.utcnow():
+                time.sleep(5)
+                logging.info("Date/time on node %s still not reset, waiting 5 seconds and retrying"
+                             % node_name)
+                node_datetime_string = node_debug(node_name, skew_command)
+                node_datetime = string_to_date(node_datetime_string)
+                counter += 1
+                if counter > max_retries:
+                    logging.error("Date and time in node %s didn't reset properly"
+                                  % node_name)
+                    not_reset.append(node_name)
+                    break
+            if counter < max_retries:
+                logging.info("Date in node " + str(node_name) + " reset properly")
+    elif object_type == "pod":
+        for pod_name in names:
+            first_date_time = datetime.datetime.utcnow()
+            counter = 0
+            pod_datetime_string = pod_exec(pod_name[0], skew_command, pod_name[1])
+            pod_datetime = string_to_date(pod_datetime_string)
+            while not first_date_time < pod_datetime < datetime.datetime.utcnow():
+                time.sleep(5)
+                logging.info("Date/time on pod %s still not reset, waiting 5 seconds and retrying"
+                             % pod_name[0])
+                first_date_time = datetime.datetime.utcnow()
+                pod_datetime = pod_exec(pod_name[0], skew_command, pod_name[1])
+                pod_datetime = string_to_date(pod_datetime)
+                counter += 1
+                if counter > max_retries:
+                    logging.error("Date and time in pod %s didn't reset properly"
+                                  % pod_name[0])
+                    not_reset.append(pod_name[0])
+                    break
+            if counter < max_retries:
+                logging.info("Date in pod " + str(pod_name[0]) + " reset properly")
+    return not_reset
diff --git a/run_kraken.py b/run_kraken.py
index 54c6a59d..2888f517 100644
--- a/run_kraken.py
+++ b/run_kraken.py
@@ -12,6 +12,7 @@
 import kraken.invoke.command as runcommand
 import kraken.node_actions.common_node_functions as nodeaction
 from kraken.node_actions.aws_node_scenarios import aws_node_scenarios
+import kraken.time_actions.common_time_functions as time_actions
 
 
 # Get the node scenarios object of specfied cloud type
@@ -83,7 +84,6 @@ def publish_kraken_status(config, failed_post_scenarios):
                 logging.info("Cerberus status is not healthy and post action scenarios "
                              "are still failing")
     else:
-
         if failed_post_scenarios:
             if config['kraken']['exit_on_failure']:
                 logging.info("Cerberus status is healthy but post action scenarios "
@@ -165,6 +165,7 @@ def main(cfg):
         kubeconfig_path = config["kraken"].get("kubeconfig_path", "")
         scenarios = config["kraken"].get("scenarios", [])
         node_scenarios = config["kraken"].get("node_scenarios", [])
+        time_scenarios = config['kraken'].get("time_scenarios", [])
         wait_duration = config["tunings"].get("wait_duration", 60)
         iterations = config["tunings"].get("iterations", 1)
         daemon_mode = config["tunings"].get("daemon_mode", False)
@@ -247,6 +248,21 @@ def main(cfg):
                                     cerberus_integration(config)
                                     logging.info("")
 
+            # Inject time skew chaos scenarios specified in the config
+            if time_scenarios:
+                for time_scenario_config in time_scenarios:
+                    with open(time_scenario_config, 'r') as f:
+                        scenario_config = yaml.full_load(f)
+                        for time_scenario in scenario_config['time_scenarios']:
+                            object_type, object_names = time_actions.skew_time(time_scenario)
+                            not_reset = time_actions.check_date_time(object_type, object_names)
+                            if len(not_reset) > 0:
+                                logging.info('Object times were not reset')
+                            logging.info("Waiting for the specified duration: %s"
+                                         % wait_duration)
+                            time.sleep(wait_duration)
+                            publish_kraken_status(config, not_reset)
+
             iteration += 1
             logging.info("")
         if failed_post_scenarios:
diff --git a/scenarios/time_scenarios_example.yml b/scenarios/time_scenarios_example.yml
new file mode 100644
index 00000000..9b190b65
--- /dev/null
+++ b/scenarios/time_scenarios_example.yml
@@ -0,0 +1,7 @@
+time_scenarios:
+  - action: skew_time
+    object_type: pod
+    label_selector: app=multus
+  - action: skew_date
+    object_type: node
+    label_selector: node-role.kubernetes.io/worker