Skip to content

Commit

Permalink
Adding Kraken to PerfScale Pipeline
Browse files Browse the repository at this point in the history
This commit adds kraken to CI pipeline and thereby enabling chaos
scenarios to be injected on specified jump host.
  • Loading branch information
yashashreesuresh committed Aug 23, 2020
1 parent 3a78cdf commit 8ab0686
Show file tree
Hide file tree
Showing 19 changed files with 404 additions and 0 deletions.
20 changes: 20 additions & 0 deletions CI/scenarios/etcd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: "delete etcd pods"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-etcd"
selector: "k8s-app=etcd"
filters:
- randomSample:
size: 1
actions:
- kill:
probability: 1
force: true
23 changes: 23 additions & 0 deletions CI/scenarios/openshift-apiserver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: "delete openshift-apiserver pods"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-apiserver"
selector: "app=openshift-apiserver"

filters:
- randomSample:
size: 1

# The actions will be executed in the order specified
actions:
- kill:
probability: 1
force: true
22 changes: 22 additions & 0 deletions CI/scenarios/openshift-kube-apiserver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: "delete openshift-kube-apiserver pods"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-kube-apiserver"
selector: "app=openshift-kube-apiserver"
filters:
- randomSample:
size: 1

# The actions will be executed in the order specified
actions:
- kill:
probability: 1
force: true
21 changes: 21 additions & 0 deletions CI/scenarios/post_action_etcd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 10
minSecondsBetweenRuns: 1
scenarios:
- name: "check 3 pods are in namespace with selector: etcd"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-etcd"
selector: "k8s-app=etcd"
filters:
- property:
name: "state"
value: "Running"
# The actions will be executed in the order specified
actions:
- checkPodCount:
count: 3
3 changes: 3 additions & 0 deletions CI/scenarios/post_action_etcd_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
pods="$(oc get pods -n openshift-etcd | grep -c Running)"
echo "$pods"
23 changes: 23 additions & 0 deletions CI/scenarios/post_action_etcd_example_py.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python3
import subprocess
import logging


def run(cmd):
try:
output = subprocess.Popen(cmd, shell=True,
universal_newlines=True, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(out, err) = output.communicate()
logging.info("out " + str(out))
except Exception as e:
logging.error("Failed to run %s, error: %s" % (cmd, e))
return out


pods_running = run("oc get pods -n openshift-etcd | grep -c Running").rstrip()

if pods_running == str(3):
print("There were 3 pods running properly")
else:
print("ERROR there were " + str(pods_running) + " pods running instead of 3")
23 changes: 23 additions & 0 deletions CI/scenarios/post_action_openshift-apiserver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: "check 3 pods are in namespace with selector: openshift-apiserver"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-apiserver"
selector: "app=openshift-apiserver"

filters:
- property:
name: "state"
value: "Running"

# The actions will be executed in the order specified
actions:
- checkPodCount:
count: 3
21 changes: 21 additions & 0 deletions CI/scenarios/post_action_openshift-kube-apiserver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: "check 3 pods are in namespace with selector: openshift-kube-apiserver"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-kube-apiserver"
selector: "app=openshift-kube-apiserver"
filters:
- property:
name: "state"
value: "Running"
# The actions will be executed in the order specified
actions:
- checkPodCount:
count: 3
22 changes: 22 additions & 0 deletions CI/scenarios/post_action_prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 10
minSecondsBetweenRuns: 1
scenarios:
- name: "check 2 pods are in namespace with selector: prometheus"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-monitoring"
selector: "app=prometheus"
filters:
- property:
name: "state"
value: "Running"
# The actions will be executed in the order specified
actions:
- checkPodCount:
count: 2

68 changes: 68 additions & 0 deletions CI/scenarios/post_action_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
import subprocess
import re
import sys
from kubernetes import client, config
from kubernetes.client.rest import ApiException
import logging


# List all namespaces
def list_namespaces():
namespaces = []
try:
config.load_kube_config()
cli = client.CoreV1Api()
ret = cli.list_namespace(pretty=True)
except ApiException as e:
logging.error("Exception when calling \
CoreV1Api->list_namespaced_pod: %s\n" % e)
for namespace in ret.items:
namespaces.append(namespace.metadata.name)
return namespaces


# Check if all the watch_namespaces are valid
def check_namespaces(namespaces):
try:
valid_namespaces = list_namespaces()
regex_namespaces = set(namespaces) - set(valid_namespaces)
final_namespaces = set(namespaces) - set(regex_namespaces)
valid_regex = set()
if regex_namespaces:
for namespace in valid_namespaces:
for regex_namespace in regex_namespaces:
if re.search(regex_namespace, namespace):
final_namespaces.add(namespace)
valid_regex.add(regex_namespace)
break
invalid_namespaces = regex_namespaces - valid_regex
if invalid_namespaces:
raise Exception("There exists no namespaces matching: %s" % (invalid_namespaces))
return list(final_namespaces)
except Exception as e:
logging.error("%s" % (e))
sys.exit(1)


def run(cmd):
try:
output = subprocess.Popen(cmd, shell=True,
universal_newlines=True, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(out, err) = output.communicate()
except Exception as e:
logging.error("Failed to run %s, error: %s" % (cmd, e))
return out


regex_namespace = ["openshift-.*"]
namespaces = check_namespaces(regex_namespace)
pods_running = 0
for namespace in namespaces:
new_pods_running = run("oc get pods -n " + namespace + " | grep -c Running").rstrip()
try:
pods_running += int(new_pods_running)
except Exception:
continue
print(pods_running)
11 changes: 11 additions & 0 deletions CI/scenarios/post_action_regex.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
pods="$(oc get pods -n openshift-etcd | grep -c Running)"
echo "$pods"

if [ "$pods" -eq 3 ]
then
echo "Pods Pass"
else
# need capital error for proper error catching in run_kraken
echo "ERROR pod count $pods doesnt match 3 expected pods"
fi
18 changes: 18 additions & 0 deletions CI/scenarios/post_action_regex_openshift_pod_kill.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: kill up to 3 pods in any openshift namespace
steps:
- podAction:
matches:
- namespace: "openshift-.*"
filters:
- property:
name: "state"
value: "Running"
actions:
- checkPodCount:
count: 146
23 changes: 23 additions & 0 deletions CI/scenarios/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: "delete prometheus pods"
steps:
- podAction:
matches:
- labels:
namespace: "openshift-monitoring"
selector: "app=prometheus"

filters:
- randomSample:
size: 1

# The actions will be executed in the order specified
actions:
- kill:
probability: 1
force: true
20 changes: 20 additions & 0 deletions CI/scenarios/regex_openshift_pod_kill.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
config:
runStrategy:
runs: 1
maxSecondsBetweenRuns: 30
minSecondsBetweenRuns: 1
scenarios:
- name: kill up to 3 pods in any openshift namespace
steps:
- podAction:
matches:
- namespace: "openshift-.*"
filters:
- property:
name: "state"
value: "Running"
- randomSample:
size: 3
actions:
- kill:
probability: .7
11 changes: 11 additions & 0 deletions ansible/ansible.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[defaults]
callback_whitelist = profile_tasks
host_key_checking = False
log_path = ~/ansible.log
retry_files_enabled = False
# work around privilege escalation timeouts in ansible:
timeout = 30

[callback_profile_tasks]
task_output_limit = 10000
sort_order = none
1 change: 1 addition & 0 deletions ansible/inventory
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[orchestration]
26 changes: 26 additions & 0 deletions ansible/kraken.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
- hosts: orchestration
gather_facts: true
remote_user: "{{ orchestration_user }}"
vars_files:
- vars/kraken_vars.yml

tasks:
- name: Git clone kraken repository
git:
repo: "{{ kraken_repository }}"
dest: "{{ kraken_dir }}"
force: yes

- name: Generate kraken config file
template:
src: kraken.j2
dest: "{{ kraken_config }}"

- name: Start injecting failures
shell: |
cd "{{ kraken_dir }}"
cp -r "{{ scenarios_folder_path }}"* scenarios/
unset CONFIG
python3 run_kraken.py
ignore_errors: yes
13 changes: 13 additions & 0 deletions ansible/templates/kraken.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
kraken:
kubeconfig_path: {{ kubeconfig_path }} # Path to kubeconfig
exit_on_failure: {{ exit_on_failure }} # Exit when a post action scenario fails
scenarios: {{ scenarios }} # List of policies/chaos scenarios to load

cerberus:
cerberus_enabled: {{ cerberus_enabled }} # Enable it when cerberus is previously installed
cerberus_url: {{ cerberus_url }} # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal

tunings:
wait_duration: {{ wait_duration }} # Duration to wait between each chaos scenario
iterations: {{ iterations }} # Number of times to execute the scenarios
daemon_mode: {{ daemon_mode }} # Iterations are set to infinity which means that the cerberus will monitor the resources forever
Loading

0 comments on commit 8ab0686

Please sign in to comment.