Skip to content

Commit

Permalink
Merge branches 'w/2.8/improvement/3218-enrich-sos-report-content' and…
Browse files Browse the repository at this point in the history
… 'q/3222/2.7/improvement/3218-enrich-sos-report-content' into tmp/octopus/q/2.8
  • Loading branch information
bert-e committed Apr 7, 2021
3 parents f7150c6 + 8d53398 + b7d7177 commit 8395c4f
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 23 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,16 @@ version to 1.6.5 (PR [#2582](https://github.com/scality/metalk8s/pull/2582))

## Release 2.5.3 (in development)

### Enhancements
- [#3218](https://github.com/scality/metalk8s/issues/3218) - Enrich sosreport
plugins:
- Add a Prometheus snapshot
- Add Salt configuration
- Add salt-minion journal
- Add kubectl top nodes & pods
- Add bootstrap and solutions configuration files
(PR [#3222](https://github.com/scality/metalk8s/pull/3222))

## Release 2.5.2

### Enhancements
Expand Down
2 changes: 2 additions & 0 deletions charts/kube-prometheus-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ prometheus:
## Maximum size of metrics
retentionSize: '__escape__({{ prometheus.spec.config.retention_size | string }})'

enableAdminAPI: '__var__(prometheus.spec.config.enable_admin_api)'

grafana:
adminPassword: admin

Expand Down
2 changes: 1 addition & 1 deletion packages/common/metalk8s-sosreport/containerd.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def setup(self):

def _get_crio_list(self, cmd):
ret = []
result = self.get_command_output(cmd)
result = self.exec_cmd(cmd)
if result["status"] == 0:
for entry in result["output"].splitlines():
if "deprecated" not in entry[0]:
Expand Down
126 changes: 105 additions & 21 deletions packages/common/metalk8s-sosreport/metalk8s.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#! /bin/env python3

from sos.plugins import Plugin, RedHatPlugin, UbuntuPlugin
from os import path

import requests
from sos.plugins import Plugin, RedHatPlugin, UbuntuPlugin


class metalk8s(Plugin, RedHatPlugin, UbuntuPlugin):

Expand All @@ -17,18 +19,96 @@ class metalk8s(Plugin, RedHatPlugin, UbuntuPlugin):
("all", "also collect all namespaces output separately", "slow", False),
("describe", "capture descriptions of all kube resources", "fast", False),
("podlogs", "capture logs for pods", "slow", False),
("prometheus-snapshot", "generate a Prometheus snapshot", "slow", False),
]

def check_is_master(self):
return any([path.exists("/etc/kubernetes/admin.conf")])

def prometheus_snapshot(self):
kube_cmd = (
"kubectl "
"--kubeconfig=/etc/kubernetes/admin.conf "
"--namespace metalk8s-monitoring"
)

# Retrieve Prometheus endpoint
prom_endpoint_cmd = (
"{0} get endpoints "
"prometheus-operator-prometheus --output "
"jsonpath='{{ .subsets[0].addresses[0].targetRef.name }} "
"{{ .subsets[0].addresses[0].ip }}:"
"{{ .subsets[0].ports[0].port }}'".format(kube_cmd)
)
prom_endpoint_res = self.exec_cmd(prom_endpoint_cmd)
prom_instance, prom_endpoint = prom_endpoint_res["output"].split()

# Generate snapshot
# return a JSON object as follows:
# {"status":"success","data":{"name":"20210322T164646Z-7d0b9ca8be8e9981"}}
# or in case of error:
# {"status":"error","errorType":"unavailable","error":"admin APIs disabled"}
prom_snapshot_url = "http://{0}/api/v1/admin/tsdb/snapshot".format(
prom_endpoint
)
res = requests.post(prom_snapshot_url)
try:
res.raise_for_status()
except requests.exceptions.HTTPError as exc:
self._log_error(
"An error occurred while querying Prometheus API: {0}".format(str(exc))
)
return

try:
res_json = res.json()
except ValueError as exc:
self._log_error(
"Invalid JSON returned by Prometheus API: {0}".format(res.text)
)
return

try:
snapshot_name = res_json["data"]["name"]
except KeyError:
self._log_error(
"Unable to generate Prometheus snapshot: {0}".format(res_json["error"])
)
return

# Copy snapshot locally
snapshot_archive_dir = "{0}/prometheus-snapshot".format(
self.archive.get_archive_path()
)

copy_snapshot_cmd = (
"{0} cp -c prometheus {1}:/prometheus/snapshots/{2} {3}".format(
kube_cmd, prom_instance, snapshot_name, snapshot_archive_dir
)
)
self.exec_cmd(copy_snapshot_cmd)

# Remove snapshot from Prometheus pod
delete_snapshot_cmd = (
"{0} exec -c prometheus {1} -- "
"rm -rf /prometheus/snapshots/{2}".format(
kube_cmd, prom_instance, snapshot_name
)
)
self.exec_cmd(delete_snapshot_cmd)

def setup(self):
self.add_copy_spec("/etc/kubernetes/manifests")
self.add_copy_spec("/etc/metalk8s/bootstrap.yaml")
self.add_copy_spec("/etc/metalk8s/solutions.yaml")
self.add_copy_spec("/etc/salt")
self.add_forbidden_path("/etc/salt/pki")
self.add_copy_spec("/var/log/pods")
self.add_copy_spec("/var/log/metalk8s")

services = [
"kubelet",
"salt-minion",
]

for service in services:
Expand All @@ -41,16 +121,15 @@ def setup(self):
kube_cmd += "--kubeconfig=/etc/kubernetes/admin.conf"

kube_get_cmd = "get -o json "
for subcmd in ["version", "config view"]:
for subcmd in ["version", "config view", "top nodes"]:
self.add_cmd_output("{0} {1}".format(kube_cmd, subcmd))

# get all namespaces in use
namespaces_result = self.get_command_output(
"{0} get namespaces".format(kube_cmd)
namespaces_result = self.exec_cmd(
"{0} get namespaces --no-headers"
"--output custom-columns=':metadata.name'".format(kube_cmd)
)
kube_namespaces = [
n.split()[0] for n in namespaces_result["output"].splitlines()[1:] if n
]
kube_namespaces = namespaces_result["output"].splitlines()

resources = [
"pods",
Expand Down Expand Up @@ -78,23 +157,25 @@ def setup(self):
kube_cmd, kube_get_cmd, kube_namespace
)

self.add_cmd_output("{} events".format(kube_namespaced_cmd))

for res in resources:
self.add_cmd_output("{0} {1}".format(kube_namespaced_cmd, res))
for subcmd in ["events", "top pods"] + resources:
self.add_cmd_output(
"{0} {1}".format(kube_namespaced_cmd, subcmd)
)

if self.get_option("describe"):
# need to drop json formatting for this
kube_namespaced_cmd = "{0} get {1}".format(kube_cmd, kube_namespace)
for res in resources:
r = self.get_command_output(
"{0} {1}".format(kube_namespaced_cmd, res)
self.add_cmd_output("{0} {1}".format(kube_namespaced_cmd, res))

r = self.exec_cmd(
"{0} {1} --no-headers "
"--output custom-colums=':metadata.name'".format(
kube_namespaced_cmd, res
)
)
if r["status"] == 0:
kube_cmd_result = [
k.split()[0] for k in r["output"].splitlines()[1:]
]
for k in kube_cmd_result:
for k in r["output"].splitlines():
kube_namespaced_cmd = "{0} {1}".format(
kube_cmd, kube_namespace
)
Expand All @@ -106,12 +187,12 @@ def setup(self):

if self.get_option("podlogs"):
kube_namespaced_cmd = "{0} {1}".format(kube_cmd, kube_namespace)
r = self.get_command_output(
"{} get pods".format(kube_namespaced_cmd)
r = self.exec_cmd(
"{} get pods --no-headers --output "
"custom-columns=':metadata.name'".format(kube_namespaced_cmd)
)
if r["status"] == 0:
pods = [p.split()[0] for p in r["output"].splitlines()[1:]]
for pod in pods:
for pod in r["output"].splitlines():
self.add_cmd_output(
"{0} logs {1} --all-containers".format(
kube_namespaced_cmd, pod
Expand All @@ -123,6 +204,9 @@ def setup(self):
for res in resources:
self.add_cmd_output("{0} {1}".format(kube_namespaced_cmd, res))

if self.get_option("prometheus-snapshot"):
self.prometheus_snapshot()

def postproc(self):
# First, clear sensitive data from the json output collected.
# This will mask values when the 'name' looks susceptible of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ spec:
config:
retention_time: "10d"
retention_size: "0" # "0" to disable size-based retention
enable_admin_api: false
rules:
node_exporter:
node_filesystem_space_filling_up:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53907,7 +53907,7 @@ spec:
namespace: metalk8s-monitoring
pathPrefix: /
port: web
enableAdminAPI: false
enableAdminAPI: {% endraw -%}{{ prometheus.spec.config.enable_admin_api }}{%- raw %}
externalUrl: http://prometheus-operator-prometheus.metalk8s-monitoring:9090
image: {% endraw -%}{{ build_image_name("prometheus", False) }}{%- raw %}:v2.22.1
listenLocal: false
Expand Down

0 comments on commit 8395c4f

Please sign in to comment.