Skip to content

Commit

Permalink
[kubernetes] new data, namespace support, and options
Browse files Browse the repository at this point in the history
This patch collects additional data from kubernetes masters, changes the
available plugin options, and makes collection namespace aware.

By default, json output will be collected for events, limitranges, pods, pvcs,
replicationcontrollers, resourcequotas and services on a per-namespace basis.

Note that the 'serviceaccount' and 'secrets' resources are not collected due to
the high risk of exposing secure information.

Version and 'config view' output is now collected.

The 'all' option (default enabled) will collect a non-json listing of each
resource across all namespaces for ease of reference.

The 'describe' option (default enabled) will collect 'kubectl describe' output,
non-json, for each object of each resource in each namespace, if present.

The 'podslog' option has been renamed to 'podlogs' and is disabled by default.

Note that while this will run on OpenShift v3 masters, only resources shared by
OpenShift and 'plain' Kubernetes are collected by this plugin. OpenShift routes
for example are not collected.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
  • Loading branch information
TurboTurtle authored and bmr-cymru committed Jun 29, 2016
1 parent d3cb541 commit 047a207
Showing 1 changed file with 110 additions and 30 deletions.
140 changes: 110 additions & 30 deletions sos/plugins/kubernetes.py
Expand Up @@ -15,48 +15,128 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

from sos.plugins import Plugin, RedHatPlugin
from os import path


class kubernetes(Plugin, RedHatPlugin):

"""Kubernetes plugin
"""

option_list = [("podslog", "capture logs for pods", 'slow', False)]
# Red Hat Atomic Platform and OpenShift Enterprise use the
# atomic-openshift-master package to provide kubernetes
packages = ('kubernetes', 'kubernetes-master', 'atomic-openshift-master')
files = ("/etc/origin/master/master-config.yaml",)

option_list = [
("all", "also collect --all-namespaces output separately",
'fast', True),
("describe", "capture descriptions of all kube resources",
'fast', True),
("podlogs", "capture logs for pods", 'slow', False),
]

def check_is_master(self):
if any([
path.exists("/var/run/kubernetes/apiserver.key"),
path.exists("/etc/origin/master/master-config.yaml")
]):
return True
return False

def setup(self):
self.add_copy_spec("/etc/kubernetes")
self.add_copy_spec("/var/run/flannel")

# Kubernetes master info
self.add_cmd_output("kubectl version")
self.add_cmd_output("kubectl get -o json pods")
self.add_cmd_output("kubectl get -o json nodes")
self.add_cmd_output("kubectl get -o json services")
self.add_cmd_output("kubectl get -o json replicationController")
self.add_cmd_output("kubectl get -o json events")

# This could use a single call:
#
# add_journal(units=["kubelet", "kube-apiserver", ... ])
#
# But this would merge all units into a single text stream - to
# preserve existing file layout in archives keep these as
# separate journalctl calls.
self.add_journal(units="kubelet")
self.add_journal(units="kube-apiserver")
self.add_journal(units="kube-controller-manager")
self.add_journal(units="kube-scheduler")
self.add_journal(units="kube-proxy")

if self.get_option('podslog'):
result = self.get_command_output("kubectl get pods")
if result['status'] == 0:
for line in result['output'].splitlines()[1:]:
pod_name = line.split(" ")[0]
self.add_cmd_output([
"{0} log {1}".format("kubectl", pod_name)
])
svcs = [
'kubelet',
'kube-apiserver',
'kube-proxy',
'kube-scheduler',
'kube-controller-manager'
]

for svc in svcs:
self.add_journal(units=svc)

# We can only grab kubectl output from the master
if self.check_is_master():
kube_cmd = "kubectl "
if path.exists('/etc/origin/master/admin.kubeconfig'):
kube_cmd += "--config=/etc/origin/master/admin.kubeconfig"

kube_get_cmd = "get -o json "
for subcmd in ['version', 'config view']:
self.add_cmd_output('%s %s' % (kube_cmd, subcmd))

# get all namespaces in use
kn = self.get_command_output('%s get namespaces' % kube_cmd)
knsps = [n.split()[0] for n in kn['output'].splitlines()[1:] if n]

resources = [
'limitrange',
'pods',
'pvc',
'rc',
'resourcequota',
'services'
]

# nodes and pvs are not namespaced, must pull separately
self.add_cmd_output([
"{} get -o json nodes".format(kube_cmd),
"{} get -o json pv".format(kube_cmd)
])

for n in knsps:
knsp = '--namespace=%s' % n
k_cmd = '%s %s %s' % (kube_cmd, kube_get_cmd, knsp)

self.add_cmd_output('%s events' % k_cmd)

for res in resources:
self.add_cmd_output('%s %s' % (k_cmd, res))

if self.get_option('describe'):
# need to drop json formatting for this
k_cmd = '%s get %s' % (kube_cmd, knsp)
for res in resources:
r = self.get_command_output(
'%s %s' % (k_cmd, res))
if r['status'] == 0:
k_list = [k.split()[0] for k in
r['output'].splitlines()[1:]]
for k in k_list:
k_cmd = '%s %s' % (kube_cmd, knsp)
self.add_cmd_output(
'%s describe %s %s' % (k_cmd, res, k))

if self.get_option('podlogs'):
k_cmd = '%s get %s' % (kube_cmd, knsp)
r = self.get_command_output('$s get pods' % k_cmd)
if r['status'] == 0:
pods = [p.split()[0] for p in
r['output'].splitlines()[1:]]
for pod in pods:
self.add_cmd_output('%s logs %s' % (k_cmd, pod))

if self.get_option('all'):
k_cmd = '%s get --all-namespaces=true' % kube_cmd
for res in resources:
self.add_cmd_output('%s %s' % (k_cmd, res))

def postproc(self):
# First, clear sensitive data from the json output collected.
# This will mask values when the "name" looks susceptible of
# values worth obfuscating, i.e. if the name contains strings
# like "pass", "pwd", "key" or "token"
env_regexp = r'(?P<var>{\s*"name":\s*[^,]*' \
r'(pass|pwd|key|token|cred|PASS|PWD|KEY)[^,]*,\s*"value":)[^}]*'
self.do_cmd_output_sub('kubectl', env_regexp,
r'\g<var> "********"')

# Next, we need to handle the private keys and certs in some
# output that is not hit by the previous iteration.
self.do_cmd_private_sub('kubectl')

# vim: et ts=5 sw=4

0 comments on commit 047a207

Please sign in to comment.