[kubernetes] new data, namespace support, and options

This patch collects additional data from kubernetes masters, changes the available plugin options, and makes collection namespace aware. By default, json output will be collected for events, limitranges, pods, pvcs, replicationcontrollers, resourcequotas and services on a per-namespace basis. Note that the 'serviceaccount' and 'secrets' resources are not collected due to the high risk of exposing secure information. Version and 'config view' output is now collected. The 'all' option (default enabled) will collect a non-json listing of each resource across all namespaces for ease of reference. The 'describe' option (default enabled) will collect 'kubectl describe' output, non-json, for each object of each resource in each namespace, if present. The 'podslog' option has been renamed to 'podlogs' and is disabled by default. Note that while this will run on OpenShift v3 masters, only resources shared by OpenShift and 'plain' Kubernetes are collected by this plugin. OpenShift routes for example are not collected. Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
sosreport · Jun 29, 2016 · 047a207 · 047a207
1 parent d3cb541
commit 047a207
Showing 1 changed file with 110 additions and 30 deletions.
diff --git a/sos/plugins/kubernetes.py b/sos/plugins/kubernetes.py
@@ -15,48 +15,128 @@
 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 
 from sos.plugins import Plugin, RedHatPlugin
+from os import path
 
 
 class kubernetes(Plugin, RedHatPlugin):
 
     """Kubernetes plugin
     """
 
-    option_list = [("podslog", "capture logs for pods", 'slow', False)]
+    # Red Hat Atomic Platform and OpenShift Enterprise use the
+    # atomic-openshift-master package to provide kubernetes
+    packages = ('kubernetes', 'kubernetes-master', 'atomic-openshift-master')
+    files = ("/etc/origin/master/master-config.yaml",)
+
+    option_list = [
+        ("all", "also collect --all-namespaces output separately",
+            'fast', True),
+        ("describe", "capture descriptions of all kube resources",
+            'fast', True),
+        ("podlogs", "capture logs for pods", 'slow', False),
+    ]
+
+    def check_is_master(self):
+        if any([
+            path.exists("/var/run/kubernetes/apiserver.key"),
+            path.exists("/etc/origin/master/master-config.yaml")
+        ]):
+            return True
+        return False
 
     def setup(self):
         self.add_copy_spec("/etc/kubernetes")
         self.add_copy_spec("/var/run/flannel")
 
-        # Kubernetes master info
-        self.add_cmd_output("kubectl version")
-        self.add_cmd_output("kubectl get -o json pods")
-        self.add_cmd_output("kubectl get -o json nodes")
-        self.add_cmd_output("kubectl get -o json services")
-        self.add_cmd_output("kubectl get -o json replicationController")
-        self.add_cmd_output("kubectl get -o json events")
-
-        # This could use a single call:
-        #
-        #   add_journal(units=["kubelet", "kube-apiserver", ... ])
-        #
-        # But this would merge all units into a single text stream - to
-        # preserve existing file layout in archives keep these as
-        # separate journalctl calls.
-        self.add_journal(units="kubelet")
-        self.add_journal(units="kube-apiserver")
-        self.add_journal(units="kube-controller-manager")
-        self.add_journal(units="kube-scheduler")
-        self.add_journal(units="kube-proxy")
-
-        if self.get_option('podslog'):
-            result = self.get_command_output("kubectl get pods")
-            if result['status'] == 0:
-                for line in result['output'].splitlines()[1:]:
-                    pod_name = line.split(" ")[0]
-                    self.add_cmd_output([
-                        "{0} log {1}".format("kubectl", pod_name)
-                    ])
+        svcs = [
+            'kubelet',
+            'kube-apiserver',
+            'kube-proxy',
+            'kube-scheduler',
+            'kube-controller-manager'
+        ]
+
+        for svc in svcs:
+            self.add_journal(units=svc)
+
+        # We can only grab kubectl output from the master
+        if self.check_is_master():
+            kube_cmd = "kubectl "
+            if path.exists('/etc/origin/master/admin.kubeconfig'):
+                kube_cmd += "--config=/etc/origin/master/admin.kubeconfig"
+
+            kube_get_cmd = "get -o json "
+            for subcmd in ['version', 'config view']:
+                self.add_cmd_output('%s %s' % (kube_cmd, subcmd))
+
+            # get all namespaces in use
+            kn = self.get_command_output('%s get namespaces' % kube_cmd)
+            knsps = [n.split()[0] for n in kn['output'].splitlines()[1:] if n]
+
+            resources = [
+                'limitrange',
+                'pods',
+                'pvc',
+                'rc',
+                'resourcequota',
+                'services'
+            ]
+
+            # nodes and pvs are not namespaced, must pull separately
+            self.add_cmd_output([
+                "{} get -o json nodes".format(kube_cmd),
+                "{} get -o json pv".format(kube_cmd)
+            ])
+
+            for n in knsps:
+                knsp = '--namespace=%s' % n
+                k_cmd = '%s %s %s' % (kube_cmd, kube_get_cmd, knsp)
+
+                self.add_cmd_output('%s events' % k_cmd)
+
+                for res in resources:
+                    self.add_cmd_output('%s %s' % (k_cmd, res))
+
+                if self.get_option('describe'):
+                    # need to drop json formatting for this
+                    k_cmd = '%s get %s' % (kube_cmd, knsp)
+                    for res in resources:
+                        r = self.get_command_output(
+                            '%s %s' % (k_cmd, res))
+                        if r['status'] == 0:
+                            k_list = [k.split()[0] for k in
+                                      r['output'].splitlines()[1:]]
+                            for k in k_list:
+                                k_cmd = '%s %s' % (kube_cmd, knsp)
+                                self.add_cmd_output(
+                                    '%s describe %s %s' % (k_cmd, res, k))
+
+                if self.get_option('podlogs'):
+                    k_cmd = '%s get %s' % (kube_cmd, knsp)
+                    r = self.get_command_output('$s get pods' % k_cmd)
+                    if r['status'] == 0:
+                        pods = [p.split()[0] for p in
+                                r['output'].splitlines()[1:]]
+                        for pod in pods:
+                            self.add_cmd_output('%s logs %s' % (k_cmd, pod))
+
+            if self.get_option('all'):
+                k_cmd = '%s get --all-namespaces=true' % kube_cmd
+                for res in resources:
+                    self.add_cmd_output('%s %s' % (k_cmd, res))
+
+    def postproc(self):
+        # First, clear sensitive data from the json output collected.
+        # This will mask values when the "name" looks susceptible of
+        # values worth obfuscating, i.e. if the name contains strings
+        # like "pass", "pwd", "key" or "token"
+        env_regexp = r'(?P<var>{\s*"name":\s*[^,]*' \
+            r'(pass|pwd|key|token|cred|PASS|PWD|KEY)[^,]*,\s*"value":)[^}]*'
+        self.do_cmd_output_sub('kubectl', env_regexp,
+                               r'\g<var> "********"')
 
+        # Next, we need to handle the private keys and certs in some
+        # output that is not hit by the previous iteration.
+        self.do_cmd_private_sub('kubectl')
 
 # vim: et ts=5 sw=4