Skip to content

Commit

Permalink
KEP-22: Diagnostic Bundle Collection (#1506)
Browse files Browse the repository at this point in the history
Implementation for Diagnostics Bundle collection

Signed-off-by: Vasilii Emelin <vasilii_emelin@epam.com>

Co-authored-by: Andreas Neumann <aneumann@mesosphere.com>
Co-authored-by: Aleksey Dukhovniy <adukhovniy@mesosphere.io>
Co-authored-by: Jan Schlicht <jan@d2iq.com>
  • Loading branch information
4 people committed Jun 9, 2020
1 parent 1e51529 commit a0a7e34
Show file tree
Hide file tree
Showing 36 changed files with 4,020 additions and 8 deletions.
1 change: 1 addition & 0 deletions go.mod
Expand Up @@ -32,6 +32,7 @@ require (
k8s.io/api v0.17.3
k8s.io/apiextensions-apiserver v0.17.2
k8s.io/apimachinery v0.17.3
k8s.io/cli-runtime v0.17.3
k8s.io/client-go v0.17.3
k8s.io/code-generator v0.17.3
k8s.io/component-base v0.17.3
Expand Down
50 changes: 50 additions & 0 deletions pkg/kudoctl/cmd/diagnostics.go
@@ -0,0 +1,50 @@
package cmd

import (
"fmt"
"time"

"github.com/spf13/afero"
"github.com/spf13/cobra"

"github.com/kudobuilder/kudo/pkg/kudoctl/cmd/diagnostics"
"github.com/kudobuilder/kudo/pkg/kudoctl/util/kudo"
)

const (
diagCollectExample = ` # collect diagnostics example
kubectl kudo diagnostics collect --instance flink
`
)

func newDiagnosticsCmd(fs afero.Fs) *cobra.Command {
cmd := &cobra.Command{
Use: "diagnostics",
Short: "collect diagnostics",
Long: "diagnostics provides functionality to collect and analyze diagnostics data",
}
cmd.AddCommand(newDiagnosticsCollectCmd(fs))
return cmd
}

func newDiagnosticsCollectCmd(fs afero.Fs) *cobra.Command {
var logSince time.Duration
var instance string
cmd := &cobra.Command{
Use: "collect",
Short: "collect diagnostics",
Long: "collect data relevant for diagnostics of the provided instance's state",
Example: diagCollectExample,
RunE: func(cmd *cobra.Command, args []string) error {
c, err := kudo.NewClient(Settings.KubeConfig, Settings.RequestTimeout, Settings.Validate)
if err != nil {
return fmt.Errorf("failed to create kudo client: %v", err)
}
return diagnostics.Collect(fs, instance, diagnostics.NewOptions(logSince), c, &Settings)
},
}
cmd.Flags().StringVar(&instance, "instance", "", "The instance name.")
cmd.Flags().DurationVar(&logSince, "log-since", 0, "Only return logs newer than a relative duration like 5s, 2m, or 3h. Defaults to all logs.")

return cmd
}
132 changes: 132 additions & 0 deletions pkg/kudoctl/cmd/diagnostics/collectors.go
@@ -0,0 +1,132 @@
package diagnostics

import (
"fmt"
"io"
"path/filepath"
"reflect"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"

"github.com/kudobuilder/kudo/pkg/kudoctl/clog"
)

// Ensure collector is implemented
var _ collector = &resourceCollector{}

// resourceCollector - collector interface implementation for Kubernetes resources (runtime objects)
type resourceCollector struct {
loadResourceFn func() (runtime.Object, error)
name string // object kind used to describe the error
parentDir stringGetter // parent dir to attach the printer's output
failOnError bool // define whether the collector should return the error
callback func(runtime.Object) // will be called with the retrieved resource after collection to update shared context
printMode printMode
}

// collect - load a resource and send either the resource or collection error to printer
// return error if failOnError field is set to true
// if failOnError is true, finding no object(s) is treated as an error
func (c *resourceCollector) collect(printer *nonFailingPrinter) error {
clog.V(4).Printf("Collect Resource %s in parent dir %s", c.name, c.parentDir())
obj, err := c._collect(c.failOnError)
if err != nil {
printer.printError(err, c.parentDir(), c.name)
if c.failOnError {
return err
}
}
if obj != nil {
printer.printObject(obj, c.parentDir(), c.printMode)
}
return nil
}

func emptyResult(obj runtime.Object) bool {
return obj == nil || reflect.ValueOf(obj).IsNil() || (meta.IsListType(obj) && meta.LenList(obj) == 0)
}

func (c *resourceCollector) _collect(failOnError bool) (runtime.Object, error) {
obj, err := c.loadResourceFn()
if err != nil {
return nil, fmt.Errorf("failed to retrieve object(s) of kind %s: %v", c.name, err)
}
if emptyResult(obj) {
if failOnError {
return nil, fmt.Errorf("no object(s) of kind %s retrieved", c.name)
}
return nil, nil
}
if c.callback != nil {
c.callback(obj)
}
return obj, nil
}

// Ensure collector is implemented
var _ collector = &resourceCollectorGroup{}

// resourceCollectorGroup - a composite collector for Kubernetes runtime objects whose loading and printing depend on
// each other's side-effects on the shared context
type resourceCollectorGroup struct {
collectors []resourceCollector
}

// collect - collect resource and run callback for each collector, print all afterwards
// collection failures are treated as fatal regardless of the collectors failOnError flag setting
func (g resourceCollectorGroup) collect(printer *nonFailingPrinter) error {
clog.V(0).Printf("Collect ResourceGroup for %d collectors", len(g.collectors))
objs := make([]runtime.Object, len(g.collectors))
for i, c := range g.collectors {
obj, err := c._collect(true)
if err != nil {
printer.printError(err, c.parentDir(), c.name)
return err
}
objs[i] = obj
}
for i, c := range g.collectors {
printer.printObject(objs[i], c.parentDir(), c.printMode)
}
return nil
}

// Ensure collector is implemented
var _ collector = &logsCollector{}

type logsCollector struct {
loadLogFn func(string, string) (io.ReadCloser, error)
pods func() []v1.Pod
parentDir stringGetter
}

func (c *logsCollector) collect(printer *nonFailingPrinter) error {
clog.V(0).Printf("Collect Logs for %d pods", len(c.pods()))
for _, pod := range c.pods() {
for _, container := range pod.Spec.Containers {
log, err := c.loadLogFn(pod.Name, container.Name)
if err != nil {
printer.printError(err, filepath.Join(c.parentDir(), fmt.Sprintf("pod_%s", pod.Name)), fmt.Sprintf("%s.log", container.Name))
} else {
printer.printLog(log, filepath.Join(c.parentDir(), fmt.Sprintf("pod_%s", pod.Name)), container.Name)
_ = log.Close()
}
}
}
return nil
}

var _ collector = &objCollector{}

type objCollector struct {
obj interface{}
parentDir stringGetter
name string
}

func (c *objCollector) collect(printer *nonFailingPrinter) error {
printer.printYaml(c.obj, c.parentDir(), c.name)
return nil
}
55 changes: 55 additions & 0 deletions pkg/kudoctl/cmd/diagnostics/diagnostics.go
@@ -0,0 +1,55 @@
package diagnostics

import (
"fmt"
"strings"
"time"

"github.com/spf13/afero"

"github.com/kudobuilder/kudo/pkg/kudoctl/env"
"github.com/kudobuilder/kudo/pkg/kudoctl/util/kudo"
"github.com/kudobuilder/kudo/pkg/version"
)

type Options struct {
LogSince *int64
}

func NewOptions(logSince time.Duration) *Options {
opts := Options{}
if logSince > 0 {
sec := int64(logSince.Round(time.Second).Seconds())
opts.LogSince = &sec
}
return &opts
}

func Collect(fs afero.Fs, instance string, options *Options, c *kudo.Client, s *env.Settings) error {
if err := verifyDiagDirNotExists(fs); err != nil {
return err
}
p := &nonFailingPrinter{fs: fs}

if err := diagForInstance(instance, options, c, version.Get(), s, p); err != nil {
p.errors = append(p.errors, err.Error())
}
if err := diagForKudoManager(options, c, p); err != nil {
p.errors = append(p.errors, err.Error())
}
if len(p.errors) > 0 {
return fmt.Errorf(strings.Join(p.errors, "\n"))
}
return nil
}

func verifyDiagDirNotExists(fs afero.Fs) error {
exists, err := afero.Exists(fs, DiagDir)
if err != nil {
return fmt.Errorf("failed to verify that target directory %s doesn't exist: %v", DiagDir, err)
}
if exists {
return fmt.Errorf("target directory %s already exists", DiagDir)
}
return nil
}

0 comments on commit a0a7e34

Please sign in to comment.