Skip to content

Commit

Permalink
Document telemetry, backtrace only on crash, ignore submit errors (#388)
Browse files Browse the repository at this point in the history
* Document telemetry in README and improve crash message redaction

* Update README.md

Co-authored-by: Tom Wieczorek <twz123@users.noreply.github.com>

* Only send a cleaned up backtrace upon crash

* Suppress analytics error messages and ignore any failures

* The test panic was not meant to be commited

Co-authored-by: Tom Wieczorek <twz123@users.noreply.github.com>
  • Loading branch information
kke and twz123 committed May 24, 2022
1 parent 4dbda8f commit a2d5fc2
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 31 deletions.
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,30 @@ k0sctl completion > /usr/local/share/zsh/site-functions/_k0sctl
k0sctl completion > ~/.config/fish/completions/k0sctl.fish
```

## Anonymous telemetry

K0sctl sends anonymized telemetry data when it is used. This can be disabled via the `--disable-telemetry` flag or by setting the environment variable `DISABLE_TELEMETRY=true`.

The telemetry data includes:

- K0sctl version
- Operating system + CPU architecture ("linux x86", "darwin arm64", ...)
- An anonymous machine ID generated by [denisbrodbeck/machineid](https://github.com/denisbrodbeck/machineid) or if that fails, an md5 sum of the hostname
- Event information:
* Phase name ("Connecting to hosts", "Gathering facts", ...) and the duration how long it took to finish
* Cluster UUID (`kubectl get -n kube-system namespace kube-system -o template={{.metadata.uid}}`)
* Was k0s dynamic config enabled (true/false)
* Was a custom or the default k0s configuration used (true/false)
* In case of a crash, a backtrace with source filenames and line numbers only

The data is used to estimate the number of users and to identify failure hotspots.

## Development status

K0sctl is ready for use and in continuous development. It is still at a stage where maintaining backwards compatibility is not a high priority goal.

Missing major features include at least:

* Windows targets are not yet supported
* The released binaries have not been signed
* Nodes can't be removed
* The configuration specification and command-line interface options are still evolving
Expand Down
5 changes: 2 additions & 3 deletions analytics/analytics.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
)

type publisher interface {
Publish(string, map[string]interface{}) error
Publish(string, map[string]interface{})
Close()
}

Expand All @@ -21,9 +21,8 @@ func (c *NullClient) Initialize() error {
}

// Publish would send a tracking event
func (c *NullClient) Publish(event string, props map[string]interface{}) error {
func (c *NullClient) Publish(event string, props map[string]interface{}) {
log.Tracef("analytics event %s - properties: %+v", event, props)
return nil
}

// Close the analytics connection
Expand Down
4 changes: 3 additions & 1 deletion analytics/phase.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,7 @@ func (p *Phase) After(result error) error {
event = "phase-failure"
}

return Client.Publish(event, p.props)
Client.Publish(event, p.props)

return nil
}
8 changes: 3 additions & 5 deletions cmd/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,10 @@ var applyCommand = &cli.Command{
&phase.Disconnect{},
)

if err := analytics.Client.Publish("apply-start", map[string]interface{}{}); err != nil {
return err
}
analytics.Client.Publish("apply-start", map[string]interface{}{})

if err := manager.Run(); err != nil {
_ = analytics.Client.Publish("apply-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
analytics.Client.Publish("apply-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
if lf, err := LogFile(); err == nil {
if ln, ok := lf.(interface{ Name() string }); ok {
log.Errorf("apply failed - log file saved to %s", ln.Name())
Expand All @@ -92,7 +90,7 @@ var applyCommand = &cli.Command{
return err
}

_ = analytics.Client.Publish("apply-success", map[string]interface{}{"duration": time.Since(start), "clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
analytics.Client.Publish("apply-success", map[string]interface{}{"duration": time.Since(start), "clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})

duration := time.Since(start).Truncate(time.Second)
text := fmt.Sprintf("==> Finished in %s", duration)
Expand Down
8 changes: 3 additions & 5 deletions cmd/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,10 @@ var backupCommand = &cli.Command{
&phase.Disconnect{},
)

if err := analytics.Client.Publish("backup-start", map[string]interface{}{}); err != nil {
return err
}
analytics.Client.Publish("backup-start", map[string]interface{}{})

if err := manager.Run(); err != nil {
_ = analytics.Client.Publish("backup-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
analytics.Client.Publish("backup-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
if lf, err := LogFile(); err == nil {
if ln, ok := lf.(interface{ Name() string }); ok {
log.Errorf("backup failed - log file saved to %s", ln.Name())
Expand All @@ -53,7 +51,7 @@ var backupCommand = &cli.Command{
return err
}

_ = analytics.Client.Publish("backup-success", map[string]interface{}{"duration": time.Since(start), "clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
analytics.Client.Publish("backup-success", map[string]interface{}{"duration": time.Since(start), "clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})

duration := time.Since(start).Truncate(time.Second)
text := fmt.Sprintf("==> Finished in %s", duration)
Expand Down
4 changes: 1 addition & 3 deletions cmd/config_edit.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ var configEditCommand = &cli.Command{
return fmt.Errorf("output is not a terminal")
}

if err := analytics.Client.Publish("config-edit-start", map[string]interface{}{}); err != nil {
return err
}
analytics.Client.Publish("config-edit-start", map[string]interface{}{})

editor, err := shellEditor()
if err != nil {
Expand Down
4 changes: 1 addition & 3 deletions cmd/config_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ var configStatusCommand = &cli.Command{
Before: actions(initLogging, startCheckUpgrade, initConfig, initAnalytics),
After: actions(reportCheckUpgrade, closeAnalytics),
Action: func(ctx *cli.Context) error {
if err := analytics.Client.Publish("config-status-start", map[string]interface{}{}); err != nil {
return err
}
analytics.Client.Publish("config-status-start", map[string]interface{}{})

c := ctx.Context.Value(ctxConfigKey{}).(*v1beta1.Cluster)
h := c.Spec.K0sLeader()
Expand Down
8 changes: 3 additions & 5 deletions cmd/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,14 @@ var resetCommand = &cli.Command{
&phase.Disconnect{},
)

if err := analytics.Client.Publish("reset-start", map[string]interface{}{}); err != nil {
return err
}
analytics.Client.Publish("reset-start", map[string]interface{}{})

if err := manager.Run(); err != nil {
_ = analytics.Client.Publish("reset-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
analytics.Client.Publish("reset-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
return err
}

_ = analytics.Client.Publish("reset-success", map[string]interface{}{"duration": time.Since(start), "clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})
analytics.Client.Publish("reset-success", map[string]interface{}{"duration": time.Since(start), "clusterID": manager.Config.Spec.K0s.Metadata.ClusterID})

duration := time.Since(start).Truncate(time.Second)
text := fmt.Sprintf("==> Finished in %s", duration)
Expand Down
7 changes: 5 additions & 2 deletions integration/segment/segment.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,17 @@ func NewClient() (*Client, error) {
}

// Publish enqueues the sending of a tracking event
func (c Client) Publish(event string, props map[string]interface{}) error {
func (c Client) Publish(event string, props map[string]interface{}) {
log.Tracef("segment event %s - properties: %+v", event, props)
return c.client.Enqueue(segment.Track{
err := c.client.Enqueue(segment.Track{
Context: ctx,
AnonymousId: c.machineID,
Event: event,
Properties: props,
})
if err != nil {
log.Debugf("failed to submit telemetry: %s", err)
}
}

// Close the analytics connection
Expand Down
24 changes: 21 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package main

import (
"fmt"
"os"
"runtime"
"strings"

"github.com/k0sproject/k0sctl/analytics"
"github.com/k0sproject/k0sctl/cmd"
Expand All @@ -11,8 +12,25 @@ import (

func handlepanic() {
if err := recover(); err != nil {
_ = analytics.Client.Publish("panic", map[string]interface{}{"error": fmt.Sprint(err)})
log.Fatalf("PANIC: %s", err)
buf := make([]byte, 1<<16)
ss := runtime.Stack(buf, true)
msg := string(buf[:ss])
var bt []string
for _, row := range strings.Split(msg, "\n") {
if !strings.HasPrefix(row, "\t") {
continue
}
if strings.Contains(row, "main.") {
continue
}
if strings.Contains(row, "panic") {
continue
}
bt = append(bt, strings.TrimSpace(row))
}

analytics.Client.Publish("panic", map[string]interface{}{"backtrace": strings.Join(bt, "\n")})
log.Fatalf("PANIC: %v\n", err)
}
}

Expand Down

0 comments on commit a2d5fc2

Please sign in to comment.