Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cli): A: Add ability to dump pprof data to logs ... #3454

Merged
merged 14 commits into from
Nov 28, 2023
Merged
383 changes: 383 additions & 0 deletions debug/debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,383 @@
// Package debug for debug helper functions.
package debug
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved

import (
"bufio"
"bytes"
"context"
"encoding/pem"
"errors"
"fmt"
"io"
"os"
"runtime"
"runtime/pprof"
"strconv"
"strings"
"sync"

"github.com/kopia/kopia/repo/logging"
)

var log = logging.Module("kopia/debug")

// ProfileName the name of the profile (see: runtime/pprof/Lookup).
type ProfileName string

const (
pair = 2
)

const (
// DefaultDebugProfileRate default sample/data fraction for profile sample collection rates (1/x, where x is the
// data fraction sample rate).
DefaultDebugProfileRate = 100
// DefaultDebugProfileDumpBufferSizeB default size of the pprof output buffer.
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
DefaultDebugProfileDumpBufferSizeB = 1 << 17
)

const (
// EnvVarKopiaDebugPprof environment variable that contains the pprof dump configuration.
EnvVarKopiaDebugPprof = "KOPIA_DEBUG_PPROF"
)

// flags used to configure profiling in EnvVarKopiaDebugPprof.
const (
// KopiaDebugFlagForceGc force garbage collection before dumping heap data.
KopiaDebugFlagForceGc = "forcegc"
// KopiaDebugFlagDebug value of the profiles `debug` parameter.
KopiaDebugFlagDebug = "debug"
// KopiaDebugFlagRate rate setting for the named profile (if available). always an integer.
KopiaDebugFlagRate = "rate"
)

const (
// ProfileNameBlock block profile key.
ProfileNameBlock ProfileName = "block"
// ProfileNameMutex mutex profile key.
ProfileNameMutex = "mutex"
// ProfileNameCPU cpu profile key.
ProfileNameCPU = "cpu"
)

// ProfileConfig configuration flags for a profile.
type ProfileConfig struct {
flags []string
buf *bytes.Buffer
}

// ProfileConfigs configuration flags for all requested profiles.
type ProfileConfigs struct {
mu sync.Mutex

// +checklocks:mu
pcm map[ProfileName]*ProfileConfig
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
}

//nolint:gochecknoglobals
var pprofConfigs = &ProfileConfigs{}

type pprofSetRate struct {
fn func(int)
def int
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
}

//nolint:gochecknoglobals
var pprofProfileRates = map[ProfileName]pprofSetRate{
ProfileNameBlock: {
fn: func(x int) { runtime.SetBlockProfileRate(x) },
def: DefaultDebugProfileRate,
},
ProfileNameMutex: {
fn: func(x int) { runtime.SetMutexProfileFraction(x) },
def: DefaultDebugProfileRate,
},
}

// GetValue get the value of the named flag, `s`. False will be returned
// if the flag does not exist. True will be returned if flag exists without
// a value.
func (p ProfileConfig) GetValue(s string) (string, bool) {
for _, f := range p.flags {
kvs := strings.SplitN(f, "=", pair)
if kvs[0] != s {
continue
}

if len(kvs) == 1 {
return "", true
}

return kvs[1], true
}

return "", false
}

func parseProfileConfigs(bufSizeB int, ppconfigs string) map[ProfileName]*ProfileConfig {
pbs := map[ProfileName]*ProfileConfig{}
allProfileOptions := strings.Split(ppconfigs, ":")

for _, profileOptionWithFlags := range allProfileOptions {
// of those, see if any have profile specific settings
profileFlagNameValuePairs := strings.SplitN(profileOptionWithFlags, "=", pair)
flagValue := ""

if len(profileFlagNameValuePairs) > 1 {
flagValue = profileFlagNameValuePairs[1]
}

flagKey := ProfileName(strings.ToLower(profileFlagNameValuePairs[0]))
pbs[flagKey] = newProfileConfig(bufSizeB, flagValue)
}

return pbs
}

// newProfileConfig create a new profiling configuration.
func newProfileConfig(bufSizeB int, ppconfig string) *ProfileConfig {
q := &ProfileConfig{
buf: bytes.NewBuffer(make([]byte, 0, bufSizeB)),
}

flgs := strings.Split(ppconfig, ",")
if len(flgs) > 0 && flgs[0] != "" { // len(flgs) > 1 && flgs[0] == "" should never happen
q.flags = flgs
}

return q
}

func setupProfileFractions(ctx context.Context, profileBuffers map[ProfileName]*ProfileConfig) {
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
for k, pprofset := range pprofProfileRates {
v, ok := profileBuffers[k]
if !ok {
// profile not configured - leave it alone
continue
}

if v == nil {
// profile configured, but no rate - set to default
pprofset.fn(pprofset.def)
continue
}

s, _ := v.GetValue(KopiaDebugFlagRate)
if s == "" {
// flag without an argument - set to default
pprofset.fn(pprofset.def)
continue
}

n1, err := strconv.Atoi(s)
if err != nil {
log(ctx).With("cause", err).Warnf("invalid PPROF rate, %q, for %s: %v", s, k)
continue
}

log(ctx).Debugf("setting PPROF rate, %d, for %s", n1, k)
pprofset.fn(n1)
}
}

// ClearProfileFractions set the profile fractions to their zero values.
func ClearProfileFractions(profileBuffers map[ProfileName]*ProfileConfig) {
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
for k, pprofset := range pprofProfileRates {
v := profileBuffers[k]
if v == nil { // fold missing values and empty values
continue
}

_, ok := v.GetValue(KopiaDebugFlagRate)
if !ok { // only care if a value might have been set before
continue
}

pprofset.fn(0)
}
}

// StartProfileBuffers start profile buffers for enabled profiles/trace. Buffers
// are returned in an slice of buffers: CPU, Heap and trace respectively. class is used to distinguish profiles
// external to kopia.
func StartProfileBuffers(ctx context.Context) {
ppconfigs := os.Getenv(EnvVarKopiaDebugPprof)
// if empty, then don't bother configuring but emit a log message - use might be expecting them to be configured
if ppconfigs == "" {
log(ctx).Debug("no profile buffers enabled")
return
}

bufSizeB := DefaultDebugProfileDumpBufferSizeB

// look for matching services. "*" signals all services for profiling
log(ctx).Debug("configuring profile buffers")

// acquire global lock when performing operations with global side-effects
pprofConfigs.mu.Lock()
defer pprofConfigs.mu.Unlock()

pprofConfigs.pcm = parseProfileConfigs(bufSizeB, ppconfigs)

// profiling rates need to be set before starting profiling
setupProfileFractions(ctx, pprofConfigs.pcm)

// cpu has special initialization
v, ok := pprofConfigs.pcm[ProfileNameCPU]
if ok {
err := pprof.StartCPUProfile(v.buf)
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log(ctx).With("cause", err).Warn("cannot start cpu PPROF")
delete(pprofConfigs.pcm, ProfileNameCPU)
}
}
}

// DumpPem dump a PEM version of the byte slice, bs, into writer, wrt.
func DumpPem(bs []byte, types string, wrt *os.File) error {
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
// err0 for background process
var err0 error

blk := &pem.Block{
Type: types,
Bytes: bs,
}
// wrt is likely a line oriented writer, so writing individual lines
// will make best use of output buffer and help prevent overflows or
// stalls in the output path.
pr, pw := io.Pipe()
// encode PEM in the background and output in a line oriented
// fashion - this prevents the need for a large buffer to hold
// the encoded PEM.
go func() {
aaron-kasten marked this conversation as resolved.
Show resolved Hide resolved
// writer close on exit of background process
//nolint:errcheck
defer pw.Close()
// do the encoding
err0 = pem.Encode(pw, blk)
if err0 != nil {
return
}
}()

// connect rdr to pipe reader
rdr := bufio.NewReader(pr)

// err1 for reading
// err2 for writing
var err1, err2 error
for err1 == nil && err2 == nil {
var ln []byte
ln, err1 = rdr.ReadBytes('\n')
// err1 can return ln and non-nil err1, so always call write
_, err2 = wrt.Write(ln)
}

// got a write error. this has precedent
if err2 != nil {
return fmt.Errorf("could not write PEM: %w", err2)
}

// did not get a read error. file ends in newline
if err1 == nil {
return nil
}

// if file does not end in newline, then output one
if errors.Is(err1, io.EOF) {
_, err2 = wrt.WriteString("\n")
if err2 != nil {
return fmt.Errorf("could not write PEM: %w", err2)
}

return io.EOF
}

return fmt.Errorf("error reading bytes: %w", err1)
}

func parseDebugNumber(v *ProfileConfig) (int, error) {
debugs, ok := v.GetValue(KopiaDebugFlagDebug)
if !ok {
return 0, nil
}

debug, err := strconv.Atoi(debugs)
if err != nil {
return 0, fmt.Errorf("could not parse number %q: %w", debugs, err)
}

return debug, nil
}

// StopProfileBuffers stop and dump the contents of the buffers to the log as PEMs. Buffers
// supplied here are from StartProfileBuffers.
func StopProfileBuffers(ctx context.Context) {
pprofConfigs.mu.Lock()
defer pprofConfigs.mu.Unlock()

if pprofConfigs == nil {
log(ctx).Debug("profile buffers not configured")
return
}

log(ctx).Debug("saving PEM buffers for output")
// cpu and heap profiles requires special handling
for k, v := range pprofConfigs.pcm {
log(ctx).Debugf("stopping PPROF profile %q", k)

if v == nil {
continue
}

if k == ProfileNameCPU {
pprof.StopCPUProfile()
continue
}

_, ok := v.GetValue(KopiaDebugFlagForceGc)
if ok {
log(ctx).Debug("performing GC before PPROF dump ...")
runtime.GC()
}
Comment on lines +341 to +345
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should KopiaDebugFlagForceGc be a "global" flag that applies to all profiles?
or

Should runtime.GC() be only called once even if multiple profiles specify the flag?

Thoughts?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to ensure that GC was run immediately before any memory profiling. This will allow users to check for memory leaks. There's a chance that other operations in the loop will allocate additional memory that I did not want to show up on the profile.

I did not want to special case which profiles allowed GC because PPROF dumps can be added by users - and there's no determining which profiles will benefit from an immediate GC. I could not conceive a scenario where only one GC would be required to produce accurate results.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Asking because it is not clear to me whether a GC is needed before dumping each profile, or what difference it makes. To be clear, I don't have a good answer.

When stopping and dumping the profiles, it seems that a possible sequence would be

GC -> dump profile 1 -> GC -> dump profile 2 -> GC -> dump profile 3

In this case profile 2 would contain some information (allocations or side effects) coming from performing dump profile 1. Similarly, profile 3 would contain information coming from dumping profiles 1 and 2.

An alternative sequence would be.

GC -> dump profile 1, dump profile 2 -> dump profile 3

Not sure if it would make a difference, or if it matters.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyway 🤷🏼‍♂️


debug, err := parseDebugNumber(v)
if err != nil {
log(ctx).With("cause", err).Warn("invalid PPROF configuration debug number")
continue
}

pent := pprof.Lookup(string(k))
if pent == nil {
log(ctx).Warnf("no system PPROF entry for %q", k)
delete(pprofConfigs.pcm, k)

continue
}

err = pent.WriteTo(v.buf, debug)
if err != nil {
log(ctx).With("cause", err).Warn("error writing PPROF buffer")

continue
}
}
// dump the profiles out into their respective PEMs
for k, v := range pprofConfigs.pcm {
if v == nil {
continue
}

unm := strings.ToUpper(string(k))
log(ctx).Infof("dumping PEM for %q", unm)

err := DumpPem(v.buf.Bytes(), unm, os.Stderr)
if err != nil {
log(ctx).With("cause", err).Error("cannot write PEM")
}
}

// clear the profile rates and fractions to effectively stop profiling
ClearProfileFractions(pprofConfigs.pcm)
pprofConfigs.pcm = map[ProfileName]*ProfileConfig{}
}