forked from DataDog/dd-trace-go
/
options.go
558 lines (511 loc) · 18.8 KB
/
options.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016 Datadog, Inc.
package profiler
import (
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"unicode"
"github.com/lannguyen-c0x12c/dd-trace-go/internal"
"github.com/lannguyen-c0x12c/dd-trace-go/internal/globalconfig"
"github.com/lannguyen-c0x12c/dd-trace-go/internal/log"
"github.com/lannguyen-c0x12c/dd-trace-go/internal/osinfo"
"github.com/lannguyen-c0x12c/dd-trace-go/internal/traceprof"
"github.com/lannguyen-c0x12c/dd-trace-go/internal/version"
"github.com/lannguyen-c0x12c/dd-trace-go/profiler/internal/immutable"
"github.com/DataDog/datadog-go/v5/statsd"
)
const (
// DefaultMutexFraction specifies the mutex profile fraction to be used with the mutex profiler.
// For more information or for changing this value, check MutexProfileFraction
DefaultMutexFraction = 10
// DefaultBlockRate specifies the default block profiling rate (in ns) used
// by the block profiler. For more information or for changing this value,
// check BlockProfileRate(). The default value of 100ms is somewhat
// arbitrary. There is no provably safe value that will guarantee low
// overhead for this profile type for all workloads. We don't recommend
// enabling it under normal circumstances. See the link below for more
// information: https://github.com/DataDog/go-profiler-notes/pull/15/files
DefaultBlockRate = 100000000
// DefaultPeriod specifies the default period at which profiles will be collected.
DefaultPeriod = time.Minute
// DefaultDuration specifies the default length of the CPU profile snapshot.
DefaultDuration = time.Minute
// DefaultUploadTimeout specifies the default timeout for uploading profiles.
// It can be overwritten using the DD_PROFILING_UPLOAD_TIMEOUT env variable
// or the WithUploadTimeout option.
DefaultUploadTimeout = 10 * time.Second
)
const (
defaultAPIURL = "https://intake.profile.datadoghq.com/v1/input"
defaultAgentHost = "localhost"
defaultAgentPort = "8126"
)
var defaultClient = &http.Client{
// We copy the transport to avoid using the default one, as it might be
// augmented with tracing and we don't want these calls to be recorded.
// See https://golang.org/pkg/net/http/#DefaultTransport .
Transport: &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
DualStack: true,
}).DialContext,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
},
}
var defaultProfileTypes = []ProfileType{MetricsProfile, CPUProfile, HeapProfile}
type config struct {
apiKey string
agentless bool
// targetURL is the upload destination URL. It will be set by the profiler on start to either apiURL or agentURL
// based on the other options.
targetURL string
apiURL string // apiURL is the Datadog intake API URL
agentURL string // agentURL is the Datadog agent profiling URL
service, env string
hostname string
statsd StatsdClient
httpClient *http.Client
tags immutable.StringSlice
types map[ProfileType]struct{}
period time.Duration
cpuDuration time.Duration
cpuProfileRate int
uploadTimeout time.Duration
maxGoroutinesWait int
mutexFraction int
blockRate int
outputDir string
deltaProfiles bool
deltaMethod string
logStartup bool
traceEnabled bool
traceConfig executionTraceConfig
endpointCountEnabled bool
}
// logStartup records the configuration to the configured logger in JSON format
func logStartup(c *config) {
info := struct {
Date string `json:"date"` // ISO 8601 date and time of start
OSName string `json:"os_name"` // Windows, Darwin, Debian, etc.
OSVersion string `json:"os_version"` // Version of the OS
Version string `json:"version"` // Profiler version
Lang string `json:"lang"` // "Go"
LangVersion string `json:"lang_version"` // Go version, e.g. go1.18
Hostname string `json:"hostname"`
DeltaProfiles bool `json:"delta_profiles"`
DeltaMethod string `json:"delta_method"`
Service string `json:"service"`
Env string `json:"env"`
TargetURL string `json:"target_url"`
Agentless bool `json:"agentless"`
Tags []string `json:"tags"`
ProfilePeriod string `json:"profile_period"`
EnabledProfiles []string `json:"enabled_profiles"`
CPUDuration string `json:"cpu_duration"`
CPUProfileRate int `json:"cpu_profile_rate"`
BlockProfileRate int `json:"block_profile_rate"`
MutexProfileFraction int `json:"mutex_profile_fraction"`
MaxGoroutinesWait int `json:"max_goroutines_wait"`
UploadTimeout string `json:"upload_timeout"`
TraceEnabled bool `json:"execution_trace_enabled"`
TracePeriod string `json:"execution_trace_period"`
TraceSizeLimit int `json:"execution_trace_size_limit"`
EndpointCountEnabled bool `json:"endpoint_count_enabled"`
}{
Date: time.Now().Format(time.RFC3339),
OSName: osinfo.OSName(),
OSVersion: osinfo.OSVersion(),
Version: version.Tag,
Lang: "Go",
LangVersion: runtime.Version(),
Hostname: c.hostname,
DeltaProfiles: c.deltaProfiles,
DeltaMethod: c.deltaMethod,
Service: c.service,
Env: c.env,
TargetURL: c.targetURL,
Agentless: c.agentless,
Tags: c.tags.Slice(),
ProfilePeriod: c.period.String(),
CPUDuration: c.cpuDuration.String(),
CPUProfileRate: c.cpuProfileRate,
BlockProfileRate: c.blockRate,
MutexProfileFraction: c.mutexFraction,
MaxGoroutinesWait: c.maxGoroutinesWait,
UploadTimeout: c.uploadTimeout.String(),
TraceEnabled: c.traceEnabled,
TracePeriod: c.traceConfig.Period.String(),
TraceSizeLimit: c.traceConfig.Limit,
EndpointCountEnabled: c.endpointCountEnabled,
}
for t := range c.types {
info.EnabledProfiles = append(info.EnabledProfiles, t.String())
}
b, err := json.Marshal(info)
if err != nil {
log.Error("Marshaling profiler configuration: %s", err)
return
}
log.Info("Profiler configuration: %s\n", b)
}
func urlForSite(site string) (string, error) {
u := fmt.Sprintf("https://intake.profile.%s/v1/input", site)
_, err := url.Parse(u)
return u, err
}
// isAPIKeyValid reports whether the given string is a structurally valid API key
func isAPIKeyValid(key string) bool {
if len(key) != 32 {
return false
}
for _, c := range key {
if c > unicode.MaxASCII || (!unicode.IsLower(c) && !unicode.IsNumber(c)) {
return false
}
}
return true
}
func (c *config) addProfileType(t ProfileType) {
if c.types == nil {
c.types = make(map[ProfileType]struct{})
}
c.types[t] = struct{}{}
}
func defaultConfig() (*config, error) {
c := config{
apiURL: defaultAPIURL,
service: filepath.Base(os.Args[0]),
statsd: &statsd.NoOpClient{},
httpClient: defaultClient,
period: DefaultPeriod,
cpuDuration: DefaultDuration,
blockRate: DefaultBlockRate,
mutexFraction: DefaultMutexFraction,
uploadTimeout: DefaultUploadTimeout,
maxGoroutinesWait: 1000, // arbitrary value, should limit STW to ~30ms
deltaProfiles: internal.BoolEnv("DD_PROFILING_DELTA", true),
deltaMethod: os.Getenv("DD_PROFILING_DELTA_METHOD"),
logStartup: internal.BoolEnv("DD_TRACE_STARTUP_LOGS", true),
endpointCountEnabled: internal.BoolEnv(traceprof.EndpointCountEnvVar, false),
}
c.tags = c.tags.Append(fmt.Sprintf("process_id:%d", os.Getpid()))
for _, t := range defaultProfileTypes {
c.addProfileType(t)
}
agentHost, agentPort := defaultAgentHost, defaultAgentPort
if v := os.Getenv("DD_AGENT_HOST"); v != "" {
agentHost = v
}
if v := os.Getenv("DD_TRACE_AGENT_PORT"); v != "" {
agentPort = v
}
WithAgentAddr(net.JoinHostPort(agentHost, agentPort))(&c)
if url := internal.AgentURLFromEnv(); url != nil {
if url.Scheme == "unix" {
WithUDS(url.Path)(&c)
} else {
c.agentURL = url.String() + "/profiling/v1/input"
}
}
if v := os.Getenv("DD_PROFILING_UPLOAD_TIMEOUT"); v != "" {
d, err := time.ParseDuration(v)
if err != nil {
return nil, fmt.Errorf("DD_PROFILING_UPLOAD_TIMEOUT: %s", err)
}
WithUploadTimeout(d)(&c)
}
if v := os.Getenv("DD_API_KEY"); v != "" {
WithAPIKey(v)(&c)
}
if internal.BoolEnv("DD_PROFILING_AGENTLESS", false) {
WithAgentlessUpload()(&c)
}
if v := os.Getenv("DD_SITE"); v != "" {
WithSite(v)(&c)
}
if v := os.Getenv("DD_ENV"); v != "" {
WithEnv(v)(&c)
}
if v := os.Getenv("DD_SERVICE"); v != "" {
WithService(v)(&c)
}
if v := os.Getenv("DD_VERSION"); v != "" {
WithVersion(v)(&c)
}
tags := make(map[string]string)
if v := os.Getenv("DD_TAGS"); v != "" {
tags = internal.ParseTagString(v)
internal.CleanGitMetadataTags(tags)
}
for key, val := range internal.GetGitMetadataTags() {
tags[key] = val
}
for key, val := range tags {
if val != "" {
WithTags(key + ":" + val)(&c)
} else {
WithTags(key)(&c)
}
}
WithTags(
"profiler_version:"+version.Tag,
"runtime_version:"+strings.TrimPrefix(runtime.Version(), "go"),
"runtime_compiler:"+runtime.Compiler,
"runtime_arch:"+runtime.GOARCH,
"runtime_os:"+runtime.GOOS,
"runtime-id:"+globalconfig.RuntimeID(),
)(&c)
// not for public use
if v := os.Getenv("DD_PROFILING_URL"); v != "" {
WithURL(v)(&c)
}
// not for public use
if v := os.Getenv("DD_PROFILING_OUTPUT_DIR"); v != "" {
withOutputDir(v)(&c)
}
if v := os.Getenv("DD_PROFILING_WAIT_PROFILE_MAX_GOROUTINES"); v != "" {
n, err := strconv.Atoi(v)
if err != nil {
return nil, fmt.Errorf("DD_PROFILING_WAIT_PROFILE_MAX_GOROUTINES: %s", err)
}
c.maxGoroutinesWait = n
}
// Experimental feature: Go execution trace (runtime/trace) recording.
c.traceEnabled = internal.BoolEnv("DD_PROFILING_EXECUTION_TRACE_ENABLED", false)
c.traceConfig.Period = internal.DurationEnv("DD_PROFILING_EXECUTION_TRACE_PERIOD", 5000*time.Second)
c.traceConfig.Limit = internal.IntEnv("DD_PROFILING_EXECUTION_TRACE_LIMIT_BYTES", defaultExecutionTraceSizeLimit)
if c.traceEnabled && (c.traceConfig.Period == 0 || c.traceConfig.Limit == 0) {
log.Warn("Invalid execution trace config, enabled is true but size limit or frequency is 0. Disabling execution trace.")
c.traceEnabled = false
}
return &c, nil
}
// An Option is used to configure the profiler's behaviour.
type Option func(*config)
// WithAgentAddr specifies the address to use when reaching the Datadog Agent.
func WithAgentAddr(hostport string) Option {
return func(cfg *config) {
cfg.agentURL = "http://" + hostport + "/profiling/v1/input"
}
}
// WithAPIKey sets the Datadog API Key and takes precedence over the DD_API_KEY
// env variable. Historically this option was used to enable agentless
// uploading, but as of dd-trace-go v1.30.0 the behavior has changed to always
// default to agent based uploading which doesn't require an API key. So if you
// currently don't have an agent running on the default localhost:8126 hostport
// you need to set it up, or use WithAgentAddr to specify the hostport location
// of the agent. See WithAgentlessUpload for more information.
func WithAPIKey(key string) Option {
return func(cfg *config) {
cfg.apiKey = key
}
}
// WithAgentlessUpload is currently for internal usage only and not officially
// supported. You should not enable it unless somebody at Datadog instructed
// you to do so. It allows to skip the agent and talk to the Datadog API
// directly using the provided API key.
func WithAgentlessUpload() Option {
return func(cfg *config) {
cfg.agentless = true
}
}
// WithDeltaProfiles specifies if delta profiles are enabled. The default value
// is true. This option takes precedence over the DD_PROFILING_DELTA
// environment variable that can be set to "true" or "false" as well. See
// https://dtdg.co/go-delta-profile-docs for more information.
func WithDeltaProfiles(enabled bool) Option {
return func(cfg *config) {
cfg.deltaProfiles = enabled
}
}
// WithURL specifies the HTTP URL for the Datadog Profiling API.
func WithURL(url string) Option {
return func(cfg *config) {
cfg.apiURL = url
}
}
// WithPeriod specifies the interval at which to collect profiles.
func WithPeriod(d time.Duration) Option {
return func(cfg *config) {
cfg.period = d
}
}
// CPUDuration specifies the length at which to collect CPU profiles.
func CPUDuration(d time.Duration) Option {
return func(cfg *config) {
cfg.cpuDuration = d
}
}
// CPUProfileRate sets the sampling frequency for CPU profiling. A sample will
// be taken once for every (1 / hz) seconds of on-CPU time. If not given,
// profiling will use the default rate from the runtime/pprof.StartCPUProfile
// function, which is 100 as of Go 1.0.
//
// Setting a different profile rate will result in a spurious warning every time
// CPU profling is started, like "cannot set cpu profile rate until previous
// profile has finished". This is a known issue, but the rate will still be set
// correctly and CPU profiling will work.
func CPUProfileRate(hz int) Option {
return func(cfg *config) {
cfg.cpuProfileRate = hz
}
}
// MutexProfileFraction turns on mutex profiles with rate indicating the fraction
// of mutex contention events reported in the mutex profile.
// On average, 1/rate events are reported.
// Setting an aggressive rate can hurt performance.
// For more information on this value, check runtime.SetMutexProfileFraction.
func MutexProfileFraction(rate int) Option {
return func(cfg *config) {
cfg.addProfileType(MutexProfile)
cfg.mutexFraction = rate
}
}
// BlockProfileRate turns on block profiles with the given rate. We do not
// recommend enabling this profile type, see DefaultBlockRate for more
// information. The rate is given in nanoseconds and a block event with a given
// duration has a min(duration/rate, 1) chance of getting sampled.
func BlockProfileRate(rate int) Option {
return func(cfg *config) {
cfg.addProfileType(BlockProfile)
cfg.blockRate = rate
}
}
// WithProfileTypes specifies the profile types to be collected by the profiler.
func WithProfileTypes(types ...ProfileType) Option {
return func(cfg *config) {
// reset the types and only use what the user has specified
for k := range cfg.types {
delete(cfg.types, k)
}
cfg.addProfileType(MetricsProfile) // always report metrics
for _, t := range types {
cfg.addProfileType(t)
}
}
}
// WithService specifies the service name to attach to a profile.
func WithService(name string) Option {
return func(cfg *config) {
cfg.service = name
}
}
// WithEnv specifies the environment to which these profiles should be registered.
func WithEnv(env string) Option {
return func(cfg *config) {
cfg.env = env
}
}
// WithVersion specifies the service version tag to attach to profiles
func WithVersion(version string) Option {
return WithTags("version:" + version)
}
// WithTags specifies a set of tags to be attached to the profiler. These may help
// filter the profiling view based on various information.
func WithTags(tags ...string) Option {
return func(cfg *config) {
cfg.tags = cfg.tags.Append(tags...)
}
}
// WithStatsd specifies an optional statsd client to use for metrics. By default,
// no metrics are sent.
func WithStatsd(client StatsdClient) Option {
return func(cfg *config) {
cfg.statsd = client
}
}
// WithUploadTimeout specifies the timeout to use for uploading profiles. The
// default timeout is specified by DefaultUploadTimeout or the
// DD_PROFILING_UPLOAD_TIMEOUT env variable. Using a negative value or 0 will
// cause an error when starting the profiler.
func WithUploadTimeout(d time.Duration) Option {
return func(cfg *config) {
cfg.uploadTimeout = d
}
}
// WithSite specifies the datadog site (datadoghq.com, datadoghq.eu, etc.)
// which profiles will be sent to.
func WithSite(site string) Option {
return func(cfg *config) {
u, err := urlForSite(site)
if err != nil {
log.Error("profiler: invalid site provided, using %s (%s)", defaultAPIURL, err)
return
}
cfg.apiURL = u
}
}
// WithHTTPClient specifies the HTTP client to use when submitting profiles to Site.
// In general, using this method is only necessary if you have need to customize the
// transport layer, for instance when using a unix domain socket.
func WithHTTPClient(client *http.Client) Option {
return func(cfg *config) {
cfg.httpClient = client
}
}
// WithUDS configures the HTTP client to dial the Datadog Agent via the specified Unix Domain Socket path.
func WithUDS(socketPath string) Option {
return WithHTTPClient(&http.Client{
Transport: &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", socketPath)
},
},
})
}
// withOutputDir writes a copy of all uploaded profiles to the given
// directory. This is intended for local development or debugging uploading
// issues. The directory will keep growing, no cleanup is performed.
func withOutputDir(dir string) Option {
return func(cfg *config) {
cfg.outputDir = dir
}
}
// WithLogStartup toggles logging the configuration of the profiler to standard
// error when profiling is started. The configuration is logged in a JSON
// format. This option is enabled by default.
func WithLogStartup(enabled bool) Option {
return func(cfg *config) {
cfg.logStartup = enabled
}
}
// WithHostname sets the hostname which will be added to uploaded profiles
// through the "host:<hostname>" tag. If no hostname is given, the hostname will
// default to the output of os.Hostname()
func WithHostname(hostname string) Option {
return func(cfg *config) {
cfg.hostname = hostname
}
}
// executionTraceConfig controls how often, and for how long, runtime execution
// traces are collected, see defaultConfig() for more details.
type executionTraceConfig struct {
// Period is the amount of time between traces.
Period time.Duration
// Limit is the desired upper bound, in bytes, of a collected trace.
// Traces may be slightly larger than this limit due to flushing pending
// buffers at the end of tracing.
//
// We attempt to record for a full profiling period. The size limit of
// the trace is a better proxy for overhead (it scales with the number
// of events recorded) than duration, so we use that to decide when to
// stop tracing.
Limit int
}