forked from DataDog/datadog-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
start.go
286 lines (248 loc) · 8.23 KB
/
start.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2018 Datadog, Inc.
package app
import (
"fmt"
"syscall"
"time"
_ "expvar" // Blank import used because this isn't directly used in this file
"net/http"
_ "net/http/pprof" // Blank import used because this isn't directly used in this file
"os"
"os/signal"
"github.com/DataDog/datadog-agent/cmd/agent/api"
"github.com/DataDog/datadog-agent/cmd/agent/common"
"github.com/DataDog/datadog-agent/cmd/agent/common/signals"
"github.com/DataDog/datadog-agent/cmd/agent/gui"
"github.com/DataDog/datadog-agent/pkg/aggregator"
"github.com/DataDog/datadog-agent/pkg/config"
"github.com/DataDog/datadog-agent/pkg/dogstatsd"
"github.com/DataDog/datadog-agent/pkg/forwarder"
"github.com/DataDog/datadog-agent/pkg/logs"
"github.com/DataDog/datadog-agent/pkg/metadata"
"github.com/DataDog/datadog-agent/pkg/pidfile"
"github.com/DataDog/datadog-agent/pkg/serializer"
"github.com/DataDog/datadog-agent/pkg/util"
"github.com/DataDog/datadog-agent/pkg/version"
log "github.com/cihub/seelog"
"github.com/spf13/cobra"
// register core checks
_ "github.com/DataDog/datadog-agent/pkg/collector/corechecks/containers"
_ "github.com/DataDog/datadog-agent/pkg/collector/corechecks/embed"
_ "github.com/DataDog/datadog-agent/pkg/collector/corechecks/network"
_ "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system"
// register metadata providers
_ "github.com/DataDog/datadog-agent/pkg/collector/metadata"
_ "github.com/DataDog/datadog-agent/pkg/metadata"
)
var (
startCmd = &cobra.Command{
Use: "start",
Short: "Start the Agent",
Long: `Runs the agent in the foreground`,
RunE: start,
}
)
var (
// flags variables
runForeground bool
pidfilePath string
)
// run the host metadata collector every 14400 seconds (4 hours)
const hostMetadataCollectorInterval = 14400
// run the agent checks metadata collector every 600 seconds (10 minutes)
const agentChecksMetadataCollectorInterval = 600
func init() {
// attach the command to the root
AgentCmd.AddCommand(startCmd)
// local flags
startCmd.Flags().StringVarP(&pidfilePath, "pidfile", "p", "", "path to the pidfile")
}
// Start the main loop
func start(cmd *cobra.Command, args []string) error {
defer func() {
StopAgent()
}()
// Setup a channel to catch OS signals
signalCh := make(chan os.Signal, 1)
signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM)
// Make a channel to exit the function
stopCh := make(chan error)
go func() {
// Set up the signals async so we can Start the agent
select {
case <-signals.Stopper:
log.Info("Received stop command, shutting down...")
stopCh <- nil
case <-signals.ErrorStopper:
log.Critical("The Agent has encountered an error, shutting down...")
stopCh <- fmt.Errorf("shutting down because of an error")
case sig := <-signalCh:
log.Infof("Received signal '%s', shutting down...", sig)
stopCh <- nil
}
}()
if err := StartAgent(); err != nil {
return err
}
select {
case err := <-stopCh:
return err
}
}
// StartAgent Initializes the agent process
func StartAgent() error {
// Global Agent configuration
err := common.SetupConfig(confFilePath)
if err != nil {
return fmt.Errorf("unable to set up global agent configuration: %v", err)
}
// Setup logger
syslogURI := config.GetSyslogURI()
logFile := config.Datadog.GetString("log_file")
if logFile == "" {
logFile = common.DefaultLogFile
}
if config.Datadog.GetBool("disable_file_logging") {
// this will prevent any logging on file
logFile = ""
}
err = config.SetupLogger(
config.Datadog.GetString("log_level"),
logFile,
syslogURI,
config.Datadog.GetBool("syslog_rfc"),
config.Datadog.GetBool("syslog_tls"),
config.Datadog.GetString("syslog_pem"),
config.Datadog.GetBool("log_to_console"),
)
if err != nil {
return log.Errorf("Error while setting up logging, exiting: %v", err)
}
log.Infof("Starting Datadog Agent v%v", version.AgentVersion)
// Setup expvar server
var port = config.Datadog.GetString("expvar_port")
go http.ListenAndServe("127.0.0.1:"+port, http.DefaultServeMux)
if pidfilePath != "" {
err = pidfile.WritePID(pidfilePath)
if err != nil {
return log.Errorf("Error while writing PID file, exiting: %v", err)
}
log.Infof("pid '%d' written to pid file '%s'", os.Getpid(), pidfilePath)
}
hostname, err := util.GetHostname()
if err != nil {
return log.Errorf("Error while getting hostname, exiting: %v", err)
}
log.Infof("Hostname is: %s", hostname)
// start the cmd HTTP server
if err = api.StartServer(); err != nil {
return log.Errorf("Error while starting api server, exiting: %v", err)
}
// start the GUI server
guiPort := config.Datadog.GetString("GUI_port")
if guiPort == "-1" {
log.Infof("GUI server port -1 specified: not starting the GUI.")
} else if err = gui.StartGUIServer(guiPort); err != nil {
log.Errorf("Error while starting GUI: %v", err)
}
// setup the forwarder
keysPerDomain, err := config.GetMultipleEndpoints()
if err != nil {
log.Error("Misconfiguration of agent endpoints: ", err)
}
common.Forwarder = forwarder.NewDefaultForwarder(keysPerDomain)
log.Debugf("Starting forwarder")
common.Forwarder.Start()
log.Debugf("Forwarder started")
// setup the aggregator
s := &serializer.Serializer{Forwarder: common.Forwarder}
agg := aggregator.InitAggregator(s, hostname)
agg.AddAgentStartupEvent(version.AgentVersion)
// start dogstatsd
if config.Datadog.GetBool("use_dogstatsd") {
var err error
common.DSD, err = dogstatsd.NewServer(agg.GetChannels())
if err != nil {
log.Errorf("Could not start dogstatsd: %s", err)
}
}
log.Debugf("statsd started")
// start logs-agent
if config.Datadog.GetBool("log_enabled") {
// logs-agent does not provide any Stop method yet
// data loss may happen when stopping the agent
err := logs.Start()
if err != nil {
log.Error("Could not start logs-agent: ", err)
} else {
log.Info("Starting logs-agent")
}
} else {
log.Info("logs-agent disabled")
}
// create and setup the Autoconfig instance
common.SetupAutoConfig(config.Datadog.GetString("confd_path"))
// start the autoconfig, this will immediately run any configured check
common.StartAutoConfig()
// setup the metadata collector, this needs a working Python env to function
if config.Datadog.GetBool("enable_metadata_collection") {
common.MetadataScheduler = metadata.NewScheduler(s, hostname)
var C []config.MetadataProviders
err = config.Datadog.UnmarshalKey("metadata_providers", &C)
if err == nil {
log.Debugf("Adding configured providers to the metadata collector")
for _, c := range C {
if c.Name == "host" || c.Name == "agent_checks" {
continue
}
intl := c.Interval * time.Second
err = common.MetadataScheduler.AddCollector(c.Name, intl)
if err != nil {
log.Errorf("Unable to add '%s' metadata provider: %v", c.Name, err)
} else {
log.Infof("Scheduled metadata provider '%v' to run every %v", c.Name, intl)
}
}
} else {
log.Errorf("Unable to parse metadata_providers config: %v", err)
}
// Should be always true, except in some edge cases (multiple agents per host)
err = common.MetadataScheduler.AddCollector("host", hostMetadataCollectorInterval*time.Second)
if err != nil {
return log.Error("Host metadata is supposed to be always available in the catalog!")
}
err = common.MetadataScheduler.AddCollector("agent_checks", agentChecksMetadataCollectorInterval*time.Second)
if err != nil {
return log.Error("Agent Checks metadata is supposed to be always available in the catalog!")
}
} else {
log.Warnf("Metadata collection disabled, only do that if another agent/dogstatsd is running on this host")
}
// start dependent services
startDependentServices()
return nil
}
// StopAgent Tears down the agent process
func StopAgent() {
// gracefully shut down any component
if common.DSD != nil {
common.DSD.Stop()
}
if common.AC != nil {
common.AC.Stop()
}
if common.MetadataScheduler != nil {
common.MetadataScheduler.Stop()
}
api.StopServer()
if common.Forwarder != nil {
common.Forwarder.Stop()
}
gui.StopGUIServer()
os.Remove(pidfilePath)
log.Info("See ya!")
log.Flush()
}