This repository has been archived by the owner on Oct 24, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add telegraf setup for per-container stat reporting
Creates InfluxDB database and sets up all the steps to have telegraf running when influxdb is enabled. Closes #33
- Loading branch information
Showing
8 changed files
with
257 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
futures==3.0.5 | ||
typing==3.5.3.0 | ||
toml==0.9.2 | ||
marshmallow==2.13.4 | ||
marshmallow==2.13.4 | ||
influxdb==4.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
# Telegraf Configuration | ||
# | ||
# Telegraf is entirely plugin driven. All metrics are gathered from the | ||
# declared inputs, and sent to the declared outputs. | ||
# | ||
# Plugins must be declared in here to be active. | ||
# To deactivate a plugin, comment out the name and any variables. | ||
# | ||
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config | ||
# file would generate. | ||
# | ||
# Environment variables can be used anywhere in this config file, simply prepend | ||
# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), | ||
# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) | ||
|
||
|
||
# Global tags can be specified here in key="value" format. | ||
[global_tags] | ||
# dc = "us-east-1" # will tag all metrics with dc=us-east-1 | ||
# rack = "1a" | ||
## Environment variables can be used as tags, and throughout the config file | ||
# user = "$USER" | ||
step = "$__ARDERE_TELEGRAF_STEP__" | ||
## type is the old "docker_series" | ||
type = "$__ARDERE_TELEGRAF_TYPE__" | ||
|
||
|
||
# Configuration for telegraf agent | ||
[agent] | ||
## Default data collection interval for all inputs | ||
interval = "10s" | ||
## Rounds collection interval to 'interval' | ||
## ie, if interval="10s" then always collect on :00, :10, :20, etc. | ||
round_interval = true | ||
|
||
## Telegraf will send metrics to outputs in batches of at most | ||
## metric_batch_size metrics. | ||
## This controls the size of writes that Telegraf sends to output plugins. | ||
metric_batch_size = 1000 | ||
|
||
## For failed writes, telegraf will cache metric_buffer_limit metrics for each | ||
## output, and will flush this buffer on a successful write. Oldest metrics | ||
## are dropped first when this buffer fills. | ||
## This buffer only fills when writes fail to output plugin(s). | ||
metric_buffer_limit = 10000 | ||
|
||
## Collection jitter is used to jitter the collection by a random amount. | ||
## Each plugin will sleep for a random time within jitter before collecting. | ||
## This can be used to avoid many plugins querying things like sysfs at the | ||
## same time, which can have a measurable effect on the system. | ||
collection_jitter = "0s" | ||
|
||
## Default flushing interval for all outputs. You shouldn't set this below | ||
## interval. Maximum flush_interval will be flush_interval + flush_jitter | ||
flush_interval = "10s" | ||
## Jitter the flush interval by a random amount. This is primarily to avoid | ||
## large write spikes for users running a large number of telegraf instances. | ||
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s | ||
flush_jitter = "0s" | ||
## By default, precision will be set to the same timestamp order as the | ||
## collection interval, with the maximum being 1s. | ||
## Precision will NOT be used for service inputs, such as logparser and statsd. | ||
## Valid values are "ns", "us" (or "µs"), "ms", "s". | ||
precision = "" | ||
## Logging configuration: | ||
## Run telegraf with debug log messages. | ||
debug = false | ||
## Run telegraf in quiet mode (error log messages only). | ||
quiet = false | ||
## Specify the log file name. The empty string means to log to stderr. | ||
logfile = "" | ||
## Override default hostname, if empty use os.Hostname() | ||
hostname = "$__ARDERE_TELEGRAF_HOST__" | ||
## If set to true, do no set the "host" tag in the telegraf agent. | ||
omit_hostname = false | ||
############################################################################### | ||
# OUTPUT PLUGINS # | ||
############################################################################### | ||
# Configuration for influxdb server to send metrics to | ||
[[outputs.influxdb]] | ||
## The full HTTP or UDP endpoint URL for your InfluxDB instance. | ||
## Multiple urls can be specified as part of the same cluster, | ||
## this means that only ONE of the urls will be written to each interval. | ||
# urls = ["udp://localhost:8089"] # UDP endpoint example | ||
urls = ["http://$__ARDERE_INFLUX_ADDR__"] # required | ||
## The target database for metrics (telegraf will create it if not exists). | ||
database = "$__ARDERE_INFLUX_DB__" # required | ||
## Retention policy to write to. Empty string writes to the default rp. | ||
retention_policy = "" | ||
## Write consistency (clusters only), can be: "any", "one", "quorum", "all" | ||
write_consistency = "any" | ||
## Write timeout (for the InfluxDB client), formatted as a string. | ||
## If not provided, will default to 5s. 0s means no timeout (not recommended). | ||
timeout = "5s" | ||
# username = "telegraf" | ||
# password = "metricsmetricsmetricsmetrics" | ||
## Set the user agent for HTTP POSTs (can be useful for log differentiation) | ||
# user_agent = "telegraf" | ||
## Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) | ||
# udp_payload = 512 | ||
## Optional SSL Config | ||
# ssl_ca = "/etc/telegraf/ca.pem" | ||
# ssl_cert = "/etc/telegraf/cert.pem" | ||
# ssl_key = "/etc/telegraf/key.pem" | ||
## Use SSL but skip chain & host verification | ||
# insecure_skip_verify = false | ||
############################################################################### | ||
# PROCESSOR PLUGINS # | ||
############################################################################### | ||
# # Print all metrics that pass through this filter. | ||
# [[processors.printer]] | ||
############################################################################### | ||
# AGGREGATOR PLUGINS # | ||
############################################################################### | ||
# # Keep the aggregate min/max of each metric passing through. | ||
# [[aggregators.minmax]] | ||
# ## General Aggregator Arguments: | ||
# ## The period on which to flush & clear the aggregator. | ||
# period = "30s" | ||
# ## If true, the original metric will be dropped by the | ||
# ## aggregator and will not get sent to the output plugins. | ||
# drop_original = false | ||
############################################################################### | ||
# INPUT PLUGINS # | ||
############################################################################### | ||
# Read metrics about cpu usage | ||
[[inputs.cpu]] | ||
## Whether to report per-cpu stats or not | ||
percpu = true | ||
## Whether to report total system cpu stats or not | ||
totalcpu = true | ||
## If true, collect raw CPU time metrics. | ||
collect_cpu_time = false | ||
# Read metrics about memory usage | ||
[[inputs.mem]] | ||
# no configuration | ||
# Read TCP metrics such as established, time wait and sockets counts. | ||
[[inputs.netstat]] | ||
# no configuration | ||
############################################################################### | ||
# SERVICE INPUT PLUGINS # | ||
############################################################################### | ||
# Statsd Server | ||
[[inputs.statsd]] | ||
## Address and port to host UDP listener on | ||
service_address = ":8125" | ||
## The following configuration options control when telegraf clears it's cache | ||
## of previous values. If set to false, then telegraf will only clear it's | ||
## cache when the daemon is restarted. | ||
## Reset gauges every interval (default=true) | ||
delete_gauges = true | ||
## Reset counters every interval (default=true) | ||
delete_counters = true | ||
## Reset sets every interval (default=true) | ||
delete_sets = true | ||
## Reset timings & histograms every interval (default=true) | ||
delete_timings = true | ||
## Percentiles to calculate for timing & histogram stats | ||
percentiles = [90] | ||
## separator to use between elements of a statsd metric | ||
metric_separator = "_" | ||
## Parses tags in the datadog statsd format | ||
## http://docs.datadoghq.com/guides/dogstatsd/ | ||
parse_data_dog_tags = false | ||
## Statsd data translation templates, more info can be read here: | ||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite | ||
# templates = [ | ||
# "cpu.* measurement*" | ||
# ] | ||
## Number of UDP messages allowed to queue up, once filled, | ||
## the statsd server will start dropping packets | ||
allowed_pending_messages = 10000 | ||
## Number of timing/histogram values to track per-measurement in the | ||
## calculation of percentiles. Raising this limit increases the accuracy | ||
## of percentiles but also increases the memory usage and cpu time. | ||
#percentile_limit = 1000 | ||
percentile_limit = 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.