Skip to content

Commit dceb970

Browse files
NathanFlurryMasterPtato
authored andcommitted
fix(infra): remove high cardinality prometheus metrics
1 parent 8ee4366 commit dceb970

File tree

7 files changed

+18
-19
lines changed

7 files changed

+18
-19
lines changed

infra/tf/vector/vector.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ resource "helm_release" "vector" {
2323
namespace = "vector"
2424
repository = "https://helm.vector.dev"
2525
chart = "vector"
26-
version = "0.29.0"
26+
version = "0.38.0"
2727
values = [yamlencode({
2828
role = "Aggregator"
2929
podPriorityClassName = "service-priority"

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traefik.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ pub async fn gg_static_config() -> GlobalResult<String> {
224224
port = TUNNEL_API_INTERNAL_PORT,
225225
);
226226

227+
// Metrics are disabled since they're too high cardinality for Prometheus (both the # of
228+
// entrypoint & the frequently changing routers + services)
227229
let mut config = formatdoc!(
228230
r#"
229231
[entryPoints]
@@ -239,13 +241,6 @@ pub async fn gg_static_config() -> GlobalResult<String> {
239241
[api]
240242
insecure = true
241243
242-
[metrics.prometheus]
243-
# See lib/chirp/metrics/src/buckets.rs
244-
buckets = [0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0]
245-
addEntryPointsLabels = true
246-
addRoutersLabels = true
247-
addServicesLabels = true
248-
249244
[providers]
250245
[providers.file]
251246
directory = "/etc/game_guard/dynamic"

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/vector.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ pub fn configure(config: &Config, pool_type: backend::cluster::PoolType) -> Stri
4242
type = "remap"
4343
inputs = [{sources}]
4444
source = '''
45+
# Drop go stats
46+
if starts_with!(.name, "go_") {{
47+
abort
48+
}}
49+
4550
.tags.server_id = "___SERVER_ID___"
4651
.tags.datacenter_id = "___DATACENTER_ID___"
4752
.tags.cluster_id = "___CLUSTER_ID___"
@@ -55,6 +60,11 @@ pub fn configure(config: &Config, pool_type: backend::cluster::PoolType) -> Stri
5560
address = "127.0.0.1:{TUNNEL_VECTOR_PORT}"
5661
healthcheck.enabled = false
5762
compression = true
63+
64+
# Buffer to disk for durability & reduce memory usage
65+
buffer.max_events = 500
66+
buffer.max_size = 268435488
67+
buffer.type = "disk"
5868
"#
5969
);
6070

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/files/node_exporter.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ After=network-online.target
3030
User=node_exporter
3131
Group=node_exporter
3232
Type=simple
33-
ExecStart=/usr/bin/node_exporter --collector.cgroups --collector.network_route --collector.systemd
33+
# Reduce cardinality
34+
ExecStart=/usr/bin/node_exporter --collector.disable-defaults --collector.cpu --collector.conntrack --collector.meminfo --collector.filesystem --collector.filesystem.mount-points-exclude=^/opt/nomad/
3435
Restart=always
3536
RestartSec=2
3637

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/files/nomad_configure.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,9 @@ telemetry {
306306
collection_interval = "5s"
307307
disable_hostname = true
308308
prometheus_metrics = true
309-
publish_allocation_metrics = true
310309
publish_node_metrics = true
310+
311+
# Don't publish_allocation_metrics because of high cardinality
311312
}
312313
313314
# Needed for Prometheus rate limiting

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/files/vector_install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
version="0.34.1"
1+
version="0.38.0"
22

33
# Create vector user
44
if ! id -u "vector" &>/dev/null; then

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/mod.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,6 @@ pub async fn gen_initialize(pool_type: backend::cluster::PoolType) -> GlobalResu
107107
tcp_server_transports: Default::default(),
108108
},
109109
));
110-
111-
prometheus_targets.insert(
112-
GG_TRAEFIK_INSTANCE_NAME.into(),
113-
components::vector::PrometheusTarget {
114-
endpoint: "http://127.0.0.1:9980/metrics".into(),
115-
scrape_interval: 15,
116-
},
117-
);
118110
}
119111
backend::cluster::PoolType::Ats => {
120112
script.push(components::traffic_server::configure().await?);

0 commit comments

Comments
 (0)