-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvector_25000streams.yaml
58 lines (50 loc) · 2.47 KB
/
vector_25000streams.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
sources:
generated_logs:
type: file
oldest_first: true
include:
- /datasets/generated-logs-v1-*.ndjson.gz
transforms:
remap_generated_logs:
inputs:
- generated_logs
type: remap
source: >-
del(.file)
del(.host)
. = parse_json!(.message)
.timestamp = parse_timestamp(.timestamp, "%Y-%m-%dT%H:%M:%S%.3fZ") ?? from_unix_timestamp!(1704118424)
.bucketed_hostname = slice!(sha2!(.host.name), start:0, end:2)
# See vrl playgroud:
# https://playground.vrl.dev/?state=eyJwcm9ncmFtIjoiIyBBZGQgYSB0aW1lc3RhbXBcbi50aW1lc3RhbXAgPSBub3coKVxuXG4uYnVja2V0ID0gc2xpY2UhKHNoYTIhKC5ob3N0Lm5hbWUpLCBzdGFydDowLCBlbmQ6MilcbiIsImV2ZW50Ijp7Imhvc3QiOnsibmFtZSI6InF3ZSJ9fSwiaXNfanNvbmwiOmZhbHNlLCJlcnJvciI6bnVsbH0%3D
sinks:
# JSON log example:
# {"timestamp": "2023-01-02T03:14:09.000Z",
# "aws.cloudwatch": {"log_stream": "glendagger","ingestion_time": "2023-09-17T11:15:39.468Z"},
# "cloud": {"region": "ap-northeast-2"},"log.file.path": "/var/log/messages/glendagger","input": {"type": "aws-cloudwatch"},"process": {"name": "sshd"},
# "message": "2023-09-17T11:15:39.468Z Sep 17 11:15:39 ip-143-124-65-239 sshd: duck falcon death bunny hand stealer falcon graverover",
# "event": {"id": "dourcrest","ingested": "2023-09-17T10:33:10.468944042Z","dataset": "generic"},"host": {"name": "olivevulture"},
# "metrics": {"size": 260335, "tmin": 462262},
# "agent": {"id": "c315dc22-3ea6-44dc-8d56-fd02f675367b","name": "olivevulture","type": "filebeat","version": "8.8.0","ephemeral_id": "c315dc22-3ea6-44dc-8d56-fd02f675367b"},
# "tags": ["preserve_original_event"], "trace_id":"142720679204778717", "level": "DEBUG"}
# FIXME: Loki does not take into account the timestamp field, why?
bench_loki:
type: loki
out_of_order_action: accept
inputs:
- remap_generated_logs
endpoint: http://loki:3100
compression: none
labels:
# If you want to explode RAM usage, just add the log stream to the label
# aws_cloudwatch_log_stream: '{{ "aws.cloudwatch".log_stream }}'
# hostname: '{{ "host".name }}' # ~10k unique values.
bucketed_hostname: '{{ bucketed_hostname }}' # 256 unique values. 16^2 since sha2 returns the hash in hexadecimal format.
cloud_region: '{{ cloud.region }}' # ~25 values.
input_type: '{{ input.type }}' # We can leave it, 1 value.
level: '{{ level }}' # 4 values.
agent: benchmark
remove_timestamp: true
remove_label_fields: true
encoding:
codec: json