-
Notifications
You must be signed in to change notification settings - Fork 16
/
shredder.batch.config.reference.hocon
90 lines (84 loc) · 3.21 KB
/
shredder.batch.config.reference.hocon
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{
# Path to enriched archive (must be populated separately with run=YYYY-MM-DD-hh-mm-ss directories) for S3 input
"input": "s3://bucket/input/",
# Path to shredded archive
"output": {
# Path to shredded output
"path": "s3://bucket/shredded/",
# Shredder output compression, GZIP or NONE
# Optional, default value GZIP
"compression": "GZIP",
# This field is optional if it can be resolved with AWS region provider chain.
# It checks places like env variables, system properties, AWS profile file.
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/providers/DefaultAwsRegionProviderChain.html
"region": "eu-central-1"
}
# Queue used to communicate with Loader
"queue": {
# Type of the queue. It can be either sqs or sns
"type": "sqs",
# Name of the sqs queue
"queueName": "test-sqs",
# Region of the SQS queue.
# Optional if it can be resolved with AWS region provider chain.
"region": "eu-central-1"
}
# SNS example:
#"queue": {
# # Type of the queue. It can be either sqs or sns
# "type": "sns",
# # ARN of SNS topic
# "topicArn": "arn:aws:sns:eu-central-1:123456789:test-sns-topic",
# # Region of the SNS topic
# "region": "eu-central-1"
#}
# Configure the way in-batch deduplication is performed
"deduplication": {
# Synethtetic deduplication reassigns new ids to events with same id-fingerprintt pair
# Different options can be tried if synthetic deduplication affects performance
"synthetic": {
# Can be NONE (disable), BROADCAST and JOIN (different low-level implementations)
"type": "BROADCAST"
# Do not deduplicate pairs with less-or-equal cardinality
"cardinality": 1
}
}
# Schema-specific format settings (recommended to leave all three groups empty and use TSV as default)
"formats": {
# Format used by default (TSV or JSON)
# Optional, default value TSV
"default": "TSV",
# Schemas to be shredded as JSONs, corresponding JSONPath files must be present. Automigrations will be disabled
# Optional, default value []
"json": [
"iglu:com.acme/json-event/jsonschema/1-0-0",
"iglu:com.acme/json-event/jsonschema/2-*-*"
],
# Schemas to be shredded as TSVs, presence of the schema on Iglu Server is necessary. Automigartions enabled
# Optional, default value []
"tsv": [ ],
# Schemas that won't be loaded
# Optional, default value []
"skip": [
"iglu:com.acme/skip-event/jsonschema/1-*-*"
]
},
# Specifies interval shredder will work on
"runInterval": {
# Optional, shredder will start to process after given timestamp
"sinceTimestamp": "2021-10-12-14-55-22",
# Optional, sinceAge is a duration that specifies the maximum age of folders that
# should get processed. If sinceAge and sinceTimestamp are both specified, then the
# latest value of the two determines the earliest folder that will be processed.
"sinceAge": "14 days",
# Optional, shredder will process until given timestamp
"until": "2021-12-10-18-34-52"
}
# Observability and reporting options
"monitoring": {
# Optional, for tracking runtime exceptions
"sentry": {
"dsn": "http://sentry.acme.com"
}
}
}