-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
config.hocon.sample
122 lines (105 loc) · 4.63 KB
/
config.hocon.sample
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# Copyright (c) 2013-2017 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.
# This file (application.conf.example) contains a template with
# configuration options for Stream Enrich.
enrich {
# Sources currently supported are:
# 'kinesis' for reading Thrift-serialized records from a Kinesis stream
# 'kafka' for reading Thrift-serialized records from a Kafka topic
# 'stdin' for reading Base64-encoded Thrift-serialized records from stdin
source = kinesis
# Sinks currently supported are:
# 'kinesis' for writing enriched events to one Kinesis stream and invalid events to another.
# 'kafka' for writing enriched events to one Kafka topic and invalid events to another.
# 'stdouterr' for writing enriched events to stdout and invalid events to stderr.
# Using "sbt assembly" and "java -jar" is recommended to disable sbt logging.
sink = kinesis
# AWS credentials
# If both are set to 'default', use the default AWS credentials provider chain.
# If both are set to 'iam', use AWS IAM Roles to provision credentials.
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
accessKey = iam
secretKey = iam
}
streams {
in {
# Stream/topic where the raw events to be enriched are located
raw = {{streamsInRaw}}
}
out {
# Stream/topic where the events that were successfully enriched will end up
enriched = {{outEnriched}}
# Stream/topic where the event that failed enrichment will be stored
bad = {{outBad}}
# How the output stream/topic will be partitioned.
# Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid,
# user_ipaddress, domain_sessionid, user_fingerprint.
# Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the
# possible parittion keys correspond to.
# Otherwise, the partition key will be a random UUID.
partitionKey = {{partitionKeyName}}
}
kinesis {
# Region where the streams are located
region = {{region}}
# Maximum number of records to get from Kinesis per call to GetRecords
maxRecords = 10000
# LATEST: most recent data.
# TRIM_HORIZON: oldest available data.
# "AT_TIMESTAMP": Start from the record at or after the specified timestamp
# Note: This only effects the first run of this application on a stream.
initialPosition = TRIM_HORIZON
# Need to be specified when initial-position is "AT_TIMESTAMP".
# Timestamp format need to be in "yyyy-MM-ddTHH:mm:ssZ".
# Ex: "2017-05-17T10:00:00Z"
# Note: Time need to specified in UTC.
initialTimestamp = "{{initialTimestamp}}"
# Minimum and maximum backoff periods, in milliseconds
backoffPolicy {
minBackoff = {{enrichStreamsOutMinBackoff}}
maxBackoff = {{enrichStreamsOutMaxBackoff}}
}
}
# Kafka configuration
kafka {
brokers = "{{kafkaBrokers}}"
# Number of retries to perform before giving up on sending a record
retries = 0
}
# After enrichment, events are accumulated in a buffer before being sent to Kinesis/Kafka.
# The buffer is emptied whenever:
# - the number of stored records reaches recordLimit or
# - the combined size of the stored records reaches byteLimit or
# - the time in milliseconds since it was last emptied exceeds timeLimit when
# a new event enters the buffer
buffer {
byteLimit = {{bufferByteThreshold}}
recordLimit = {{bufferRecordThreshold}} # Not supported by Kafka; will be ignored
timeLimit = {{bufferTimeThreshold}}
}
# Used for a DynamoDB table to maintain stream state.
# Used as the Kafka consumer group ID.
# You can set it automatically using: "SnowplowEnrich-$\\{enrich.streams.in.raw\\}"
appName = "{{appName}}"
}
# Optional section for tracking endpoints
monitoring {
snowplow {
collectorUri = "{{collectorUri}}"
collectorPort = 80
appId = {{enrichAppName}}
method = GET
}
}
}