Skip to content

Commit

Permalink
Use EnrichmentManager.enrichEvent to validate events (close #23)
Browse files Browse the repository at this point in the history
  • Loading branch information
benjben committed Jul 24, 2020
1 parent f313b1b commit 5c5a02a
Show file tree
Hide file tree
Showing 13 changed files with 678 additions and 234 deletions.
1 change: 0 additions & 1 deletion build.sbt
Expand Up @@ -33,7 +33,6 @@ lazy val root = project
organization,
name,
version,
"shortName" -> "snowplow-micro",
scalaVersion),
buildInfoPackage := "buildinfo"
)
Expand Down
157 changes: 100 additions & 57 deletions example/micro.conf
@@ -1,37 +1,44 @@
# Copyright (c) 2013-2019 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.

# This file (application.conf.example) contains a template with
# configuration options for the Scala Stream Collector.
#
# To use, copy this to 'application.conf' and modify the configuration options.

# 'collector' contains configuration options for the main Scala collector.
collector {
# The collector runs as a web service specified on the following interface and port.
interface = "0.0.0.0"
port = "9090"

# optional SSL/TLS configuration
ssl {
enable = false
# whether to redirect HTTP to HTTPS
redirect = false
port = 9543
}

# The collector responds with a cookie to requests with a path that matches the 'vendor/version' protocol.
# The expected values are:
# - com.snowplowanalytics.snowplow/tp2 for Tracker Protocol 2
# - r/tp2 for redirects
# - com.snowplowanalytics.iglu/v1 for the Iglu Webhook
# Any path that matches the 'vendor/version' protocol will result in a cookie response, for use by custom webhooks
# downstream of the collector.
# But you can also map any valid (i.e. two-segment) path to one of the three defaults.
# Your custom path must be the key and the value must be one of the corresponding default paths. Both must be full
# valid paths starting with a leading slash.
# Pass in an empty map to avoid mapping.
paths {
# "/com.acme/track" = "/com.snowplowanalytics.snowplow/tp2"
# "/com.acme/redirect" = "/r/tp2"
# "/com.acme/iglu" = "/com.snowplowanalytics.iglu/v1"
}

# Configure the P3P policy header.
p3p {
policyRef = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
p-3p {
policy-ref = "/w3c/p3p.xml"
cp = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}

# Cross domain policy configuration.
# If "enabled" is set to "false", the collector will respond with a 404 to the /crossdomain.xml
# route.
crossDomain {
cross-domain {
enabled = false
# Domains that are granted access, *.acme.com will match http://acme.com and http://sub.acme.com
domains = [ "*" ]
Expand All @@ -43,60 +50,89 @@ collector {
# with the following domain and expiration.
cookie {
enabled = true
expiration = "365 days" # e.g. "365 days"
expiration = "365 days"
# Network cookie name
name = "snowplow-micro"
name = "micro"
# The domain is optional and will make the cookie accessible to other
# applications on the domain. Comment out this line to tie cookies to
# the collector's full domain
domain = ""
# applications on the domain. Comment out these lines to tie cookies to
# the collector's full domain.
# The domain is determined by matching the domains from the Origin header of the request
# to the list below. The first match is used. If no matches are found, the fallback domain will be used,
# if configured.
# If you specify a main domain, all subdomains on it will be matched.
# If you specify a subdomain, only that subdomain will be matched.
# Examples:
# domain.com will match domain.com, www.domain.com and secure.client.domain.com
# client.domain.com will match secure.client.domain.com but not domain.com or www.domain.com
domains = [
"{{cookieDomain1}}" # e.g. "domain.com" -> any origin domain ending with this will be matched and domain.com will be returned
"{{cookieDomain2}}" # e.g. "secure.anotherdomain.com" -> any origin domain ending with this will be matched and secure.anotherdomain.com will be returned
# ... more domains
]
# ... more domains
# If specified, the fallback domain will be used if none of the Origin header hosts matches the list of
# cookie domains configured above. (For example, if there is no Origin header.)
fallback-domain = "{{fallbackDomain}}"
secure = false
http-only = false
# The sameSite is optional. You can choose to not specify the attribute, or you can use `Strict`,
# `Lax` or `None` to limit the cookie sent context.
# Strict: the cookie will only be sent along with "same-site" requests.
# Lax: the cookie will be sent with same-site requests, and with cross-site top-level navigation.
# None: the cookie will be sent with same-site and cross-site requests.
same-site = "{{cookieSameSite}}"
}

# If you have a do not track cookie in place, the Scala Stream Collector can respect it by
# completely bypassing the processing of an incoming request carrying this cookie, the collector
# will simply reply by a 200 saying "do not track".
# The cookie name and value must match the configuration below, where the names of the cookies must
# match entirely and the value could be a regular expression.
doNotTrackCookie {
do-not-track-cookie {
enabled = false
name = ""
value = ""
name = "foo"
value = "bar"
}

# When enabled and the cookie specified above is missing, performs a redirect to itself to check
# if third-party cookies are blocked using the specified name. If they are indeed blocked,
# fallbackNetworkId is used instead of generating a new random one.
cookieBounce {
cookie-bounce {
enabled = false
# The name of the request parameter which will be used on redirects checking that third-party
# cookies work.
name = "n3pc"
# Network user id to fallback to when third-party cookies are blocked.
fallbackNetworkUserId = ""
fallback-network-user-id = "00000000-0000-4000-A000-000000000000"
# Optionally, specify the name of the header containing the originating protocol for use in the
# bounce redirect location. Use this if behind a load balancer that performs SSL termination.
# The value of this header must be http or https. Example, if behind an AWS Classic ELB.
forwardedProtocolHeader = "X-Forwarded-Proto"
forwarded-protocol-header = "X-Forwarded-Proto"
}

# When enabled, redirect prefix `r/` will be enabled and its query parameters resolved.
# Otherwise the request prefixed with `r/` will be dropped with `404 Not Found`
# Custom redirects configured in `paths` can still be used.
enable-default-redirect = true

# When enabled, the redirect url passed via the `u` query parameter is scanned for a placeholder
# token. All instances of that token are replaced withe the network ID. If the placeholder isn't
# specified, the default value is `${SP_NUID}`.
redirectMacro {
redirect-macro {
enabled = false
# Optional custom placeholder token (defaults to the literal `${SP_NUID}`)
placeholder = "[TOKEN]"
}

# Customize response handling for requests for the root path ("/").
# Useful if you need to redirect to web content or privacy policies regarding the use of this collector.
rootResponse {
root-response {
enabled = false
statusCode = 302
status-code = 302
# Optional, defaults to empty map
headers = {
Location = "",
X-Custom = ""
Location = "https://127.0.0.1/",
X-Custom = "something"
}
# Optional, defaults to empty string
body = "302, redirecting"
Expand All @@ -106,21 +142,11 @@ collector {
cors {
# The Access-Control-Max-Age response header indicates how long the results of a preflight
# request can be cached. -1 seconds disables the cache. Chromium max is 10m, Firefox is 24h.
accessControlMaxAge = 5 seconds
}

# Configuration of prometheus http metrics
prometheusMetrics {
# If metrics are enabled then all requests will be logged as prometheus metrics
# and '/metrics' endpoint will return the report about the requests
enabled = false
# Custom buckets for http_request_duration_seconds_bucket duration metric
#durationBucketsInSeconds = [0.1, 3, 10]
#durationbucketsInSeconds = ${?COLLECTOR_PROMETHEUS_METRICS_DURATION_BUCKETS_IN_SECONDS}
access-control-max-age = 5 seconds
}

# Configuration of prometheus http metrics
prometheusMetrics {
prometheus-metrics {
# If metrics are enabled then all requests will be logged as prometheus metrics
# and '/metrics' endpoint will return the report about the requests
enabled = false
Expand All @@ -137,14 +163,15 @@ collector {

# Whether to use the incoming event's ip as the partition key for the good stream/topic
# Note: Nsq does not make use of partition key.
useIpAddressAsPartitionKey = false
use-ip-address-as-partition-key = false

# Enable the chosen sink by uncommenting the appropriate configuration
sink {
# Choose between kinesis, googlepubsub, kafka, nsq, or stdout.
# To use stdout, comment or remove everything in the "collector.streams.sink" section except
# "enabled" which should be set to "stdout".
enabled = stdout
type = stdout

}

# Incoming events are stored in a buffer before being sent to Kinesis/Kafka.
Expand All @@ -154,9 +181,9 @@ collector {
# - the combined size of the stored records reaches byte-limit or
# - the time in milliseconds since the buffer was last emptied reaches time-limit
buffer {
byteLimit = 100000
recordLimit = 40
timeLimit = 1000
byte-limit = 100000
record-limit = 40
time-limit = 1000
}
}
}
Expand All @@ -165,9 +192,7 @@ collector {
# http://doc.akka.io/docs/akka/current/scala/general/configuration.html
akka {
loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging.
loglevel = ${?AKKA_LOGLEVEL}
loggers = ["akka.event.slf4j.Slf4jLogger"]
loggers = [${?AKKA_LOGGERS}]

# akka-http is the server the Stream collector uses and has configurable options defined at
# http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html
Expand All @@ -185,4 +210,22 @@ akka {
uri-parsing-mode = relaxed
}
}

# By default setting `collector.ssl` relies on JSSE (Java Secure Socket
# Extension) to enable secure communication.
# To override the default settings set the following section as per
# https://lightbend.github.io/ssl-config/ExampleSSLConfig.html
# ssl-config {
# debug = {
# ssl = true
# }
# keyManager = {
# stores = [
# {type = "PKCS12", classpath = false, path = "/etc/ssl/mycert.p12", password = "mypassword" }
# ]
# }
# loose {
# disableHostnameVerification = false
# }
# }
}
4 changes: 2 additions & 2 deletions project/Dependencies.scala
Expand Up @@ -12,7 +12,7 @@ object Dependencies {

val resolvers = Seq(
"Snowplow Bintray" at "https://snowplow.bintray.com/snowplow-maven",
"Snowplow Maven " at "https://maven.snplow.com/releases"
"Snowplow Maven " at "http://maven.snplow.com/releases"
)

object V {
Expand All @@ -21,7 +21,7 @@ object Dependencies {
val snowplowCommonEnrich = "1.3.0"

// circe
val circe = "0.13.0"
val circe = "0.11.1"

// specs2
val specs2 = "4.9.4"
Expand Down
20 changes: 12 additions & 8 deletions project/Settings.scala
@@ -1,11 +1,15 @@
/**
* PROPRIETARY AND CONFIDENTIAL
*
* Unauthorized copying of this file via any medium is strictly prohibited.
*
* Copyright (c) 2018 Snowplow Analytics Ltd. All rights reserved.
*/

/*
* Copyright (c) 2019-2020 Snowplow Analytics Ltd. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
* You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the Apache License Version 2.0 is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/
import sbt._
import Keys._

Expand Down

0 comments on commit 5c5a02a

Please sign in to comment.