From 88fd26b9e5a796ece6d7eebbf305c17f1b2a3c52 Mon Sep 17 00:00:00 2001 From: Konstantinos Servis Date: Tue, 20 Mar 2018 14:43:11 +0200 Subject: [PATCH] FIXUP Scala Common Enrich: extend PII Enrichment to include idntification events in EnrichedEvent (closes #3580) --- .../registry/pii/PiiPseudonymizerEnrichment.scala | 11 ++++++++++- .../common/outputs/EnrichedEvent.scala | 2 +- .../registry/pii/PiiPseudonymizerEnrichmentSpec.scala | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index 53aa34ec73..746724e0c0 100644 --- a/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -23,9 +23,11 @@ import scala.collection.mutable.MutableList import org.json4s import org.json4s.{DefaultFormats, JValue} import org.json4s.JsonAST._ +import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.jackson.Serialization.write +import org.json4s.Extraction.decompose // Java import org.apache.commons.codec.digest.DigestUtils @@ -166,9 +168,16 @@ case class PiiPseudonymizerEnrichment(fieldList: List[PiiField], strategy: PiiStrategy) extends Enrichment { implicit val json4sFormats = DefaultFormats + new PiiModifiedFieldsSerializer + new PiiStrategySerializer + private val UnstructEventSchema = + SchemaKey("com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", "1-0-0").toSchemaUri def transformer(event: EnrichedEvent): Unit = { val modifiedFields: ModifiedFields = fieldList.flatMap(_.transform(event, strategy)) - event.pii = if (modifiedFields.nonEmpty) write(PiiModifiedFields(modifiedFields, strategy)) else null + event.pii = + if (modifiedFields.nonEmpty) + write( + ("schema" -> UnstructEventSchema) ~ ("data" -> decompose(PiiModifiedFields(modifiedFields, strategy))) + ) + else null } } diff --git a/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala b/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala index df324fda92..89a6ffd5c2 100644 --- a/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala +++ b/3-enrich/scala-common-enrich/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala @@ -245,6 +245,6 @@ class EnrichedEvent extends Serializable { // True timestamp @BeanProperty var true_tstamp: String = _ - // Fields modified form PII enrichemnt (JSON String) + // Fields modified in PII enrichemnt (JSON String) @BeanProperty var pii: String = _ } diff --git a/3-enrich/scala-common-enrich/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/3-enrich/scala-common-enrich/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index d5a653c732..108c63ad42 100644 --- a/3-enrich/scala-common-enrich/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/3-enrich/scala-common-enrich/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -450,7 +450,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidationMatche expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" expected.pii = - """{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_fingerprint","originalValue":"its_you_again!","modifiedValue":"9f9fc89b7a5428f2646347974404650fc8776f791afc2200efc8a82aa754e7e6"},{"fieldName":"user_ipaddress","originalValue":"70.46.123.145","modifiedValue":"36595ea260a82b7e2d7cf44121892bf31031a9c27077d8c802454464178456c2"},{"fieldName":"user_id","originalValue":"john@acme.com","modifiedValue":"4b2d8785b49bad23638b17d8db76857a79bf79441241a78a97d88cc64bbf766e"}],"json":[{"field":"unstruct_event","originalValue":"50.56.129.169","modifiedValue":"b5814ada7bb3abb2ed7f8713433a60ed3b3780f7d98a95c936cc62abb16f316f","jsonPath":"$.ip","schema":"iglu:com.mailgun/message_clicked/jsonschema/1-0-0"},{"field":"contexts","originalValue":"bob@acme.com","modifiedValue":"405ac8384fa984f787f9486daf34d84d98f20c4d6a12e2cc4ed89be3bcb06ad6","jsonPath":"$.data.emailAddress2","schema":"iglu:com.acme/email_sent/jsonschema/1-1-0"},{"field":"contexts","originalValue":"jim@acme.com","modifiedValue":"3571b422ecb9ac85cb654b2fce521ae351d4695b0fb788aac75caf724e7881f0","jsonPath":"$.emailAddress","schema":"iglu:com.acme/email_sent/jsonschema/1-0-0"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-256"}}}}""" + """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_fingerprint","originalValue":"its_you_again!","modifiedValue":"9f9fc89b7a5428f2646347974404650fc8776f791afc2200efc8a82aa754e7e6"},{"fieldName":"user_ipaddress","originalValue":"70.46.123.145","modifiedValue":"36595ea260a82b7e2d7cf44121892bf31031a9c27077d8c802454464178456c2"},{"fieldName":"user_id","originalValue":"john@acme.com","modifiedValue":"4b2d8785b49bad23638b17d8db76857a79bf79441241a78a97d88cc64bbf766e"}],"json":[{"field":"unstruct_event","originalValue":"50.56.129.169","modifiedValue":"b5814ada7bb3abb2ed7f8713433a60ed3b3780f7d98a95c936cc62abb16f316f","jsonPath":"$.ip","schema":"iglu:com.mailgun/message_clicked/jsonschema/1-0-0"},{"field":"contexts","originalValue":"bob@acme.com","modifiedValue":"405ac8384fa984f787f9486daf34d84d98f20c4d6a12e2cc4ed89be3bcb06ad6","jsonPath":"$.data.emailAddress2","schema":"iglu:com.acme/email_sent/jsonschema/1-1-0"},{"field":"contexts","originalValue":"jim@acme.com","modifiedValue":"3571b422ecb9ac85cb654b2fce521ae351d4695b0fb788aac75caf724e7881f0","jsonPath":"$.emailAddress","schema":"iglu:com.acme/email_sent/jsonschema/1-0-0"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-256"}}}}}""" output.size must_== 1 val out = output.head