Skip to content

Commit

Permalink
Merge eeb9bb9 into 7670d32
Browse files Browse the repository at this point in the history
  • Loading branch information
istreeter committed Jun 14, 2021
2 parents 7670d32 + eeb9bb9 commit f077af4
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 46 deletions.
Expand Up @@ -21,8 +21,6 @@ import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.CommonProperti
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.NumberProperty.{Maximum, MultipleOf}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.StringProperty.{Format, MaxLength, MinLength}

import com.snowplowanalytics.snowplow.postgres.loader._

sealed trait Type {
def ddl: String =
this match {
Expand Down Expand Up @@ -52,61 +50,64 @@ object Type {
case object Bool extends Type
case object Jsonb extends Type

type DataTypeSuggestion = (Schema, String) => Option[Type]
type DataTypeSuggestion = Schema => Option[Type]

val DefaultVarcharSize = 4096

/** Derive a Postgres type, given JSON Schema */
def getDataType(properties: Schema, varcharSize: Int, columnName: String, suggestions: List[DataTypeSuggestion]): Type =
def getDataType(properties: Schema, suggestions: List[DataTypeSuggestion]): Type =
suggestions match {
case Nil => Type.Varchar(4096) // Generic
case Nil => Type.Varchar(DefaultVarcharSize) // Generic
case suggestion :: tail =>
suggestion(properties, columnName) match {
suggestion(properties) match {
case Some(format) => format
case None => getDataType(properties, varcharSize, columnName, tail)
case None => getDataType(properties, tail)
}
}

// For complex enums Suggest VARCHAR with length of longest element
val complexEnumSuggestion: DataTypeSuggestion = (properties, _) =>
val complexEnumSuggestion: DataTypeSuggestion = properties =>
properties.enum match {
case Some(enums) if isComplexEnum(enums.value) =>
val longest = excludeNull(enums.value).map(_.noSpaces.length).maximumOption.getOrElse(16)
Some(Type.Varchar(longest))
case _ => None
}

val productSuggestion: DataTypeSuggestion = (properties, _) =>
val productSuggestion: DataTypeSuggestion = properties =>
properties.`type` match {
case Some(t: SType.Union) if t.isUnion =>
Some(Type.Jsonb)
case Some(t: SType) if t === (SType.Array: SType) =>
case Some(SType.Array) =>
Some(Type.Jsonb)
case Some(SType.Union(types)) if types.contains(SType.Array) =>
Some(Type.Jsonb)
case _ => None
}

val timestampSuggestion: DataTypeSuggestion = (properties, _) =>
val timestampSuggestion: DataTypeSuggestion = properties =>
(properties.`type`, properties.format) match {
case (Some(types), Some(Format.DateTimeFormat)) if types.possiblyWithNull(SType.String) =>
Some(Type.Timestamp)
case _ => None
}

val dateSuggestion: DataTypeSuggestion = (properties, _) =>
val dateSuggestion: DataTypeSuggestion = properties =>
(properties.`type`, properties.format) match {
case (Some(types), Some(Format.DateFormat)) if types.possiblyWithNull(SType.String) =>
Some(Type.Date)
case _ => None
}

val arraySuggestion: DataTypeSuggestion = (properties, _) =>
/* TODO: what is this supposed to match? Every array type is already matched by productSuggestion */
val arraySuggestion: DataTypeSuggestion = properties =>
properties.`type` match {
case Some(types) if types.possiblyWithNull(SType.Array) =>
Some(Type.Varchar(4096))
Some(Type.Varchar(DefaultVarcharSize))
case _ => None
}

val numberSuggestion: DataTypeSuggestion = (properties, _) =>
val numberSuggestion: DataTypeSuggestion = properties =>
(properties.`type`, properties.multipleOf) match {
case (Some(types), Some(MultipleOf.NumberMultipleOf(m))) if types.possiblyWithNull(SType.Number) && m === BigDecimal(1, 2) =>
Some(Type.Double)
Expand All @@ -119,55 +120,65 @@ object Type {
}

// TODO: add more sizes
val integerSuggestion: DataTypeSuggestion = (properties, _) => {
(properties.`type`, properties.maximum, properties.enum, properties.multipleOf) match {
case (Some(types), Some(maximum), _, _) if types.possiblyWithNull(SType.Integer) =>
val integerSuggestion: DataTypeSuggestion = properties => {
(properties.`type`, properties.maximum, properties.multipleOf) match {
case (Some(types), Some(maximum), _) if types.possiblyWithNull(SType.Integer) =>
if (isBigInt(maximum)) Type.BigInt.some
else Type.Integer.some
case (Some(types), None, _, _) if types.possiblyWithNull(SType.Integer) =>
case (Some(types), None, _) if types.possiblyWithNull(SType.Integer) =>
Type.BigInt.some
// Contains only enum
case (types, _, Some(_), _) if types.isEmpty || types.get.possiblyWithNull(SType.Integer) =>
case (Some(types), _, _) if types.possiblyWithNull(SType.Integer) =>
Type.Integer.some
case (Some(types), _, _, _) if types.possiblyWithNull(SType.Integer) =>
Type.Integer.some
case (_, _, _, Some(MultipleOf.IntegerMultipleOf(_))) =>
case (_, _, Some(MultipleOf.IntegerMultipleOf(_))) =>
Type.Integer.some
case _ => None
}
}

val charSuggestion: DataTypeSuggestion = (properties, _) => {
val charSuggestion: DataTypeSuggestion = properties => {
(properties.`type`, properties.minLength, properties.maxLength) match {
case (Some(types), Some(MinLength(min)), Some(MaxLength(max))) if min === max && types.possiblyWithNull(SType.String) =>
Some(Type.Char(min.toInt))
case _ => None
}
}

val booleanSuggestion: DataTypeSuggestion = (properties, _) => {
val booleanSuggestion: DataTypeSuggestion = properties => {
properties.`type` match {
case Some(types) if types.possiblyWithNull(SType.Boolean) => Some(Type.Bool)
case _ => None
}
}

val uuidSuggestion: DataTypeSuggestion = (properties, _) => {
val uuidSuggestion: DataTypeSuggestion = properties => {
(properties.`type`, properties.format) match {
case (Some(types), Some(Format.UuidFormat)) if types.possiblyWithNull(SType.String) =>
Some(Type.Uuid)
case _ => None
}
}

val varcharSuggestion: DataTypeSuggestion = (properties, _) => {
(properties.`type`, properties.maxLength, properties.enum, properties.format) match {
case (Some(types), Some(maxLength), _, _) if types.possiblyWithNull(SType.String) =>
val varcharSuggestion: DataTypeSuggestion = properties => {
(properties.`type`, properties.maxLength, properties.format) match {
case (Some(types), Some(maxLength), _) if types.possiblyWithNull(SType.String) =>
Some(Type.Varchar(maxLength.value.toInt))
case (_, _, Some(enum), _) =>
case _ => None
}
}

val enumSuggestion: DataTypeSuggestion = properties => {
(properties.`type`, properties.enum) match {
case (Some(types), Some(_)) if types.possiblyWithNull(SType.Integer) =>
Some(Type.Integer)
case (Some(types), Some(_)) if types.possiblyWithNull(SType.Boolean) =>
Some(Type.Bool)
case (_, Some(e)) if e.value.nonEmpty && e.value.forall(_.asNumber.flatMap(_.toLong).isDefined) =>
Some(Type.Integer)
case (_, Some(e)) if e.value.nonEmpty && e.value.forall(_.asNumber.flatMap(_.toBigInt).isDefined) =>
Some(Type.BigInt)
case (_, Some(enum)) =>
enum.value.map(jsonLength).maximumOption match {
case Some(maxLength) if enum.value.lengthCompare(1) === 0 =>
Some(Type.Varchar(maxLength))
case Some(maxLength) =>
Some(Type.Varchar(maxLength))
case None => None
Expand All @@ -178,6 +189,7 @@ object Type {

val dataTypeSuggestions: List[DataTypeSuggestion] = List(
complexEnumSuggestion,
enumSuggestion,
productSuggestion,
timestampSuggestion,
dateSuggestion,
Expand Down
Expand Up @@ -268,7 +268,7 @@ object transform {
removeRoots(properties).map {
case (pointer, s: Schema) =>
val columnName: String = FlatSchema.getName(pointer)
val pgType = Type.getDataType(s, 4096, columnName, Type.dataTypeSuggestions)
val pgType = Type.getDataType(s, Type.dataTypeSuggestions)
(pointer, columnName, pgType, schema.canBeNull(s))
}

Expand Down
Expand Up @@ -34,8 +34,6 @@ object sql {

private lazy val logger = Slf4jLogHandler(getLogger)

val DefaultVarcharSize = 4096

/**
* Generate the `CREATE TABLE` DDL statement
* @param schema database schema
Expand Down Expand Up @@ -83,7 +81,7 @@ object sql {
if (migration.diff.added.nonEmpty) {
val columns = migration.diff.added.map {
case (pointer, schema) =>
buildColumn(DefaultVarcharSize, (pointer, schema))
buildColumn(pointer, schema)
}

val columnFragments = columns.foldLeft(Fragment.empty) { (acc, cur) =>
Expand All @@ -103,18 +101,15 @@ object sql {
/**
* Generate single ALTER TABLE statement for some new property
*
* @param varcharSize default size for VARCHAR
* @param pair pair of property name and its Schema properties like
* length, maximum, etc
* @param pointer the property pointer
* @param properties the property's schema, like length, maximum etc
* @return DDL statement altering single column in table
*/
def buildColumn(varcharSize: Int, pair: (Pointer.SchemaPointer, Schema)): Column =
pair match {
case (pointer, properties) =>
val columnName = FlatSchema.getName(pointer)
val dataType = Type.getDataType(properties, varcharSize, columnName, Type.dataTypeSuggestions)
Column(columnName, dataType, schema.canBeNull(properties))
}
def buildColumn(pointer: Pointer.SchemaPointer, properties: Schema): Column = {
val columnName = FlatSchema.getName(pointer)
val dataType = Type.getDataType(properties, Type.dataTypeSuggestions)
Column(columnName, dataType, schema.canBeNull(properties))
}

case class Column(name: String, dataType: Type, nullable: Boolean) {

Expand Down
@@ -0,0 +1,97 @@
/*
* Copyright (c) 2021 Snowplow Analytics Ltd. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
* You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the Apache License Version 2.0 is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/
package com.snowplowanalytics.snowplow.postgres.shredding

import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{CommonProperties, NumberProperty, StringProperty}

import io.circe.Json

import org.specs2.mutable.Specification

class TypeSpec extends Specification {

"getDataType" should {

"return timestamp type for a datetime field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.String), format = Some(StringProperty.Format.DateTimeFormat))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Timestamp
}

"return date type for a date field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.String), format = Some(StringProperty.Format.DateTimeFormat))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Timestamp
}

"return jsonb type for an array field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.Array))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Jsonb
}

"return bigint type for integer field with no maximum" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.Integer))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.BigInt
}

"return integer type for integer field with small maximum" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.Integer), maximum = Some(NumberProperty.Maximum.IntegerMaximum(100)))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Integer
}

"return bigint type for integer field with large maximum" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.Integer), maximum = Some(NumberProperty.Maximum.IntegerMaximum(10000000000L)))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.BigInt
}

"return double type for a number field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.Number))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Double
}

"return boolean type for a boolean field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.Boolean))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Bool
}

"return char type for a fixed length string field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.String), minLength = Some(StringProperty.MinLength(42)), maxLength = Some(StringProperty.MaxLength(42)))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Char(42)
}

"return uuid type for a uuid field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.String), format = Some(StringProperty.Format.UuidFormat))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Uuid
}

"return varchar type with max length for a string field" >> {
val properties = Schema(`type` = Some(CommonProperties.Type.String), maxLength = (Some(StringProperty.MaxLength(42))))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Varchar(42)
}

"return varchar type for a string enum field" >> {
val properties = Schema(enum = Some(CommonProperties.Enum(List(Json.fromString("xxx"), Json.fromString("yyy")))))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Varchar(3)
}

"return integer type for an integer enum field" >> {
val properties = Schema(enum = Some(CommonProperties.Enum(List(Json.fromInt(1), Json.fromInt(2)))))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Integer
}

"return varchar type for a complex enum field" >> {
val properties = Schema(enum = Some(CommonProperties.Enum(List(Json.fromInt(12345), Json.fromString("y")))))
Type.getDataType(properties, Type.dataTypeSuggestions) must_== Type.Varchar(5)
}

}
}

0 comments on commit f077af4

Please sign in to comment.