-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
RDB Loader: add 2nd gen load manifest table (close #366)
- Loading branch information
Showing
19 changed files
with
577 additions
and
127 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/db/Control.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package com.snowplowanalytics.snowplow.rdbloader.db | ||
|
||
import cats.{Functor, Monad} | ||
import cats.implicits._ | ||
|
||
import com.snowplowanalytics.snowplow.rdbloader.dsl.JDBC | ||
import com.snowplowanalytics.snowplow.rdbloader.{LoaderAction, LoaderError} | ||
|
||
import com.snowplowanalytics.iglu.schemaddl.redshift.generators.DdlFile | ||
import com.snowplowanalytics.iglu.schemaddl.redshift.{AlterTable, RenameTo} | ||
|
||
/** Set of common functions to control DB entities */ | ||
object Control { | ||
def renameTable[F[_]: Functor: JDBC](schema: String, from: String, to: String): LoaderAction[F, Unit] = { | ||
val alterTable = DdlFile(List(AlterTable(s"$schema.$from", RenameTo(to)))) | ||
JDBC[F].executeUpdate(Statement.DdlFile(alterTable)).void | ||
} | ||
|
||
def tableExists[F[_]: Functor: JDBC](dbSchema: String, tableName: String): LoaderAction[F, Boolean] = | ||
JDBC[F].executeQuery[Boolean](Statement.TableExists(dbSchema, tableName)).leftMap(annotateError(dbSchema, tableName)) | ||
|
||
def schemaExists[F[_]: Functor: JDBC](dbSchema: String): LoaderAction[F, Boolean] = | ||
JDBC[F].executeQueryOption[String](Statement.SchemaExists(dbSchema)).map(_.isDefined) | ||
|
||
/** List all columns in the table */ | ||
def getColumns[F[_]: Monad: JDBC](dbSchema: String, tableName: String): LoaderAction[F, List[String]] = | ||
for { | ||
_ <- JDBC[F].executeUpdate(Statement.SetSchema(dbSchema)) | ||
columns <- JDBC[F].executeQueryList[String](Statement.GetColumns(tableName)).leftMap(annotateError(dbSchema, tableName)) | ||
} yield columns | ||
|
||
|
||
def annotateError(dbSchema: String, tableName: String)(error: LoaderError): LoaderError = | ||
error match { | ||
case LoaderError.StorageTargetError(message) => | ||
LoaderError.StorageTargetError(s"$dbSchema.$tableName. " ++ message) | ||
case other => | ||
other | ||
} | ||
} |
119 changes: 119 additions & 0 deletions
119
modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/db/Manifest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package com.snowplowanalytics.snowplow.rdbloader.db | ||
|
||
import java.time.Instant | ||
|
||
import cats.{Functor, Monad} | ||
import cats.data.NonEmptyList | ||
import cats.implicits._ | ||
|
||
import cats.effect.{Timer, Async, Blocker, ContextShift} | ||
|
||
import doobie.implicits.javatimedrivernative._ | ||
|
||
import com.snowplowanalytics.iglu.schemaddl.redshift._ | ||
|
||
import com.snowplowanalytics.snowplow.rdbloader._ | ||
import com.snowplowanalytics.snowplow.rdbloader.LoaderAction | ||
import com.snowplowanalytics.snowplow.rdbloader.common.S3 | ||
import com.snowplowanalytics.snowplow.rdbloader.common.LoaderMessage | ||
import com.snowplowanalytics.snowplow.rdbloader.common.config.StorageTarget | ||
import com.snowplowanalytics.snowplow.rdbloader.dsl.{Logging, JDBC, AWS} | ||
|
||
object Manifest { | ||
|
||
val Name = "manifest" | ||
val LegacyName = "manifest_legacy" | ||
|
||
private[db] val Columns = List( | ||
Column("base", RedshiftVarchar(512), Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull),KeyConstaint(PrimaryKey))), | ||
Column("types",RedshiftVarchar(65535),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(Null))), | ||
Column("shredding_started",RedshiftTimestamp,Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), | ||
Column("shredding_completed",RedshiftTimestamp,Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), | ||
Column("min_collector_tstamp",RedshiftTimestamp,Set(CompressionEncoding(RawEncoding)),Set(Nullability(Null))), | ||
Column("max_collector_tstamp",RedshiftTimestamp,Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(Null))), | ||
Column("ingestion_tstamp",RedshiftTimestamp,Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), | ||
|
||
Column("compression",RedshiftVarchar(16),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), | ||
|
||
Column("processor_artifact",RedshiftVarchar(64),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), | ||
Column("processor_version",RedshiftVarchar(32),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), | ||
) | ||
|
||
private val LegacyColumns = List( | ||
"etl_tstamp", | ||
"commit_tstamp", | ||
"event_count", | ||
"shredded_cardinality" | ||
) | ||
|
||
/** Add `schema` to otherwise static definition of manifest table */ | ||
def getManifestDef(schema: String): CreateTable = | ||
CreateTable( | ||
s"$schema.$Name", | ||
Columns, | ||
Set.empty, | ||
Set(Diststyle(Key), DistKeyTable("base"), SortKeyTable(None,NonEmptyList.one("ingestion_tstamp"))) | ||
) | ||
|
||
def initialize[F[_]: Async: ContextShift: Logging: Timer: AWS](target: StorageTarget, dryRun: Boolean, blocker: Blocker): F[Unit] = { | ||
JDBC.interpreter[F](target, dryRun, blocker).use { implicit jdbc => | ||
setup[F](target.schema).value.flatMap { | ||
case Right(InitStatus.Created) => | ||
Logging[F].info("The manifest table has been created") | ||
case Right(InitStatus.Migrated) => | ||
Logging[F].info(s"The new manifest table has been created, legacy 0.1.0 manifest can be found at $LegacyName and can be deleted manually") | ||
case Right(InitStatus.NoChanges) => | ||
Monad[F].unit | ||
case Left(error) => | ||
Logging[F].error(s"Fatal error has happened during manifest table initialization") *> | ||
Async[F].raiseError(new IllegalStateException(error.show)) | ||
} | ||
} | ||
} | ||
|
||
def setup[F[_]: Monad: JDBC](schema: String): LoaderAction[F, InitStatus] = | ||
for { | ||
_ <- Control.schemaExists[F](schema).flatMap { | ||
case true => | ||
LoaderAction.unit[F] | ||
case false => | ||
val msg = s"Database schema $schema does not exist. Please, create the schema and events table" | ||
LoaderAction.raiseError[F, Unit](LoaderError.StorageTargetError(msg)) | ||
} | ||
exists <- Control.tableExists[F](schema, Name) | ||
status <- if (exists) for { | ||
columns <- Control.getColumns[F](schema, Name) | ||
legacy = columns.toSet === LegacyColumns.toSet | ||
status <- if (legacy) | ||
Control.renameTable[F](schema, Name, LegacyName) *> | ||
create[F](schema).as[InitStatus](InitStatus.Migrated) | ||
else | ||
LoaderAction.pure[F, InitStatus](InitStatus.NoChanges) | ||
} yield status else create[F](schema).as(InitStatus.Created) | ||
_ <- status match { | ||
case InitStatus.Migrated | InitStatus.Created => | ||
JDBC[F].executeUpdate(Statement.CommentOn(CommentOn(s"$schema.$Name", "0.2.0"))) | ||
case _ => | ||
LoaderAction.unit[F] | ||
} | ||
} yield status | ||
|
||
def add[F[_]: Functor: JDBC](schema: String, message: LoaderMessage.ShreddingComplete): LoaderAction[F, Unit] = | ||
JDBC[F].executeUpdate(Statement.ManifestAdd(schema, message)).void | ||
|
||
def get[F[_]: Functor: JDBC](schema: String, base: S3.Folder): LoaderAction[F, Option[Entry]] = | ||
JDBC[F].executeQueryOption[Entry](Statement.ManifestGet(schema, base)) | ||
|
||
/** Create manifest table */ | ||
def create[F[_]: Functor: JDBC](schema: String): LoaderAction[F, Unit] = | ||
JDBC[F].executeUpdate(Statement.CreateTable(getManifestDef(schema))).void | ||
|
||
case class Entry(ingestion: Instant, meta: LoaderMessage.ShreddingComplete) | ||
|
||
sealed trait InitStatus extends Product with Serializable | ||
object InitStatus { | ||
case object NoChanges extends InitStatus | ||
case object Migrated extends InitStatus | ||
case object Created extends InitStatus | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.