Permalink
Browse files

Merge pull request #185 from utzwestermann/BigQuery-Support

Big query support
  • Loading branch information...
utzwestermann committed Dec 22, 2017
2 parents 43ce4b6 + 726a7b4 commit 91dc798750f782140efbc790b4530321e12e14dc
Showing with 3,720 additions and 90 deletions.
  1. +63 −0 schedoscope-conf/src/main/resources/reference.conf
  2. +46 −1 schedoscope-conf/src/main/scala/org/schedoscope/conf/BaseSettings.scala
  3. +0 −9 schedoscope-core/pom.xml
  4. +295 −54 schedoscope-core/src/main/scala/org/schedoscope/dsl/transformations/Export.scala
  5. +1 −1 schedoscope-core/src/test/resources/log4j.properties
  6. +58 −5 schedoscope-core/src/test/scala/org/schedoscope/dsl/transformations/ExportTest.scala
  7. +22 −0 schedoscope-core/src/test/scala/test/views/TestViews.scala
  8. +55 −0 schedoscope-export/README.md
  9. +17 −2 schedoscope-export/pom.xml
  10. +215 −0 schedoscope-export/src/main/java/org/schedoscope/export/bigquery/BigQueryExportJob.java
  11. +376 −0 ...xport/src/main/java/org/schedoscope/export/bigquery/outputformat/BigQueryOutputConfiguration.java
  12. +184 −0 ...scope-export/src/main/java/org/schedoscope/export/bigquery/outputformat/BigQueryOutputFormat.java
  13. +93 −0 ...e-export/src/main/java/org/schedoscope/export/bigquery/outputformat/BiqQueryHCatRecordWriter.java
  14. +190 −0 .../src/main/java/org/schedoscope/export/bigquery/outputschema/HCatRecordToBigQueryMapConvertor.java
  15. +232 −0 ...c/main/java/org/schedoscope/export/bigquery/outputschema/HCatSchemaToBigQuerySchemaConverter.java
  16. +20 −0 ...doscope-export/src/main/java/org/schedoscope/export/bigquery/outputschema/PartitioningScheme.java
  17. +1 −6 schedoscope-export/src/main/java/org/schedoscope/export/jdbc/outputschema/SchemaUtils.java
  18. +4 −4 schedoscope-export/src/main/java/org/schedoscope/export/redis/outputformat/RedisOutputFormat.java
  19. +272 −0 schedoscope-export/src/main/java/org/schedoscope/export/utils/BigQueryUtils.java
  20. +168 −0 schedoscope-export/src/main/java/org/schedoscope/export/utils/CloudStorageUtils.java
  21. +1 −2 schedoscope-export/src/main/java/org/schedoscope/export/utils/HCatRecordJsonSerializer.java
  22. +365 −0 schedoscope-export/src/main/java/org/schedoscope/export/utils/HCatSchemaToBigQueryTransformer.java
  23. +0 −1 schedoscope-export/src/main/java/org/schedoscope/export/utils/HCatUtils.java
  24. +7 −5 schedoscope-export/src/test/java/org/schedoscope/export/HiveUnitBaseTest.java
  25. +108 −0 schedoscope-export/src/test/java/org/schedoscope/export/bigquery/BigQueryBaseTest.java
  26. +95 −0 schedoscope-export/src/test/java/org/schedoscope/export/bigquery/BigQueryExportTest.java
  27. +216 −0 ...e-export/src/test/java/org/schedoscope/export/bigquery/outputschema/BigQueryOutputFormatTest.java
  28. +608 −0 ...c/test/java/org/schedoscope/export/bigquery/outputschema/HCatSchemaToBigQueryTransformerTest.java
  29. +8 −0 schedoscope-export/src/test/resources/log4j.properties
@@ -329,6 +329,69 @@
salt = "vD75MqvaasIlCf7H"
#
# BigQuery exporter settings.
#
bigQuery {
#
# GCP project ID under which the exported BigQuery dataset will be created
#
projectId = ""
#
# GCP key in JSON format to use for authentication
#
gcpKey = ""
#
# Number of reducers to use for parallel writing to BigQuery.
#
numberOfReducers = 10
#
# GCP data storage location of exported data within BigQuery.
#
dataLocation = "EU"
#
# GCP Cloud Storage bucket for temporary storage to use for exporting to BigQuery.
#
exportStorageBucket = "schedoscope_bigquery_export"
#
# GCP Cloud Storage bucket folder prefix to apply to blobs when exporting to BigQuery
#
exportStorageBucketFolderPrefix = ""
#
# GCP Cloud Storage bucket region to use for exporting to BigQuery
#
exportStorageBucketRegion = "europe-west3"
#
# Host of proxy to use for GCP API access
#
proxyHost = ""
#
# Port of proxy to use for GCP API access
#
proxyPort = ""
}
#
# JDBC exporter settings.
#
@@ -247,10 +247,55 @@ class BaseSettings(val config: Config) {
lazy val redisExportBatchSize = config.getInt("schedoscope.export.redis.insertBatchSize")
/**
* Number of reducers to use for Redis export.
* Number of reducers to use for Kafka export.
*/
lazy val kafkaExportNumReducers = config.getInt("schedoscope.export.kafka.numberOfReducers")
/**
* GCP project ID under which exported BigQuery dataset will be created. Defaults to the default project of the current user.
*/
lazy val bigQueryExportProjectId = config.getString("schedoscope.export.bigQuery.projectId")
/**
* Number of reducers to use for BigQuery export.
*/
lazy val bigQueryExportNumReducers = config.getInt("schedoscope.export.bigQuery.numberOfReducers")
/**
* GCP data storage location of exported data within BigQuery. Defaults to EU.
*/
lazy val bigQueryExportDataLocation = config.getString("schedoscope.export.bigQuery.dataLocation")
/**
* GCP key in JSON format to use for authentication when exporting to BigQuery. If not set, the key of the current user is used.
*/
lazy val bigQueryExportGcpKey = config.getString("schedoscope.export.bigQuery.gcpKey")
/**
* GCP Cloud Storage bucket to use for temporary storage while exporting to BigQuery. Defaults to "schedoscope_bigquery_export"
*/
lazy val bigQueryExportStorageBucket = config.getString("schedoscope.export.bigQuery.exportStorageBucket")
/**
* Folder prefix to apply to blobs in the GCP Cloud Storage bucket while exporting to BigQuery. Defaults to ""
*/
lazy val bigQueryExportStorageBucketFolderPrefix = config.getString("schedoscope.export.bigQuery.exportStorageBucketFolderPrefix")
/**
* GCP Cloud Storage bucket region to use for exporting to BigQuery. Defaults to europe-west3
*/
lazy val bigQueryExportStorageBucketRegion = config.getString("schedoscope.export.bigQuery.exportStorageBucketRegion")
/**
* Host of proxy to use for GCP API access. Set to empty, i.e., no proxy to use.
*/
lazy val bigQueryExportProxyHost = config.getString("schedoscope.export.bigQuery.proxyHost")
/**
* Port of proxy to use for GCP API access. Set to empty, i.e., no proxy to use.
*/
lazy val bigQueryExportProxyPort = config.getString("schedoscope.export.bigQuery.proxyPort")
/**
* Number of reducers to use for (S)Ftp export.
*/
View
@@ -51,11 +51,6 @@
<artifactId>schedoscope-conf</artifactId>
<version>${schedoscope.version}</version>
</dependency>
<dependency>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
<version>11.0</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
@@ -232,10 +227,6 @@
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
</exclusion>
<exclusion>
<artifactId>jsp-api</artifactId>
<groupId>javax.servlet.jsp</groupId>
Oops, something went wrong.

0 comments on commit 91dc798

Please sign in to comment.