Permalink
Browse files

Merge pull request #186 from utzwestermann/BigQuery-Support

Made provided dependencies normal so that export fat jar is self serving
  • Loading branch information...
utzwestermann committed Jan 12, 2018
2 parents 64a6d8c + 6cae259 commit e130b92ce6742d96f68148e91a4c3417246e51d3
@@ -459,4 +459,4 @@
</plugin>
</plugins>
</build>
</project>
</project>
@@ -32,13 +32,13 @@
public static final String EXPORT_ANON_SALT = "export.anon.salt";
@Option(name = "-s", usage = "set to true if kerberos is enabled")
@Option(name = "-s", usage = "set if Kerberos is enabled")
protected boolean isSecured = false;
@Option(name = "-m", usage = "specify the metastore URIs", required = true)
@Option(name = "-m", usage = "specify the Metastore URI", required = true)
protected String metaStoreUris = "";
@Option(name = "-p", usage = "the kerberos principal", depends = {"-s"})
@Option(name = "-p", usage = "the Kerberos principal for accessing the metastore, e.g., hive/_HOST@DOMAIN.COM", depends = {"-s"})
protected String principal;
@Option(name = "-d", usage = "input database", required = true)
@@ -48,19 +48,18 @@
private static final Log LOG = LogFactory.getLog(BigQueryExportJob.class);
@Option(name = "-P", usage = "the GCP project ID under which to create the resulting BigQuery dataset, e.g., project 4711. If not passed, the default GCP project will be used")
@Option(name = "-P", usage = "the GCP project ID under which to create the resulting BigQuery dataset, e.g., project-4711. If not passed, the default GCP project will be used")
private String project;
@Option(name = "-D", usage = "the BigQuery table partition date into which to insert the exported data, e.g., 20171001. If not passed, it is assumed that the resulting BigQuery table is not partitioned")
private String partitionDate;
@Option(name = "-x", usage = "the postfix to append to the resulting BigQuery table name, e.g., EC0101. If not passed, no postfix will be appended")
@Option(name = "-x", usage = "the postfix to append with an underscore to the resulting BigQuery table name, e.g., mypartitionpostfix. If not passed, no postfix will be appended")
private String tablePostfix;
@Option(name = "-l", usage = "the location where to store the resulting BigQuery table, e.g., US. If not passed, EU will be used")
private String tableStorageLocation;
@Option(name = "-k", usage = "GCP key to use for authentication in JSON format. If not passed, the gcloud default user will be used")
private String gcpKey;
@@ -172,7 +171,7 @@ private Job prepareJobObject(Configuration conf) throws IOException, TException
+ inputTable);
job.setJarByClass(BigQueryExportJob.class);
job.setMapperClass(Mapper.class);
job.setMapperClass(BigQueryExportMapper.class);
job.setReducerClass(Reducer.class);
job.setMapOutputKeyClass(LongWritable.class);
@@ -0,0 +1,21 @@
package org.schedoscope.export.bigquery;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.TaskCounter;
import org.apache.hive.hcatalog.data.HCatRecord;
import java.io.IOException;
public class BigQueryExportMapper extends Mapper<WritableComparable<?>, HCatRecord, LongWritable, HCatRecord> {
@Override
protected void map(WritableComparable<?> key, HCatRecord value,
Context context) throws IOException, InterruptedException {
LongWritable localKey = new LongWritable(context.getCounter(
TaskCounter.MAP_INPUT_RECORDS).getValue());
context.write(localKey, value);
}
}

0 comments on commit e130b92

Please sign in to comment.