Permalink
Browse files

Export to any db changes

  • Loading branch information...
1 parent 9f75aa1 commit d07133faacd3a66c2823f1272855d75966d75eef @sonalgoyal committed Aug 3, 2011
View
6 README
@@ -139,6 +139,12 @@ For Delimited Text Files:
${HIHO_HOME}/scripts/hiho merge -newPath testData/merge/inputNew/fileInNewPath.txt -oldPath testData/merge/inputOld/fileInOldPath.txt -mergeBy value -outputPath output -inputFormat co.nubetech.hiho.dedup.DelimitedTextInputFormat -inputKeyClassName org.apache.hadoop.io.Text -inputValueClassName org.apache.hadoop.io.Text
+8. Export to DB:-
+bin/hadoop jar deploy/hiho-0.4.0.jar co.nubetech.hiho.job.ExportToDB -jdbcDriver <jdbcDriverName> -jdbcUrl <jdbcUrl> -jdbcUsername <jdbcUserName> -jdbcPassword <jdbcPassword> -delimiter <delimiter> -numberOfMappers <numberOfMappers> -tableName <tableName> -columnNames <columnNames> -inputPath <inputPath>
+or
+${HIHO_HOME}/scripts/hiho export db -jdbcDriver <jdbcDriverName> -jdbcUrl <jdbcUrl> -jdbcUsername <jdbcUserName> -jdbcPassword <jdbcPassword> -delimiter <delimiter> -numberOfMappers <numberOfMappers> -tableName <tableName> -columnNames <columnNames> -inputPath <inputPath>
+
+
New Features in this release
-incremental import and introduction of AppendFileInputFormat
-Oracle export
View
@@ -27,7 +27,7 @@
<property environment="env" />
<property name="name" value="hiho" />
<property name="Name" value="HIHO" />
- <property name="version" value="0.4.0" />
+ <property name="version" value="0.5.0" />
<property name="artifact.name" value="${name}-${version}" />
@@ -74,5 +74,7 @@
// conf for mergeJob
public static final String MERGE_OLD_PATH = "mapreduce.jdbc.hiho.merge.mergeOldPath";
public static final String MERGE_NEW_PATH = "mapreduce.jdbc.hiho.merge.mergeNewPath";
+
+ public static final String COLUMN_INFO = "mapreduce.jdbc.hiho.db.columnInfo";
}
@@ -0,0 +1,139 @@
+/**
+ * Copyright 2011 Nube Technologies
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package co.nubetech.hiho.job;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import co.nubetech.apache.hadoop.DBConfiguration;
+import co.nubetech.apache.hadoop.MRJobConfig;
+import co.nubetech.hiho.common.HIHOConf;
+import co.nubetech.hiho.common.HIHOException;
+import co.nubetech.hiho.mapreduce.GenericDBLoadDataMapper;
+import co.nubetech.hiho.mapreduce.lib.db.GenericDBOutputFormat;
+
+public class ExportToDB extends Configured implements Tool {
+
+ private String inputPath = null;
+ private String tableName = null;
+ private String columnNames = null;
+
+ public void populateConfiguration(String[] args, Configuration conf) {
+ for (int i = 0; i < args.length - 1; i++) {
+ if ("-jdbcDriver".equals(args[i])) {
+ conf.set(DBConfiguration.DRIVER_CLASS_PROPERTY, args[++i]);
+ } else if ("-jdbcUrl".equals(args[i])) {
+ conf.set(DBConfiguration.URL_PROPERTY, args[++i]);
+ } else if ("-jdbcUsername".equals(args[i])) {
+ conf.set(DBConfiguration.USERNAME_PROPERTY, args[++i]);
+ } else if ("-jdbcPassword".equals(args[i])) {
+ conf.set(DBConfiguration.PASSWORD_PROPERTY, args[++i]);
+ } else if ("-delimiter".equals(args[i])) {
+ conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, args[++i]);
+ } else if ("-numberOfMappers".equals(args[i])) {
+ conf.set(HIHOConf.NUMBER_MAPPERS, args[++i]);
+ } else if ("-tableName".equals(args[i])) {
+ tableName = args[++i];
+ } else if ("-columnNames".equals(args[i])) {
+ columnNames = args[++i];
+ } else if ("-inputPath".equals(args[i])) {
+ inputPath = args[++i];
+ }
+ }
+ }
+
+ public void checkMandatoryConfs(Configuration conf) throws HIHOException {
+ if (conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY) == null) {
+ throw new HIHOException(
+ "JDBC driver configuration is not specified,please specify JDBC driver class.");
+ }
+ if (conf.get(DBConfiguration.URL_PROPERTY) == null) {
+ throw new HIHOException(
+ "JDBC url path configuration is empty,please specify JDBC url path.");
+ }
+ if (!conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY).contains("hsqldb")) {
+ if (conf.get(DBConfiguration.USERNAME_PROPERTY) == null) {
+ throw new HIHOException(
+ "JDBC user name configuration is empty,please specify JDBC user name.");
+ }
+ if (conf.get(DBConfiguration.PASSWORD_PROPERTY) == null) {
+ throw new HIHOException(
+ "JDBC password configuration is empty,please specify JDBC password.");
+ }
+ }
+ if (conf.get(HIHOConf.INPUT_OUTPUT_DELIMITER) == null) {
+ throw new HIHOException(
+ "The provided delimiter is empty, please specify delimiter.");
+ }
+ if (conf.get(HIHOConf.NUMBER_MAPPERS) == null) {
+ throw new HIHOException(
+ "The provided number of mappers is empty, please specify number of mappers.");
+ }
+ if (inputPath == null) {
+ throw new HIHOException(
+ "The provided input path is empty, please specify inputPath.");
+ }
+ if (tableName == null) {
+ throw new HIHOException(
+ "The provided table name is empty, please specify tableName.");
+ }
+ if (columnNames == null) {
+ throw new HIHOException(
+ "The provided column name is empty, please specify columnName.");
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
+ populateConfiguration(args, conf);
+ try {
+ checkMandatoryConfs(conf);
+ } catch (HIHOException e1) {
+ e1.printStackTrace();
+ throw new Exception(e1);
+ }
+ Job job = new Job(conf);
+ job.getConfiguration().setInt(MRJobConfig.NUM_MAPS,
+ conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
+ job.setJobName("HihoDBExport");
+
+ job.setMapperClass(GenericDBLoadDataMapper.class);
+ job.setJarByClass(ExportToDB.class);
+ job.setNumReduceTasks(0);
+ job.setInputFormatClass(TextInputFormat.class);
+ TextInputFormat.addInputPath(job, new Path(inputPath));
+ GenericDBOutputFormat.setOutput(job, tableName, columnNames);
+
+ int ret = 0;
+ try {
+ ret = job.waitForCompletion(true) ? 0 : 1;
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return ret;
+
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(new Configuration(), new ExportToDB(), args);
+ System.exit(res);
+ }
+
+}
@@ -0,0 +1,128 @@
+/**
+ * Copyright 2011 Nube Technologies
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package co.nubetech.hiho.mapreduce;
+
+import java.io.IOException;
+import java.sql.Types;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.log4j.Logger;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.type.TypeReference;
+
+import co.nubetech.hiho.common.HIHOConf;
+import co.nubetech.hiho.mapreduce.lib.db.ColumnInfo;
+import co.nubetech.hiho.mapreduce.lib.db.GenericDBWritable;
+
+public class GenericDBLoadDataMapper<K, V> extends
+ Mapper<K, V, GenericDBWritable, NullWritable> {
+
+ final static Logger logger = Logger
+ .getLogger(co.nubetech.hiho.mapreduce.GenericDBLoadDataMapper.class);
+
+ private ArrayList values;
+ private ArrayList<ColumnInfo> tableInfo;
+ private String delimiter;
+
+ public ArrayList<ColumnInfo> getTableInfo() {
+ return tableInfo;
+ }
+
+ public String getDelimiter() {
+ return delimiter;
+ }
+
+ public void setTableInfo(ArrayList<ColumnInfo> tableInfo) {
+ this.tableInfo = tableInfo;
+ }
+
+ public void setDelimiter(String delimiter) {
+ this.delimiter = delimiter;
+ }
+
+ protected void setup(Mapper.Context context) throws IOException,
+ InterruptedException {
+ delimiter = context.getConfiguration().get(
+ HIHOConf.INPUT_OUTPUT_DELIMITER);
+ logger.debug("delimiter is: " + delimiter);
+ String columnInfoJsonString = context.getConfiguration().get(
+ HIHOConf.COLUMN_INFO);
+ logger.debug("columnInfoJsonString is: " + columnInfoJsonString);
+ ObjectMapper mapper = new ObjectMapper();
+ tableInfo = mapper.readValue(columnInfoJsonString,
+ new TypeReference<ArrayList<ColumnInfo>>() {
+ });
+ }
+
+ public void map(K key, V val, Context context) throws IOException,
+ InterruptedException {
+ values = new ArrayList();
+
+ logger.debug("Key is: " + key);
+ logger.debug("Value is: " + val);
+
+ StringTokenizer rowValue = new StringTokenizer(val.toString(), delimiter);
+ if (rowValue.countTokens() == tableInfo.size()) {
+ Iterator<ColumnInfo> iterator = tableInfo.iterator();
+ while (iterator.hasNext()) {
+ ColumnInfo columnInfo = iterator.next();
+ String columnValue = rowValue.nextToken();
+ if (columnValue == null || columnValue.trim().equals("")) {
+ values.add(null);
+ } else {
+ logger.debug("Adding value : " + columnValue);
+ int type = columnInfo.getType();
+ if (type == Types.VARCHAR) {
+ values.add(columnValue);
+ } else if (type == Types.BIGINT) {
+ values.add(Long.parseLong(columnValue));
+ } else if (type == Types.INTEGER) {
+ values.add(Integer.parseInt(columnValue));
+ } else if (type == Types.DOUBLE) {
+ values.add(Double.parseDouble(columnValue));
+ } else if (type == Types.FLOAT) {
+ values.add(Float.parseFloat(columnValue));
+ } else if (type == Types.BOOLEAN) {
+ values.add(Boolean.parseBoolean(columnValue));
+ } else if (type == Types.DATE) {
+ DateFormat df = new SimpleDateFormat();
+ try {
+ values.add(df.parse(columnValue));
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+ }
+ }
+ }
+ } else {
+ throw new IOException(
+ "Number of columns specified in table is not equal to the columns contains in the file.");
+ }
+ GenericDBWritable gdw = new GenericDBWritable(tableInfo, values);
+ context.write(gdw, null);
+
+ }
+
+}
@@ -31,6 +31,10 @@ public ColumnInfo(int index, int type, String name) {
this.type = type;
this.name = name;
}
+
+ public ColumnInfo(){
+
+ }
public int getIndex() {
return index;
Oops, something went wrong.

0 comments on commit d07133f

Please sign in to comment.