Skip to content

Commit

Permalink
Export to any db changes
Browse files Browse the repository at this point in the history
  • Loading branch information
sonalgoyal committed Aug 3, 2011
1 parent 9f75aa1 commit d07133f
Show file tree
Hide file tree
Showing 10 changed files with 1,052 additions and 1 deletion.
6 changes: 6 additions & 0 deletions README
Expand Up @@ -139,6 +139,12 @@ For Delimited Text Files:
${HIHO_HOME}/scripts/hiho merge -newPath testData/merge/inputNew/fileInNewPath.txt -oldPath testData/merge/inputOld/fileInOldPath.txt -mergeBy value -outputPath output -inputFormat co.nubetech.hiho.dedup.DelimitedTextInputFormat -inputKeyClassName org.apache.hadoop.io.Text -inputValueClassName org.apache.hadoop.io.Text ${HIHO_HOME}/scripts/hiho merge -newPath testData/merge/inputNew/fileInNewPath.txt -oldPath testData/merge/inputOld/fileInOldPath.txt -mergeBy value -outputPath output -inputFormat co.nubetech.hiho.dedup.DelimitedTextInputFormat -inputKeyClassName org.apache.hadoop.io.Text -inputValueClassName org.apache.hadoop.io.Text




8. Export to DB:-
bin/hadoop jar deploy/hiho-0.4.0.jar co.nubetech.hiho.job.ExportToDB -jdbcDriver <jdbcDriverName> -jdbcUrl <jdbcUrl> -jdbcUsername <jdbcUserName> -jdbcPassword <jdbcPassword> -delimiter <delimiter> -numberOfMappers <numberOfMappers> -tableName <tableName> -columnNames <columnNames> -inputPath <inputPath>
or
${HIHO_HOME}/scripts/hiho export db -jdbcDriver <jdbcDriverName> -jdbcUrl <jdbcUrl> -jdbcUsername <jdbcUserName> -jdbcPassword <jdbcPassword> -delimiter <delimiter> -numberOfMappers <numberOfMappers> -tableName <tableName> -columnNames <columnNames> -inputPath <inputPath>


New Features in this release New Features in this release
-incremental import and introduction of AppendFileInputFormat -incremental import and introduction of AppendFileInputFormat
-Oracle export -Oracle export
Expand Down
2 changes: 1 addition & 1 deletion build.xml
Expand Up @@ -27,7 +27,7 @@
<property environment="env" /> <property environment="env" />
<property name="name" value="hiho" /> <property name="name" value="hiho" />
<property name="Name" value="HIHO" /> <property name="Name" value="HIHO" />
<property name="version" value="0.4.0" /> <property name="version" value="0.5.0" />




<property name="artifact.name" value="${name}-${version}" /> <property name="artifact.name" value="${name}-${version}" />
Expand Down
2 changes: 2 additions & 0 deletions src/co/nubetech/hiho/common/HIHOConf.java
Expand Up @@ -74,5 +74,7 @@ public interface HIHOConf {
// conf for mergeJob // conf for mergeJob
public static final String MERGE_OLD_PATH = "mapreduce.jdbc.hiho.merge.mergeOldPath"; public static final String MERGE_OLD_PATH = "mapreduce.jdbc.hiho.merge.mergeOldPath";
public static final String MERGE_NEW_PATH = "mapreduce.jdbc.hiho.merge.mergeNewPath"; public static final String MERGE_NEW_PATH = "mapreduce.jdbc.hiho.merge.mergeNewPath";

public static final String COLUMN_INFO = "mapreduce.jdbc.hiho.db.columnInfo";


} }
139 changes: 139 additions & 0 deletions src/co/nubetech/hiho/job/ExportToDB.java
@@ -0,0 +1,139 @@
/**
* Copyright 2011 Nube Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package co.nubetech.hiho.job;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import co.nubetech.apache.hadoop.DBConfiguration;
import co.nubetech.apache.hadoop.MRJobConfig;
import co.nubetech.hiho.common.HIHOConf;
import co.nubetech.hiho.common.HIHOException;
import co.nubetech.hiho.mapreduce.GenericDBLoadDataMapper;
import co.nubetech.hiho.mapreduce.lib.db.GenericDBOutputFormat;

public class ExportToDB extends Configured implements Tool {

private String inputPath = null;
private String tableName = null;
private String columnNames = null;

public void populateConfiguration(String[] args, Configuration conf) {
for (int i = 0; i < args.length - 1; i++) {
if ("-jdbcDriver".equals(args[i])) {
conf.set(DBConfiguration.DRIVER_CLASS_PROPERTY, args[++i]);
} else if ("-jdbcUrl".equals(args[i])) {
conf.set(DBConfiguration.URL_PROPERTY, args[++i]);
} else if ("-jdbcUsername".equals(args[i])) {
conf.set(DBConfiguration.USERNAME_PROPERTY, args[++i]);
} else if ("-jdbcPassword".equals(args[i])) {
conf.set(DBConfiguration.PASSWORD_PROPERTY, args[++i]);
} else if ("-delimiter".equals(args[i])) {
conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, args[++i]);
} else if ("-numberOfMappers".equals(args[i])) {
conf.set(HIHOConf.NUMBER_MAPPERS, args[++i]);
} else if ("-tableName".equals(args[i])) {
tableName = args[++i];
} else if ("-columnNames".equals(args[i])) {
columnNames = args[++i];
} else if ("-inputPath".equals(args[i])) {
inputPath = args[++i];
}
}
}

public void checkMandatoryConfs(Configuration conf) throws HIHOException {
if (conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY) == null) {
throw new HIHOException(
"JDBC driver configuration is not specified,please specify JDBC driver class.");
}
if (conf.get(DBConfiguration.URL_PROPERTY) == null) {
throw new HIHOException(
"JDBC url path configuration is empty,please specify JDBC url path.");
}
if (!conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY).contains("hsqldb")) {
if (conf.get(DBConfiguration.USERNAME_PROPERTY) == null) {
throw new HIHOException(
"JDBC user name configuration is empty,please specify JDBC user name.");
}
if (conf.get(DBConfiguration.PASSWORD_PROPERTY) == null) {
throw new HIHOException(
"JDBC password configuration is empty,please specify JDBC password.");
}
}
if (conf.get(HIHOConf.INPUT_OUTPUT_DELIMITER) == null) {
throw new HIHOException(
"The provided delimiter is empty, please specify delimiter.");
}
if (conf.get(HIHOConf.NUMBER_MAPPERS) == null) {
throw new HIHOException(
"The provided number of mappers is empty, please specify number of mappers.");
}
if (inputPath == null) {
throw new HIHOException(
"The provided input path is empty, please specify inputPath.");
}
if (tableName == null) {
throw new HIHOException(
"The provided table name is empty, please specify tableName.");
}
if (columnNames == null) {
throw new HIHOException(
"The provided column name is empty, please specify columnName.");
}
}

public int run(String[] args) throws Exception {
Configuration conf = getConf();
populateConfiguration(args, conf);
try {
checkMandatoryConfs(conf);
} catch (HIHOException e1) {
e1.printStackTrace();
throw new Exception(e1);
}
Job job = new Job(conf);
job.getConfiguration().setInt(MRJobConfig.NUM_MAPS,
conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
job.setJobName("HihoDBExport");

job.setMapperClass(GenericDBLoadDataMapper.class);
job.setJarByClass(ExportToDB.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path(inputPath));
GenericDBOutputFormat.setOutput(job, tableName, columnNames);

int ret = 0;
try {
ret = job.waitForCompletion(true) ? 0 : 1;
} catch (Exception e) {
e.printStackTrace();
}
return ret;

}

public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new ExportToDB(), args);
System.exit(res);
}

}
128 changes: 128 additions & 0 deletions src/co/nubetech/hiho/mapreduce/GenericDBLoadDataMapper.java
@@ -0,0 +1,128 @@
/**
* Copyright 2011 Nube Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package co.nubetech.hiho.mapreduce;

import java.io.IOException;
import java.sql.Types;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;

import co.nubetech.hiho.common.HIHOConf;
import co.nubetech.hiho.mapreduce.lib.db.ColumnInfo;
import co.nubetech.hiho.mapreduce.lib.db.GenericDBWritable;

public class GenericDBLoadDataMapper<K, V> extends
Mapper<K, V, GenericDBWritable, NullWritable> {

final static Logger logger = Logger
.getLogger(co.nubetech.hiho.mapreduce.GenericDBLoadDataMapper.class);

private ArrayList values;
private ArrayList<ColumnInfo> tableInfo;
private String delimiter;

public ArrayList<ColumnInfo> getTableInfo() {
return tableInfo;
}

public String getDelimiter() {
return delimiter;
}

public void setTableInfo(ArrayList<ColumnInfo> tableInfo) {
this.tableInfo = tableInfo;
}

public void setDelimiter(String delimiter) {
this.delimiter = delimiter;
}

protected void setup(Mapper.Context context) throws IOException,
InterruptedException {
delimiter = context.getConfiguration().get(
HIHOConf.INPUT_OUTPUT_DELIMITER);
logger.debug("delimiter is: " + delimiter);
String columnInfoJsonString = context.getConfiguration().get(
HIHOConf.COLUMN_INFO);
logger.debug("columnInfoJsonString is: " + columnInfoJsonString);
ObjectMapper mapper = new ObjectMapper();
tableInfo = mapper.readValue(columnInfoJsonString,
new TypeReference<ArrayList<ColumnInfo>>() {
});
}

public void map(K key, V val, Context context) throws IOException,
InterruptedException {
values = new ArrayList();

logger.debug("Key is: " + key);
logger.debug("Value is: " + val);

StringTokenizer rowValue = new StringTokenizer(val.toString(), delimiter);
if (rowValue.countTokens() == tableInfo.size()) {
Iterator<ColumnInfo> iterator = tableInfo.iterator();
while (iterator.hasNext()) {
ColumnInfo columnInfo = iterator.next();
String columnValue = rowValue.nextToken();
if (columnValue == null || columnValue.trim().equals("")) {
values.add(null);
} else {
logger.debug("Adding value : " + columnValue);
int type = columnInfo.getType();
if (type == Types.VARCHAR) {
values.add(columnValue);
} else if (type == Types.BIGINT) {
values.add(Long.parseLong(columnValue));
} else if (type == Types.INTEGER) {
values.add(Integer.parseInt(columnValue));
} else if (type == Types.DOUBLE) {
values.add(Double.parseDouble(columnValue));
} else if (type == Types.FLOAT) {
values.add(Float.parseFloat(columnValue));
} else if (type == Types.BOOLEAN) {
values.add(Boolean.parseBoolean(columnValue));
} else if (type == Types.DATE) {
DateFormat df = new SimpleDateFormat();
try {
values.add(df.parse(columnValue));
} catch (ParseException e) {
e.printStackTrace();
throw new IOException(e);
}
}
}
}
} else {
throw new IOException(
"Number of columns specified in table is not equal to the columns contains in the file.");
}
GenericDBWritable gdw = new GenericDBWritable(tableInfo, values);
context.write(gdw, null);

}

}
4 changes: 4 additions & 0 deletions src/co/nubetech/hiho/mapreduce/lib/db/ColumnInfo.java
Expand Up @@ -31,6 +31,10 @@ public ColumnInfo(int index, int type, String name) {
this.type = type; this.type = type;
this.name = name; this.name = name;
} }

public ColumnInfo(){

}


public int getIndex() { public int getIndex() {
return index; return index;
Expand Down

0 comments on commit d07133f

Please sign in to comment.