Permalink
Browse files

Source Code Check-in and Patches Folder

git-svn-id: https://hadoopdb.svn.sourceforge.net/svnroot/hadoopdb/trunk@2 2ab80ee8-dcc6-4802-a23e-3bfff869ab2a
  • Loading branch information...
azzadev
azzadev committed Jul 18, 2009
1 parent d5c7437 commit e03853eed6c651f8a7eba3184d72eabad1045dbe
Showing with 5,688 additions and 0 deletions.
  1. +67 −0 Patches/hive-sms.patch
  2. +140 −0 src/java/edu/yale/cs/hadoopdb/benchmark/AggTaskLargeDB.java
  3. +120 −0 src/java/edu/yale/cs/hadoopdb/benchmark/AggTaskLargeHDFS.java
  4. +142 −0 src/java/edu/yale/cs/hadoopdb/benchmark/AggTaskSmallDB.java
  5. +119 −0 src/java/edu/yale/cs/hadoopdb/benchmark/AggTaskSmallHDFS.java
  6. +119 −0 src/java/edu/yale/cs/hadoopdb/benchmark/GrepTaskDB.java
  7. +108 −0 src/java/edu/yale/cs/hadoopdb/benchmark/GrepTaskHDFS.java
  8. +177 −0 src/java/edu/yale/cs/hadoopdb/benchmark/JoinTaskDB.java
  9. +387 −0 src/java/edu/yale/cs/hadoopdb/benchmark/JoinTaskHDFS.java
  10. +123 −0 src/java/edu/yale/cs/hadoopdb/benchmark/SelectionTaskDB.java
  11. +106 −0 src/java/edu/yale/cs/hadoopdb/benchmark/SelectionTaskHDFS.java
  12. +139 −0 src/java/edu/yale/cs/hadoopdb/benchmark/UDFAggTaskDB.java
  13. +103 −0 src/java/edu/yale/cs/hadoopdb/benchmark/UDFAggTaskHDFS.java
  14. +59 −0 src/java/edu/yale/cs/hadoopdb/catalog/BaseDBConfiguration.java
  15. +111 −0 src/java/edu/yale/cs/hadoopdb/catalog/Catalog.java
  16. +144 −0 src/java/edu/yale/cs/hadoopdb/catalog/SimpleCatalogGenerator.java
  17. +242 −0 src/java/edu/yale/cs/hadoopdb/catalog/SimpleRandomReplicationFactorTwo.java
  18. +80 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/Configuration.java
  19. +154 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/ConfigurationMapping.java
  20. +33 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/DBConfigurationSchema.xsd
  21. +187 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/Node.java
  22. +84 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/ObjectFactory.java
  23. +92 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/Partition.java
  24. +105 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/Relation.java
  25. +9 −0 src/java/edu/yale/cs/hadoopdb/catalog/xml/package-info.java
  26. +185 −0 src/java/edu/yale/cs/hadoopdb/connector/AbstractDBRecordReader.java
  27. +101 −0 src/java/edu/yale/cs/hadoopdb/connector/DBChunk.java
  28. +66 −0 src/java/edu/yale/cs/hadoopdb/connector/DBChunkHost.java
  29. +66 −0 src/java/edu/yale/cs/hadoopdb/connector/DBConfiguration.java
  30. +24 −0 src/java/edu/yale/cs/hadoopdb/connector/DBConst.java
  31. +75 −0 src/java/edu/yale/cs/hadoopdb/connector/DBInputFormat.java
  32. +140 −0 src/java/edu/yale/cs/hadoopdb/connector/DBInputSplit.java
  33. +75 −0 src/java/edu/yale/cs/hadoopdb/connector/DBRecordReader.java
  34. +28 −0 src/java/edu/yale/cs/hadoopdb/connector/DBWritable.java
  35. +210 −0 src/java/edu/yale/cs/hadoopdb/dataloader/GlobalHasher.java
  36. +104 −0 src/java/edu/yale/cs/hadoopdb/dataloader/LocalHasher.java
  37. +144 −0 src/java/edu/yale/cs/hadoopdb/exec/DBJobBase.java
  38. +64 −0 src/java/edu/yale/cs/hadoopdb/exec/HDFSJobBase.java
  39. +148 −0 src/java/edu/yale/cs/hadoopdb/sms/SQLQuery.java
  40. +590 −0 src/java/edu/yale/cs/hadoopdb/sms/SQLQueryGenerator.java
  41. +10 −0 src/java/edu/yale/cs/hadoopdb/sms/connector/SMSConfiguration.java
  42. +103 −0 src/java/edu/yale/cs/hadoopdb/sms/connector/SMSInputFormat.java
  43. +66 −0 src/java/edu/yale/cs/hadoopdb/sms/connector/SMSInputSplit.java
  44. +87 −0 src/java/edu/yale/cs/hadoopdb/sms/connector/SMSRecordReader.java
  45. +13 −0 src/java/edu/yale/cs/hadoopdb/util/BenchmarkUtils.java
  46. +27 −0 src/java/edu/yale/cs/hadoopdb/util/HDFSUtil.java
  47. +145 −0 src/java/edu/yale/cs/hadoopdb/util/ParseSchema.java
  48. +67 −0 src/java/edu/yale/cs/hadoopdb/util/ec2/InstanceProperties.java
View
@@ -0,0 +1,67 @@
+Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
+===================================================================
+--- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (revision 791534)
++++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (working copy)
+@@ -703,6 +703,8 @@
+ String newFile = hiveScratchDir + File.separator + (++numEmptyPaths);
+ Path newPath = new Path(newFile);
+ LOG.info("Changed input file to " + newPath.toString());
++ //CHANGE
++ job.set(newPath.toString(), new Path(emptyFile).getName());
+
+ // toggle the work
+ LinkedHashMap<String, ArrayList<String>> pathToAliases = work.getPathToAliases();
+Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+===================================================================
+--- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (revision 791534)
++++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (working copy)
+@@ -98,6 +98,9 @@
+ if (inputSplit instanceof FileSplit) {
+ return ((FileSplit)inputSplit).getPath();
+ }
++ if(inputSplit instanceof Pathable){
++ return ((Pathable)inputSplit).getPath();
++ }
+ return new Path("");
+ }
+
+Index: ql/src/java/org/apache/hadoop/hive/ql/io/Pathable.java
+===================================================================
+--- ql/src/java/org/apache/hadoop/hive/ql/io/Pathable.java (revision 0)
++++ ql/src/java/org/apache/hadoop/hive/ql/io/Pathable.java (revision 0)
+@@ -0,0 +1,6 @@
++package org.apache.hadoop.hive.ql.io;
++import org.apache.hadoop.fs.Path;
++
++public interface Pathable {
++ public Path getPath();
++}
+Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+===================================================================
+--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 791534)
++++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy)
+@@ -128,6 +128,8 @@
+ import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+
++import edu.yale.cs.hadoopdb.sms.SQLQueryGenerator;
++
+ /**
+ * Implementation of the semantic analyzer
+ */
+@@ -3896,11 +3898,15 @@
+ // from which we want to find the reduce operator
+ genMapRedTasks(qb);
+
++ //HadoopDB SMS
++ SQLQueryGenerator.process(conf, qb, topOps);
++
+ LOG.info("Completed plan generation");
+
+ return;
+ }
+
++
+ /**
+ * Generates and expression node descriptor for the expression passed in the arguments. This
+ * function uses the row resolver and the metadata informatinon that are passed as arguments
@@ -0,0 +1,140 @@
+package edu.yale.cs.hadoopdb.benchmark;
+
+
+import java.io.IOException;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
+
+import edu.yale.cs.hadoopdb.connector.DBConst;
+import edu.yale.cs.hadoopdb.connector.DBWritable;
+import edu.yale.cs.hadoopdb.exec.DBJobBase;
+import edu.yale.cs.hadoopdb.util.HDFSUtil;
+
+/**
+ * HadoopDB's implementation of Large Aggregation Task
+ * http://database.cs.brown.edu/projects/mapreduce-vs-dbms/
+ */
+public class AggTaskLargeDB extends DBJobBase {
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(new Configuration(), new AggTaskLargeDB(),
+ args);
+ System.exit(res);
+ }
+
+ @Override
+ protected JobConf configureJob(String... args) throws Exception {
+
+ JobConf conf = new JobConf(this.getClass());
+ conf.setJobName("aggregation_db_large");
+
+ conf.setOutputKeyClass(Text.class);
+ conf.setOutputValueClass(DoubleWritable.class);
+ conf.setOutputFormat(TextOutputFormat.class);
+
+ conf.setMapperClass(Map.class);
+ conf.setReducerClass(Reduce.class);
+
+ if (args.length < 1) {
+ throw new RuntimeException("Incorrect arguments provided for "
+ + this.getClass());
+ }
+
+ // OUTPUT properties
+ Path outputPath = new Path(args[0]);
+ HDFSUtil.deletePath(outputPath);
+ FileOutputFormat.setOutputPath(conf, outputPath);
+
+ conf.set(DBConst.DB_RELATION_ID, "UserVisits");
+ conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName());
+ conf.set(DBConst.DB_SQL_QUERY,
+ "SELECT sourceIP, SUM(adRevenue) AS sumAdRevenue "
+ + "FROM UserVisits GROUP BY sourceIP;");
+
+ return conf;
+ }
+
+ @Override
+ protected int printUsage() {
+ System.out.println("<output_dir>");
+ return -1;
+ }
+
+ static class Map extends MapReduceBase implements
+ Mapper<LongWritable, AggUserVisitsRecord, Text, DoubleWritable> {
+
+ protected Text outputKey = new Text();
+ protected DoubleWritable outputValue = new DoubleWritable();
+
+ public void map(LongWritable key, AggUserVisitsRecord value,
+ OutputCollector<Text, DoubleWritable> output, Reporter reporter)
+ throws IOException {
+
+ outputKey.set(value.getSourceIP());
+ outputValue.set(value.getSumAdRevenue());
+ output.collect(outputKey, outputValue);
+
+ }
+ }
+
+ public static class Reduce extends MapReduceBase implements
+ Reducer<Text, DoubleWritable, Text, DoubleWritable> {
+
+ protected DoubleWritable outputValue = new DoubleWritable();
+
+ public void reduce(Text key, Iterator<DoubleWritable> values,
+ OutputCollector<Text, DoubleWritable> output, Reporter reporter)
+ throws IOException {
+
+ double sum = 0;
+ while (values.hasNext()) {
+ sum += values.next().get();
+ }
+
+ outputValue.set(sum);
+ output.collect(key, outputValue);
+ }
+ }
+
+ static class AggUserVisitsRecord implements DBWritable {
+ private String sourceIP;
+ private double sumAdRevenue;
+
+ public String getSourceIP() {
+ return sourceIP;
+ }
+
+ public double getSumAdRevenue() {
+ return sumAdRevenue;
+ }
+
+ @Override
+ public void readFields(ResultSet resultSet) throws SQLException {
+ this.sourceIP = resultSet.getString("sourceIP");
+ this.sumAdRevenue = resultSet.getDouble("sumAdRevenue");
+ }
+
+ @Override
+ public void write(PreparedStatement statement) throws SQLException {
+ throw new UnsupportedOperationException("No write() impl.");
+ }
+ }
+
+}
@@ -0,0 +1,120 @@
+package edu.yale.cs.hadoopdb.benchmark;
+
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
+
+import edu.yale.cs.hadoopdb.exec.HDFSJobBase;
+import edu.yale.cs.hadoopdb.util.BenchmarkUtils;
+import edu.yale.cs.hadoopdb.util.HDFSUtil;
+
+/**
+ * Adapted from Andy Pavlo's code
+ * http://database.cs.brown.edu/projects/mapreduce-vs-dbms/
+ */
+public class AggTaskLargeHDFS extends HDFSJobBase {
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(new Configuration(), new AggTaskLargeHDFS(), args);
+ System.exit(res);
+ }
+
+ @Override
+ protected JobConf configureJob(String... args) throws IOException {
+
+ JobConf conf = new JobConf(getConf(), this.getClass());
+ conf.setJobName("aggregation_hdfs_large");
+
+ conf.setOutputKeyClass(Text.class);
+ conf.setOutputValueClass(DoubleWritable.class);
+
+ conf.setMapperClass(AggTaskLargeHDFS.Map.class);
+ conf.setCombinerClass(AggTaskLargeHDFS.Reduce.class);
+ conf.setReducerClass(AggTaskLargeHDFS.Reduce.class);
+
+ conf.setInputFormat(TextInputFormat.class);
+ conf.setOutputFormat(TextOutputFormat.class);
+
+ if (args.length < 2) {
+ throw new RuntimeException("Incorrect arguments provided for "
+ + this.getClass());
+ }
+
+ FileInputFormat.setInputPaths(conf, new Path(args[0]));
+
+ // OUTPUT properties
+ Path outputPath = new Path(args[1]);
+ HDFSUtil.deletePath(outputPath);
+ FileOutputFormat.setOutputPath(conf, outputPath);
+
+ return conf;
+
+ }
+
+ static class Map extends MapReduceBase implements
+ Mapper<LongWritable, Text, Text, DoubleWritable> {
+
+ protected Text outputKey = new Text();
+ protected DoubleWritable outputValue = new DoubleWritable();
+
+ public void map(LongWritable key, Text value,
+ OutputCollector<Text, DoubleWritable> output, Reporter reporter)
+ throws IOException {
+
+ // UserVisits: sourceIP | destURL | visitDate | adRevenue |
+ // userAgent | countryCode | langCode | searchWord | duration
+ String fields[] = BenchmarkUtils.DELIMITER_PATTERN.split(value
+ .toString());
+
+ String newKey = fields[0];
+ Double revenue = Double.parseDouble(fields[3]);
+
+ outputKey.set(newKey);
+ outputValue.set(revenue);
+ output.collect(outputKey, outputValue);
+ }
+ }
+
+ public static class Reduce extends MapReduceBase implements
+ Reducer<Text, DoubleWritable, Text, DoubleWritable> {
+
+ protected DoubleWritable outputValue = new DoubleWritable();
+
+ public void reduce(Text key, Iterator<DoubleWritable> values,
+ OutputCollector<Text, DoubleWritable> output, Reporter reporter)
+ throws IOException {
+
+ double sum = 0;
+ while (values.hasNext()) {
+ sum += values.next().get();
+ }
+
+ outputValue.set(sum);
+ output.collect(key, outputValue);
+ }
+ }
+
+ @Override
+ protected int printUsage() {
+ System.out.println("<input_dir> <output_dir>");
+ return -1;
+ }
+
+}
Oops, something went wrong.

0 comments on commit e03853e

Please sign in to comment.