Permalink
Browse files

use MultiInputFormat for Lzo*PigLoaders. This

enables mixing objects stored in both Base64Line
format or in binary block format.
Extra cost is that each input file is opened twice on 
the mappers (might avoid this in future).
  • Loading branch information...
1 parent 0f76994 commit 171ef4e5a44e15341ef1e61c80e18975210701d5 Raghu Angadi committed Jan 10, 2012
@@ -12,9 +12,9 @@
import org.slf4j.LoggerFactory;
import com.google.protobuf.Message;
-import com.twitter.elephantbird.mapreduce.input.LzoProtobufB64LineInputFormat;
import com.twitter.elephantbird.mapreduce.input.LzoRecordReader;
-import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
+import com.twitter.elephantbird.mapreduce.input.MultiInputFormat;
+import com.twitter.elephantbird.mapreduce.io.BinaryWritable;
import com.twitter.elephantbird.pig.util.PigUtil;
import com.twitter.elephantbird.pig.util.ProjectedProtobufTupleFactory;
import com.twitter.elephantbird.pig.util.ProtobufToPig;
@@ -85,11 +85,11 @@ public ResourceSchema getSchema(String filename, Job job) throws IOException {
}
@Override
- public InputFormat<LongWritable, ProtobufWritable<M>> getInputFormat() throws IOException {
+ public InputFormat<LongWritable, BinaryWritable<M>> getInputFormat() throws IOException {
if (typeRef == null) {
LOG.error("Protobuf class must be specified before an InputFormat can be created. Do not use the no-argument constructor.");
throw new IllegalArgumentException("Protobuf class must be specified before an InputFormat can be created. Do not use the no-argument constructor.");
}
- return new LzoProtobufB64LineInputFormat<M>(typeRef);
+ return new MultiInputFormat<M>(typeRef);
}
}
@@ -1,13 +1,6 @@
package com.twitter.elephantbird.pig.load;
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputFormat;
-
import com.google.protobuf.Message;
-import com.twitter.elephantbird.mapreduce.input.LzoProtobufBlockInputFormat;
-import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
/**
* Loader for LZO-compressed files written using the ProtobufBlockInputFormat<br>
@@ -29,9 +22,4 @@ public LzoProtobufBlockPigLoader() {
public LzoProtobufBlockPigLoader(String protoClassName) {
super(protoClassName);
}
-
- @Override
- public InputFormat<LongWritable, ProtobufWritable<M>> getInputFormat() throws IOException {
- return new LzoProtobufBlockInputFormat<M>(typeRef);
- }
}
@@ -12,8 +12,8 @@
import org.apache.thrift.TBase;
import com.twitter.elephantbird.mapreduce.input.LzoRecordReader;
-import com.twitter.elephantbird.mapreduce.input.LzoThriftB64LineInputFormat;
-import com.twitter.elephantbird.mapreduce.io.ThriftWritable;
+import com.twitter.elephantbird.mapreduce.input.MultiInputFormat;
+import com.twitter.elephantbird.mapreduce.io.BinaryWritable;
import com.twitter.elephantbird.pig.util.PigUtil;
import com.twitter.elephantbird.pig.util.ProjectedThriftTupleFactory;
import com.twitter.elephantbird.pig.util.ThriftToPig;
@@ -59,7 +59,7 @@ public ResourceSchema getSchema(String filename, Job job) throws IOException {
}
@Override
- public InputFormat<LongWritable, ThriftWritable<M>> getInputFormat() throws IOException {
- return new LzoThriftB64LineInputFormat<M>(typeRef_);
+ public InputFormat<LongWritable, BinaryWritable<M>> getInputFormat() throws IOException {
+ return new MultiInputFormat<M>(typeRef_);
}
}
@@ -1,22 +1,10 @@
package com.twitter.elephantbird.pig.load;
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.thrift.TBase;
-import com.twitter.elephantbird.mapreduce.input.LzoThriftBlockInputFormat;
-import com.twitter.elephantbird.mapreduce.io.ThriftWritable;
-
public class LzoThriftBlockPigLoader<M extends TBase<?, ?>> extends LzoThriftB64LinePigLoader<M> {
public LzoThriftBlockPigLoader(String thriftClassName) {
super(thriftClassName);
}
-
- @Override
- public InputFormat<LongWritable, ThriftWritable<M>> getInputFormat() throws IOException {
- return new LzoThriftBlockInputFormat<M>(typeRef_);
- }
}

0 comments on commit 171ef4e

Please sign in to comment.