Skip to content

Commit

Permalink
Avoid forcing conversion to String when writing RCBinary
Browse files Browse the repository at this point in the history
The implementation of the RCBinary serializer makes an unnecessary
call to StringObjectInspector.getPrimitiveJavaObject(), which
forces a conversion of the utf-8 bytes to a Java String
  • Loading branch information
martint committed Dec 29, 2015
1 parent 294b442 commit 72917de
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -272,7 +272,7 @@
<dependency>
<groupId>com.facebook.presto.hive</groupId>
<artifactId>hive-apache</artifactId>
<version>0.15</version>
<version>0.16</version>
</dependency>

<dependency>
Expand Down
Expand Up @@ -43,6 +43,7 @@
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.Serializer;
import org.apache.hadoop.hive.serde2.columnar.OptimizedLazyBinaryColumnarSerde;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.io.compress.CompressionCodec;
Expand Down Expand Up @@ -530,6 +531,9 @@ public HiveRecordWriter(

fieldCount = fileColumnNames.size();

if (serDe.equals(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe.class.getName())) {
serDe = OptimizedLazyBinaryColumnarSerde.class.getName();
}
serializer = initializeSerializer(conf, schema, serDe);
recordWriter = HiveWriteUtils.createRecordWriter(new Path(writePath, fileName), conf, schema, outputFormat);

Expand Down

0 comments on commit 72917de

Please sign in to comment.