Permalink
Browse files

Fix compatibility issue with pig9, upgrade to pig9

Pig9 changed the construction of bags, which was creating an issue in
the case of lists and sets with structs as elements. I added a test for
this case, and made it such that the change is consistent with both pig8
and pig9.

Note: the test will fail on pig8.
  • Loading branch information...
1 parent 5d2a930 commit c3fa36b5014277bb90c76f2b03499247a7b0e831 Jonathan Coveney committed Nov 17, 2011
View
BIN lib/pig-0.9.2-SNAPSHOT.jar
Binary file not shown.
View
16 src/java/com/twitter/elephantbird/pig/util/ThriftToPig.java
@@ -240,9 +240,21 @@ public static Schema toSchema(TStructDescriptor tDesc ) {
private static FieldSchema singleFieldToFieldSchema(String fieldName, Field field) throws FrontendException {
switch (field.getType()) {
case TType.LIST:
- return new FieldSchema(fieldName, singleFieldToTupleSchema(fieldName + "_tuple", field.getListElemField()), DataType.BAG);
+ Schema s1 = singleFieldToTupleSchema(fieldName + "_tuple", field.getListElemField());
+ if (PigUtil.Pig9orNewer && field.getListElemField().getType()==TType.STRUCT) {
+ //In pig9, if the field is a struct, then we need to wrap it in a Tuple
+ return new FieldSchema(fieldName, new Schema(new FieldSchema("t",s1,DataType.TUPLE)), DataType.BAG);
+ } else {
+ return new FieldSchema(fieldName, s1, DataType.BAG);
+ }
case TType.SET:
- return new FieldSchema(fieldName, singleFieldToTupleSchema(fieldName + "_tuple", field.getSetElemField()), DataType.BAG);
+ Schema s2 = singleFieldToTupleSchema(fieldName + "_tuple", field.getSetElemField());
+ if (PigUtil.Pig9orNewer && field.getSetElemField().getType()==TType.STRUCT) {
+ //In pig9, if the field is a struct, then we need to wrap it in a Tuple
+ return new FieldSchema(fieldName, new Schema(new FieldSchema("t",s2,DataType.TUPLE)), DataType.BAG);
+ } else {
+ return new FieldSchema(fieldName, s2, DataType.BAG);
+ }
case TType.MAP:
// can not specify types for maps in Pig.
if (field.getMapKeyField().getType() != TType.STRING
View
32 src/test/com/twitter/elephantbird/pig/piggybank/TestThriftToPig.java
@@ -6,13 +6,16 @@
import java.io.IOException;
import java.nio.ByteBuffer;
+import org.apache.pig.ResourceSchema;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.thrift.Fixtures;
import org.apache.thrift.TBase;
import org.apache.thrift.TException;
-import org.apache.thrift.Fixtures;
import org.junit.Test;
import thrift.test.HolyMoley;
@@ -26,8 +29,9 @@
import com.twitter.data.proto.tutorial.thrift.PhoneNumber;
import com.twitter.data.proto.tutorial.thrift.PhoneType;
import com.twitter.elephantbird.mapreduce.io.ThriftConverter;
-import com.twitter.elephantbird.pig.util.ThriftToPig;
import com.twitter.elephantbird.pig.util.PigToThrift;
+import com.twitter.elephantbird.pig.util.PigUtil;
+import com.twitter.elephantbird.pig.util.ThriftToPig;
import com.twitter.elephantbird.thrift.test.TestName;
import com.twitter.elephantbird.thrift.test.TestPerson;
import com.twitter.elephantbird.thrift.test.TestPhoneType;
@@ -145,11 +149,29 @@ private void tupleTest(TestType type) throws Exception {
TestPhoneType.HOME, "408-555-5555",
TestPhoneType.MOBILE, "650-555-5555",
TestPhoneType.WORK, "415-555-5555"));
-
String tupleString = toTuple(type, testPerson).toDelimitedString("-");
assertTrue( // the order of elements in map could vary because of HashMap
tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, WORK=415-555-5555, HOME=408-555-5555}") ||
- tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, HOME=408-555-5555, WORK=415-555-5555}")
- );
+ tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, HOME=408-555-5555, WORK=415-555-5555}"));
+ }
+
+ @Test
+ //test a list of structs
+ //pig9 changed how building bag schemas is handled, which introduced a bug in elephantbird
+ //this test isolates that bug
+ public void nestedStructInListTest() throws FrontendException {
+ nestedInListTestHelper("com.twitter.elephantbird.thrift.test.TestRecipe");
+ }
+
+ @Test
+ public void nestedStructInSetTest() throws FrontendException {
+ nestedInListTestHelper("com.twitter.elephantbird.thrift.test.TestUniqueRecipe");
+ }
+
+ public void nestedInListTestHelper(String s) throws FrontendException {
+ TypeRef typeRef_ = PigUtil.getThriftTypeRef(s);
+ Schema schema=ThriftToPig.toSchema(typeRef_.getRawClass());
+ Schema oldSchema = Schema.getPigSchema(new ResourceSchema(schema));
+ assertTrue(schema.toString().equals(oldSchema.toString())); //this should be a direct equals, but there is a pig bug
}
}
View
14 src/thrift/test.thrift
@@ -20,3 +20,17 @@ struct TestPerson {
2: map<TestPhoneType, string> phones, // for testing enum keys in maps.
}
+struct TestIngredient {
+ 1: string name,
+ 2: string color,
+}
+
+struct TestRecipe {
+ 1: string name,
+ 2: list<TestIngredient> ingredients,
+}
+
+struct TestUniqueRecipe {
+ 1: string name,
+ 2: set<TestIngredient> ingredients,
+}

0 comments on commit c3fa36b

Please sign in to comment.