Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fix compatibility issue with pig9, upgrade to pig9

Pig9 changed the construction of bags, which was creating an issue in
the case of lists and sets with structs as elements. I added a test for
this case, and made it such that the change is consistent with both pig8
and pig9.

Note: the test will fail on pig8.
  • Loading branch information...
commit c3fa36b5014277bb90c76f2b03499247a7b0e831 1 parent 5d2a930
Jonathan Coveney authored November 16, 2011
BIN  lib/pig-0.9.2-SNAPSHOT.jar
Binary file not shown
16  src/java/com/twitter/elephantbird/pig/util/ThriftToPig.java
@@ -240,9 +240,21 @@ public static Schema toSchema(TStructDescriptor tDesc ) {
240 240
   private static FieldSchema singleFieldToFieldSchema(String fieldName, Field field) throws FrontendException {
241 241
     switch (field.getType()) {
242 242
       case TType.LIST:
243  
-        return new FieldSchema(fieldName, singleFieldToTupleSchema(fieldName + "_tuple", field.getListElemField()), DataType.BAG);
  243
+        Schema s1 = singleFieldToTupleSchema(fieldName + "_tuple", field.getListElemField());
  244
+        if (PigUtil.Pig9orNewer && field.getListElemField().getType()==TType.STRUCT) {
  245
+          //In pig9, if the field is a struct, then we need to wrap it in a Tuple
  246
+          return new FieldSchema(fieldName, new Schema(new FieldSchema("t",s1,DataType.TUPLE)), DataType.BAG);
  247
+        } else {
  248
+          return new FieldSchema(fieldName, s1, DataType.BAG);
  249
+        }
244 250
       case TType.SET:
245  
-        return new FieldSchema(fieldName, singleFieldToTupleSchema(fieldName + "_tuple", field.getSetElemField()), DataType.BAG);
  251
+        Schema s2 = singleFieldToTupleSchema(fieldName + "_tuple", field.getSetElemField());
  252
+        if (PigUtil.Pig9orNewer && field.getSetElemField().getType()==TType.STRUCT) {
  253
+          //In pig9, if the field is a struct, then we need to wrap it in a Tuple
  254
+          return new FieldSchema(fieldName, new Schema(new FieldSchema("t",s2,DataType.TUPLE)), DataType.BAG);
  255
+        } else {
  256
+          return new FieldSchema(fieldName, s2, DataType.BAG);
  257
+        }
246 258
       case TType.MAP:
247 259
         // can not specify types for maps in Pig.
248 260
         if (field.getMapKeyField().getType() != TType.STRING
32  src/test/com/twitter/elephantbird/pig/piggybank/TestThriftToPig.java
@@ -6,13 +6,16 @@
6 6
 import java.io.IOException;
7 7
 import java.nio.ByteBuffer;
8 8
 
  9
+import org.apache.pig.ResourceSchema;
9 10
 import org.apache.pig.backend.executionengine.ExecException;
10 11
 import org.apache.pig.data.DataByteArray;
11 12
 import org.apache.pig.data.Tuple;
12 13
 import org.apache.pig.data.TupleFactory;
  14
+import org.apache.pig.impl.logicalLayer.FrontendException;
  15
+import org.apache.pig.impl.logicalLayer.schema.Schema;
  16
+import org.apache.thrift.Fixtures;
13 17
 import org.apache.thrift.TBase;
14 18
 import org.apache.thrift.TException;
15  
-import org.apache.thrift.Fixtures;
16 19
 import org.junit.Test;
17 20
 
18 21
 import thrift.test.HolyMoley;
@@ -26,8 +29,9 @@
26 29
 import com.twitter.data.proto.tutorial.thrift.PhoneNumber;
27 30
 import com.twitter.data.proto.tutorial.thrift.PhoneType;
28 31
 import com.twitter.elephantbird.mapreduce.io.ThriftConverter;
29  
-import com.twitter.elephantbird.pig.util.ThriftToPig;
30 32
 import com.twitter.elephantbird.pig.util.PigToThrift;
  33
+import com.twitter.elephantbird.pig.util.PigUtil;
  34
+import com.twitter.elephantbird.pig.util.ThriftToPig;
31 35
 import com.twitter.elephantbird.thrift.test.TestName;
32 36
 import com.twitter.elephantbird.thrift.test.TestPerson;
33 37
 import com.twitter.elephantbird.thrift.test.TestPhoneType;
@@ -145,11 +149,29 @@ private void tupleTest(TestType type) throws Exception {
145 149
                                       TestPhoneType.HOME,   "408-555-5555",
146 150
                                       TestPhoneType.MOBILE, "650-555-5555",
147 151
                                       TestPhoneType.WORK,   "415-555-5555"));
148  
-
149 152
     String tupleString = toTuple(type, testPerson).toDelimitedString("-");
150 153
     assertTrue( // the order of elements in map could vary because of HashMap
151 154
         tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, WORK=415-555-5555, HOME=408-555-5555}") ||
152  
-        tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, HOME=408-555-5555, WORK=415-555-5555}")
153  
-    );
  155
+        tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, HOME=408-555-5555, WORK=415-555-5555}"));
  156
+  }
  157
+
  158
+  @Test
  159
+  //test a list of structs
  160
+  //pig9 changed how building bag schemas is handled, which introduced a bug in elephantbird
  161
+  //this test isolates that bug
  162
+  public void nestedStructInListTest() throws FrontendException {
  163
+    nestedInListTestHelper("com.twitter.elephantbird.thrift.test.TestRecipe");
  164
+  }
  165
+
  166
+  @Test
  167
+  public void nestedStructInSetTest() throws FrontendException {
  168
+    nestedInListTestHelper("com.twitter.elephantbird.thrift.test.TestUniqueRecipe");
  169
+  }
  170
+
  171
+  public void nestedInListTestHelper(String s) throws FrontendException {
  172
+    TypeRef typeRef_ = PigUtil.getThriftTypeRef(s);
  173
+    Schema schema=ThriftToPig.toSchema(typeRef_.getRawClass());
  174
+    Schema oldSchema = Schema.getPigSchema(new ResourceSchema(schema));
  175
+    assertTrue(schema.toString().equals(oldSchema.toString())); //this should be a direct equals, but there is a pig bug
154 176
   }
155 177
 }
14  src/thrift/test.thrift
@@ -20,3 +20,17 @@ struct TestPerson {
20 20
   2: map<TestPhoneType, string>   phones, // for testing enum keys in maps.
21 21
 }
22 22
 
  23
+struct TestIngredient {
  24
+  1: string name,
  25
+  2: string color,
  26
+}
  27
+
  28
+struct TestRecipe {
  29
+  1: string name,
  30
+  2: list<TestIngredient> ingredients,
  31
+}
  32
+
  33
+struct TestUniqueRecipe {
  34
+  1: string name,
  35
+  2: set<TestIngredient> ingredients,
  36
+}

0 notes on commit c3fa36b

Please sign in to comment.
Something went wrong with that request. Please try again.