Skip to content

Loading…

Fixed #issue-70 related to get_json_object functionality for hive-serde #71

Open
wants to merge 3 commits into from

3 participants

@snehalNagmote

This issue is related to get_json_object does not work with hive serde on Json String unless one defines it as struct.
#70

@snehalNagmote

I fixed this by adding String object inspector that would handle Json and tested it , it works, Look forward to your review and comments .

Snehal Nagmote added some commits
@ashokkumar-dhanavel

I have recently reported a similar issue with get_json_object,

#82

then I merged with develop branch with pull request and its working perfectly. Is this eventually going to merge with developing branch ?

@rcongiu
Owner

This pull request is mostly formatting changes and can't be merged automatically.
When a pull request is like that, it's much harder to review. Please do keep in mind I work on this on my spare time. Can you please submit a pull request or a diff for just the relevant change ?

@snehalNagmote
@wmoss wmoss added a commit to wmoss/Hive-JSON-Serde that referenced this pull request
Snehal Nagmote Fix for #issue-70 related to get_json_object
This is a reworking of 
rcongiu#71 to update it to the
new directory structure in 1.3
ce401c3
@wmoss wmoss added a commit to wmoss/Hive-JSON-Serde that referenced this pull request
Snehal Nagmote Fix for #issue-70 related to get_json_object
This is a reworking of
rcongiu#71 to update it to the
new directory structure in 1.3
d1aa01c
@wmoss wmoss added a commit to wmoss/Hive-JSON-Serde that referenced this pull request
@wmoss wmoss Fix for #issue-70 related to get_json_object
This is a reworking of
rcongiu#71 to update it to the
new directory structure in 1.3
6770f89
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on May 27, 2014
  1. Fixed #issue-70 related to get_json_object functionality for string i…

    Snehal Nagmote committed
    …n hive serde
Commits on May 28, 2014
  1. Fixed formatting changes

    Snehal Nagmote committed
Commits on Jun 9, 2014
  1. Removed logging

    Snehal Nagmote committed
View
3 src/main/java/org/openx/data/jsonserde/objectinspector/JsonObjectInspectorFactory.java
@@ -16,6 +16,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
@@ -29,6 +30,7 @@
import org.openx.data.jsonserde.objectinspector.primitive.JavaStringDoubleObjectInspector;
import org.openx.data.jsonserde.objectinspector.primitive.JavaStringFloatObjectInspector;
import org.openx.data.jsonserde.objectinspector.primitive.JavaStringIntObjectInspector;
+import org.openx.data.jsonserde.objectinspector.primitive.JavaStringJsonObjectInspector;
import org.openx.data.jsonserde.objectinspector.primitive.JavaStringLongObjectInspector;
import org.openx.data.jsonserde.objectinspector.primitive.JavaStringShortObjectInspector;
import org.openx.data.jsonserde.objectinspector.primitive.JavaStringTimestampObjectInspector;
@@ -171,6 +173,7 @@ public static JsonMapObjectInspector getJsonMapObjectInspector(
= new EnumMap<PrimitiveCategory, AbstractPrimitiveJavaObjectInspector>(PrimitiveCategory.class);
static {
+ primitiveOICache.put(PrimitiveCategory.STRING, new JavaStringJsonObjectInspector());
primitiveOICache.put(PrimitiveCategory.BYTE, new JavaStringByteObjectInspector());
primitiveOICache.put(PrimitiveCategory.SHORT, new JavaStringShortObjectInspector());
primitiveOICache.put(PrimitiveCategory.INT, new JavaStringIntObjectInspector());
View
48 ...ava/org/openx/data/jsonserde/objectinspector/primitive/JavaStringJsonObjectInspector.java
@@ -0,0 +1,48 @@
+package org.openx.data.jsonserde.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Logger;
+
+public class JavaStringJsonObjectInspector extends AbstractPrimitiveJavaObjectInspector
+ implements
+ SettableStringObjectInspector {
+
+ Logger logger = Logger.getLogger(JavaStringJsonObjectInspector.class);
+
+ public JavaStringJsonObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.stringTypeEntry);
+ }
+
+ @Override
+ public Text getPrimitiveWritableObject(Object o) {
+ return o == null ? null : new Text(((String) o.toString()));
+ }
+
+ @Override
+ public String getPrimitiveJavaObject(Object o) {
+ return o == null ? null : o.toString();
+ }
+
+ @Override
+ public Object create(Text value) {
+ return value == null ? null : value.toString();
+ }
+
+ @Override
+ public Object set(Object o, Text value) {
+ return value == null ? null : value.toString();
+ }
+
+ @Override
+ public Object create(String value) {
+ return value;
+ }
+
+ @Override
+ public Object set(Object o, String value) {
+ return value;
+ }
+}
View
154 src/test/java/org/openx/data/jsonserde/GetJsonObjectTest.java
@@ -0,0 +1,154 @@
+package org.openx.data.jsonserde;
+
+import static org.junit.Assert.*;
+
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.udf.UDFJson;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.junit.Before;
+import org.junit.Test;
+import org.openx.data.jsonserde.json.JSONException;
+import org.openx.data.jsonserde.json.JSONObject;
+
+/**
+ * Tests getJson Object
+ *
+ * @author snagmote
+ *
+ */
+public class GetJsonObjectTest {
+
+ static JsonSerDe instance;
+
+ @Before
+ public void setUp() throws Exception {
+ initialize();
+ }
+
+ static public void initialize() throws Exception {
+ instance = new JsonSerDe();
+ Configuration conf = null;
+ Properties tbl = new Properties();
+ // from google video API
+ tbl.setProperty(Constants.LIST_COLUMNS, "kind,etag,pageInfo,v_items");
+ tbl.setProperty(
+ Constants.LIST_COLUMN_TYPES,
+ ("string,string," + "string,"
+ + "ARRAY<STRUCT<kind:STRING,"
+ + "etag:STRING,"
+ + "id:STRING,"
+ + "v_statistics:STRUCT<viewCount:INT,likeCount:INT,dislikeCount:INT,favoriteCount:INT,commentCount:INT>,"
+ + "topicDetails:STRUCT<topicIds:ARRAY<STRING>,relevantTopicIds:ARRAY<STRING>>"
+ + ">>").toLowerCase());
+ tbl.setProperty("mapping.v_items", "items");
+ tbl.setProperty("mapping.v_statistics", "statistics");
+
+ instance.initialize(conf, tbl);
+ tbl.setProperty("mapping.v_items", "items");
+ tbl.setProperty("mapping.v_statistics", "statistics");
+
+ instance.initialize(conf, tbl);
+ }
+
+ @Test
+ public void testGetJsonObject() throws SerDeException, JSONException {
+ Writable w = new Text(
+ "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": { \"totalResults\": 1, \"resultsPerPage\": 1 }, \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }");
+
+ JSONObject result = (JSONObject) instance.deserialize(w);
+
+ StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector();
+
+ Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo"));
+
+ StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo")
+ .getFieldObjectInspector();
+
+ UDFJson udfJson = new UDFJson();
+ Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.totalresults");
+ assertEquals("1", output.toString());
+
+ }
+
+ @Test
+ public void testNestedGetJsonObject() throws SerDeException, JSONException {
+ Writable w = new Text(
+ "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": { \"pagehit\":{ \"kind\": \"youtube#video\" } ,\"totalResults\": 1, \"resultsPerPage\": 1 }, \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }");
+
+ StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector();
+ JSONObject result = (JSONObject) instance.deserialize(w);
+
+ Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo"));
+
+ StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo")
+ .getFieldObjectInspector();
+
+ UDFJson udfJson = new UDFJson();
+ Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.pagehit");
+ assertEquals("{\"kind\":\"youtube#video\"}", output.toString());
+ }
+
+ @Test
+ public void testStringWhenNotJson() throws SerDeException, JSONException {
+ Writable w = new Text(
+ "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": \"page\", \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }");
+
+ StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector();
+ JSONObject result = (JSONObject) instance.deserialize(w);
+
+ Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo"));
+
+ StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo")
+ .getFieldObjectInspector();
+
+ UDFJson udfJson = new UDFJson();
+ Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.test_field");
+ assertNull(output);
+ }
+
+ @Test
+ public void testStringWhenFieldIsNotInJson() throws SerDeException, JSONException {
+ Writable w = new Text(
+ "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": { \"totalResults\": 1, \"resultsPerPage\": 1 }, \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }");
+
+ StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector();
+ JSONObject result = (JSONObject) instance.deserialize(w);
+
+ Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo"));
+
+ StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo")
+ .getFieldObjectInspector();
+
+ UDFJson udfJson = new UDFJson();
+ Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.test_field");
+ assertNull(output);
+ }
+
+
+ @Test
+ public void testStringWhenJson() throws SerDeException, JSONException {
+ Writable w = new Text(
+ "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": \"page\", \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }");
+
+ StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector();
+ JSONObject result = (JSONObject) instance.deserialize(w);
+
+ Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo"));
+
+ StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo")
+ .getFieldObjectInspector();
+
+ String sres = loi.getPrimitiveJavaObject(res);
+ assertEquals("page", sres);
+
+ }
+
+}
Something went wrong with that request. Please try again.