Skip to content

Commit

Permalink
Add configuration for explicit null value in the serialized output JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
Yair Levi committed Oct 6, 2017
1 parent 6d6acfd commit 9bd0cf6
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 15 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
@@ -1,6 +1,9 @@
# Changelog for the Hive-JSON-SerDe # Changelog for the Hive-JSON-SerDe
_Generated automatically by maven_ _Generated automatically by maven_


## Next Release
* *2017-10-06 10:04:22* Add configuration for explicit null value in the serialized output JSON _(lyair1)_

## 1.3.8 ## 1.3.8
* *2017-04-09 22:50:53* updating poms for branch'release/1.3.8' with non-snapshot versions _(rcongiu)_ [91cd5dcbb17c7f4](https://github.com/rcongiu/Hive-JSON-Serde/commit/91cd5dcbb17c7f4) * *2017-04-09 22:50:53* updating poms for branch'release/1.3.8' with non-snapshot versions _(rcongiu)_ [91cd5dcbb17c7f4](https://github.com/rcongiu/Hive-JSON-Serde/commit/91cd5dcbb17c7f4)
* *2017-04-09 22:46:47* Found a better place for the automated CHANGELOG, in subproject _(rcongiu)_ [14de153f362cc34](https://github.com/rcongiu/Hive-JSON-Serde/commit/14de153f362cc34) * *2017-04-09 22:46:47* Found a better place for the automated CHANGELOG, in subproject _(rcongiu)_ [14de153f362cc34](https://github.com/rcongiu/Hive-JSON-Serde/commit/14de153f362cc34)
Expand Down
30 changes: 30 additions & 0 deletions README.md
Expand Up @@ -288,6 +288,36 @@ WITH SERDEPROPERTIES (
SELECT time1,time2 from mytable SELECT time1,time2 from mytable
``` ```


### Explicit Null Value In Serialized JSON String

In order to be complaint with some object oriented systems an explicit 'null' json value is required in the serialized string.
As default, Hive-JSON-Serde will not produce null values in the output serialized JSON string and just drop the key, if you do want to have explicit 'null' values in your output JSON string, use the following:

```
DROP TABLE tableWithNull;
CREATE EXTERNAL TABLE tableWithNull
(
`stringCol` STRING,
'stringNullCol' STRING,
'stringMissingCol' STRING,
'structCol' STRUCT<name : STRING>,
'structNullCol' STRUCT<name : STRING>,
'structMissingCol' STRUCT<name : STRING>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("explicit.null.value" = "true");
-- JSON string: {\"stringCol\":"blabla",\"stringNullCol\":null,\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null}}
LOAD DATA LOCAL INPATH 'pathToJsonFile.json' OVERWRITE INTO TABLE tableWithNull;
-- The output when ("explicit.null.value" = "true"):
-- {\"stringCol\":"blabla",\"stringNullCol\":null,\"stringMissingCol\":null,\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null},\"structMissingCol\":null}
-- The default output or when ("explicit.null.value" = "false"):
-- {\"stringCol\":"blabla",\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{}}
```


### User Defined Functions (UDF) ### User Defined Functions (UDF)


#### tjson #### tjson
Expand Down
34 changes: 20 additions & 14 deletions json-serde/src/main/java/org/openx/data/jsonserde/JsonSerDe.java
Expand Up @@ -80,11 +80,13 @@ public class JsonSerDe extends AbstractSerDe {
long serializedDataSize; long serializedDataSize;
// if set, will ignore malformed JSON in deserialization // if set, will ignore malformed JSON in deserialization
boolean ignoreMalformedJson = false; boolean ignoreMalformedJson = false;
boolean explicitNullValue = false;


// properties used in configuration // properties used in configuration
public static final String PROP_IGNORE_MALFORMED_JSON = "ignore.malformed.json"; public static final String PROP_IGNORE_MALFORMED_JSON = "ignore.malformed.json";
public static final String PROP_DOTS_IN_KEYS = "dots.in.keys"; public static final String PROP_DOTS_IN_KEYS = "dots.in.keys";
public static final String PROP_CASE_INSENSITIVE ="case.insensitive" ; public static final String PROP_CASE_INSENSITIVE ="case.insensitive" ;
public static final String PROP_EXPLICIT_NULL ="explicit.null.value" ;


JsonStructOIOptions options; JsonStructOIOptions options;


Expand Down Expand Up @@ -151,7 +153,9 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException
// other configuration // other configuration
ignoreMalformedJson = Boolean.parseBoolean(tbl ignoreMalformedJson = Boolean.parseBoolean(tbl
.getProperty(PROP_IGNORE_MALFORMED_JSON, "false")); .getProperty(PROP_IGNORE_MALFORMED_JSON, "false"));


explicitNullValue = Boolean.parseBoolean(tbl
.getProperty(PROP_EXPLICIT_NULL, "false"));
} }


/** /**
Expand Down Expand Up @@ -266,20 +270,22 @@ private JSONObject serializeStruct( Object obj,
StructField sf = fields.get(i); StructField sf = fields.get(i);
Object data = soi.getStructFieldData(obj, sf); Object data = soi.getStructFieldData(obj, sf);


if (null != data) { try {
try { if (null != data) {
// we want to serialize columns with their proper HIVE name,
// not the _col2 kind of name usually generated upstream // we want to serialize columns with their proper HIVE name,
result.put( // not the _col2 kind of name usually generated upstream
getSerializedFieldName(columnNames, i, sf), result.put(
serializeField( getSerializedFieldName(columnNames, i, sf),
data, serializeField(
sf.getFieldObjectInspector())); data,

sf.getFieldObjectInspector()));
} catch (JSONException ex) { } else if(explicitNullValue) {
LOG.warn("Problem serializing", ex); result.putNull(getSerializedFieldName(columnNames, i, sf));
throw new RuntimeException(ex);
} }
} catch (JSONException ex) {
LOG.warn("Problem serializing", ex);
throw new RuntimeException(ex);
} }
} }
return result; return result;
Expand Down
Expand Up @@ -628,5 +628,86 @@ public void testNestedCaseSensitiveMapping() throws SerDeException, IOException
Object col2 = soi.getStructFieldData(res, soi.getStructFieldRef("col2")); Object col2 = soi.getStructFieldData(res, soi.getStructFieldRef("col2"));
assertTrue(soi2.getStructFieldData(col2, soi2.getStructFieldRef("time1")).equals("foryou")); assertTrue(soi2.getStructFieldData(col2, soi2.getStructFieldRef("time1")).equals("foryou"));
} }


@Test
public void testExplicitNullValueDefault() throws SerDeException, IOException {
System.out.println("testExplicitNullValue");
JsonSerDe serde = new JsonSerDe();
Configuration conf = null;
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, "stringCol,nullCol,missingCol");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string");
serde.initialize(conf, tbl);
StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();

// Load json string with one 'null' value and one 'missing' value
Object res = serde.deserialize(new Text("{\"stringCol\":\"str\",\"nullCol\":null}"));

// Get the serialized json string
String jsonStr = serde.serialize(res, soi).toString();

assertTrue(soi.getStructFieldData(res, soi.getStructFieldRef("stringCol")).equals("str"));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("nullCol")));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("missingCol")));
assertEquals(jsonStr,"{\"stringCol\":\"str\"}");
}

@Test
public void testExplicitNullValue() throws SerDeException, IOException {
System.out.println("testExplicitNullValue");
JsonSerDe serde = new JsonSerDe();
Configuration conf = null;
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, "stringCol,nullCol,missingCol");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string");

// Set 'explicit.null.value' to true
tbl.setProperty(JsonSerDe.PROP_EXPLICIT_NULL, "true");

serde.initialize(conf, tbl);
StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();

// Load json string with one 'null' value and one 'missing' value
Object res = serde.deserialize(new Text("{\"stringCol\":\"str\",\"nullCol\":null}"));

// Get the serialized json string
String jsonStr = serde.serialize(res, soi).toString();

assertTrue(soi.getStructFieldData(res, soi.getStructFieldRef("stringCol")).equals("str"));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("nullCol")));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("missingCol")));
assertEquals(jsonStr,"{\"nullCol\":null,\"stringCol\":\"str\",\"missingCol\":null}");
}

@Test
public void testNestedExplicitNullValue() throws SerDeException, IOException {
System.out.println("testNestedExplicitNullValue");
JsonSerDe serde = new JsonSerDe();
Configuration conf = null;
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, "structCol,structNullCol,missingStructCol");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<name:string>,struct<name:string>,struct<name:string>");

// Set 'explicit.null.value' to true
tbl.setProperty(JsonSerDe.PROP_EXPLICIT_NULL, "true");

serde.initialize(conf, tbl);
StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
Object res = serde.deserialize(new Text("{\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null}}"));

// Get the serialized json string
String jsonStr = serde.serialize(res, soi).toString();

StructObjectInspector structColSoi = (StructObjectInspector) soi.getStructFieldRef("structCol").getFieldObjectInspector();
Object structCol = soi.getStructFieldData(res, soi.getStructFieldRef("structCol"));
assertTrue(structColSoi.getStructFieldData(structCol, structColSoi.getStructFieldRef("name")).equals("myName"));

StructObjectInspector structNullColSoi = (StructObjectInspector) soi.getStructFieldRef("structNullCol").getFieldObjectInspector();
Object structNullCol = soi.getStructFieldData(res, soi.getStructFieldRef("structNullCol"));
assertNull(structNullColSoi.getStructFieldData(structNullCol, structNullColSoi.getStructFieldRef("name")));

assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("missingStructCol")));

assertEquals(jsonStr,"{\"missingStructCol\":null,\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null}}");
}
} }
17 changes: 17 additions & 0 deletions json/src/main/java/org/openx/data/jsonserde/json/JSONObject.java
Expand Up @@ -1130,6 +1130,23 @@ public JSONObject put(String key, Object value) throws JSONException {
} }




/**
* Put a null value for key in the JSONObject.
* @param key A key string.
* @return this.
* @throws JSONException if the key is null.
*/
public JSONObject putNull(String key) throws JSONException {
if (key == null) {
throw new JSONException("Null key.");
}

this.map.put(key, null);

return this;
}


/** /**
* Put a key/value pair in the JSONObject, but only if the key and the * Put a key/value pair in the JSONObject, but only if the key and the
* value are both non-null, and only if there is not already a member * value are both non-null, and only if there is not already a member
Expand Down

0 comments on commit 9bd0cf6

Please sign in to comment.