Permalink
Browse files

Add configuration for explicit null value in the serialized output JSON

  • Loading branch information...
Yair Levi
Yair Levi committed Oct 6, 2017
1 parent 6d6acfd commit 9bd0cf6af864e319becf10ee481815d12fc8b94a
View
@@ -1,6 +1,9 @@
# Changelog for the Hive-JSON-SerDe
_Generated automatically by maven_
## Next Release
* *2017-10-06 10:04:22* Add configuration for explicit null value in the serialized output JSON _(lyair1)_
## 1.3.8
* *2017-04-09 22:50:53* updating poms for branch'release/1.3.8' with non-snapshot versions _(rcongiu)_ [91cd5dcbb17c7f4](https://github.com/rcongiu/Hive-JSON-Serde/commit/91cd5dcbb17c7f4)
* *2017-04-09 22:46:47* Found a better place for the automated CHANGELOG, in subproject _(rcongiu)_ [14de153f362cc34](https://github.com/rcongiu/Hive-JSON-Serde/commit/14de153f362cc34)
View
@@ -288,6 +288,36 @@ WITH SERDEPROPERTIES (
SELECT time1,time2 from mytable
```
### Explicit Null Value In Serialized JSON String
In order to be complaint with some object oriented systems an explicit 'null' json value is required in the serialized string.
As default, Hive-JSON-Serde will not produce null values in the output serialized JSON string and just drop the key, if you do want to have explicit 'null' values in your output JSON string, use the following:
```
DROP TABLE tableWithNull;
CREATE EXTERNAL TABLE tableWithNull
(
`stringCol` STRING,
'stringNullCol' STRING,
'stringMissingCol' STRING,
'structCol' STRUCT<name : STRING>,
'structNullCol' STRUCT<name : STRING>,
'structMissingCol' STRUCT<name : STRING>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("explicit.null.value" = "true");
-- JSON string: {\"stringCol\":"blabla",\"stringNullCol\":null,\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null}}
LOAD DATA LOCAL INPATH 'pathToJsonFile.json' OVERWRITE INTO TABLE tableWithNull;
-- The output when ("explicit.null.value" = "true"):
-- {\"stringCol\":"blabla",\"stringNullCol\":null,\"stringMissingCol\":null,\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null},\"structMissingCol\":null}
-- The default output or when ("explicit.null.value" = "false"):
-- {\"stringCol\":"blabla",\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{}}
```
### User Defined Functions (UDF)
#### tjson
@@ -80,11 +80,13 @@
long serializedDataSize;
// if set, will ignore malformed JSON in deserialization
boolean ignoreMalformedJson = false;
boolean explicitNullValue = false;
// properties used in configuration
public static final String PROP_IGNORE_MALFORMED_JSON = "ignore.malformed.json";
public static final String PROP_DOTS_IN_KEYS = "dots.in.keys";
public static final String PROP_CASE_INSENSITIVE ="case.insensitive" ;
public static final String PROP_EXPLICIT_NULL ="explicit.null.value" ;
JsonStructOIOptions options;
@@ -151,7 +153,9 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException
// other configuration
ignoreMalformedJson = Boolean.parseBoolean(tbl
.getProperty(PROP_IGNORE_MALFORMED_JSON, "false"));
explicitNullValue = Boolean.parseBoolean(tbl
.getProperty(PROP_EXPLICIT_NULL, "false"));
}
/**
@@ -266,20 +270,22 @@ private JSONObject serializeStruct( Object obj,
StructField sf = fields.get(i);
Object data = soi.getStructFieldData(obj, sf);
if (null != data) {
try {
// we want to serialize columns with their proper HIVE name,
// not the _col2 kind of name usually generated upstream
result.put(
getSerializedFieldName(columnNames, i, sf),
serializeField(
data,
sf.getFieldObjectInspector()));
} catch (JSONException ex) {
LOG.warn("Problem serializing", ex);
throw new RuntimeException(ex);
try {
if (null != data) {
// we want to serialize columns with their proper HIVE name,
// not the _col2 kind of name usually generated upstream
result.put(
getSerializedFieldName(columnNames, i, sf),
serializeField(
data,
sf.getFieldObjectInspector()));
} else if(explicitNullValue) {
result.putNull(getSerializedFieldName(columnNames, i, sf));
}
} catch (JSONException ex) {
LOG.warn("Problem serializing", ex);
throw new RuntimeException(ex);
}
}
return result;
@@ -628,5 +628,86 @@ public void testNestedCaseSensitiveMapping() throws SerDeException, IOException
Object col2 = soi.getStructFieldData(res, soi.getStructFieldRef("col2"));
assertTrue(soi2.getStructFieldData(col2, soi2.getStructFieldRef("time1")).equals("foryou"));
}
@Test
public void testExplicitNullValueDefault() throws SerDeException, IOException {
System.out.println("testExplicitNullValue");
JsonSerDe serde = new JsonSerDe();
Configuration conf = null;
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, "stringCol,nullCol,missingCol");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string");
serde.initialize(conf, tbl);
StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
// Load json string with one 'null' value and one 'missing' value
Object res = serde.deserialize(new Text("{\"stringCol\":\"str\",\"nullCol\":null}"));
// Get the serialized json string
String jsonStr = serde.serialize(res, soi).toString();
assertTrue(soi.getStructFieldData(res, soi.getStructFieldRef("stringCol")).equals("str"));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("nullCol")));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("missingCol")));
assertEquals(jsonStr,"{\"stringCol\":\"str\"}");
}
@Test
public void testExplicitNullValue() throws SerDeException, IOException {
System.out.println("testExplicitNullValue");
JsonSerDe serde = new JsonSerDe();
Configuration conf = null;
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, "stringCol,nullCol,missingCol");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string");
// Set 'explicit.null.value' to true
tbl.setProperty(JsonSerDe.PROP_EXPLICIT_NULL, "true");
serde.initialize(conf, tbl);
StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
// Load json string with one 'null' value and one 'missing' value
Object res = serde.deserialize(new Text("{\"stringCol\":\"str\",\"nullCol\":null}"));
// Get the serialized json string
String jsonStr = serde.serialize(res, soi).toString();
assertTrue(soi.getStructFieldData(res, soi.getStructFieldRef("stringCol")).equals("str"));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("nullCol")));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("missingCol")));
assertEquals(jsonStr,"{\"nullCol\":null,\"stringCol\":\"str\",\"missingCol\":null}");
}
@Test
public void testNestedExplicitNullValue() throws SerDeException, IOException {
System.out.println("testNestedExplicitNullValue");
JsonSerDe serde = new JsonSerDe();
Configuration conf = null;
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, "structCol,structNullCol,missingStructCol");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<name:string>,struct<name:string>,struct<name:string>");
// Set 'explicit.null.value' to true
tbl.setProperty(JsonSerDe.PROP_EXPLICIT_NULL, "true");
serde.initialize(conf, tbl);
StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
Object res = serde.deserialize(new Text("{\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null}}"));
// Get the serialized json string
String jsonStr = serde.serialize(res, soi).toString();
StructObjectInspector structColSoi = (StructObjectInspector) soi.getStructFieldRef("structCol").getFieldObjectInspector();
Object structCol = soi.getStructFieldData(res, soi.getStructFieldRef("structCol"));
assertTrue(structColSoi.getStructFieldData(structCol, structColSoi.getStructFieldRef("name")).equals("myName"));
StructObjectInspector structNullColSoi = (StructObjectInspector) soi.getStructFieldRef("structNullCol").getFieldObjectInspector();
Object structNullCol = soi.getStructFieldData(res, soi.getStructFieldRef("structNullCol"));
assertNull(structNullColSoi.getStructFieldData(structNullCol, structNullColSoi.getStructFieldRef("name")));
assertNull(soi.getStructFieldData(res, soi.getStructFieldRef("missingStructCol")));
assertEquals(jsonStr,"{\"missingStructCol\":null,\"structCol\":{\"name\":\"myName\"},\"structNullCol\":{\"name\":null}}");
}
}
@@ -1130,6 +1130,23 @@ public JSONObject put(String key, Object value) throws JSONException {
}
/**
* Put a null value for key in the JSONObject.
* @param key A key string.
* @return this.
* @throws JSONException if the key is null.
*/
public JSONObject putNull(String key) throws JSONException {
if (key == null) {
throw new JSONException("Null key.");
}
this.map.put(key, null);
return this;
}
/**
* Put a key/value pair in the JSONObject, but only if the key and the
* value are both non-null, and only if there is not already a member

0 comments on commit 9bd0cf6

Please sign in to comment.