Permalink
Browse files

Making SerDe smarter when handling numeric numbers. Keep string repre…

…sentation till it's actually time to parse it, this way the JSON parser does not need to know in advance if a number should be a short, into or large int.

Work towards solving #34 and #45
  • Loading branch information...
rcongiu committed Jan 21, 2014
1 parent 8aab1ef commit 3331d9f4865c2ad144bc34ab2caa3e022ac881f5
View
@@ -0,0 +1 @@
/target/
View
@@ -0,0 +1,110 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.openx.data</groupId>
<artifactId>json-serde</artifactId>
<version>1.1.5-SNAPSHOT</version>
<packaging>jar</packaging>
<name>openx-json-serde</name>
<url>https://github.com/rcongiu/Hive-JSON-Serde</url>
<scm>
<!-- Replace the connection below with your project connection -->
<connection>scm:git:git@github.com:rcongiu/Hive-JSON-Serde.git</connection>
<developerConnection>scm:git:git@github.com:rcongiu/Hive-JSON-Serde.git</developerConnection>
<url>scm:git:git@github.com:juven/git-demo.git</url>
</scm>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<cdh.version>0.8.0-cdh4a1-SNAPSHOT</cdh.version>
</properties>
<build>
<!-- wagon-ssh-external extension is necessary for deploying with scpexe -->
<extensions>
<extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-ssh-external</artifactId>
<version>2.2</version>
</extension>
</extensions>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>2.4</version>
</plugin>
<!-- Assembly Plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>Cloudera</id>
<name>Cloudera Maven Repo</name>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop.hive</groupId>
<artifactId>hive-serde</artifactId>
<version>${cdh.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${cdh.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
View
@@ -0,0 +1,22 @@
#release configuration
#Mon Mar 25 21:14:59 PDT 2013
dependency.org.apache.hadoop.hive\:hive-exec.release=0.8.0-cdh4a1-SNAPSHOT
scm.commentPrefix=[maven-release-plugin]
dependency.org.apache.hadoop.hive\:hive-serde.release=0.8.0-cdh4a1-SNAPSHOT
project.scm.org.openx.data\:json-serde.tag=HEAD
pushChanges=true
project.scm.org.openx.data\:json-serde.connection=scm\:git\:git@github.com\:rcongiu/Hive-JSON-Serde.git
project.rel.org.openx.data\:json-serde=1.1.5
scm.tag=json-serde-1.1.5
remoteTagging=true
project.scm.org.openx.data\:json-serde.url=scm\:git\:git@github.com\:juven/git-demo.git
exec.additionalArguments=-P openx,openx
scm.url=scm\:git\:git@github.com\:rcongiu/Hive-JSON-Serde.git
scm.tagNameFormat=@{project.artifactId}-@{project.version}
dependency.org.apache.hadoop.hive\:hive-exec.development=0.8.0-cdh4a1-SNAPSHOT
preparationGoals=clean verify
exec.snapshotReleasePluginAllowed=false
dependency.org.apache.hadoop.hive\:hive-serde.development=0.8.0-cdh4a1-SNAPSHOT
project.dev.org.openx.data\:json-serde=1.1.6-SNAPSHOT
project.scm.org.openx.data\:json-serde.developerConnection=scm\:git\:git@github.com\:rcongiu/Hive-JSON-Serde.git
completedPhase=end-release
@@ -178,21 +178,23 @@ public JSONObject put(String key, Object value) throws JSONException {
rowTypeInfo.getStructFieldTypeInfo(key).getCategory().equals(PrimitiveObjectInspector.Category.PRIMITIVE) &&
((PrimitiveTypeInfo) rowTypeInfo.getStructFieldTypeInfo(key))
.getPrimitiveCategory().equals(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP) ) {
// value is always string. Let's see which kind
if(value instanceof String) {
value = Timestamp.valueOf((String)value);
} else if (value instanceof Float ) {
value = new Timestamp( (long) (((Float)value).floatValue() * 1000));
} else if ( value instanceof Integer) {
value = new Timestamp( ((Integer)value).longValue() * 1000);
} else if ( value instanceof Long) {
value = new Timestamp( ((Long)value).longValue() * 1000);
} else if ( value instanceof Double) {
value = new Timestamp( ((Double)value).longValue() * 1000);
} else {
throw new JSONException("I don't know how to conver to timestamp a field of type " + value.getClass()) ;
}
String s = (String) value;
if(s.indexOf(':') > 0) {
value = Timestamp.valueOf(s);
} else if(s.indexOf('.') >=0 ) {
// it's a float
value = new Timestamp( (long) ((double) (Double.parseDouble(s) * 1000)));
} else {
// integer
value = new Timestamp( Long.parseLong(s) * 1000);
}
}
}
} catch (IllegalArgumentException e) {
} catch (NumberFormatException e) {
throw new JSONException("Timestamp " + value + "improperly formatted.");
}
@@ -396,7 +398,7 @@ private JSONArray serializeList(Object obj, ListObjectInspector loi) {
}
/**
* Serializes a Hive map<> using a JSONObject.
* Serializes a Hive map&lt;&gt; using a JSONObject.
*
* @param obj the object to serialize
* @param moi the object's inspector
@@ -240,8 +240,7 @@ public JSONObject(JSONTokener x) throws JSONException {
* Construct a JSONObject from a Map.
*
* @param map A map object that can be used to initialize the contents of
* the JSONObject.
* @throws JSONException
* the JSONObject.
*/
public JSONObject(Map map) {
this.map = new HashMap();
@@ -505,7 +504,7 @@ public double getDouble(String key) throws JSONException {
return object instanceof Number ?
((Number)object).doubleValue() :
Double.parseDouble((String)object);
} catch (Exception e) {
} catch (NumberFormatException e) {
throw new JSONException("JSONObject[" + quote(key) +
"] is not a number.");
}
@@ -526,7 +525,7 @@ public int getInt(String key) throws JSONException {
return object instanceof Number ?
((Number)object).intValue() :
Integer.parseInt((String)object);
} catch (Exception e) {
} catch (NumberFormatException e) {
throw new JSONException("JSONObject[" + quote(key) +
"] is not an int.");
}
@@ -593,6 +592,7 @@ public long getLong(String key) throws JSONException {
/**
* Get an array of field names from a JSONObject.
*
* @param jo
* @return An array of field names, or null if there are no names.
*/
public static String[] getNames(JSONObject jo) {
@@ -614,6 +614,7 @@ public long getLong(String key) throws JSONException {
/**
* Get an array of field names from an Object.
*
* @param object
* @return An array of field names, or null if there are no names.
*/
public static String[] getNames(Object object) {
@@ -800,7 +801,7 @@ public boolean optBoolean(String key) {
public boolean optBoolean(String key, boolean defaultValue) {
try {
return getBoolean(key);
} catch (Exception e) {
} catch (JSONException e) {
return defaultValue;
}
}
@@ -833,7 +834,7 @@ public double optDouble(String key) {
public double optDouble(String key, double defaultValue) {
try {
return getDouble(key);
} catch (Exception e) {
} catch (JSONException e) {
return defaultValue;
}
}
@@ -866,7 +867,7 @@ public int optInt(String key) {
public int optInt(String key, int defaultValue) {
try {
return getInt(key);
} catch (Exception e) {
} catch (JSONException e) {
return defaultValue;
}
}
@@ -927,7 +928,7 @@ public long optLong(String key) {
public long optLong(String key, long defaultValue) {
try {
return getLong(key);
} catch (Exception e) {
} catch (JSONException e) {
return defaultValue;
}
}
@@ -1146,7 +1147,7 @@ public final JSONObject putOnce(String key, Object value) throws JSONException {
* @return this.
* @throws JSONException If the value is a non-finite number.
*/
public JSONObject putOpt(String key, Object value) throws JSONException {
public final JSONObject putOpt(String key, Object value) throws JSONException {
if (key != null && value != null) {
put(key, value);
}
@@ -1249,39 +1250,6 @@ public static Object stringToValue(String string) {
return JSONObject.NULL;
}
/*
* If it might be a number, try converting it.
* We support the non-standard 0x- convention.
* If a number cannot be produced, then the value will just
* be a string. Note that the 0x-, plus, and implied string
* conventions are non-standard. A JSON parser may accept
* non-JSON forms as long as it accepts all correct JSON forms.
*/
char b = string.charAt(0);
if ((b >= '0' && b <= '9') || b == '.' || b == '-' || b == '+') {
if (b == '0' && string.length() > 2 &&
(string.charAt(1) == 'x' || string.charAt(1) == 'X')) {
try {
return new Integer(Integer.parseInt(string.substring(2), 16));
} catch (Exception ignore) {
}
}
try {
if (string.indexOf('.') > -1 ||
string.indexOf('e') > -1 || string.indexOf('E') > -1) {
return Double.valueOf(string);
} else {
Long myLong = new Long(string);
if (myLong.longValue() == myLong.intValue()) {
return new Integer(myLong.intValue());
} else {
return myLong;
}
}
} catch (Exception ignore) {
}
}
return string;
}
@@ -1356,7 +1324,7 @@ public String toString() {
}
sb.append('}');
return sb.toString();
} catch (Exception e) {
} catch (JSONException e) {
return null;
}
}
@@ -1458,7 +1426,7 @@ String toString(int indentFactor, int indent) throws JSONException {
* @throws JSONException If the value is or contains an invalid number.
*/
public static String valueToString(Object value) throws JSONException {
if (value == null || value.equals(null)) {
if (value == null ) {
return "null";
}
if (value instanceof JSONString) {
@@ -1596,7 +1564,7 @@ public static Object wrap(Object object) {
return object.toString();
}
return new JSONObject(object);
} catch(Exception exception) {
} catch(JSONException exception) {
return null;
}
}
@@ -1608,6 +1576,7 @@ public static Object wrap(Object object) {
* <p>
* Warning: This method assumes that the data structure is acyclical.
*
* @param writer
* @return The writer.
* @throws JSONException
*/
@@ -384,8 +384,7 @@ public Object nextValue() throws JSONException {
if (string.equals("")) {
throw syntaxError("Missing value");
}
//return JSONObject.stringToValue(string);
return string; // we let the SerDe get the right type if numeric
return JSONObject.stringToValue(string);
}
@@ -439,6 +438,7 @@ public JSONException syntaxError(String message) {
*
* @return " at {index} [character {character} line {line}]"
*/
@Override
public String toString() {
return " at " + index + " [character " + this.character + " line " +
this.line + "]";
Oops, something went wrong.

0 comments on commit 3331d9f

Please sign in to comment.