@@ -170,6 +170,8 @@ protected Serializable deserializeCell(ColumnType type, int meta, int length, By return deserializeSet(length, inputStream); case GEOMETRY: return deserializeGeometry(meta, inputStream); + case JSON: + return deserializeJson(meta, inputStream); default: throw new IOException("Unsupported type " + type); } @@ -329,6 +331,21 @@ protected byte[] deserializeGeometry(int meta, ByteArrayInputStream inputStream) return inputStream.read(dataLength); } + /** + * Deserialize the {@code JSON} value on the input stream, and return MySQL's internal binary representation + * of the JSON value. See {@link com.github.shyiko.mysql.binlog.event.deserialization.json.JsonBinary} for + * a utility to parse this binary representation into something more useful, including a string representation. + * + * @param meta the number of bytes in which the length of the JSON value is found first on the input stream + * @param inputStream the stream containing the JSON value + * @return the MySQL internal binary representation of the JSON value; may be null + * @throws IOException if there is a problem reading the input stream + */ + protected byte[] deserializeJson(int meta, ByteArrayInputStream inputStream) throws IOException { + int blobLength = inputStream.readInteger(4); + return inputStream.read(blobLength); + } + // checkstyle, please ignore ParameterNumber for the next line private static Long asUnixTime(int year, int month, int day, int hour, int minute, int second, int millis) { // https://dev.mysql.com/doc/refman/5.0/en/datetime.html @@ -376,7 +393,7 @@ private static int[] split(long value, int divider, int length) { /** * see mysql/strings/decimal.c */ - private static BigDecimal asBigDecimal(int precision, int scale, byte[] value) { + public static BigDecimal asBigDecimal(int precision, int scale, byte[] value) { boolean positive = (value[0] & 0x80) == 0x80; value[0] ^= 0x80; if (!positive) { diff --git a/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/ColumnType.java b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/ColumnType.java index f5f6938..cc8c941 100644 --- a/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/ColumnType.java +++ b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/ColumnType.java @@ -45,6 +45,7 @@ public enum ColumnType { TIMESTAMP_V2(17), DATETIME_V2(18), TIME_V2(19), + JSON(245), NEWDECIMAL(246), ENUM(247), SET(248), diff --git a/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonBinary.java b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonBinary.java new file mode 100644 index 0000000..d5824a3 --- /dev/null +++ b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonBinary.java @@ -0,0 +1,1006 @@ +/* + * Copyright 2016 Stanley Shyiko + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.shyiko.mysql.binlog.event.deserialization.json; + +import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.Charset; + +import com.github.shyiko.mysql.binlog.event.deserialization.AbstractRowsEventDataDeserializer; +import com.github.shyiko.mysql.binlog.event.deserialization.ColumnType; +import com.github.shyiko.mysql.binlog.io.ByteArrayInputStream; + +/** + * Utility to parse the binary-encoded value of a MySQL {@code JSON} type, translating the encoded representation into + * method calls on a supplied {@link JsonFormatter} implementation. + * + *
+ * + *
+ * doc ::= type value + * type ::= + * 0x00 | // small JSON object + * 0x01 | // large JSON object + * 0x02 | // small JSON array + * 0x03 | // large JSON array + * 0x04 | // literal (true/false/null) + * 0x05 | // int16 + * 0x06 | // uint16 + * 0x07 | // int32 + * 0x08 | // uint32 + * 0x09 | // int64 + * 0x0a | // uint64 + * 0x0b | // double + * 0x0c | // utf8mb4 string + * 0x0f // custom data (any MySQL data type) + * value ::= + * object | + * array | + * literal | + * number | + * string | + * custom-data + * object ::= element-count size key-entry* value-entry* key* value* + * array ::= element-count size value-entry* value* + * // number of members in object or number of elements in array + * element-count ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * // number of bytes in the binary representation of the object or array + * size ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * key-entry ::= key-offset key-length + * key-offset ::= + * uint16 | // if used in small JSON object + * uint32 // if used in large JSON object + * key-length ::= uint16 // key length must be less than 64KB + * value-entry ::= type offset-or-inlined-value + * // This field holds either the offset to where the value is stored, + * // or the value itself if it is small enough to be inlined (that is, + * // if it is a JSON literal or a small enough [u]int). + * offset-or-inlined-value ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * key ::= utf8mb4-data + * literal ::= + * 0x00 | // JSON null literal + * 0x01 | // JSON true literal + * 0x02 | // JSON false literal + * number ::= .... // little-endian format for [u]int(16|32|64), whereas + * // double is stored in a platform-independent, eight-byte + * // format using float8store() + * string ::= data-length utf8mb4-data + * custom-data ::= custom-type data-length binary-data + * custom-type ::= uint8 // type identifier that matches the + * // internal enum_field_types enum + * data-length ::= uint8* // If the high bit of a byte is 1, the length + * // field is continued in the next byte, + * // otherwise it is the last byte of the length + * // field. So we need 1 byte to represent + * // lengths up to 127, 2 bytes to represent + * // lengths up to 16383, and so on... + *+ * + * @author Randall Hauch + */ +public class JsonBinary { + + private static final Charset UTF_8 = Charset.forName("UTF-8"); + + /** + * Parse the MySQL binary representation of a {@code JSON} value and return the JSON string representation. + *
+ * This method is equivalent to {@link #parse(byte[], JsonFormatter)} using the {@link JsonStringFormatter}. + * + * @param bytes the binary representation; may not be null + * @return the JSON string representation; never null + * @throws IOException if there is a problem reading or processing the binary representation + */ + public static String parseAsString(byte[] bytes) throws IOException { + JsonStringFormatter handler = new JsonStringFormatter(); + parse(bytes, handler); + return handler.getString(); + } + + /** + * Parse the MySQL binary representation of a {@code JSON} value and call the supplied {@link JsonFormatter} + * for the various components of the value. + * + * @param bytes the binary representation; may not be null + * @param formatter the formatter that will be called as the binary representation is parsed; may not be null + * @throws IOException if there is a problem reading or processing the binary representation + */ + public static void parse(byte[] bytes, JsonFormatter formatter) throws IOException { + new JsonBinary(bytes).parse(formatter); + } + + private final ByteArrayInputStream reader; + + public JsonBinary(byte[] bytes) { + this(new ByteArrayInputStream(bytes)); + } + + public JsonBinary(ByteArrayInputStream contents) { + this.reader = contents; + } + + @Override + public String toString() { + return getString(); + } + + public String getString() { + JsonStringFormatter handler = new JsonStringFormatter(); + try { + parse(handler); + } catch (IOException e) { + throw new RuntimeException(e); + } + return handler.getString(); + } + + public void parse(JsonFormatter formatter) throws IOException { + parse(readValueType(), formatter); + } + + protected void parse(ValueType type, JsonFormatter formatter) throws IOException { + switch (type) { + case SMALL_DOCUMENT: + parseObject(true, formatter); + break; + case LARGE_DOCUMENT: + parseObject(false, formatter); + break; + case SMALL_ARRAY: + parseArray(true, formatter); + break; + case LARGE_ARRAY: + parseArray(false, formatter); + break; + case LITERAL: + parseBoolean(formatter); + break; + case INT16: + parseInt16(formatter); + break; + case UINT16: + parseUInt16(formatter); + break; + case INT32: + parseInt32(formatter); + break; + case UINT32: + parseUInt32(formatter); + break; + case INT64: + parseInt64(formatter); + break; + case UINT64: + parseUInt64(formatter); + break; + case DOUBLE: + parseDouble(formatter); + break; + case STRING: + parseString(formatter); + break; + case CUSTOM: + parseOpaque(formatter); + break; + default: + throw new IOException("Unknown type value '" + asHex(type.getCode()) + + "' in first byte of a JSON value"); + } + } + + /** + * Parse a JSON object. + *
+ * The grammar of the binary representation of JSON objects are defined in the MySQL code base in the + * json_binary.h file: + *
+ * value ::= + * object | + * array | + * literal | + * number | + * string | + * custom-data + * object ::= element-count size key-entry* value-entry* key* value* + * // number of members in object or number of elements in array + * element-count ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * // number of bytes in the binary representation of the object or array + * size ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * key-entry ::= key-offset key-length + * key-offset ::= + * uint16 | // if used in small JSON object + * uint32 // if used in large JSON object + * key-length ::= uint16 // key length must be less than 64KB + * value-entry ::= type offset-or-inlined-value + * // This field holds either the offset to where the value is stored, + * // or the value itself if it is small enough to be inlined (that is, + * // if it is a JSON literal or a small enough [u]int). + * offset-or-inlined-value ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * key ::= utf8mb4-data + * literal ::= + * 0x00 | // JSON null literal + * 0x01 | // JSON true literal + * 0x02 | // JSON false literal + * number ::= .... // little-endian format for [u]int(16|32|64), whereas + * // double is stored in a platform-independent, eight-byte + * // format using float8store() + * string ::= data-length utf8mb4-data + * custom-data ::= custom-type data-length binary-data + * custom-type ::= uint8 // type identifier that matches the + * // internal enum_field_types enum + * data-length ::= uint8* // If the high bit of a byte is 1, the length + * // field is continued in the next byte, + * // otherwise it is the last byte of the length + * // field. So we need 1 byte to represent + * // lengths up to 127, 2 bytes to represent + * // lengths up to 16383, and so on... + *+ * + * @param small {@code true} if the object being read is "small", or {@code false} otherwise + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseObject(boolean small, JsonFormatter formatter) + throws IOException { + // Read the header ... + int numElements = readUnsignedIndex(Integer.MAX_VALUE, small, "number of elements in"); + int numBytes = readUnsignedIndex(Integer.MAX_VALUE, small, "size of"); + + // Read each key-entry, consisting of the offset and length of each key ... + int[] keyLengths = new int[numElements]; + for (int i = 0; i != numElements; ++i) { + readUnsignedIndex(numBytes, small, "key offset in"); // unused + keyLengths[i] = readUInt16(); + } + + // Read each key value value-entry + ValueEntry[] entries = new ValueEntry[numElements]; + for (int i = 0; i != numElements; ++i) { + // Parse the value ... + ValueType type = readValueType(); + switch (type) { + case LITERAL: + entries[i] = new ValueEntry(type).setValue(readLiteral()); + break; + case INT16: + case UINT16: + // The "offset" is actually the value ... + int value = readUnsignedIndex(Integer.MAX_VALUE, small, "value offset in"); + entries[i] = new ValueEntry(type).setValue(value); + break; + case INT32: + case UINT32: + if (!small) { + // The value should be large enough to handle the actual value ... + value = readUnsignedIndex(Integer.MAX_VALUE, small, "value offset in"); + entries[i] = new ValueEntry(type).setValue(value); + } + default: + // It is an offset, not a value ... + int offset = readUnsignedIndex(Integer.MAX_VALUE, small, "value offset in"); + if (offset >= numBytes) { + throw new IOException("The offset for the value in the JSON binary document is " + + offset + + ", which is larger than the binary form of the JSON document (" + + numBytes + " bytes)"); + } + entries[i] = new ValueEntry(type, offset); + } + } + + // Read each key ... + String[] keys = new String[numElements]; + for (int i = 0; i != numElements; ++i) { + keys[i] = reader.readString(keyLengths[i]); + } + + // Now parse the values ... + formatter.beginObject(numElements); + for (int i = 0; i != numElements; ++i) { + if (i != 0) { + formatter.nextEntry(); + } + formatter.name(keys[i]); + ValueEntry entry = entries[i]; + if (entry.resolved) { + Object value = entry.value; + if (value == null) { + formatter.valueNull(); + } else if (value instanceof Boolean) { + formatter.value((Boolean) value); + } else if (value instanceof Integer) { + formatter.value((Integer) value); + } + } else { + // Parse the value ... + parse(entry.type, formatter); + } + } + formatter.endObject(); + } + + /** + * Parse a JSON array. + *
+ * The grammar of the binary representation of JSON objects are defined in the MySQL code base in the + * json_binary.h file, and are: + *
+ * value ::= + * object | + * array | + * literal | + * number | + * string | + * custom-data + * array ::= element-count size value-entry* value* + * // number of members in object or number of elements in array + * element-count ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * // number of bytes in the binary representation of the object or array + * size ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * value-entry ::= type offset-or-inlined-value + * // This field holds either the offset to where the value is stored, + * // or the value itself if it is small enough to be inlined (that is, + * // if it is a JSON literal or a small enough [u]int). + * offset-or-inlined-value ::= + * uint16 | // if used in small JSON object/array + * uint32 // if used in large JSON object/array + * key ::= utf8mb4-data + * literal ::= + * 0x00 | // JSON null literal + * 0x01 | // JSON true literal + * 0x02 | // JSON false literal + * number ::= .... // little-endian format for [u]int(16|32|64), whereas + * // double is stored in a platform-independent, eight-byte + * // format using float8store() + * string ::= data-length utf8mb4-data + * custom-data ::= custom-type data-length binary-data + * custom-type ::= uint8 // type identifier that matches the + * // internal enum_field_types enum + * data-length ::= uint8* // If the high bit of a byte is 1, the length + * // field is continued in the next byte, + * // otherwise it is the last byte of the length + * // field. So we need 1 byte to represent + * // lengths up to 127, 2 bytes to represent + * // lengths up to 16383, and so on... + *+ * + * @param small {@code true} if the object being read is "small", or {@code false} otherwise + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + // checkstyle, please ignore MethodLength for the next line + protected void parseArray(boolean small, JsonFormatter formatter) + throws IOException { + // Read the header ... + int numElements = readUnsignedIndex(Integer.MAX_VALUE, small, "number of elements in"); + int numBytes = readUnsignedIndex(Integer.MAX_VALUE, small, "size of"); + + // Read each key value value-entry + ValueEntry[] entries = new ValueEntry[numElements]; + for (int i = 0; i != numElements; ++i) { + // Parse the value ... + ValueType type = readValueType(); + switch (type) { + case LITERAL: + entries[i] = new ValueEntry(type).setValue(readLiteral()); + break; + case INT16: + case UINT16: + // The "offset" is actually the value ... + int value = readUnsignedIndex(Integer.MAX_VALUE, small, "value offset in"); + entries[i] = new ValueEntry(type).setValue(value); + break; + case INT32: + case UINT32: + if (!small) { + // The value should be large enough to handle the actual value ... + value = readUnsignedIndex(Integer.MAX_VALUE, small, "value offset in"); + entries[i] = new ValueEntry(type).setValue(value); + } + default: + // It is an offset, not a value ... + int offset = readUnsignedIndex(Integer.MAX_VALUE, small, "value offset in"); + if (offset >= numBytes) { + throw new IOException("The offset for the value in the JSON binary document is " + + offset + + ", which is larger than the binary form of the JSON document (" + + numBytes + " bytes)"); + } + entries[i] = new ValueEntry(type, offset); + } + } + + // Now parse the values ... + formatter.beginArray(numElements); + for (int i = 0; i != numElements; ++i) { + if (i != 0) { + formatter.nextEntry(); + } + ValueEntry entry = entries[i]; + if (entry.resolved) { + Object value = entry.value; + if (value == null) { + formatter.valueNull(); + } else if (value instanceof Boolean) { + formatter.value((Boolean) value); + } else if (value instanceof Integer) { + formatter.value((Integer) value); + } + } else { + // Parse the value ... + parse(entry.type, formatter); + } + } + formatter.endArray(); + } + + /** + * Parse a literal value that is either null, {@code true}, or {@code false}. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseBoolean(JsonFormatter formatter) throws IOException { + Boolean literal = readLiteral(); + if (literal == null) { + formatter.valueNull(); + } else { + formatter.value(literal); + } + } + + /** + * Parse a 2 byte integer value. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseInt16(JsonFormatter formatter) throws IOException { + int value = readInt16(); + formatter.value(value); + } + + /** + * Parse a 2 byte unsigned integer value. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseUInt16(JsonFormatter formatter) throws IOException { + int value = readUInt16(); + formatter.value(value); + } + + /** + * Parse a 4 byte integer value. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseInt32(JsonFormatter formatter) throws IOException { + int value = readInt32(); + formatter.value(value); + } + + /** + * Parse a 4 byte unsigned integer value. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseUInt32(JsonFormatter formatter) throws IOException { + long value = readUInt32(); + formatter.value(value); + } + + /** + * Parse a 8 byte integer value. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseInt64(JsonFormatter formatter) throws IOException { + long value = readInt64(); + formatter.value(value); + } + + /** + * Parse a 8 byte unsigned integer value. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseUInt64(JsonFormatter formatter) throws IOException { + BigInteger value = readUInt64(); + formatter.value(value); + } + + /** + * Parse a 8 byte double value. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseDouble(JsonFormatter formatter) throws IOException { + long rawValue = readInt64(); + double value = Double.longBitsToDouble(rawValue); + formatter.value(value); + } + + /** + * Parse the length and value of a string stored in MySQL's "utf8mb" character set (which equates to Java's + * UTF-8 character set. The length is a {@link #readVariableInt() variable length integer} length of the string. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseString(JsonFormatter formatter) throws IOException { + int length = readVariableInt(); + String value = new String(reader.read(length), UTF_8); + formatter.value(value); + } + + /** + * Parse an opaque type. Specific types such as {@link #parseDate(JsonFormatter) DATE}, + * {@link #parseTime(JsonFormatter) TIME}, and {@link #parseDatetime(JsonFormatter) DATETIME} values are + * stored as opaque types, though they are to be unpacked. TIMESTAMPs are also stored as opaque types, but + * converted by MySQL to + * {@code DATETIME} prior to storage. + * Other MySQL types are stored as opaque types and passed on to the formatter as opaque values. + *
+ * See the + * MySQL source code for the logic used in this method. + *
+ *
+ * custom-data ::= custom-type data-length binary-data + * custom-type ::= uint8 // type identifier that matches the + * // internal enum_field_types enum + * data-length ::= uint8* // If the high bit of a byte is 1, the length + * // field is continued in the next byte, + * // otherwise it is the last byte of the length + * // field. So we need 1 byte to represent + * // lengths up to 127, 2 bytes to represent + * // lengths up to 16383, and so on... + *+ * + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseOpaque(JsonFormatter formatter) throws IOException { + // Read the custom type, which should be a standard ColumnType ... + int customType = reader.read(); + ColumnType type = ColumnType.byCode(customType); + if (type == null) { + throw new IOException("Unknown type '" + asHex(customType) + + "' in first byte of a JSON opaque value"); + } + // Read the data length ... + int length = readVariableInt(); + + switch (type) { + case DECIMAL: + case NEWDECIMAL: + // See 'Json_decimal::convert_from_binary' + // https://github.com/mysql/mysql-server/blob/5.7/sql/json_dom.cc#L1625 + parseDecimal(length, formatter); + break; + + // All dates and times are in one of these types + // See 'Json_datetime::to_packed' for details + // https://github.com/mysql/mysql-server/blob/5.7/sql/json_dom.cc#L1681 + // which calls 'TIME_to_longlong_packed' + // https://github.com/mysql/mysql-server/blob/5.7/sql-common/my_time.c#L2005 + // + // and 'Json_datetime::from_packed' + // https://github.com/mysql/mysql-server/blob/5.7/sql/json_dom.cc#L1688 + // which calls 'TIME_from_longlong_packed' + // https://github.com/mysql/mysql-server/blob/5.7/sql/sql_time.cc#L1624 + case DATE: + parseDate(formatter); + break; + case TIME: + case TIME_V2: + parseTime(formatter); + break; + case DATETIME: + case DATETIME_V2: + case TIMESTAMP: + case TIMESTAMP_V2: + parseDatetime(formatter); + break; + default: + parseOpaqueValue(type, length, formatter); + } + } + + /** + * Parse a {@code DATE} value, which is stored using the same format as {@code DATETIME}: + * 5 bytes + fractional-seconds storage. However, the hour, minute, second, and fractional seconds are ignored. + *
+ * The non-fractional part is 40 bits: + * + *
+ * 1 bit sign (1= non-negative, 0= negative) + * 17 bits year*13+month (year 0-9999, month 0-12) + * 5 bits day (0-31) + * 5 bits hour (0-23) + * 6 bits minute (0-59) + * 6 bits second (0-59) + *+ * + * The fractional part is typically dependent upon the fsp (i.e., fractional seconds part) defined by + * a column, but in the case of JSON it is always 3 bytes. + *
+ * The format of all temporal values is outlined in the MySQL documentation, + * although since the MySQL {@code JSON} type is only available in 5.7, only version 2 of the date-time formats + * are necessary. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseDate(JsonFormatter formatter) throws IOException { + long raw = readInt64(); + long value = raw >> 24; + int yearMonth = (int) (value >> 22) % (1 << 17); // 17 bits starting at 22nd + int year = yearMonth / 13; + int month = yearMonth % 13; + int day = (int) (value >> 17) % (1 << 5); // 5 bits starting at 17th + formatter.valueDate(year, month, day); + } + + /** + * Parse a {@code TIME} value, which is stored using the same format as {@code DATETIME}: + * 5 bytes + fractional-seconds storage. However, the year, month, and day values are ignored + *
+ * The non-fractional part is 40 bits: + * + *
+ * 1 bit sign (1= non-negative, 0= negative) + * 17 bits year*13+month (year 0-9999, month 0-12) + * 5 bits day (0-31) + * 5 bits hour (0-23) + * 6 bits minute (0-59) + * 6 bits second (0-59) + *+ * + * The fractional part is typically dependent upon the fsp (i.e., fractional seconds part) defined by + * a column, but in the case of JSON it is always 3 bytes. + *
+ * The format of all temporal values is outlined in the MySQL documentation, + * although since the MySQL {@code JSON} type is only available in 5.7, only version 2 of the date-time formats + * are necessary. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseTime(JsonFormatter formatter) throws IOException { + long raw = readInt64(); + long value = raw >> 24; + boolean negative = value < 0L; + int hour = (int) (value >> 12) % (1 << 10); // 10 bits starting at 12th + int min = (int) (value >> 6) % (1 << 6); // 6 bits starting at 6th + int sec = (int) value % (1 << 6); // 6 bits starting at 0th + if (negative) { + hour *= -1; + } + int microSeconds = (int) (raw % (1 << 24)); + formatter.valueTime(hour, min, sec, microSeconds); + } + + /** + * Parse a {@code DATETIME} value, which is stored as 5 bytes + fractional-seconds storage. + *
+ * The non-fractional part is 40 bits: + * + *
+ * 1 bit sign (1= non-negative, 0= negative) + * 17 bits year*13+month (year 0-9999, month 0-12) + * 5 bits day (0-31) + * 5 bits hour (0-23) + * 6 bits minute (0-59) + * 6 bits second (0-59) + *+ * + * The sign bit is always 1. A value of 0 (negative) is reserved. The fractional part is typically dependent upon + * the fsp (i.e., fractional seconds part) defined by a column, but in the case of JSON it is always 3 bytes. + * Unlike the documentation, however, the 8 byte value is in little-endian form. + *
+ * The format of all temporal values is outlined in the MySQL documentation, + * although since the MySQL {@code JSON} type is only available in 5.7, only version 2 of the date-time formats + * are necessary. + * + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseDatetime(JsonFormatter formatter) throws IOException { + long raw = readInt64(); + long value = raw >> 24; + int yearMonth = (int) (value >> 22) % (1 << 17); // 17 bits starting at 22nd + int year = yearMonth / 13; + int month = yearMonth % 13; + int day = (int) (value >> 17) % (1 << 5); // 5 bits starting at 17th + int hour = (int) (value >> 12) % (1 << 5); // 5 bits starting at 12th + int min = (int) (value >> 6) % (1 << 6); // 6 bits starting at 6th + int sec = (int) (value % (1 << 6)); // 6 bits starting at 0th + int microSeconds = (int) (raw % (1 << 24)); + formatter.valueDatetime(year, month, day, hour, min, sec, microSeconds); + } + + /** + * Parse a {@code DECIMAL} value. The first two bytes are the precision and scale, followed by the binary + * representation of the decimal itself. + * + * @param length the length of the complete binary representation + * @param formatter the formatter to be notified of the parsed value; may not be null + * @throws IOException if there is a problem reading the JSON value + */ + protected void parseDecimal(int length, JsonFormatter formatter) throws IOException { + // First two bytes are the precision and scale ... + int precision = reader.read(); + int scale = reader.read(); + + // Followed by the binary representation (see `my_decimal_get_binary_size`) + int decimalLength = length - 2; + BigDecimal dec = AbstractRowsEventDataDeserializer.asBigDecimal(precision, scale, reader.read(decimalLength)); + formatter.value(dec); + } + + protected void parseOpaqueValue(ColumnType type, int length, JsonFormatter formatter) + throws IOException { + formatter.valueOpaque(type, reader.read(length)); + } + + protected int readFractionalSecondsInMicroseconds() throws IOException { + return (int) readBigEndianLong(3); + } + + protected long readBigEndianLong(int numBytes) throws IOException { + byte[] bytes = reader.read(numBytes); + long result = 0; + for (int i = 0; i != numBytes; i++) { + int b = bytes[i] & 0xFF; + result = (result << 8) | b; + } + return result; + } + + protected int readUnsignedIndex(int maxValue, boolean isSmall, String desc) throws IOException { + long result = isSmall ? readUInt16() : readUInt32(); + if (result >= maxValue) { + throw new IOException("The " + desc + " the JSON document is " + result + + " and is too big for the binary form of the document (" + maxValue + ")"); + } + if (result > Integer.MAX_VALUE) { + throw new IOException("The " + desc + " the JSON document is " + result + " and is too big to be used"); + } + return (int) result; + } + + protected int readInt16() throws IOException { + int b1 = reader.read() & 0xFF; + int b2 = reader.read(); + return (short) (b2 << 8 | b1); + } + + protected int readUInt16() throws IOException { + int b1 = reader.read() & 0xFF; + int b2 = reader.read() & 0xFF; + return (b2 << 8 | b1) & 0xFFFF; + } + + protected int readInt24() throws IOException { + int b1 = reader.read() & 0xFF; + int b2 = reader.read() & 0xFF; + int b3 = reader.read(); + return b3 << 16 | b2 << 8 | b1; + } + + protected int readInt32() throws IOException { + int b1 = reader.read() & 0xFF; + int b2 = reader.read() & 0xFF; + int b3 = reader.read() & 0xFF; + int b4 = reader.read(); + return b4 << 24 | b3 << 16 | b2 << 8 | b1; + } + + protected long readUInt32() throws IOException { + int b1 = reader.read() & 0xFF; + int b2 = reader.read() & 0xFF; + int b3 = reader.read() & 0xFF; + int b4 = reader.read() & 0xFF; + return (long) ((b4 << 24) | (b3 << 16) | (b2 << 8) | b1) & 0xFFFFFFFF; + } + + protected long readInt64() throws IOException { + int b1 = reader.read() & 0xFF; + int b2 = reader.read() & 0xFF; + int b3 = reader.read() & 0xFF; + long b4 = reader.read() & 0xFF; + long b5 = reader.read() & 0xFF; + long b6 = reader.read() & 0xFF; + long b7 = reader.read() & 0xFF; + long b8 = reader.read(); + return b8 << 56 | (b7 << 48) | (b6 << 40) | (b5 << 32) | + (b4 << 24) | (b3 << 16) | (b2 << 8) | b1; + } + + protected BigInteger readUInt64() throws IOException { + byte[] bigEndian = new byte[8]; + for (int i = 8; i != 0; --i) { + bigEndian[i - 1] = (byte) (reader.read() & 0xFF); + } + return new BigInteger(1, bigEndian); + } + + /** + * Read a variable-length integer value. + *
+ * If the high bit of a byte is 1, the length field is continued in the next byte, otherwise it is the last + * byte of the length field. So we need 1 byte to represent lengths up to 127, 2 bytes to represent lengths up + * to 16383, and so on... + * + * @return the integer value + */ + protected int readVariableInt() throws IOException { + byte b = 0; + int length = 0; + do { + b = (byte) reader.read(); + length = (length << 7) + (b & 0x7F); + } while (b < 0); + return length; + } + + protected Boolean readLiteral() throws IOException { + byte b = (byte) reader.read(); + if (b == 0x00) { + return null; + } else if (b == 0x01) { + return Boolean.TRUE; + } else if (b == 0x02) { + return Boolean.FALSE; + } + throw new IOException("Unexpected value '" + asHex(b) + "' for literal"); + } + + protected ValueType readValueType() throws IOException { + byte b = (byte) reader.read(); + ValueType result = ValueType.byCode(b); + if (result == null) { + throw new IOException("Unknown value type code '" + String.format("%02X", (int) b) + "'"); + } + return result; + } + + protected static String asHex(byte b) { + return String.format("%02X ", b); + } + + protected static String asHex(int value) { + return Integer.toHexString(value); + } + + /** + * Class used internally to hold value entry information. + */ + protected static final class ValueEntry { + + protected final ValueType type; + protected final int index; + protected Object value; + protected boolean resolved; + + public ValueEntry(ValueType type) { + this.type = type; + this.index = 0; + } + + public ValueEntry(ValueType type, int index) { + this.type = type; + this.index = index; + } + + public ValueEntry setValue(Object value) { + this.value = value; + this.resolved = true; + return this; + } + } +} diff --git a/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonFormatter.java b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonFormatter.java new file mode 100644 index 0000000..2888182 --- /dev/null +++ b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonFormatter.java @@ -0,0 +1,177 @@ +/* + * Copyright 2016 Stanley Shyiko + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.shyiko.mysql.binlog.event.deserialization.json; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import com.github.shyiko.mysql.binlog.event.deserialization.ColumnType; + +/** + * Handle the various actions involved when {@link JsonBinary#parse(byte[], JsonFormatter)} a JSON binary + * value. + * + * @author Randall Hauch + */ +public interface JsonFormatter { + + /** + * Prepare to receive the name-value pairs in a JSON object. + * + * @param numElements the number of name-value pairs (or elements) + */ + void beginObject(int numElements); + + /** + * Prepare to receive the value pairs that in a JSON array. + * + * @param numElements the number of array elements + */ + void beginArray(int numElements); + + /** + * Complete the previously-started JSON object. + */ + void endObject(); + + /** + * Complete the previously-started JSON array. + */ + void endArray(); + + /** + * Receive the name of an element in a JSON object. + * + * @param name the element's name; never null + */ + void name(String name); + + /** + * Receive the string value of an element in a JSON object. + * + * @param value the element's value; never null + */ + void value(String value); + + /** + * Receive the integer value of an element in a JSON object. + * + * @param value the element's value + */ + void value(int value); + + /** + * Receive the long value of an element in a JSON object. + * + * @param value the element's value + */ + void value(long value); + + /** + * Receive the double value of an element in a JSON object. + * + * @param value the element's value + */ + void value(double value); + + /** + * Receive the {@link BigInteger} value of an element in a JSON object. + * + * @param value the element's value; never null + */ + void value(BigInteger value); + + /** + * Receive the {@link BigDecimal} value of an element in a JSON object. + * + * @param value the element's value; never null + */ + void value(BigDecimal value); + + /** + * Receive the boolean value of an element in a JSON object. + * + * @param value the element's value + */ + void value(boolean value); + + /** + * Receive a null value of an element in a JSON object. + */ + void valueNull(); + + /** + * Receive the year value of an element in a JSON object. + * + * @param year the year number that makes up the element's value + */ + void valueYear(int year); + + /** + * Receive the date value of an element in a JSON object. + * + * @param year the positive or negative year in the element's date value + * @param month the month (0-12) in the element's date value + * @param day the day of the month (0-31) in the element's date value + */ + void valueDate(int year, int month, int day); + + /** + * Receive the date and time value of an element in a JSON object. + * + * @param year the positive or negative year in the element's date value + * @param month the month (0-12) in the element's date value + * @param day the day of the month (0-31) in the element's date value + * @param hour the hour of the day (0-24) in the element's time value + * @param min the minutes of the hour (0-60) in the element's time value + * @param sec the seconds of the minute (0-60) in the element's time value + * @param microSeconds the number of microseconds in the element's time value + */ + // checkstyle, please ignore ParameterNumber for the next line + void valueDatetime(int year, int month, int day, int hour, int min, int sec, int microSeconds); + + /** + * Receive the time value of an element in a JSON object. + * + * @param hour the hour of the day (0-24) in the element's time value + * @param min the minutes of the hour (0-60) in the element's time value + * @param sec the seconds of the minute (0-60) in the element's time value + * @param microSeconds the number of microseconds in the element's time value + */ + void valueTime(int hour, int min, int sec, int microSeconds); + + /** + * Receive the timestamp value of an element in a JSON object. + * + * @param secondsPastEpoch the number of seconds past epoch (January 1, 1970) in the element's timestamp value + * @param microSeconds the number of microseconds in the element's time value + */ + void valueTimestamp(long secondsPastEpoch, int microSeconds); + + /** + * Receive an opaque value of an element in a JSON object. + * + * @param type the column type for the value; may not be null + * @param value the binary representation for the element's value + */ + void valueOpaque(ColumnType type, byte[] value); + + /** + * Called after an entry signaling that another entry will be signaled. + */ + void nextEntry(); + +} diff --git a/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonStringFormatter.java b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonStringFormatter.java new file mode 100644 index 0000000..e2f657c --- /dev/null +++ b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/JsonStringFormatter.java @@ -0,0 +1,325 @@ +/* + * Copyright 2016 Stanley Shyiko + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.shyiko.mysql.binlog.event.deserialization.json; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import com.github.shyiko.mysql.binlog.event.deserialization.ColumnType; + +/** + * A {@link JsonFormatter} implementation that creates a JSON string representation. + * + * @author Randall Hauch + */ +public class JsonStringFormatter implements JsonFormatter { + + /** + * Value used for lookup tables to indicate that matching characters + * do not need to be escaped. + */ + private static final int ESCAPE_NONE = 0; + + /** + * Value used for lookup tables to indicate that matching characters + * are to be escaped using standard escaping; for JSON this means + * (for example) using "backslash - u" escape method. + */ + private static final int ESCAPE_GENERIC = -1; + + /** + * A lookup table that determines which of the first 128 Unicode code points (single-byte UTF-8 characters) + * must be escaped. A value of '0' means no escaping is required; positive values must be escaped with a + * preceding backslash; and negative values that generic escaping (e.g., {@code \\uXXXX}). + */ + private static final int[] ESCAPES; + + static { + int[] escape = new int[128]; + // Generic escape for control characters ... + for (int i = 0; i < 32; ++i) { + escape[i] = ESCAPE_GENERIC; + } + // Backslash escape for other specific characters ... + escape['"'] = '"'; + escape['\\'] = '\\'; + // Escaping of slash is optional, so let's not add it + escape[0x08] = 'b'; + escape[0x09] = 't'; + escape[0x0C] = 'f'; + escape[0x0A] = 'n'; + escape[0x0D] = 'r'; + ESCAPES = escape; + } + + private static final char[] HEX_CODES = "0123456789ABCDEF".toCharArray(); + + private final StringBuilder sb = new StringBuilder(); + + @Override + public String toString() { + return getString(); + } + + public String getString() { + return sb.toString(); + } + + @Override + public void beginObject(int numElements) { + sb.append('{'); + } + + @Override + public void beginArray(int numElements) { + sb.append('['); + } + + @Override + public void endObject() { + sb.append('}'); + } + + @Override + public void endArray() { + sb.append(']'); + } + + @Override + public void name(String name) { + sb.append('"'); + appendString(name); + sb.append("\":"); + } + + @Override + public void value(String value) { + sb.append('"'); + appendString(value); + sb.append('"'); + } + + @Override + public void value(int value) { + sb.append(Integer.toString(value)); + } + + @Override + public void value(long value) { + sb.append(Long.toString(value)); + } + + @Override + public void value(double value) { + // Double's toString method will result in scientific notation and loss of precision + String str = Double.toString(value); + if (str.contains("E")) { + value(new BigDecimal(value)); + } else { + sb.append(str); + } + } + + @Override + public void value(BigInteger value) { + // Using the BigInteger.toString() method will result in scientific notation, so instead ... + value(new BigDecimal(value)); + } + + @Override + public void value(BigDecimal value) { + // Using the BigInteger.toString() method will result in scientific notation, so instead ... + sb.append(value.toPlainString()); + } + + @Override + public void value(boolean value) { + sb.append(Boolean.toString(value)); + } + + @Override + public void valueNull() { + sb.append("null"); + } + + @Override + public void valueYear(int year) { + sb.append(year); + } + + @Override + public void valueDate(int year, int month, int day) { + sb.append('"'); + appendDate(year, month, day); + sb.append('"'); + } + + @Override + // checkstyle, please ignore ParameterNumber for the next line + public void valueDatetime(int year, int month, int day, int hour, int min, int sec, int microSeconds) { + sb.append('"'); + appendDate(year, month, day); + sb.append(' '); + appendTime(hour, min, sec, microSeconds); + sb.append('"'); + } + + @Override + public void valueTime(int hour, int min, int sec, int microSeconds) { + sb.append('"'); + if (hour < 0) { + sb.append('-'); + hour = Math.abs(hour); + } + appendTime(hour, min, sec, microSeconds); + sb.append('"'); + } + + @Override + public void valueTimestamp(long secondsPastEpoch, int microSeconds) { + sb.append(secondsPastEpoch); + appendSixDigitUnsignedInt(microSeconds, false); + } + + @Override + public void valueOpaque(ColumnType type, byte[] value) { + sb.append('"'); + sb.append(javax.xml.bind.DatatypeConverter.printBase64Binary(value)); + sb.append('"'); + } + + @Override + public void nextEntry() { + sb.append(','); + } + + /** + * Append a string by escaping any characters that must be escaped. + * + * @param original the string to be written; may not be null + */ + protected void appendString(String original) { + for (int i = 0, len = original.length(); i < len; ++i) { + char c = original.charAt(i); + int ch = c; + if (ch < 0 || ESCAPES[ch] == 0) { + sb.append(c); + continue; + } + int escape = ESCAPES[ch]; + if (escape > 0) { // 2-char escape, fine + sb.append('\\'); + sb.append(c); + } else { + unicodeEscape(ch); + } + } + } + + /** + * Append a generic Unicode escape (e.g., {@code \\uXXXX}) for given character. + * + * @param charToEscape the character to escape + */ + private void unicodeEscape(int charToEscape) { + sb.append('\\'); + sb.append('u'); + if (charToEscape > 0xFF) { + int hi = (charToEscape >> 8) & 0xFF; + sb.append(HEX_CODES[hi >> 4]); + sb.append(HEX_CODES[hi & 0xF]); + charToEscape &= 0xFF; + } else { + sb.append('0'); + sb.append('0'); + } + // We know it's a control char, so only the last 2 chars are non-0 + sb.append(HEX_CODES[charToEscape >> 4]); + sb.append(HEX_CODES[charToEscape & 0xF]); + } + + protected void appendTwoDigitUnsignedInt(int value) { + assert value >= 0; + assert value < 100; + if (value < 10) { + sb.append("0").append(value); + } else { + sb.append(value); + } + } + + protected void appendFourDigitUnsignedInt(int value) { + if (value < 10) { + sb.append("000").append(value); + } else if (value < 100) { + sb.append("00").append(value); + } else if (value < 1000) { + sb.append("0").append(value); + } else { + sb.append(value); + } + } + + protected void appendSixDigitUnsignedInt(int value, boolean trimTrailingZeros) { + assert value > 0; + assert value < 1000000; + // Add prefixes if necessary ... + if (value < 10) { + sb.append("00000"); + } else if (value < 100) { + sb.append("0000"); + } else if (value < 1000) { + sb.append("000"); + } else if (value < 10000) { + sb.append("00"); + } else if (value < 100000) { + sb.append("0"); + } + if (trimTrailingZeros) { + // Remove any trailing 0's ... + for (int i = 0; i != 6; ++i) { + if (value % 10 == 0) { + value /= 10; + } + } + sb.append(value); + } + } + + protected void appendDate(int year, int month, int day) { + if (year < 0) { + sb.append('-'); + year = Math.abs(year); + } + appendFourDigitUnsignedInt(year); + sb.append('-'); + appendTwoDigitUnsignedInt(month); + sb.append('-'); + appendTwoDigitUnsignedInt(day); + } + + protected void appendTime(int hour, int min, int sec, int microSeconds) { + appendTwoDigitUnsignedInt(hour); + sb.append(':'); + appendTwoDigitUnsignedInt(min); + sb.append(':'); + appendTwoDigitUnsignedInt(sec); + if (microSeconds != 0) { + sb.append('.'); + appendSixDigitUnsignedInt(microSeconds, true); + } + } +} diff --git a/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/ValueType.java b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/ValueType.java new file mode 100644 index 0000000..816bc60 --- /dev/null +++ b/src/main/java/com/github/shyiko/mysql/binlog/event/deserialization/json/ValueType.java @@ -0,0 +1,87 @@ +/* + * Copyright 2016 Stanley Shyiko + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.shyiko.mysql.binlog.event.deserialization.json; + +import java.util.HashMap; +import java.util.Map; + +/** + * The set of values that can be used within a MySQL JSON value. + *
+ * These values are defined in the MySQL codebase in the + * json_binary.h file, and are: + * + *
+ * type ::= + * 0x00 | // small JSON object + * 0x01 | // large JSON object + * 0x02 | // small JSON array + * 0x03 | // large JSON array + * 0x04 | // literal (true/false/null) + * 0x05 | // int16 + * 0x06 | // uint16 + * 0x07 | // int32 + * 0x08 | // uint32 + * 0x09 | // int64 + * 0x0a | // uint64 + * 0x0b | // double + * 0x0c | // utf8mb4 string + * 0x0f // custom data (any MySQL data type) + *+ * + * @author Randall Hauch + */ +public enum ValueType { + + SMALL_DOCUMENT(0x00), + LARGE_DOCUMENT(0x01), + SMALL_ARRAY(0x02), + LARGE_ARRAY(0x03), + LITERAL(0x04), + INT16(0x05), + UINT16(0x06), + INT32(0x07), + UINT32(0x08), + INT64(0x09), + UINT64(0x0a), + DOUBLE(0x0b), + STRING(0x0c), + CUSTOM(0x0f); + + private final int code; + + ValueType(int code) { + this.code = code; + } + + public int getCode() { + return this.code; + } + + private static final Map