Skip to content

Commit

Permalink
[CONJ-589] clob length and truncate methods to behave correctly for
Browse files Browse the repository at this point in the history
characters encoded in 2 UTF-16 characters
  • Loading branch information
rusher committed Dec 21, 2018
1 parent c742e2b commit 0d2baa4
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 29 deletions.
109 changes: 80 additions & 29 deletions src/main/java/org/mariadb/jdbc/MariaDbClob.java
Expand Up @@ -238,45 +238,96 @@ public OutputStream setAsciiStream(long pos) throws SQLException {
*/
@Override
public long length() {
//The length of a character string is the number of UTF-16 units (not the number of characters)
long len = 0;
for (int i = offset; i < offset + length; ) {
int byteValue = data[i] & 0xff;
if (byteValue < 0x80) {
i += 1;
} else if (byteValue < 0xC2) {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
} else if (byteValue < 0xE0) {
i += 2;
} else if (byteValue < 0xF0) {
i += 3;
} else if (byteValue < 0xF8) {
len++;
i += 4;
int pos = offset;

//set ASCII (<= 127 chars)
for(; len < length && data[pos] >= 0; ) {
len++;
pos++;
}

//multi-bytes UTF-8
while(pos < offset + length) {
byte firstByte = data[pos++];
if (firstByte < 0) {
if (firstByte >> 5 != -2 || (firstByte & 30) == 0) {
if (firstByte >> 4 == -2) {
if (pos + 1 < offset + length) {
pos+=2;
len++;
} else {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
}
} else if (firstByte >> 3 != -2) {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
} else if (pos + 2 < offset + length) {
pos+=3;
len+=2;
} else {
//bad truncated UTF8
pos += offset + length;
len+=1;
}
} else {
pos++;
len++;
}
} else {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
len++;
}
len++;
}
return len;
}

@Override
public void truncate(final long len) throws SQLException {
public void truncate(final long truncateLen) throws SQLException {

//truncate the number of UTF-16 characters
//this can result in a bad UTF-8 string if string finish with a
//character represented in 2 UTF-16
long len = 0;
int pos = offset;
for (; pos < offset + Math.min(length, len); ) {
int byteValue = data[pos] & 0xff;
if (byteValue < 0x80) {
pos += 1;
} else if (byteValue < 0xC2) {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
} else if (byteValue < 0xE0) {
pos += 2;
} else if (byteValue < 0xF0) {
pos += 3;
} else if (byteValue < 0xF8) {
pos += 4;

//set ASCII (<= 127 chars)
for (; len < length && len < truncateLen && data[pos] >= 0; ) {
len++;
pos++;
}

//multi-bytes UTF-8
while (pos < offset + length && len < truncateLen) {
byte firstByte = data[pos++];
if (firstByte < 0) {
if (firstByte >> 5 != -2 || (firstByte & 30) == 0) {
if (firstByte >> 4 == -2) {
if (pos + 1 < offset + length) {
pos += 2;
len++;
} else {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
}
} else if (firstByte >> 3 != -2) {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
} else if (pos + 2 < offset + length) {
if (len + 2 < truncateLen) {
pos += 3;
len += 2;
} else {
//truncation will result in bad UTF-8 String
pos += 1;
len = truncateLen;
}
} else {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
}
} else {
pos++;
len++;
}
} else {
throw new UncheckedIOException("invalid UTF8", new CharacterCodingException());
len++;
}
}
length = pos - offset;
Expand Down
42 changes: 42 additions & 0 deletions src/test/java/org/mariadb/jdbc/BlobTest.java
Expand Up @@ -310,6 +310,48 @@ public void testClob3() throws Exception {
assertEquals("Øhello", result);
}

@Test
public void clobLength() throws Exception {
Statement stmt = sharedConnection.createStatement();
try (ResultSet rs = stmt
.executeQuery("SELECT 'ab$c', 'ab¢c', 'abहc', 'ab\uD801\uDC37c', 'ab𐍈c' from dual")) {
while (rs.next()) {

Clob clob1 = rs.getClob(1);
Clob clob2 = rs.getClob(2);
Clob clob3 = rs.getClob(3);
Clob clob4 = rs.getClob(4);
Clob clob5 = rs.getClob(5);

assertEquals(4, clob1.length());
assertEquals(4, clob2.length());
assertEquals(4, clob3.length());
assertEquals(5, clob4.length());
assertEquals(5, clob5.length());

clob1.truncate(3);
clob2.truncate(3);
clob3.truncate(3);
clob4.truncate(3);
clob5.truncate(3);

assertEquals(3, clob1.length());
assertEquals(3, clob2.length());
assertEquals(3, clob3.length());
assertEquals(3, clob4.length());
assertEquals(3, clob5.length());

assertEquals("ab$", clob1.getSubString(1, 3));
assertEquals("ab¢", clob2.getSubString(1, 3));
assertEquals("abह", clob3.getSubString(1, 3));
assertEquals("ab�", clob4.getSubString(1, 3));
assertEquals("ab�", clob5.getSubString(1, 3));
}
}
}



@Test
public void testBlob() throws SQLException, IOException {
PreparedStatement stmt = sharedConnection
Expand Down

0 comments on commit 0d2baa4

Please sign in to comment.