Permalink
Browse files

Support surrogate pairs for UTF-16 strings in URI encoding/decoding a…

…nd releasing new version of pegasus

RB=916880
G=si-dev
R=xma
A=xma
  • Loading branch information...
Kenta Labur
Kenta Labur committed Feb 10, 2017
1 parent e72930a commit 231ab9cb030150ea3f2cd5fa9cfeaa98d42639c9
View
@@ -1,5 +1,10 @@
10.1.3
------
10.1.2
------
(RB=916880)
Support surrogate pairs for UTF-16 strings in URI encoding/decoding
10.1.1
------
View
@@ -1,4 +1,4 @@
version=10.1.1
version=10.1.2
sonatypeUsername=please_set_in_home_dir_if_uploading_to_maven_central
sonatypePassword=please_set_in_home_dir_if_uploading_to_maven_central
@@ -62,6 +62,7 @@
* Removed dependency on javax.ws.rs interfaces
* Added JavaDoc documentation to conform to Pegasus style guidelines
* Remove special-case encoding of ' ' in query params
* Updated _encode() and appendPercentEncodedOctet() methods to handle surrogate pairs
*/
package com.linkedin.jersey.api.uri;
@@ -296,39 +297,56 @@ public static String encodeTemplateNames(String s) {
return s;
}
private static String _encode(String s, Type t, boolean template, boolean contextualEncode) {
private static String _encode(final String s, final Type t, final boolean template, final boolean contextualEncode) {
final boolean[] table = ENCODING_TABLES[t.ordinal()];
boolean insideTemplateParam = false;
StringBuilder sb = null;
for (int i = 0; i < s.length(); i++) {
final char c = s.charAt(i);
if (c < 0x80 && table[c]) {
if (sb != null) sb.append(c);
for (int offset = 0, codePoint; offset < s.length(); offset += Character.charCount(codePoint)) {
codePoint = s.codePointAt(offset);
if (codePoint < 0x80 && table[codePoint]) {
if (sb != null) {
sb.append((char) codePoint);
}
} else {
if (template && (c == '{' || c == '}')) {
if (sb != null) sb.append(c);
continue;
} else if (contextualEncode) {
if (c == '%' && i + 2 < s.length()) {
if (isHexCharacter(s.charAt(i + 1)) &&
isHexCharacter(s.charAt(i + 2))) {
if (sb != null)
sb.append('%').append(s.charAt(i + 1)).append(s.charAt(i + 2));
i += 2;
continue;
if (template) {
boolean leavingTemplateParam = false;
if (codePoint == '{') {
insideTemplateParam = true;
} else if (codePoint == '}') {
insideTemplateParam = false;
leavingTemplateParam = true;
}
if (insideTemplateParam || leavingTemplateParam) {
if (sb != null) {
sb.append(Character.toChars(codePoint));
}
continue;
}
}
if (contextualEncode
&& codePoint == '%'
&& offset + 2 < s.length()
&& isHexCharacter(s.charAt(offset + 1))
&& isHexCharacter(s.charAt(offset + 2))) {
if (sb != null) {
sb.append('%').append(s.charAt(offset + 1)).append(s.charAt(offset + 2));
}
offset += 2;
continue;
}
if (sb == null) {
sb = new StringBuilder();
sb.append(s.substring(0, i));
sb.append(s.substring(0, offset));
}
if (c < 0x80) {
appendPercentEncodedOctet(sb, c);
if (codePoint < 0x80) {
appendPercentEncodedOctet(sb, (char) codePoint);
} else {
appendUTF8EncodedCharacter(sb, c);
appendUTF8EncodedCharacter(sb, codePoint);
}
}
}
@@ -346,13 +364,15 @@ private static void appendPercentEncodedOctet(StringBuilder sb, int b) {
sb.append(HEX_DIGITS[b & 0x0F]);
}
private static void appendUTF8EncodedCharacter(StringBuilder sb, char c) {
final ByteBuffer bb = UTF_8_CHARSET.encode("" + c);
private static void appendUTF8EncodedCharacter(final StringBuilder sb, final int codePoint) {
final CharBuffer chars = CharBuffer.wrap(Character.toChars(codePoint));
final ByteBuffer bytes = UTF_8_CHARSET.encode(chars);
while (bb.hasRemaining()) {
appendPercentEncodedOctet(sb, bb.get() & 0xFF);
while (bytes.hasRemaining()) {
appendPercentEncodedOctet(sb, bytes.get() & 0xFF);
}
}
private static final String[] SCHEME = {"0-9", "A-Z", "a-z", "+", "-", "."};
private static final String[] UNRESERVED = {"0-9", "A-Z", "a-z", "-", ".", "_", "~"};
private static final String[] SUB_DELIMS = {"!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="};
@@ -868,4 +888,4 @@ private static int decodeHex(char c) {
private static boolean isHexCharacter(char c) {
return c < 128 && HEX_TABLE[c] != -1;
}
}
}
@@ -123,6 +123,34 @@ public void testEncodedDecoding(String encodedString, Object expectedObj) throws
Assert.assertEquals(actualObj, expectedObj);
}
@DataProvider
private static Object[][] unicode()
{
// create objects
// test unicode encoding
DataMap japaneseMap = new DataMap();
japaneseMap.put("konnichiwa","こんにちは"); // Japanese
DataMap emojiMap = new DataMap();
emojiMap.put("smiley",""); // Emoji
DataMap surrogatePairMap = new DataMap();
surrogatePairMap.put("stickoutTongue", "\uD83D\uDE1B"); // Emoji, but with surrogate pairs
return new Object[][] {
{"(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)", japaneseMap },
{ "(smiley:%E2%98%BA)", emojiMap},
{ "(stickoutTongue:%F0%9F%98%9B)",surrogatePairMap }
};
}
@Test(dataProvider = "unicode")
public void testUnicode(String decodable, Object expectedObj) throws PathSegment.PathSegmentSyntaxException
{
Object actualObj = URIElementParser.parse(decodable);
Assert.assertEquals(actualObj, expectedObj);
}
@DataProvider
private static Object[][] undecodables()
{
@@ -338,4 +338,38 @@ public void testExtractionWithSlashes()
Assert.assertEquals(components2.length, 1);
Assert.assertEquals(components2[0], "foo");
}
@DataProvider
public Object[][] unicode()
{
// create objects
// test unicode encoding
DataMap japaneseMap = new DataMap();
japaneseMap.put("konnichiwa","こんにちは"); // Japanese
DataMap emojiMap = new DataMap();
emojiMap.put("smiley",""); // Emoji
DataMap surrogatePairMap = new DataMap();
surrogatePairMap.put("stickoutTongue", "\uD83D\uDE1B"); // Emoji, but with surrogate pairs
return new Object[][] {
{ japaneseMap, "(konnichiwa:こんにちは)", "(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)", "(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)" },
{ emojiMap, "(smiley:☺)", "(smiley:%E2%98%BA)", "(smiley:%E2%98%BA)"},
{ surrogatePairMap, "(stickoutTongue:\uD83D\uDE1B)", "(stickoutTongue:%F0%9F%98%9B)","(stickoutTongue:%F0%9F%98%9B)" }
};
}
@Test(dataProvider = "unicode")
public void testUnicode(Object obj, String expectedNoEsc, String expectedPathSegEsc, String expectedQueryParamEsc)
{
String actualNoEsc = URIParamUtils.encodeElement(obj, NO_ESCAPING, null);
Assert.assertEquals(actualNoEsc, expectedNoEsc);
String actualPathSegEsc = URIParamUtils.encodeElement(obj, URL_ESCAPING,
UriComponent.Type.PATH_SEGMENT);
Assert.assertEquals(actualPathSegEsc, expectedPathSegEsc);
String actualQueryParamEsc = URIParamUtils.encodeElement(obj, URL_ESCAPING,
UriComponent.Type.QUERY_PARAM);
Assert.assertEquals(actualQueryParamEsc, expectedQueryParamEsc);
}
}

0 comments on commit 231ab9c

Please sign in to comment.