Permalink
Browse files

Support surrogate pairs for UTF-16 strings in URI encoding/decoding a…

…nd releasing new version of pegasus

RB=916880
G=si-dev
R=xma
A=xma
  • Loading branch information...
1 parent e72930a commit 231ab9cb030150ea3f2cd5fa9cfeaa98d42639c9 Kenta Labur committed Feb 10, 2017
View
@@ -1,5 +1,10 @@
+10.1.3
+------
+
10.1.2
------
+(RB=916880)
+Support surrogate pairs for UTF-16 strings in URI encoding/decoding
10.1.1
------
View
@@ -1,4 +1,4 @@
-version=10.1.1
+version=10.1.2
sonatypeUsername=please_set_in_home_dir_if_uploading_to_maven_central
sonatypePassword=please_set_in_home_dir_if_uploading_to_maven_central
@@ -62,6 +62,7 @@
* Removed dependency on javax.ws.rs interfaces
* Added JavaDoc documentation to conform to Pegasus style guidelines
* Remove special-case encoding of ' ' in query params
+ * Updated _encode() and appendPercentEncodedOctet() methods to handle surrogate pairs
*/
package com.linkedin.jersey.api.uri;
@@ -296,39 +297,56 @@ public static String encodeTemplateNames(String s) {
return s;
}
- private static String _encode(String s, Type t, boolean template, boolean contextualEncode) {
+ private static String _encode(final String s, final Type t, final boolean template, final boolean contextualEncode) {
final boolean[] table = ENCODING_TABLES[t.ordinal()];
+ boolean insideTemplateParam = false;
StringBuilder sb = null;
- for (int i = 0; i < s.length(); i++) {
- final char c = s.charAt(i);
- if (c < 0x80 && table[c]) {
- if (sb != null) sb.append(c);
+ for (int offset = 0, codePoint; offset < s.length(); offset += Character.charCount(codePoint)) {
+ codePoint = s.codePointAt(offset);
+
+ if (codePoint < 0x80 && table[codePoint]) {
+ if (sb != null) {
+ sb.append((char) codePoint);
+ }
} else {
- if (template && (c == '{' || c == '}')) {
- if (sb != null) sb.append(c);
- continue;
- } else if (contextualEncode) {
- if (c == '%' && i + 2 < s.length()) {
- if (isHexCharacter(s.charAt(i + 1)) &&
- isHexCharacter(s.charAt(i + 2))) {
- if (sb != null)
- sb.append('%').append(s.charAt(i + 1)).append(s.charAt(i + 2));
- i += 2;
- continue;
+ if (template) {
+ boolean leavingTemplateParam = false;
+ if (codePoint == '{') {
+ insideTemplateParam = true;
+ } else if (codePoint == '}') {
+ insideTemplateParam = false;
+ leavingTemplateParam = true;
+ }
+ if (insideTemplateParam || leavingTemplateParam) {
+ if (sb != null) {
+ sb.append(Character.toChars(codePoint));
}
+ continue;
}
}
+ if (contextualEncode
+ && codePoint == '%'
+ && offset + 2 < s.length()
+ && isHexCharacter(s.charAt(offset + 1))
+ && isHexCharacter(s.charAt(offset + 2))) {
+ if (sb != null) {
+ sb.append('%').append(s.charAt(offset + 1)).append(s.charAt(offset + 2));
+ }
+ offset += 2;
+ continue;
+ }
+
if (sb == null) {
sb = new StringBuilder();
- sb.append(s.substring(0, i));
+ sb.append(s.substring(0, offset));
}
- if (c < 0x80) {
- appendPercentEncodedOctet(sb, c);
+ if (codePoint < 0x80) {
+ appendPercentEncodedOctet(sb, (char) codePoint);
} else {
- appendUTF8EncodedCharacter(sb, c);
+ appendUTF8EncodedCharacter(sb, codePoint);
}
}
}
@@ -346,13 +364,15 @@ private static void appendPercentEncodedOctet(StringBuilder sb, int b) {
sb.append(HEX_DIGITS[b & 0x0F]);
}
- private static void appendUTF8EncodedCharacter(StringBuilder sb, char c) {
- final ByteBuffer bb = UTF_8_CHARSET.encode("" + c);
+ private static void appendUTF8EncodedCharacter(final StringBuilder sb, final int codePoint) {
+ final CharBuffer chars = CharBuffer.wrap(Character.toChars(codePoint));
+ final ByteBuffer bytes = UTF_8_CHARSET.encode(chars);
- while (bb.hasRemaining()) {
- appendPercentEncodedOctet(sb, bb.get() & 0xFF);
+ while (bytes.hasRemaining()) {
+ appendPercentEncodedOctet(sb, bytes.get() & 0xFF);
}
}
+
private static final String[] SCHEME = {"0-9", "A-Z", "a-z", "+", "-", "."};
private static final String[] UNRESERVED = {"0-9", "A-Z", "a-z", "-", ".", "_", "~"};
private static final String[] SUB_DELIMS = {"!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="};
@@ -868,4 +888,4 @@ private static int decodeHex(char c) {
private static boolean isHexCharacter(char c) {
return c < 128 && HEX_TABLE[c] != -1;
}
-}
+}
@@ -124,6 +124,34 @@ public void testEncodedDecoding(String encodedString, Object expectedObj) throws
}
@DataProvider
+ private static Object[][] unicode()
+ {
+ // create objects
+ // test unicode encoding
+ DataMap japaneseMap = new DataMap();
+ japaneseMap.put("konnichiwa","こんにちは"); // Japanese
+
+ DataMap emojiMap = new DataMap();
+ emojiMap.put("smiley",""); // Emoji
+
+ DataMap surrogatePairMap = new DataMap();
+ surrogatePairMap.put("stickoutTongue", "\uD83D\uDE1B"); // Emoji, but with surrogate pairs
+
+ return new Object[][] {
+ {"(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)", japaneseMap },
+ { "(smiley:%E2%98%BA)", emojiMap},
+ { "(stickoutTongue:%F0%9F%98%9B)",surrogatePairMap }
+ };
+ }
+
+ @Test(dataProvider = "unicode")
+ public void testUnicode(String decodable, Object expectedObj) throws PathSegment.PathSegmentSyntaxException
+ {
+ Object actualObj = URIElementParser.parse(decodable);
+ Assert.assertEquals(actualObj, expectedObj);
+ }
+
+ @DataProvider
private static Object[][] undecodables()
{
return new Object[][] {
@@ -338,4 +338,38 @@ public void testExtractionWithSlashes()
Assert.assertEquals(components2.length, 1);
Assert.assertEquals(components2[0], "foo");
}
+
+ @DataProvider
+ public Object[][] unicode()
+ {
+ // create objects
+ // test unicode encoding
+ DataMap japaneseMap = new DataMap();
+ japaneseMap.put("konnichiwa","こんにちは"); // Japanese
+
+ DataMap emojiMap = new DataMap();
+ emojiMap.put("smiley",""); // Emoji
+
+ DataMap surrogatePairMap = new DataMap();
+ surrogatePairMap.put("stickoutTongue", "\uD83D\uDE1B"); // Emoji, but with surrogate pairs
+
+ return new Object[][] {
+ { japaneseMap, "(konnichiwa:こんにちは)", "(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)", "(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)" },
+ { emojiMap, "(smiley:☺)", "(smiley:%E2%98%BA)", "(smiley:%E2%98%BA)"},
+ { surrogatePairMap, "(stickoutTongue:\uD83D\uDE1B)", "(stickoutTongue:%F0%9F%98%9B)","(stickoutTongue:%F0%9F%98%9B)" }
+ };
+ }
+
+ @Test(dataProvider = "unicode")
+ public void testUnicode(Object obj, String expectedNoEsc, String expectedPathSegEsc, String expectedQueryParamEsc)
+ {
+ String actualNoEsc = URIParamUtils.encodeElement(obj, NO_ESCAPING, null);
+ Assert.assertEquals(actualNoEsc, expectedNoEsc);
+ String actualPathSegEsc = URIParamUtils.encodeElement(obj, URL_ESCAPING,
+ UriComponent.Type.PATH_SEGMENT);
+ Assert.assertEquals(actualPathSegEsc, expectedPathSegEsc);
+ String actualQueryParamEsc = URIParamUtils.encodeElement(obj, URL_ESCAPING,
+ UriComponent.Type.QUERY_PARAM);
+ Assert.assertEquals(actualQueryParamEsc, expectedQueryParamEsc);
+ }
}

0 comments on commit 231ab9c

Please sign in to comment.