Skip to content

Commit

Permalink
Support surrogate pairs for UTF-16 strings in URI encoding/decoding a…
Browse files Browse the repository at this point in the history
…nd releasing new version of pegasus

RB=916880
G=si-dev
R=xma
A=xma
  • Loading branch information
Kenta Labur committed Feb 11, 2017
1 parent e72930a commit 231ab9c
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 26 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
10.1.3
------

10.1.2
------
(RB=916880)
Support surrogate pairs for UTF-16 strings in URI encoding/decoding

10.1.1
------
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version=10.1.1
version=10.1.2
sonatypeUsername=please_set_in_home_dir_if_uploading_to_maven_central
sonatypePassword=please_set_in_home_dir_if_uploading_to_maven_central

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
* Removed dependency on javax.ws.rs interfaces
* Added JavaDoc documentation to conform to Pegasus style guidelines
* Remove special-case encoding of ' ' in query params
* Updated _encode() and appendPercentEncodedOctet() methods to handle surrogate pairs
*/

package com.linkedin.jersey.api.uri;
Expand Down Expand Up @@ -296,39 +297,56 @@ public static String encodeTemplateNames(String s) {
return s;
}

private static String _encode(String s, Type t, boolean template, boolean contextualEncode) {
private static String _encode(final String s, final Type t, final boolean template, final boolean contextualEncode) {
final boolean[] table = ENCODING_TABLES[t.ordinal()];
boolean insideTemplateParam = false;

StringBuilder sb = null;
for (int i = 0; i < s.length(); i++) {
final char c = s.charAt(i);
if (c < 0x80 && table[c]) {
if (sb != null) sb.append(c);
for (int offset = 0, codePoint; offset < s.length(); offset += Character.charCount(codePoint)) {
codePoint = s.codePointAt(offset);

if (codePoint < 0x80 && table[codePoint]) {
if (sb != null) {
sb.append((char) codePoint);
}
} else {
if (template && (c == '{' || c == '}')) {
if (sb != null) sb.append(c);
continue;
} else if (contextualEncode) {
if (c == '%' && i + 2 < s.length()) {
if (isHexCharacter(s.charAt(i + 1)) &&
isHexCharacter(s.charAt(i + 2))) {
if (sb != null)
sb.append('%').append(s.charAt(i + 1)).append(s.charAt(i + 2));
i += 2;
continue;
if (template) {
boolean leavingTemplateParam = false;
if (codePoint == '{') {
insideTemplateParam = true;
} else if (codePoint == '}') {
insideTemplateParam = false;
leavingTemplateParam = true;
}
if (insideTemplateParam || leavingTemplateParam) {
if (sb != null) {
sb.append(Character.toChars(codePoint));
}
continue;
}
}

if (contextualEncode
&& codePoint == '%'
&& offset + 2 < s.length()
&& isHexCharacter(s.charAt(offset + 1))
&& isHexCharacter(s.charAt(offset + 2))) {
if (sb != null) {
sb.append('%').append(s.charAt(offset + 1)).append(s.charAt(offset + 2));
}
offset += 2;
continue;
}

if (sb == null) {
sb = new StringBuilder();
sb.append(s.substring(0, i));
sb.append(s.substring(0, offset));
}

if (c < 0x80) {
appendPercentEncodedOctet(sb, c);
if (codePoint < 0x80) {
appendPercentEncodedOctet(sb, (char) codePoint);
} else {
appendUTF8EncodedCharacter(sb, c);
appendUTF8EncodedCharacter(sb, codePoint);
}
}
}
Expand All @@ -346,13 +364,15 @@ private static void appendPercentEncodedOctet(StringBuilder sb, int b) {
sb.append(HEX_DIGITS[b & 0x0F]);
}

private static void appendUTF8EncodedCharacter(StringBuilder sb, char c) {
final ByteBuffer bb = UTF_8_CHARSET.encode("" + c);
private static void appendUTF8EncodedCharacter(final StringBuilder sb, final int codePoint) {
final CharBuffer chars = CharBuffer.wrap(Character.toChars(codePoint));
final ByteBuffer bytes = UTF_8_CHARSET.encode(chars);

while (bb.hasRemaining()) {
appendPercentEncodedOctet(sb, bb.get() & 0xFF);
while (bytes.hasRemaining()) {
appendPercentEncodedOctet(sb, bytes.get() & 0xFF);
}
}

private static final String[] SCHEME = {"0-9", "A-Z", "a-z", "+", "-", "."};
private static final String[] UNRESERVED = {"0-9", "A-Z", "a-z", "-", ".", "_", "~"};
private static final String[] SUB_DELIMS = {"!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="};
Expand Down Expand Up @@ -868,4 +888,4 @@ private static int decodeHex(char c) {
private static boolean isHexCharacter(char c) {
return c < 128 && HEX_TABLE[c] != -1;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,34 @@ public void testEncodedDecoding(String encodedString, Object expectedObj) throws
Assert.assertEquals(actualObj, expectedObj);
}

@DataProvider
private static Object[][] unicode()
{
// create objects
// test unicode encoding
DataMap japaneseMap = new DataMap();
japaneseMap.put("konnichiwa","こんにちは"); // Japanese

DataMap emojiMap = new DataMap();
emojiMap.put("smiley","☺"); // Emoji

DataMap surrogatePairMap = new DataMap();
surrogatePairMap.put("stickoutTongue", "\uD83D\uDE1B"); // Emoji, but with surrogate pairs

return new Object[][] {
{"(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)", japaneseMap },
{ "(smiley:%E2%98%BA)", emojiMap},
{ "(stickoutTongue:%F0%9F%98%9B)",surrogatePairMap }
};
}

@Test(dataProvider = "unicode")
public void testUnicode(String decodable, Object expectedObj) throws PathSegment.PathSegmentSyntaxException
{
Object actualObj = URIElementParser.parse(decodable);
Assert.assertEquals(actualObj, expectedObj);
}

@DataProvider
private static Object[][] undecodables()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,4 +338,38 @@ public void testExtractionWithSlashes()
Assert.assertEquals(components2.length, 1);
Assert.assertEquals(components2[0], "foo");
}

@DataProvider
public Object[][] unicode()
{
// create objects
// test unicode encoding
DataMap japaneseMap = new DataMap();
japaneseMap.put("konnichiwa","こんにちは"); // Japanese

DataMap emojiMap = new DataMap();
emojiMap.put("smiley","☺"); // Emoji

DataMap surrogatePairMap = new DataMap();
surrogatePairMap.put("stickoutTongue", "\uD83D\uDE1B"); // Emoji, but with surrogate pairs

return new Object[][] {
{ japaneseMap, "(konnichiwa:こんにちは)", "(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)", "(konnichiwa:%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF)" },
{ emojiMap, "(smiley:☺)", "(smiley:%E2%98%BA)", "(smiley:%E2%98%BA)"},
{ surrogatePairMap, "(stickoutTongue:\uD83D\uDE1B)", "(stickoutTongue:%F0%9F%98%9B)","(stickoutTongue:%F0%9F%98%9B)" }
};
}

@Test(dataProvider = "unicode")
public void testUnicode(Object obj, String expectedNoEsc, String expectedPathSegEsc, String expectedQueryParamEsc)
{
String actualNoEsc = URIParamUtils.encodeElement(obj, NO_ESCAPING, null);
Assert.assertEquals(actualNoEsc, expectedNoEsc);
String actualPathSegEsc = URIParamUtils.encodeElement(obj, URL_ESCAPING,
UriComponent.Type.PATH_SEGMENT);
Assert.assertEquals(actualPathSegEsc, expectedPathSegEsc);
String actualQueryParamEsc = URIParamUtils.encodeElement(obj, URL_ESCAPING,
UriComponent.Type.QUERY_PARAM);
Assert.assertEquals(actualQueryParamEsc, expectedQueryParamEsc);
}
}

0 comments on commit 231ab9c

Please sign in to comment.