Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve MimeText conformance #255

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -24,7 +24,7 @@

import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.*;
import java.util.regex.Pattern;

public class MimeText {
@@ -33,31 +33,61 @@ public class MimeText {
private final static Pattern decodeQuotedPrintablePattern = Pattern.compile("=([0-9A-F]{2})");

public static String encode(String raw) {
var quoteMatcher = encodePattern.matcher(raw);
return quoteMatcher.replaceAll(mo -> "=?UTF-8?B?" + Base64.getEncoder().encodeToString(String.valueOf(mo.group(1)).getBytes(StandardCharsets.UTF_8)) + "?=");
var words = raw.split(" ");
var encodedWords = new ArrayList<String>();
var lastEncoded = false;
for (var word : words) {
var needsQuotePattern = encodePattern.matcher(word);
if (needsQuotePattern.find()) {
if (lastEncoded) {
// Spaces between encoded words are ignored, so add an explicit one
encodedWords.add("=?UTF-8?B?IA==?=");
}
encodedWords.add("=?UTF-8?B?" + Base64.getEncoder().encodeToString(word.getBytes(StandardCharsets.UTF_8)) + "?=");
lastEncoded = true;
} else {
encodedWords.add(word);
lastEncoded = false;
}
}
return String.join(" ", encodedWords);
}

public static String decode(String encoded) {
var decoded = new StringBuilder();
var quotedMatcher = decodePattern.matcher(encoded);
return quotedMatcher.replaceAll(mo -> {
try {
if (mo.group(2).toUpperCase().equals("B")) {
return new String(Base64.getDecoder().decode(mo.group(3)), mo.group(1));
} else {
var quotedPrintableMatcher = decodeQuotedPrintablePattern.matcher(mo.group(3));
return quotedPrintableMatcher.replaceAll(qmo -> {
var byteValue = new byte[1];
byteValue[0] = (byte)Integer.parseInt(qmo.group(1), 16);
try {
return new String(byteValue, mo.group(1));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
});
var lastMatchEnd = 0;
while (quotedMatcher.find()) {
if (quotedMatcher.start() > lastMatchEnd) {
var separator = encoded.substring(lastMatchEnd, quotedMatcher.start());
if (!separator.isBlank()) {
decoded.append(separator);
}
}
if (quotedMatcher.group(2).toUpperCase().equals("B")) {
try {
decoded.append(new String(Base64.getDecoder().decode(quotedMatcher.group(3)), quotedMatcher.group(1)));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
} else {
var quotedDecodedSpaces = quotedMatcher.group(3).replace("_", " ");
var quotedPrintableMatcher = decodeQuotedPrintablePattern.matcher(quotedDecodedSpaces);
decoded.append(quotedPrintableMatcher.replaceAll(qmo -> {
var byteValue = new byte[1];
byteValue[0] = (byte)Integer.parseInt(qmo.group(1), 16);
try {
return new String(byteValue, quotedMatcher.group(1));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}));
}
});
lastMatchEnd = quotedMatcher.end();
}
if (lastMatchEnd < encoded.length()) {
decoded.append(encoded, lastMatchEnd, encoded.length());
}
return decoded.toString();
}
}
@@ -28,17 +28,59 @@

class MimeTextTests {
@Test
void encode() {
assertEquals("=?UTF-8?B?w6XDpMO2?=", MimeText.encode("åäö"));
void simple() {
var encoded = "=?UTF-8?B?w6XDpMO2?=";
var decoded = "åäö";
assertEquals(encoded, MimeText.encode(decoded));
assertEquals(decoded, MimeText.decode(encoded));
}

@Test
void decode() {
assertEquals("åäö", MimeText.decode("=?utf-8?b?w6XDpMO2?="));
void mixed() {
var encoded = "=?UTF-8?B?VMOpc3Q=?=";
var decoded = "Tést";
assertEquals(encoded, MimeText.encode(decoded));
assertEquals(decoded, MimeText.decode(encoded));
}

@Test
void multipleWords() {
var encoded = "This is a =?UTF-8?B?dMOpc3Q=?= of =?UTF-8?B?bcO8bHRpcGxl?= words";
var decoded = "This is a tést of mültiple words";
assertEquals(encoded, MimeText.encode(decoded));
assertEquals(decoded, MimeText.decode(encoded));
}

@Test
void concatenateTokens() {
var encoded = "=?UTF-8?B?VMOpc3Q=?= =?UTF-8?B?IA==?= =?UTF-8?B?VMOpc3Q=?=";
var decoded = "Tést Tést";
assertEquals(encoded, MimeText.encode(decoded));
assertEquals(decoded, MimeText.decode(encoded));
}

@Test
void preserveSpaces() {
var encoded = "spac es";
var decoded = "spac es";
assertEquals(encoded, MimeText.encode(decoded));
assertEquals(decoded, MimeText.decode(encoded));
}

@Test
void decodeSpaces() {
var encoded = "=?UTF-8?B?VMOpc3Q=?= =?UTF-8?B?VMOpc3Q=?= and ";
var decoded = "TéstTést and ";
assertEquals(decoded, MimeText.decode(encoded));
}

@Test
void decodeIsoQ() {
assertEquals("Bä", MimeText.decode("=?iso-8859-1?Q?B=E4?="));
}

@Test
void decodeIsoQSpaces() {
assertEquals("Bä Bä Bä", MimeText.decode("=?iso-8859-1?Q?B=E4_B=E4=20B=E4?="));
}
}