Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Updated Charset.normalize (and related classes) to handle UDH presence

in the message body correctly.
  • Loading branch information...
commit d34a6377d8ea16bf44b56d4f86622af932198ffc 1 parent 69395a6
John Woolf authored
View
12 src/main/java/com/cloudhopper/commons/charset/BaseCharset.java
@@ -27,12 +27,20 @@
protected abstract byte[] encodeCharSequence(CharSequence str0);
protected abstract void decodeToBuffer(byte[] bytes, StringBuilder buffer);
+ /** {@inheritDoc} */
+ @Deprecated
@Override
public String normalize(CharSequence str0) {
- byte[] bytes = this.encode(str0);
+ return normalize(false, str0);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public String normalize(boolean udh, CharSequence str0) {
+ byte[] bytes = this.encode(udh, str0);
// normalizing a string should never be result in a longer string
StringBuilder buf = new StringBuilder(str0.length());
- this.decode(bytes, buf);
+ this.decode(udh, bytes, buf);
return buf.toString();
}
View
27 src/main/java/com/cloudhopper/commons/charset/Charset.java
@@ -34,6 +34,7 @@
* @param str0 The Java string to convert into a byte array
* @return A new byte array
*/
+ @Deprecated
public byte[] encode(CharSequence str0);
/**
@@ -62,6 +63,7 @@
* @param bytes The array of bytes to decode
* @param buffer The String buffer to append chars to
*/
+ @Deprecated
public void decode(final byte[] bytes, StringBuilder buffer);
/**
@@ -79,6 +81,10 @@
public void decode(boolean udh, final byte[] bytes, StringBuilder buffer);
/**
+ * @deprecated WARNING: method does not account for existence of a <tt>UDH</tt>
+ * in the <code>CharSequence</code>.</p>
+ * Use {@link #normalize(boolean, CharSequence)} instead.</p>
+ *
* Normalize the characters of the source string to characters that can be
* represented by this charset. Any characters in the input String that
* cannot be represented by this charset are replaced with a '?' (question
@@ -95,6 +101,27 @@
* @param str0 The source string to normalize
* @return The normalized string
*/
+ @Deprecated
public String normalize(CharSequence str0);
+ /**
+ * Normalize the characters of the source string to characters that can be
+ * represented by this charset. Any characters in the input String that
+ * cannot be represented by this charset are replaced with a '?' (question
+ * mark character).<br><br>
+ * The default implementation of this method is partially inefficient by
+ * first encoding the input String to a byte array representing this charset
+ * followed by decoding the byte array back into a Java String. During this
+ * double conversion, any characters in the original Java String that don't
+ * exist in this charset are replaced with '?' (question mark characters)
+ * and then decoded back into a new Java String.<br><br>
+ * Some charsets may choose to override this default behavior to achieve a
+ * more efficient implementation.
+ *
+ * @param udh Whether or not the byte array contains bytes representing a <tt>UDH</tt>
+ * @param str0 The source string to normalize
+ * @return The normalized string
+ */
+ public String normalize(boolean udh, CharSequence str0);
+
}
View
32 src/main/java/com/cloudhopper/commons/charset/CharsetUtil.java
@@ -209,16 +209,44 @@ static public String decode(boolean udh, byte[] bytes, Charset charset) {
return buffer.toString();
}
+ /** @deprecated Use {@link #normalize(boolean, CharSequence, String) instead. */
+ @Deprecated
static public String normalize(CharSequence str0, String charsetName) {
+ return normalize(false, str0, charsetName);
+ }
+
+ /**
+ * @param udh Whether the bytes contain a <tt>UDH</tt>
+ * @param str0 The user data <code>CharSequence</code>
+ * @param charsetName The character encoding name
+ *
+ * @return The normalized string
+ * @see Charset#normalize(boolean, CharSequence)
+ */
+ static public String normalize(boolean udh, CharSequence str0, String charsetName) {
Charset charset = map(charsetName);
if (charset == null) {
throw new IllegalArgumentException("Unsupported charset [" + charsetName + "]");
}
- return normalize(str0, charset);
+ return normalize(udh, str0, charset);
}
+ /** @deprecated Use {@link #normalize(boolean, CharSequence, Charset) instead. */
+ @Deprecated
static public String normalize(CharSequence str0, Charset charset) {
- return charset.normalize(str0);
+ return normalize(false, str0, charset);
+ }
+
+ /**
+ * @param udh Whether the bytes contain a <tt>UDH</tt>
+ * @param str0 The user data <code>CharSequence</code>
+ * @param charset The character encoding
+ *
+ * @return The normalized string
+ * @see Charset#normalize(boolean, CharSequence)
+ */
+ static public String normalize(boolean udh, CharSequence str0, Charset charset) {
+ return charset.normalize(udh, str0);
}
}
Please sign in to comment.
Something went wrong with that request. Please try again.