diff --git a/AndroidManifest.xml b/AndroidManifest.xml index 23ba014..3ee0483 100644 --- a/AndroidManifest.xml +++ b/AndroidManifest.xml @@ -1,6 +1,6 @@ + package="macaca.unicode.ime" > * Utf7ImeService enables users to input any Unicode character by using only the @@ -53,14 +55,14 @@ public class Utf7ImeService extends InputMethodService { /** * Expected encoding for hardware key input. */ - private static final String UTF7 = "UTF-7"; + private static final String UTF7 = "X-MODIFIED-UTF-7"; private static final String ASCII = "US-ASCII"; /** * Special character to shift to Modified BASE64 in modified UTF-7. */ - private static final char UTF7_SHIFT = '+'; + private static final char UTF7_SHIFT = '&'; /** * Special character to shift back to US-ASCII in modified UTF-7. @@ -89,7 +91,7 @@ public void onStartInput(EditorInfo attribute, boolean restarting) { if (!restarting) { mMetaState = 0; mIsShifted = false; - mUtf7Charset = Charset.forName(UTF7); + mUtf7Charset = new CharsetProvider().charsetForName(UTF7); } mComposing = null; } @@ -141,9 +143,6 @@ public boolean onKeyDown(int keyCode, KeyEvent event) { // Shifted State if (c == UTF7_UNSHIFT) { toUnshifted(); - } else if (!isAlphanumeric(c)) { - toUnshifted(); - commitCharacter(c); } else { appendComposing(c); } diff --git a/src/macaca/unicode/ime/charsetUtils/Base64Util.java b/src/macaca/unicode/ime/charsetUtils/Base64Util.java new file mode 100644 index 0000000..e66e965 --- /dev/null +++ b/src/macaca/unicode/ime/charsetUtils/Base64Util.java @@ -0,0 +1,40 @@ +package macaca.unicode.ime.charsetUtils; + +import java.util.Arrays; + +class Base64Util { + private static final int ALPHABET_LENGTH = 64; + private final char[] alphabet; + private final int[] inverseAlphabet; + + Base64Util(final String alphabet) { + this.alphabet = alphabet.toCharArray(); + if (alphabet.length() != ALPHABET_LENGTH) + throw new IllegalArgumentException("alphabet has incorrect length (should be 64, not " + + alphabet.length() + ")"); + inverseAlphabet = new int[128]; + Arrays.fill(inverseAlphabet, -1); + for (int i = 0; i < this.alphabet.length; i++) { + final char ch = this.alphabet[i]; + if (ch >= 128) + throw new IllegalArgumentException("invalid character in alphabet: " + ch); + inverseAlphabet[ch] = i; + } + } + + int getSextet(final byte ch) { + if (ch >= 128) + return -1; + return inverseAlphabet[ch]; + } + + boolean contains(final char ch) { + if (ch >= 128) + return false; + return inverseAlphabet[ch] >= 0; + } + + byte getChar(final int sextet) { + return (byte) alphabet[sextet]; + } +} diff --git a/src/macaca/unicode/ime/charsetUtils/CharsetProvider.java b/src/macaca/unicode/ime/charsetUtils/CharsetProvider.java new file mode 100644 index 0000000..06bcca1 --- /dev/null +++ b/src/macaca/unicode/ime/charsetUtils/CharsetProvider.java @@ -0,0 +1,48 @@ +package macaca.unicode.ime.charsetUtils; + +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; + +public class CharsetProvider extends java.nio.charset.spi.CharsetProvider { + private static final String UTF7_NAME = "UTF-7"; + private static final String UTF7_O_NAME = "X-UTF-7-OPTIONAL"; + private static final String UTF7_M_NAME = "X-MODIFIED-UTF-7"; + private static final String[] UTF7_ALIASES = new String[] { "UNICODE-1-1-UTF-7", + "CSUNICODE11UTF7", "X-RFC2152", "X-RFC-2152" }; + private static final String[] UTF7_O_ALIASES = new String[] { "X-RFC2152-OPTIONAL", + "X-RFC-2152-OPTIONAL" }; + private static final String[] UTF7_M_ALIASES = new String[] { "X-IMAP-MODIFIED-UTF-7", + "X-IMAP4-MODIFIED-UTF7", "X-IMAP4-MODIFIED-UTF-7", "X-RFC3501", "X-RFC-3501" }; + private Charset utf7charset = new UTF7Charset(UTF7_NAME, UTF7_ALIASES, false); + private Charset utf7oCharset = new UTF7Charset(UTF7_O_NAME, UTF7_O_ALIASES, true); + private Charset imap4charset = new ModifiedUTF7Charset(UTF7_M_NAME, UTF7_M_ALIASES); + private List charsets; + + public CharsetProvider() { + charsets = Arrays.asList(new Object[] { utf7charset, imap4charset, utf7oCharset }); + } + + public Charset charsetForName(String charsetName) { + charsetName = charsetName.toUpperCase(Locale.US); + for (Iterator iter = charsets.iterator(); iter.hasNext();) { + Charset charset = (Charset) iter.next(); + if (charset.name().equals(charsetName)) + return charset; + } + for (Iterator iter = charsets.iterator(); iter.hasNext();) { + Charset charset = (Charset) iter.next(); + if (charset.aliases().contains(charsetName)) + return charset; + } + return null; + } + + public Iterator charsets() { + return charsets.iterator(); + } + +} diff --git a/src/macaca/unicode/ime/charsetUtils/ModifiedUTF7Charset.java b/src/macaca/unicode/ime/charsetUtils/ModifiedUTF7Charset.java new file mode 100644 index 0000000..42d3842 --- /dev/null +++ b/src/macaca/unicode/ime/charsetUtils/ModifiedUTF7Charset.java @@ -0,0 +1,24 @@ +package macaca.unicode.ime.charsetUtils; + +class ModifiedUTF7Charset extends UTF7StyleCharset { + private static final String MODIFIED_BASE64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + + "abcdefghijklmnopqrstuvwxyz" + "0123456789+,"; + + ModifiedUTF7Charset(String name, String[] aliases) { + super(name, aliases, MODIFIED_BASE64_ALPHABET, true); + } + + boolean canEncodeDirectly(char ch) { + if (ch == shift()) + return false; + return ch >= 0x20 && ch <= 0x7E; + } + + byte shift() { + return '&'; + } + + byte unshift() { + return '-'; + } +} diff --git a/src/macaca/unicode/ime/charsetUtils/UTF7Charset.java b/src/macaca/unicode/ime/charsetUtils/UTF7Charset.java new file mode 100644 index 0000000..ec912d0 --- /dev/null +++ b/src/macaca/unicode/ime/charsetUtils/UTF7Charset.java @@ -0,0 +1,30 @@ +package macaca.unicode.ime.charsetUtils; + +class UTF7Charset extends UTF7StyleCharset { + private static final String BASE64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + private static final String SET_D = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"; + private static final String SET_O = "!\"#$%&*;<=>@[]^_`{|}"; + private static final String RULE_3 = " \t\r\n"; + final String directlyEncoded; + + UTF7Charset(String name, String[] aliases, boolean includeOptional) { + super(name, aliases, BASE64_ALPHABET, false); + if (includeOptional) + this.directlyEncoded = SET_D + SET_O + RULE_3; + else + this.directlyEncoded = SET_D + RULE_3; + } + + boolean canEncodeDirectly(char ch) { + return directlyEncoded.indexOf(ch) >= 0; + } + + byte shift() { + return '+'; + } + + byte unshift() { + return '-'; + } +} diff --git a/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharset.java b/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharset.java new file mode 100644 index 0000000..f773869 --- /dev/null +++ b/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharset.java @@ -0,0 +1,38 @@ +package macaca.unicode.ime.charsetUtils; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.util.Arrays; +import java.util.List; + +abstract class UTF7StyleCharset extends Charset { + private static final List CONTAINED = Arrays.asList(new String[] { "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE" }); + final boolean strict; + Base64Util base64; + + protected UTF7StyleCharset(String canonicalName, String[] aliases, String alphabet, + boolean strict) { + super(canonicalName, aliases); + this.base64 = new Base64Util(alphabet); + this.strict = strict; + } + + public boolean contains(final Charset cs) { + return CONTAINED.contains(cs.name()); + } + + public CharsetDecoder newDecoder() { + return new UTF7StyleCharsetDecoder(this, base64, strict); + } + + public CharsetEncoder newEncoder() { + return new UTF7StyleCharsetEncoder(this, base64, strict); + } + + abstract boolean canEncodeDirectly(char ch); + + abstract byte shift(); + + abstract byte unshift(); +} diff --git a/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharsetDecoder.java b/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharsetDecoder.java new file mode 100644 index 0000000..a586745 --- /dev/null +++ b/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharsetDecoder.java @@ -0,0 +1,120 @@ +package macaca.unicode.ime.charsetUtils; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; + +class UTF7StyleCharsetDecoder extends CharsetDecoder { + private final Base64Util base64; + private final byte shift; + private final byte unshift; + private final boolean strict; + private boolean base64mode; + private int bitsRead; + private int tempChar; + private boolean justShifted; + private boolean justUnshifted; + + UTF7StyleCharsetDecoder(UTF7StyleCharset cs, Base64Util base64, boolean strict) { + super(cs, 0.6f, 1.0f); + this.base64 = base64; + this.strict = strict; + this.shift = cs.shift(); + this.unshift = cs.unshift(); + } + + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + while (in.hasRemaining()) { + byte b = in.get(); + if (base64mode) { + if (b == unshift) { + if (base64bitsWaiting()) + return malformed(in); + if (justShifted) { + if (!out.hasRemaining()) + return overflow(in); + out.put((char) shift); + } else + justUnshifted = true; + setUnshifted(); + } else { + if (!out.hasRemaining()) + return overflow(in); + CoderResult result = handleBase64(in, out, b); + if (result != null) + return result; + } + justShifted = false; + } else { + if (b == shift) { + base64mode = true; + if (justUnshifted && strict) + return malformed(in); + justShifted = true; + continue; + } + if (!out.hasRemaining()) + return overflow(in); + out.put((char) b); + justUnshifted = false; + } + } + return CoderResult.UNDERFLOW; + } + + private CoderResult overflow(ByteBuffer in) { + in.position(in.position() - 1); + return CoderResult.OVERFLOW; + } + + private CoderResult handleBase64(ByteBuffer in, CharBuffer out, byte lastRead) { + CoderResult result = null; + int sextet = base64.getSextet(lastRead); + if (sextet >= 0) { + bitsRead += 6; + if (bitsRead < 16) { + tempChar += sextet << (16 - bitsRead); + } else { + bitsRead -= 16; + tempChar += sextet >> (bitsRead); + out.put((char) tempChar); + tempChar = (sextet << (16 - bitsRead)) & 0xFFFF; + } + } else { + if (strict) + return malformed(in); + out.put((char) lastRead); + if (base64bitsWaiting()) + result = malformed(in); + setUnshifted(); + } + return result; + } + + protected CoderResult implFlush(CharBuffer out) { + if ((base64mode && strict) || base64bitsWaiting()) + return CoderResult.malformedForLength(1); + return CoderResult.UNDERFLOW; + } + + protected void implReset() { + setUnshifted(); + justUnshifted = false; + } + + private CoderResult malformed(ByteBuffer in) { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + + private boolean base64bitsWaiting() { + return tempChar != 0 || bitsRead >= 6; + } + + private void setUnshifted() { + base64mode = false; + bitsRead = 0; + tempChar = 0; + } +} diff --git a/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharsetEncoder.java b/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharsetEncoder.java new file mode 100644 index 0000000..fab48dc --- /dev/null +++ b/src/macaca/unicode/ime/charsetUtils/UTF7StyleCharsetEncoder.java @@ -0,0 +1,89 @@ +package macaca.unicode.ime.charsetUtils; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +class UTF7StyleCharsetEncoder extends CharsetEncoder { + private static final float AVG_BYTES_PER_CHAR = 1.5f; + private static final float MAX_BYTES_PER_CHAR = 5.0f; + private final UTF7StyleCharset cs; + private final Base64Util base64; + private final byte shift; + private final byte unshift; + private final boolean strict; + private boolean base64mode; + private int bitsToOutput; + private int sextet; + + UTF7StyleCharsetEncoder(UTF7StyleCharset cs, Base64Util base64, boolean strict) { + super(cs, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR); + this.cs = cs; + this.base64 = base64; + this.strict = strict; + this.shift = cs.shift(); + this.unshift = cs.unshift(); + } + + protected void implReset() { + base64mode = false; + sextet = 0; + bitsToOutput = 0; + } + + protected CoderResult implFlush(ByteBuffer out) { + if (base64mode) { + if (out.remaining() < 2) + return CoderResult.OVERFLOW; + if (bitsToOutput != 0) + out.put(base64.getChar(sextet)); + out.put(unshift); + } + return CoderResult.UNDERFLOW; + } + + protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + while (in.hasRemaining()) { + if (out.remaining() < 4) + return CoderResult.OVERFLOW; + char ch = in.get(); + if (cs.canEncodeDirectly(ch)) { + unshift(out, ch); + out.put((byte) ch); + } else if (!base64mode && ch == shift) { + out.put(shift); + out.put(unshift); + } else + encodeBase64(ch, out); + } + return CoderResult.UNDERFLOW; + } + + private void unshift(ByteBuffer out, char ch) { + if (!base64mode) + return; + if (bitsToOutput != 0) + out.put(base64.getChar(sextet)); + if (base64.contains(ch) || ch == unshift || strict) + out.put(unshift); + base64mode = false; + sextet = 0; + bitsToOutput = 0; + } + + private void encodeBase64(char ch, ByteBuffer out) { + if (!base64mode) + out.put(shift); + base64mode = true; + bitsToOutput += 16; + while (bitsToOutput >= 6) { + bitsToOutput -= 6; + sextet += (ch >> bitsToOutput); + sextet &= 0x3F; + out.put(base64.getChar(sextet)); + sextet = 0; + } + sextet = (ch << (6 - bitsToOutput)) & 0x3F; + } +}