Skip to content
Permalink
Browse files

Faster utf7 encode (#373)

~40% faster for input with a mix of unicode and ascii chars
  • Loading branch information...
carsonip authored and mjs committed Apr 5, 2019
1 parent ec6b32c commit 46a3ee64ed8f3725582f2f7f8955c7e760c34397
Showing with 11 additions and 8 deletions.
  1. +11 −8 imapclient/imap_utf7.py
@@ -21,26 +21,29 @@ def encode(s):
if not isinstance(s, text_type):
return s

res = []
res = bytearray()

b64_buffer = []

def consume_b64_buffer(buf):
"""
Consume the buffer by encoding it into a modified base 64 representation
and surround it with shift characters & and -
"""
if b64_buffer:
res.extend([b'&', base64_utf7_encode(buf), b'-'])
if buf:
res.extend(b'&' + base64_utf7_encode(buf) + b'-')
del buf[:]

for c in s:
# printable ascii case should not be modified
if 0x20 <= ord(c) <= 0x7e:
o = ord(c)
if 0x20 <= o <= 0x7e:
consume_b64_buffer(b64_buffer)
# Special case: & is used as shift character so we need to escape it in ASCII
if c == '&':
res.append(b'&-')
if o == 0x26: # & = 0x26
res.extend(b'&-')
else:
res.append(c.encode('ascii'))
res.append(o)

# Bufferize characters that will be encoded in base64 and append them later
# in the result, when iterating over ASCII character or the end of string
@@ -50,7 +53,7 @@ def consume_b64_buffer(buf):
# Consume the remaining buffer if the string finish with non-ASCII characters
consume_b64_buffer(b64_buffer)

return b''.join(res)
return bytes(res)


AMPERSAND_ORD = byte2int(b'&')

0 comments on commit 46a3ee6

Please sign in to comment.
You can’t perform that action at this time.