Skip to content

Commit

Permalink
Unicode fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
mrjoes committed Nov 15, 2011
1 parent c64681f commit ae3963f
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 41 deletions.
2 changes: 2 additions & 0 deletions examples/transports/index.html
Expand Up @@ -23,6 +23,8 @@
return transports.push($(this).attr('id'));
});

console.log(transports);

// Hack to work around bug 251, I really hope it is going to be fixed.
// https://github.com/LearnBoost/socket.io-client/issues/251
// Alternative way to do full reconnect is to pass 'force new connection',
Expand Down
16 changes: 8 additions & 8 deletions tests/proto_test.py
Expand Up @@ -23,27 +23,27 @@ def test_encode_frames():

def test_decode_frames():
# Single string
eq_(proto.decode_frames('abc'), ['abc'])
eq_(proto.decode_frames(u'abc'), [u'abc'])

# Multiplie strings
eq_(proto.decode_frames(u'\ufffd3\ufffdabc\ufffd3\ufffddef'.encode('utf-8')),
['abc', 'def'])
eq_(proto.decode_frames(u'\ufffd3\ufffdabc\ufffd3\ufffddef'),
[u'abc', u'def'])


def test_message():
# Test string message
eq_(proto.message(None, 'abc'), '3:::abc')
eq_(proto.message(None, 'abc'), u'3:::abc')

eq_(proto.message('abc', 'def'), '3::abc:def')
eq_(proto.message('abc', 'def'), u'3::abc:def')

eq_(proto.message(None, u'\u0403\u0404\u0405'),
u'3:::\u0403\u0404\u0405'.encode('utf-8'))
u'3:::\u0403\u0404\u0405')

# TODO: Multibyte encoding fix

# TODO: Fix me
eq_(proto.message(None, dict(a=1, b=2)),
'4:::%s' % proto.json_dumps(dict(a=1, b=2)))
u'4:::%s' % proto.json_dumps(dict(a=1, b=2)))


# TODO: Cover event
# TODO: Add event unit tests
12 changes: 9 additions & 3 deletions tornadio2/polling.py
Expand Up @@ -77,10 +77,11 @@ def post(self, session_id):
if self.session.is_closed or not self.preflight():
raise HTTPError(401)

data = self.request.body
# Grab body and decode it (socket.io always sends data in utf-8)
data = self.request.body.decode('utf-8')

# IE XDomainRequest support
if data.startswith('data='):
if data.startswith(u'data='):
data = data[5:]

# Process packets one by one
Expand Down Expand Up @@ -257,14 +258,19 @@ def post(self, session_id):
if self.session.is_closed or not self.preflight():
raise HTTPError(401)

# Socket.io always send data utf-8 encoded.
data = self.request.body

# IE XDomainRequest support
if not data.startswith('d='):
logging.error('Malformed JSONP POST request')
raise HTTPError(403)

data = urllib.unquote(data[2:])
# Special case
data = urllib.unquote(data[2:]).decode('utf-8')

if data.startswith(u'"'):
data = data.strip(u'"')

# Process packets one by one
packets = proto.decode_frames(data)
Expand Down
59 changes: 30 additions & 29 deletions tornadio2/proto.py
Expand Up @@ -48,7 +48,7 @@ def default(self, o):
NOOP = '8'

# utf-8 encoded frame separator
FRAME_SEPARATOR = u'\ufffd'.encode('utf-8')
FRAME_SEPARATOR = u'\ufffd'


def disconnect(endpoint=None):
Expand All @@ -57,7 +57,7 @@ def disconnect(endpoint=None):
`endpoint`
Optional endpoint name
"""
return '0::%s' % (
return u'0::%s' % (
endpoint or ''
)

Expand All @@ -68,15 +68,15 @@ def connect(endpoint=None):
`endpoint`
Optional endpoint name
"""
return '1::%s' % (
return u'1::%s' % (
endpoint or ''
)


def heartbeat():
"""Generate heartbeat message.
"""
return '2::'
return u'2::'


def message(endpoint, msg, message_id=None):
Expand All @@ -93,19 +93,19 @@ def message(endpoint, msg, message_id=None):
if (not isinstance(msg, (unicode, str)) and
isinstance(msg, (object, dict))):
if msg is not None:
return '4:%s:%s:%s' % (
return u'4:%s:%s:%s' % (
message_id or '',
endpoint or '',
json.dumps(msg, **json_decimal_args).encode('utf-8')
json.dumps(msg, **json_decimal_args)
)
else:
# TODO: Log something
return ''
# TODO: Log something?
return u''
else:
return '3:%s:%s:%s' % (
message_id or '',
endpoint or '',
msg.encode('utf-8') if isinstance(msg, unicode) else str(msg)
return u'3:%s:%s:%s' % (
message_id or u'',
endpoint or u'',
msg if isinstance(msg, unicode) else str(msg).decode('utf-8')
)


Expand Down Expand Up @@ -137,10 +137,10 @@ def event(endpoint, name, message_id, *args, **kwargs):
args=[kwargs]
)

return '5:%s:%s:%s' % (
return u'5:%s:%s:%s' % (
message_id or '',
endpoint or '',
json.dumps(evt).encode('utf-8')
json.dumps(evt)
)


Expand All @@ -152,8 +152,8 @@ def ack(endpoint, message_id):
`message_id`
Message id to acknowledge
"""
return '6::%s:%s' % (endpoint or '',
message_id)
return u'6::%s:%s' % (endpoint or '',
message_id)


def error(endpoint, reason, advice=None):
Expand All @@ -166,14 +166,14 @@ def error(endpoint, reason, advice=None):
`advice`
Error advice
"""
return '7::%s:%s+%s' % (endpoint or '',
(reason or '').encode('utf-8'),
(advice or '').encode('utf-8'))
return u'7::%s:%s+%s' % (endpoint or '',
(reason or ''),
(advice or ''))


def noop():
"""Generate noop packet."""
return '8::'
return u'8::'


def json_dumps(msg):
Expand Down Expand Up @@ -202,6 +202,8 @@ def decode_frames(data):
"""
# Single message - nothing to decode here
assert isinstance(data, unicode), 'frame is not unicode'

if not data.startswith(FRAME_SEPARATOR):
return [data]

Expand All @@ -212,13 +214,13 @@ def decode_frames(data):
frame_len = len(FRAME_SEPARATOR)

while data[idx:idx + frame_len] == FRAME_SEPARATOR:
idx += len(FRAME_SEPARATOR)
idx += 1

# Grab message length
len_start = idx
idx = data.find(FRAME_SEPARATOR, idx)
msg_len = int(data[len_start:idx])
idx += len(FRAME_SEPARATOR)
idx += 1

# Grab message
msg_data = data[idx:idx + msg_len]
Expand All @@ -229,7 +231,7 @@ def decode_frames(data):
return packets


# Encode expects packets in UTF-8 encoding
# Encode expects packets in unicode
def encode_frames(packets):
"""Encode list of packets.
Expand All @@ -242,12 +244,11 @@ def encode_frames(packets):

# Exactly one packet - don't do any frame encoding
if len(packets) == 1:
return packets[0]
return packets[0].encode('utf-8')

# Multiple packets
frames = ''

for p in packets:
frames += '%s%d%s%s' % (FRAME_SEPARATOR, len(p), FRAME_SEPARATOR, p)
frames = u''.join(u'%s%d%s%s' % (FRAME_SEPARATOR, len(p),
FRAME_SEPARATOR, p)
for p in packets)

return frames
return frames.encode('utf-8')
2 changes: 1 addition & 1 deletion tornadio2/session.py
Expand Up @@ -382,7 +382,7 @@ def raw_message(self, msg):
# and it is dict, unpack dictionary. Otherwise, pass
# in args
if len(args) == 1 and isinstance(args[0], dict):
# Fix for the http://bugs.python.org/issue4978
# Fix for the http://bugs.python.org/issue4978 for older Python versions
str_args = dict((str(x), y) for x, y in args[0].iteritems())

conn.on_event(event['name'], **str_args)
Expand Down

0 comments on commit ae3963f

Please sign in to comment.