Skip to content

Commit

Permalink
employing json encoder to handle unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbierbaum committed Jun 9, 2014
1 parent c7e06ba commit e97e294
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 13 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.pyc
dist/
pyclj.egg-info/
build/
25 changes: 14 additions & 11 deletions clj.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

import os
from cStringIO import StringIO
import json

import decimal
import uuid
Expand All @@ -55,11 +56,13 @@

def number(v):
if v.endswith('M'):
return decimal.Decimal(v[:-1])
elif '.' in v:
return float(v)
out = decimal.Decimal(v[:-1])
else:
return int(v)
try:
out = int(v)
except ValueError as e:
out = float(v)
return out

_STOP_CHARS = [" ", ",", "\n", "\r", "\t"]
_COLL_OPEN_CHARS = ["#", "[", "{", "("]
Expand Down Expand Up @@ -223,16 +226,15 @@ def __read_token(self):
cp = c
c = self.__read_fd(1)
e = c
#v = u''.join(buf).decode('unicode-escape')
v = ''.join(buf).decode('string-escape')
v = unicode(''.join(buf).decode('unicode-escape'))

elif t == "datetime":
## skip "inst"
self.__read_fd(4)

## read next value as string
s = self.__read_token()
if not isinstance(s, str):
if not isinstance(s, basestring):
raise ValueError('Str expected, but got %s' % str(s))

## remove read string from the value_stack
Expand All @@ -247,7 +249,7 @@ def __read_token(self):

## read next value as string
s = self.__read_token()
if not isinstance(s, str):
if not isinstance(s, basestring):
raise ValueError('Str expected, but got %s' % str(s))

## remove read string from the value_stack
Expand Down Expand Up @@ -364,8 +366,8 @@ def __do_encode(self, d):
elif t == "decimal":
fd.write(str(d) + 'M')
elif t == "string":
s = d.encode("string-escape").replace('"', '\\"')
fd.write('"'+s+'"')
s = json.encoder.py_encode_basestring_ascii(unicode(d))
fd.write(s)
elif t == "boolean":
if d:
fd.write('true')
Expand All @@ -383,7 +385,8 @@ def __do_encode(self, d):
s = str(d)
fd.write("#uuid \"%s\"" % s)
else:
fd.write('"'+str(d)+'"')
s = json.encoder.py_encode_basestring_ascii(unicode(d))
fd.write(s)

def dump(obj, fp):
return CljEncoder(obj, fp).encode()
Expand Down
12 changes: 10 additions & 2 deletions tests/clj-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ def setUp(self):
"23": 23,
'23.45M': decimal.Decimal('23.45'),
"23.11": 23.11,
"3141e5": 3141e5,
"3.1415e7": 3.1415e7,
"true": True,
"false": False,
"nil": None,
Expand All @@ -20,7 +22,8 @@ def setUp(self):
'"string\n"': 'string\n',
'[:hello]':["hello"],
'-10.4':-10.4,
'"你"': '你',
'{"a" "\\"A substring\\""}': {"a": "\"A substring\""},
'"\\u4f60"': u'\u4f60',
"[1 2]": [1,2],
"#{true \"hello\" 12}": set([True, "hello", 12]),
"(\\a \\b \\c \\d)": ["a","b","c","d"],
Expand All @@ -33,7 +36,7 @@ def setUp(self):
"#inst \"2012-10-19T22:19:03.000-00:00\"": datetime(2012, 10, 19, 22, 19, 3, tzinfo=pytz.utc),
'#uuid "6eabd442-6958-484b-825d-aa79c0ad4967"': uuid.UUID("6eabd442-6958-484b-825d-aa79c0ad4967"),
'{:a #inst "2012-10-19T22:19:03.000-00:00"}': {"a":datetime(2012, 10, 19, 22, 19, 3, tzinfo=pytz.utc)},
'[#inst "2012-10-19T22:19:03.000-00:00"]': [datetime(2012, 10, 19, 22, 19, 3, tzinfo=pytz.utc)]
'[#inst "2012-10-19T22:19:03.000-00:00"]': [datetime(2012, 10, 19, 22, 19, 3, tzinfo=pytz.utc)],
'{:likes #{{:db/id 2} {:db/id 1}}}': {'likes': tuple([{'db/id': 2}, {'db/id': 1}])}
}

Expand All @@ -54,6 +57,7 @@ def setUp(self):
self.data = {'"helloworld"': "helloworld",
'"hello\\"world"': "hello\"world",
'12': 12,
'1000000.0': 1e6,
'23.45M': decimal.Decimal('23.45'),
'12.334': 12.334,
'true': True,
Expand All @@ -63,7 +67,11 @@ def setUp(self):
"[1 2 3 4]": (1,2,3,4),
"[]": (),
"{}": {},
'{"a" "\\"A substring \\u4f60\\""}': {"a": u"\"A substring \u4f60\""},
'"\\u4f60"': u'\u4f60',
'{"a" 1 "b" 2}':{"a":1, "b":2},
'{"\\u4f60" 1 "b" 2}':{u"\u4f60":1, "b":2},
'["\\u4f60" "a" "b"]': [u"\u4f60", u"a", u"b"],
'#{1}': set([1]),
'["h" nil [1 2 3] {"w" true}]':["h",None,[1,2,3],{"w":True}],
'#inst "2012-10-19T14:16:54Z"':datetime(2012,10,19,14,16,54,907),
Expand Down

0 comments on commit e97e294

Please sign in to comment.