Permalink
Browse files

fix encoding in ruby >= 1.9

  • Loading branch information...
kr committed Feb 21, 2012
1 parent 8363cb8 commit 9d32929c74e67a6c1858293b7a600d9bb699883c
Showing with 44 additions and 17 deletions.
  1. +1 −1 Readme
  2. +40 −15 okjson.rb
  3. +1 −0 t/encode-utf8.rb
  4. +1 −0 t/encode-utf8.rb.exp
  5. +1 −1 tested-on
View
2 Readme
@@ -19,7 +19,7 @@ This library is intended to be "vendored".
It is not a gem; instead, copy okjson.rb
into your project and "require" it directly.
This method helps you avoid an external
-dependency. It's only about 550 lines of
+dependency. It's only about 575 lines of
source; about half of that is UTF-8 coding.
If you bundle okjson with your library,
please change the module's name to something
View
@@ -1,4 +1,6 @@
-# Copyright 2011 Keith Rarick
+# encoding: UTF-8
+#
+# Copyright 2011, 2012 Keith Rarick
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -260,6 +262,12 @@ def abbrev(s)
def unquote(q)
q = q[1...-1]
a = q.dup # allocate a big enough string
+ rubydoesenc = false
+ # In ruby >= 1.9, a[w] is a codepoint, not a byte.
+ if a.class.method_defined?(:force_encoding)
+ a.force_encoding('UTF-8')
+ rubydoesenc = true
+ end
r, w = 0, 0
while r < q.length
c = q[r]
@@ -297,7 +305,12 @@ def unquote(q)
end
end
end
- w += ucharenc(a, w, uchar)
+ if rubydoesenc
+ a[w] = '' << uchar
+ w += 1
+ else
+ w += ucharenc(a, w, uchar)
+ end
else
raise Error, "invalid escape char #{q[r]} in \"#{q}\""
end
@@ -307,6 +320,8 @@ def unquote(q)
# Copy anything else byte-for-byte.
# Valid UTF-8 will remain valid UTF-8.
# Invalid UTF-8 will remain invalid UTF-8.
+ # In ruby >= 1.9, c is a codepoint, not a byte,
+ # in which case this is still what we want.
a[w] = c
r += 1
w += 1
@@ -441,6 +456,10 @@ def strenc(s)
t = StringIO.new
t.putc(?")
r = 0
+
+ # In ruby >= 1.9, s[r] is a codepoint, not a byte.
+ rubydoesenc = s.class.method_defined?(:encoding)
+
while r < s.length
case s[r]
when ?" then t.print('\\"')
@@ -455,21 +474,13 @@ def strenc(s)
case true
when Spc <= c && c <= ?~
t.putc(c)
- when true
+ when rubydoesenc
+ u = c.ord
+ surrenc(t, u)
+ else
u, size = uchardec(s, r)
r += size - 1 # we add one more at the bottom of the loop
- if u < 0x10000
- t.print('\\u')
- hexenc4(t, u)
- else
- u1, u2 = unsubst(u)
- t.print('\\u')
- hexenc4(t, u1)
- t.print('\\u')
- hexenc4(t, u2)
- end
- else
- # invalid byte; skip it
+ surrenc(t, u)
end
end
r += 1
@@ -479,6 +490,20 @@ def strenc(s)
end
+ def surrenc(t, u)
+ if u < 0x10000
+ t.print('\\u')
+ hexenc4(t, u)
+ else
+ u1, u2 = unsubst(u)
+ t.print('\\u')
+ hexenc4(t, u1)
+ t.print('\\u')
+ hexenc4(t, u2)
+ end
+ end
+
+
def hexenc4(t, u)
t.putc(Hex[(u>>12)&0xf])
t.putc(Hex[(u>>8)&0xf])
View
@@ -0,0 +1 @@
+["á$¢€園𝄞"]
View
@@ -0,0 +1 @@
+["\u00e1$\u00a2\u20ac\u5712\ud834\udd1e"]
View
@@ -1,4 +1,4 @@
-This commit was tested Mon Feb 20 16:08:17 PST 2012
+This commit was tested Mon Feb 20 18:21:05 PST 2012
using the following ruby interpreters:
ruby 1.8.7 (2010-01-10 patchlevel 249) [universal-darwin11.0]
ruby 1.9.2p290 (2011-07-09 revision 32553) [x86_64-darwin11.2.0]

0 comments on commit 9d32929

Please sign in to comment.