updating ok_json lib which fixes OkJsonTest.test_json_encode

plentz · Feb 22, 2012 · 988c882 · 988c882
1 parent 74d6f58
commit 988c882
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 17 deletions.
diff --git a/ok_json_test.rb b/ok_json_test.rb
@@ -7,11 +7,10 @@
 require 'okjson'
 
 class OkJsonTest < MiniTest::Unit::TestCase
-  # This is a bug in OKJSON
   def test_json_encode
     data = {'message' => "á"}
     json = OkJson.encode data
-    assert_equal '{"message":"á"}', json
+    assert_equal '{"message":"\u00e1"}', json
   end
 
   def test_json_decode

diff --git a/okjson.rb b/okjson.rb
@@ -1,4 +1,6 @@
-# Copyright 2011 Keith Rarick
+# encoding: UTF-8
+#
+# Copyright 2011, 2012 Keith Rarick
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -260,6 +262,12 @@ def abbrev(s)
   def unquote(q)
     q = q[1...-1]
     a = q.dup # allocate a big enough string
+    rubydoesenc = false
+    # In ruby >= 1.9, a[w] is a codepoint, not a byte.
+    if a.class.method_defined?(:force_encoding)
+      a.force_encoding('UTF-8')
+      rubydoesenc = true
+    end
     r, w = 0, 0
     while r < q.length
       c = q[r]
@@ -297,7 +305,12 @@ def unquote(q)
               end
             end
           end
-          w += ucharenc(a, w, uchar)
+          if rubydoesenc
+            a[w] = '' << uchar
+            w += 1
+          else
+            w += ucharenc(a, w, uchar)
+          end
         else
           raise Error, "invalid escape char #{q[r]} in \"#{q}\""
         end
@@ -307,6 +320,8 @@ def unquote(q)
         # Copy anything else byte-for-byte.
         # Valid UTF-8 will remain valid UTF-8.
         # Invalid UTF-8 will remain invalid UTF-8.
+        # In ruby >= 1.9, c is a codepoint, not a byte,
+        # in which case this is still what we want.
         a[w] = c
         r += 1
         w += 1
@@ -441,6 +456,10 @@ def strenc(s)
     t = StringIO.new
     t.putc(?")
     r = 0
+
+    # In ruby >= 1.9, s[r] is a codepoint, not a byte.
+    rubydoesenc = s.class.method_defined?(:encoding)
+
     while r < s.length
       case s[r]
       when ?"  then t.print('\\"')
@@ -455,21 +474,13 @@ def strenc(s)
         case true
         when Spc <= c && c <= ?~
           t.putc(c)
-        when true
+        when rubydoesenc
+          u = c.ord
+          surrenc(t, u)
+        else
           u, size = uchardec(s, r)
           r += size - 1 # we add one more at the bottom of the loop
-          if u < 0x10000
+          surrenc(t, u)
-            t.print('\\u')
-            hexenc4(t, u)
-          else
-            u1, u2 = unsubst(u)
-            t.print('\\u')
-            hexenc4(t, u1)
-            t.print('\\u')
-            hexenc4(t, u2)
-          end
-        else
-          # invalid byte; skip it
         end
       end
       r += 1
@@ -479,6 +490,20 @@ def strenc(s)
   end
 
 
+  def surrenc(t, u)
+    if u < 0x10000
+      t.print('\\u')
+      hexenc4(t, u)
+    else
+      u1, u2 = unsubst(u)
+      t.print('\\u')
+      hexenc4(t, u1)
+      t.print('\\u')
+      hexenc4(t, u2)
+    end
+  end
+
+
   def hexenc4(t, u)
     t.putc(Hex[(u>>12)&0xf])
     t.putc(Hex[(u>>8)&0xf])