Permalink
Browse files

fixed encoded bug in sax, needs more testing

  • Loading branch information...
1 parent 274b1b6 commit d3d3348f0e5d7cffcee3c5b96a940f8c61d65e99 @ohler55 committed Jul 23, 2013
Showing with 86 additions and 58 deletions.
  1. +4 −2 README.md
  2. +20 −13 ext/ox/dump.c
  3. +1 −1 ext/ox/sax.c
  4. +1 −1 lib/ox/version.rb
  5. +3 −3 test/perf_obj.rb
  6. +6 −1 test/sax/helpers.rb
  7. +40 −37 test/sax/sax_test.rb
  8. +11 −0 test/tests.rb
View
@@ -34,9 +34,11 @@ A fast XML parser and Object marshaller as a Ruby gem.
## <a name="release">Release Notes</a>
-### Release 2.0.5
+### Release 2.0.6
- - Better support for special character encoding with 1.8.7.
+ - Fixed bug in special character decoding that chopped of text.
+
+ - Limit depth on dump to 1000 to avoid core dump on circular references if the user does not specify circular.
## <a name="description">Description</a>
View
@@ -39,6 +39,7 @@
#include "ox.h"
#define USE_B64 0
+#define MAX_DEPTH 1000
typedef unsigned long ulong;
@@ -74,13 +75,13 @@ typedef struct _Out {
static void dump_obj_to_xml(VALUE obj, Options copts, Out out);
static void dump_first_obj(VALUE obj, Out out);
-static void dump_obj(ID aid, VALUE obj, unsigned int depth, Out out);
-static void dump_gen_doc(VALUE obj, unsigned int depth, Out out);
-static void dump_gen_element(VALUE obj, unsigned int depth, Out out);
-static void dump_gen_instruct(VALUE obj, unsigned int depth, Out out);
+static void dump_obj(ID aid, VALUE obj, int depth, Out out);
+static void dump_gen_doc(VALUE obj, int depth, Out out);
+static void dump_gen_element(VALUE obj, int depth, Out out);
+static void dump_gen_instruct(VALUE obj, int depth, Out out);
static int dump_gen_attr(VALUE key, VALUE value, Out out);
-static int dump_gen_nodes(VALUE obj, unsigned int depth, Out out);
-static void dump_gen_val_node(VALUE obj, unsigned int depth,
+static int dump_gen_nodes(VALUE obj, int depth, Out out);
+static void dump_gen_val_node(VALUE obj, int depth,
const char *pre, size_t plen,
const char *suf, size_t slen, Out out);
@@ -102,7 +103,7 @@ static int is_xml_friendly(const uchar *str, int len);
static const char hex_chars[17] = "0123456789abcdef";
-static char xml_friendly_chars[256] = "\
+static char xml_friendly_chars[257] = "\
88888888811881888888888888888888\
11611156111111111111111111114141\
11111111111111111111111111111111\
@@ -553,12 +554,15 @@ dump_first_obj(VALUE obj, Out out) {
}
static void
-dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
+dump_obj(ID aid, VALUE obj, int depth, Out out) {
struct _Element e;
VALUE prev_obj = out->obj;
char value_buf[64];
int cnt;
+ if (MAX_DEPTH < depth) {
+ rb_raise(rb_eSysStackError, "maximum depth exceeded");
+ }
out->obj = obj;
if (0 == aid) {
/*e.attr.str = 0; */
@@ -985,7 +989,7 @@ dump_hash(VALUE key, VALUE value, Out out) {
}
static void
-dump_gen_doc(VALUE obj, unsigned int depth, Out out) {
+dump_gen_doc(VALUE obj, int depth, Out out) {
VALUE attrs = rb_attr_get(obj, ox_attributes_id);
VALUE nodes = rb_attr_get(obj, ox_nodes_id);
@@ -1018,7 +1022,7 @@ dump_gen_doc(VALUE obj, unsigned int depth, Out out) {
}
static void
-dump_gen_element(VALUE obj, unsigned int depth, Out out) {
+dump_gen_element(VALUE obj, int depth, Out out) {
VALUE rname = rb_attr_get(obj, ox_at_value_id);
VALUE attrs = rb_attr_get(obj, ox_attributes_id);
VALUE nodes = rb_attr_get(obj, ox_nodes_id);
@@ -1066,7 +1070,7 @@ dump_gen_element(VALUE obj, unsigned int depth, Out out) {
}
static void
-dump_gen_instruct(VALUE obj, unsigned int depth, Out out) {
+dump_gen_instruct(VALUE obj, int depth, Out out) {
VALUE rname = rb_attr_get(obj, ox_at_value_id);
VALUE attrs = rb_attr_get(obj, ox_attributes_id);
VALUE rcontent = rb_attr_get(obj, ox_at_content_id);
@@ -1100,7 +1104,7 @@ dump_gen_instruct(VALUE obj, unsigned int depth, Out out) {
}
static int
-dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
+dump_gen_nodes(VALUE obj, int depth, Out out) {
long cnt = RARRAY_LEN(obj);
int indent_needed = 1;
@@ -1109,6 +1113,9 @@ dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
VALUE clas;
int d2 = depth + 1;
+ if (MAX_DEPTH < depth) {
+ rb_raise(rb_eSysStackError, "maximum depth exceeded");
+ }
for (; 0 < cnt; cnt--, np++) {
clas = rb_obj_class(*np);
if (ox_element_clas == clas) {
@@ -1159,7 +1166,7 @@ dump_gen_attr(VALUE key, VALUE value, Out out) {
}
static void
-dump_gen_val_node(VALUE obj, unsigned int depth,
+dump_gen_val_node(VALUE obj, int depth,
const char *pre, size_t plen,
const char *suf, size_t slen, Out out) {
VALUE v = rb_attr_get(obj, ox_at_value_id);
View
@@ -1179,7 +1179,6 @@ ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
if ('&' == *s) {
int c = 0;
char *end;
- //int x = 0;
s++;
if ('#' == *s) {
@@ -1236,6 +1235,7 @@ ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
continue;
}
s = end + 1;
+ continue;
} else if (0 == strncasecmp(s, "lt;", 3)) {
c = '<';
s += 3;
View
@@ -1,5 +1,5 @@
module Ox
# Current version of the module.
- VERSION = '2.0.5'
+ VERSION = '2.0.6a1'
end
View
@@ -63,8 +63,8 @@
if files.empty?
$obj = do_sample ? sample_doc(2) : files('..')
$mars = Marshal.dump($obj)
- $xml = Ox.dump($obj, :indent => $indent, circular: $circular)
- $json = Oj.dump($obj, :indent => $indent, circular: $circular)
+ $xml = Ox.dump($obj, :indent => $indent, :circular => $circular)
+ $json = Oj.dump($obj, :indent => $indent, :circular => $circular)
File.open('sample.xml', 'w') { |f| f.write($xml) }
File.open('sample.json', 'w') { |f| f.write($json) }
File.open('sample.marshal', 'w') { |f| f.write($mars) }
@@ -75,7 +75,7 @@
$xml = File.read(f)
$obj = Ox.load($xml);
$mars = Marshal.dump($obj)
- $json = Oj.dump($obj, :indent => $indent, circular: $circular)
+ $json = Oj.dump($obj, :indent => $indent, :circular => $circular)
end
end
View
@@ -22,7 +22,7 @@ def parse_compare(xml, expected, handler_class = AllSax, opts = {}, handler_attr
input = StringIO.new(xml)
options = {
:symbolize => true,
- :convert_special => false,
+ :convert_special => true,
:smart => false
}.merge(opts)
@@ -34,4 +34,9 @@ def parse_compare(xml, expected, handler_class = AllSax, opts = {}, handler_attr
assert_equal(expected, actual)
end
+ # This is needed to stop test/unit from complaining that there is no test
+ # specified.
+ def test_hack
+ assert(true)
+ end
end
View
@@ -526,56 +526,59 @@ def test_sax_mixed
end
def test_sax_encoding
- if RUBY_VERSION.start_with?('1.8')
- assert(true)
- else
- parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
+ parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
<top>ピーター</top>
},
- [[:instruct, "xml"],
- [:attr, :version, "1.0"],
- [:attr, :encoding, "UTF-8"],
- [:end_instruct, 'xml'],
- [:start_element, :top],
- [:text, 'ピーター'],
- [:end_element, :top]])
- end
+ [[:instruct, "xml"],
+ [:attr, :version, "1.0"],
+ [:attr, :encoding, "UTF-8"],
+ [:end_instruct, 'xml'],
+ [:start_element, :top],
+ [:text, 'ピーター'],
+ [:end_element, :top]])
end
def test_sax_bom
- if RUBY_VERSION.start_with?('1.8')
- assert(true)
- else
- xml = %{\xEF\xBB\xBF<?xml?>
+ xml = %{\xEF\xBB\xBF<?xml?>
<top>ピーター</top>
}
+ if !RUBY_VERSION.start_with?('1.8')
xml.force_encoding('ASCII')
- parse_compare(xml,
- [[:instruct, "xml"],
- [:end_instruct, 'xml'],
- [:start_element, :top],
- [:text, 'ピーター'],
- [:end_element, :top]])
end
+ parse_compare(xml,
+ [[:instruct, "xml"],
+ [:end_instruct, 'xml'],
+ [:start_element, :top],
+ [:text, 'ピーター'],
+ [:end_element, :top]])
end
def test_sax_full_encoding
- if RUBY_VERSION.start_with?('1.8')
- assert(true)
- else
- parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
-<いち name="ピーター" つま="まきえ">ピーター</いち>
+ parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
+<いち name="ピーター" つま="まきえ">ピーター is katakana</いち>
},
- [[:instruct, "xml"],
- [:attr, :version, "1.0"],
- [:attr, :encoding, "UTF-8"],
- [:end_instruct, 'xml'],
- [:start_element, 'いち'.to_sym],
- [:attr, :name, 'ピーター'],
- [:attr, 'つま'.to_sym, 'まきえ'],
- [:text, 'ピーター'],
- [:end_element, 'いち'.to_sym]])
- end
+ [[:instruct, "xml"],
+ [:attr, :version, "1.0"],
+ [:attr, :encoding, "UTF-8"],
+ [:end_instruct, 'xml'],
+ [:start_element, 'いち'.to_sym],
+ [:attr, :name, 'ピーター'],
+ [:attr, 'つま'.to_sym, 'まきえ'],
+ [:text, 'ピーター is katakana'],
+ [:end_element, 'いち'.to_sym]])
+ end
+
+ def test_sax_amp_hash
+ parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
+<text>&#233; is e with an accent</text>
+},
+ [[:instruct, "xml"],
+ [:attr, :version, "1.0"],
+ [:attr, :encoding, "UTF-8"],
+ [:end_instruct, 'xml'],
+ [:start_element, :text],
+ [:text, 'é is e with an accent'],
+ [:end_element, :text]])
end
def test_sax_implied_encoding
View
@@ -500,6 +500,17 @@ def test_circular
end
end
+ # verify that an exception is raised if a circular ref object is created.
+ def test_circular_limit
+ h = {}
+ h[:h] = h
+ begin
+ Ox.dump(h)
+ rescue Exception
+ assert(true)
+ end
+ end
+
def test_raw
raw = Ox::Element.new('raw')
su = Ox::Element.new('sushi')

0 comments on commit d3d3348

Please sign in to comment.