Skip to content

Commit

Permalink
added skip to sax parser
Browse files Browse the repository at this point in the history
  • Loading branch information
ohler55 committed Jun 18, 2014
1 parent bb73431 commit a7ddbd5
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 8 deletions.
11 changes: 10 additions & 1 deletion ext/ox/ox.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,8 @@ load_file(int argc, VALUE *argv, VALUE self) {
* @param [Hash] options parse options
* @param [true|false] :convert_special flag indicating special characters like < are converted
* @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names
* @param [true|false] :smart flag indicating the parser use hints if available (use with html)
* @param [true|false] :smart flag indicating the parser uses hints if available (use with html)
* @param [:skip_return|:skip_white] :skip flag indicating the parser skips \r or collpase white space into a single space. Default (skip nothing)
*/
static VALUE
sax_parse(int argc, VALUE *argv, VALUE self) {
Expand All @@ -720,6 +721,7 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
options.symbolize = 1;
options.convert_special = 0;
options.smart = 0;
options.skip = NoSkip;

if (argc < 2) {
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
Expand All @@ -737,6 +739,13 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
options.symbolize = (Qtrue == v);
}
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
if (skip_return_sym == v) {
options.skip = CrSkip;
} else if (skip_white_sym == v) {
options.skip = SpcSkip;
}
}
}
ox_sax_parse(argv[0], argv[1], &options);

Expand Down
15 changes: 11 additions & 4 deletions ext/ox/sax.c
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,7 @@ read_element_end(SaxDrive dr) {

static char
read_text(SaxDrive dr) {
VALUE args[1];
char c;
int line = dr->buf.line;
int col = dr->buf.col - 1;
Expand All @@ -923,8 +924,6 @@ read_text(SaxDrive dr) {
*(dr->buf.tail - 1) = '\0';
}
if (dr->has.value) {
VALUE args[1];

if (dr->has.line) {
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
}
Expand All @@ -934,11 +933,19 @@ read_text(SaxDrive dr) {
*args = dr->value_obj;
rb_funcall2(dr->handler, ox_value_id, 1, args);
} else if (dr->has.text) {
VALUE args[1];

if (dr->options.convert_special) {
ox_sax_collapse_special(dr, dr->buf.str, line, col);
}
switch (dr->options.skip) {
case CrSkip:
buf_collapse_return(dr->buf.str);
break;
case SpcSkip:
buf_collapse_white(dr->buf.str);
break;
default:
break;
}
args[0] = rb_str_new2(dr->buf.str);
#if HAS_ENCODING_SUPPORT
if (0 != dr->encoding) {
Expand Down
3 changes: 3 additions & 0 deletions ext/ox/sax.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@
#include "sax_has.h"
#include "sax_stack.h"
#include "sax_hint.h"
#include "ox.h"

typedef struct _SaxOptions {
int symbolize;
int convert_special;
int smart;
SkipMode skip;
} *SaxOptions;

typedef struct _SaxDrive {
Expand All @@ -60,6 +62,7 @@ typedef struct _SaxDrive {
#endif
} *SaxDrive;

extern void ox_collapse_return(char *str);
extern void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options);
extern void ox_sax_drive_cleanup(SaxDrive dr);
extern void ox_sax_drive_error(SaxDrive dr, const char *msg);
Expand Down
10 changes: 10 additions & 0 deletions ext/ox/sax_as.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,16 @@ sax_value_as_s(VALUE self) {
if (dr->options.convert_special) {
ox_sax_collapse_special(dr, dr->buf.str, dr->buf.line, dr->buf.col);
}
switch (dr->options.skip) {
case CrSkip:
buf_collapse_return(dr->buf.str);
break;
case SpcSkip:
buf_collapse_white(dr->buf.str);
break;
default:
break;
}
rs = rb_str_new2(dr->buf.str);
#if HAS_ENCODING_SUPPORT
if (0 != dr->encoding) {
Expand Down
36 changes: 36 additions & 0 deletions ext/ox/sax_buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,40 @@ buf_checkback(Buf buf, CheckPt cp) {
return cp->c;
}

static inline void
buf_collapse_return(char *str) {
char *back = str;

for (; '\0' != *str; str++) {
if ('\r' != *str) {
*back++ = *str;
}
}
*back = '\0';
}

static inline void
buf_collapse_white(char *str) {
char *s = str;
char *back = str;

for (; '\0' != *s; s++) {
switch(*s) {
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
if (back == str || ' ' != *(back - 1)) {
*back++ = ' ';
}
break;
default:
*back++ = *s;
break;
}
}
*back = '\0';
}

#endif /* __OX_SAX_BUF_H__ */
2 changes: 1 addition & 1 deletion lib/ox/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

module Ox
# Current version of the module.
VERSION = '2.1.2b1'
VERSION = '2.1.2b2'
end
1 change: 0 additions & 1 deletion test/sax/helpers.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
require 'handlers'


module SaxTestHelpers

# A helper method to initiate a sax parsing using a specified xml
Expand Down
21 changes: 21 additions & 0 deletions test/sax/sax_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,27 @@ def test_sax_value_string
assert_equal('cheese', handler.item)
end

def test_sax_skip_none
handler = TypeSax.new(:as_s)
xml = %{<top> Pete\r\n Ohler</top>}
Ox.sax_parse(handler, StringIO.new(xml))
assert_equal(%{ Pete\r\n Ohler}, handler.item)
end

def test_sax_skip_return
handler = TypeSax.new(:as_s)
xml = %{<top> Pete\r\n Ohler</top>}
Ox.sax_parse(handler, StringIO.new(xml), :skip => :skip_return)
assert_equal(%{ Pete\n Ohler}, handler.item)
end

def test_sax_skip_white
handler = TypeSax.new(:as_s)
xml = %{<top> Pete\r\n Ohler</top>}
Ox.sax_parse(handler, StringIO.new(xml), :skip => :skip_white)
assert_equal(%{ Pete Ohler}, handler.item)
end

def test_sax_value_float
handler = TypeSax.new(:as_f)
Ox.sax_parse(handler, StringIO.new(%{<top>7</top>}))
Expand Down
1 change: 0 additions & 1 deletion test/tests.rb
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ def test_skip_none
xml = %{<top> Pete\r\n Ohler</top>}
doc = Ox.load(xml, :mode => :generic, :symbolize_keys => false, :skip => :skip_none)
x2 = Ox.dump(doc, :indent => 0)
puts "*** '#{x2}'"
assert_equal(%{\n<top> Pete\r\n Ohler</top>\n}, x2)
end

Expand Down

0 comments on commit a7ddbd5

Please sign in to comment.