From 3ef54d1b36bfec75f0eb6c9de8c9cb5e71d25bcd Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Mon, 27 Feb 2012 17:21:00 -0800 Subject: [PATCH] * ext/psych/parser.c: prevent a memory leak by protecting calls to handler callbacks. * test/psych/test_parser.rb: test to demonstrate leak. --- CHANGELOG.rdoc | 6 ++ ext/psych/parser.c | 143 +++++++++++++++++++++++++++++++------- test/psych/test_parser.rb | 30 ++++++++ 3 files changed, 155 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc index 50667a3a..24b0854b 100644 --- a/CHANGELOG.rdoc +++ b/CHANGELOG.rdoc @@ -1,3 +1,9 @@ +Fri Feb 24 13:54:33 2012 Aaron Patterson + + * ext/psych/parser.c: prevent a memory leak by protecting calls to + handler callbacks. + * test/psych/test_parser.rb: test to demonstrate leak. + Fri Feb 24 08:08:38 2012 Aaron Patterson * ext/psych/parser.c: set parser encoding based on the YAML input diff --git a/ext/psych/parser.c b/ext/psych/parser.c index 98206860..9808c6b6 100644 --- a/ext/psych/parser.c +++ b/ext/psych/parser.c @@ -154,6 +154,68 @@ static VALUE transcode_io(VALUE src, int * parser_encoding) #endif +static VALUE protected_start_stream(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_start_stream, 1, args[1]); +} + +static VALUE protected_start_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_document, 3, args + 1); +} + +static VALUE protected_end_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_end_document, 1, args[1]); +} + +static VALUE protected_alias(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_alias, 1, args[1]); +} + +static VALUE protected_scalar(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_scalar, 6, args + 1); +} + +static VALUE protected_start_sequence(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_sequence, 4, args + 1); +} + +static VALUE protected_end_sequence(VALUE handler) +{ + return rb_funcall(handler, id_end_sequence, 0); +} + +static VALUE protected_start_mapping(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_mapping, 4, args + 1); +} + +static VALUE protected_end_mapping(VALUE handler) +{ + return rb_funcall(handler, id_end_mapping, 0); +} + +static VALUE protected_empty(VALUE handler) +{ + return rb_funcall(handler, id_empty, 0); +} + +static VALUE protected_end_stream(VALUE handler) +{ + return rb_funcall(handler, id_end_stream, 0); +} + /* * call-seq: * parser.parse(yaml) @@ -170,6 +232,7 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) yaml_event_t event; int done = 0; int tainted = 0; + int state = 0; int parser_encoding = YAML_ANY_ENCODING; #ifdef HAVE_RUBY_ENCODING_H int encoding = rb_utf8_encindex(); @@ -223,14 +286,18 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) } switch(event.type) { - case YAML_STREAM_START_EVENT: - - rb_funcall(handler, id_start_stream, 1, - INT2NUM((long)event.data.stream_start.encoding) - ); - break; + case YAML_STREAM_START_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = INT2NUM((long)event.data.stream_start.encoding); + rb_protect(protected_start_stream, (VALUE)args, &state); + } + break; case YAML_DOCUMENT_START_EVENT: { + VALUE args[4]; /* Get a list of tag directives (if any) */ VALUE tag_directives = rb_ary_new(); /* Grab the document version */ @@ -268,19 +335,25 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); } } - rb_funcall(handler, id_start_document, 3, - version, tag_directives, - event.data.document_start.implicit == 1 ? Qtrue : Qfalse - ); + args[0] = handler; + args[1] = version; + args[2] = tag_directives; + args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_start_document, (VALUE)args, &state); } break; case YAML_DOCUMENT_END_EVENT: - rb_funcall(handler, id_end_document, 1, - event.data.document_end.implicit == 1 ? Qtrue : Qfalse - ); + { + VALUE args[2]; + + args[0] = handler; + args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_end_document, (VALUE)args, &state); + } break; case YAML_ALIAS_EVENT: { + VALUE args[2]; VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); @@ -290,11 +363,14 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) #endif } - rb_funcall(handler, id_alias, 1, alias); + args[0] = handler; + args[1] = alias; + rb_protect(protected_alias, (VALUE)args, &state); } break; case YAML_SCALAR_EVENT: { + VALUE args[7]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE plain_implicit, quoted_implicit, style; @@ -332,12 +408,19 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) style = INT2NUM((long)event.data.scalar.style); - rb_funcall(handler, id_scalar, 6, - val, anchor, tag, plain_implicit, quoted_implicit, style); + args[0] = handler; + args[1] = val; + args[2] = anchor; + args[3] = tag; + args[4] = plain_implicit; + args[5] = quoted_implicit; + args[6] = style; + rb_protect(protected_scalar, (VALUE)args, &state); } break; case YAML_SEQUENCE_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -363,15 +446,21 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) style = INT2NUM((long)event.data.sequence_start.style); - rb_funcall(handler, id_start_sequence, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_sequence, (VALUE)args, &state); } break; case YAML_SEQUENCE_END_EVENT: - rb_funcall(handler, id_end_sequence, 0); + rb_protect(protected_end_sequence, handler, &state); break; case YAML_MAPPING_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -396,22 +485,28 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) style = INT2NUM((long)event.data.mapping_start.style); - rb_funcall(handler, id_start_mapping, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_mapping, (VALUE)args, &state); } break; case YAML_MAPPING_END_EVENT: - rb_funcall(handler, id_end_mapping, 0); + rb_protect(protected_end_mapping, handler, &state); break; case YAML_NO_EVENT: - rb_funcall(handler, id_empty, 0); + rb_protect(protected_empty, handler, &state); break; case YAML_STREAM_END_EVENT: - rb_funcall(handler, id_end_stream, 0); + rb_protect(protected_end_stream, handler, &state); done = 1; break; } yaml_event_delete(&event); + if (state) rb_jump_tag(state); } return self; diff --git a/test/psych/test_parser.rb b/test/psych/test_parser.rb index cfbfb616..d8c53f2d 100644 --- a/test/psych/test_parser.rb +++ b/test/psych/test_parser.rb @@ -32,6 +32,36 @@ def setup @handler.parser = @parser end + def test_exception_memory_leak + yaml = <<-eoyaml +%YAML 1.1 +%TAG ! tag:tenderlovemaking.com,2009: +--- &ponies +- first element +- *ponies +- foo: bar +... + eoyaml + + [:start_stream, :start_document, :end_document, :alias, :scalar, + :start_sequence, :end_sequence, :start_mapping, :end_mapping, + :end_stream].each do |method| + + klass = Class.new(Psych::Handler) do + define_method(method) do |*args| + raise + end + end + + parser = Psych::Parser.new klass.new + 2.times { + assert_raises(RuntimeError, method.to_s) do + parser.parse yaml + end + } + end + end + def test_multiparse 3.times do @parser.parse '--- foo'