Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Common options #3204

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 31 additions & 12 deletions ext/nokogiri/gumbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,32 @@ parse_cleanup(VALUE parse_args)
return Qnil;
}

// Scan the keyword arguments for options common to the document and fragment
// parse.
static GumboOptions
common_options(VALUE kwargs) {
// The order of the keywords determines the order of the values below.
// If this order is changed, then setting the options below must change as
// well.
ID keywords[] = {
rb_intern_const("max_attributes"),
rb_intern_const("max_errors"),
rb_intern_const("max_tree_depth"),
};
VALUE values[sizeof keywords / sizeof keywords[0]];

// Extract the values coresponding to the required keywords. Raise an error
// if required arguments are missing.
rb_get_kwargs(kwargs, keywords, 3, 0, values);

GumboOptions options = kGumboDefaultOptions;
options.max_attributes = NUM2INT(values[0]);
options.max_errors = NUM2INT(values[1]);
options.max_tree_depth = NUM2INT(values[2]);

return options;
}

static VALUE parse_continue(VALUE parse_args);

/*
Expand All @@ -331,10 +357,7 @@ rb_gumbo_s_parse(int argc, VALUE *argv, VALUE _self)
kwargs = rb_hash_new();
}

GumboOptions options = kGumboDefaultOptions;
options.max_attributes = NUM2INT(rb_hash_aref(kwargs, ID2SYM(rb_intern_const("max_attributes"))));
options.max_errors = NUM2INT(rb_hash_aref(kwargs, ID2SYM(rb_intern_const("max_errors"))));
options.max_tree_depth = NUM2INT(rb_hash_aref(kwargs, ID2SYM(rb_intern_const("max_tree_depth"))));
GumboOptions options = common_options(kwargs);

GumboOutput *output = perform_parse(&options, input);
ParseArgs args = {
Expand Down Expand Up @@ -440,6 +463,8 @@ rb_gumbo_s_fragment(int argc, VALUE *argv, VALUE _self)
kwargs = rb_hash_new();
}

GumboOptions options = common_options(kwargs);

if (NIL_P(ctx)) {
ctx_tag = "body";
ctx_ns = GUMBO_NAMESPACE_HTML;
Expand Down Expand Up @@ -543,14 +568,8 @@ rb_gumbo_s_fragment(int argc, VALUE *argv, VALUE _self)
}

// Perform a fragment parse.
GumboOptions options = kGumboDefaultOptions;
options.max_attributes = NUM2INT(rb_hash_aref(kwargs, ID2SYM(rb_intern_const("max_attributes"))));
options.max_errors = NUM2INT(rb_hash_aref(kwargs, ID2SYM(rb_intern_const("max_errors"))));

// Add one to account for the HTML element.
int depth = NUM2INT(rb_hash_aref(kwargs, ID2SYM(rb_intern_const("max_tree_depth"))));
options.max_tree_depth = depth < 0 ? -1 : (depth + 1);

// Add one to the max tree depth to account for the HTML element.
options.max_tree_depth = options.max_tree_depth < 0 ? -1 : (options.max_tree_depth + 1);
options.fragment_context = ctx_tag;
options.fragment_namespace = ctx_ns;
options.fragment_encoding = encoding;
Expand Down
2 changes: 1 addition & 1 deletion lib/nokogiri/html5/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def do_parse(string_or_io, url, encoding, **options)
string = HTML5.read_and_encode(string_or_io, encoding)

options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
options[:max_errors] ||= options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH

doc = Nokogiri::Gumbo.parse(string, url, self, **options)
Expand Down
2 changes: 1 addition & 1 deletion lib/nokogiri/html5/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def initialize(doc, tags = nil, ctx = nil, options = {}) # rubocop:disable Lint/
tags = Nokogiri::HTML5.read_and_encode(tags, nil)

options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
options[:max_errors] ||= options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH

Nokogiri::Gumbo.fragment(self, tags, ctx, **options)
Expand Down
4 changes: 2 additions & 2 deletions test/html5/test_encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def round_trip_through(str, enc)
define_method("test_parse_encoded_#{enc[0]}".to_sym) do
html = "<!DOCTYPE html><span>#{enc[1]}</span>"
encoded_html = round_trip_through(html, enc[0])
doc = Nokogiri::HTML5(encoded_html, encoding: enc[0])
doc = Nokogiri::HTML5(encoded_html, enc[0])
span = doc.at("/html/body/span")
refute_nil span
assert_equal enc[1], span.content
Expand All @@ -210,7 +210,7 @@ def round_trip_through(str, enc)
skip "https://bugs.ruby-lang.org/issues/15033" if enc[0] == "ISO-2022-JP"
round_trip_through(enc[1], enc[0])
encoded = encodings_doc.serialize(encoding: enc[0])
doc = Nokogiri::HTML5(encoded, encoding: enc[0])
doc = Nokogiri::HTML5(encoded, enc[0])
assert_equal encodings_html, doc.serialize
end
end
Expand Down
Loading