Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize ERB::Util.html_escape more than CGI.escapeHTML for template engines #27

Merged
merged 3 commits into from
Nov 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
/spec/reports/
/tmp/
Gemfile.lock
*.so
*.gem
7 changes: 5 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@ source 'https://rubygems.org'

gemspec

gem 'rake'
gem 'test-unit'
group :development do
gem 'rake'
gem 'rake-compiler'
gem 'test-unit'
end
6 changes: 6 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ Rake::TestTask.new(:test) do |t|
t.test_files = FileList['test/**/test_*.rb']
end

if RUBY_ENGINE != 'jruby'
require 'rake/extensiontask'
Rake::ExtensionTask.new('erb')
task test: :compile
end

task :sync_tool do
require 'fileutils'
FileUtils.cp '../ruby/tool/lib/core_assertions.rb', './test/lib'
Expand Down
5 changes: 4 additions & 1 deletion erb.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ Gem::Specification.new do |spec|
spec.executables = ['erb']
spec.require_paths = ['lib']

if RUBY_ENGINE != 'jruby'
if RUBY_ENGINE == 'jruby'
spec.platform = 'java'
else
spec.required_ruby_version = '>= 2.7.0'
spec.extensions = ['ext/erb/extconf.rb']
end

spec.add_dependency 'cgi', '>= 0.3.3'
Expand Down
89 changes: 89 additions & 0 deletions ext/erb/erb.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include "ruby.h"
#include "ruby/encoding.h"

static VALUE rb_cERB, rb_mEscape;

#define HTML_ESCAPE_MAX_LEN 6

static const struct {
uint8_t len;
char str[HTML_ESCAPE_MAX_LEN+1];
} html_escape_table[UCHAR_MAX+1] = {
#define HTML_ESCAPE(c, str) [c] = {rb_strlen_lit(str), str}
HTML_ESCAPE('\'', "'"),
HTML_ESCAPE('&', "&"),
HTML_ESCAPE('"', """),
HTML_ESCAPE('<', "&lt;"),
HTML_ESCAPE('>', "&gt;"),
#undef HTML_ESCAPE
};

static inline void
preserve_original_state(VALUE orig, VALUE dest)
{
rb_enc_associate(dest, rb_enc_get(orig));
}

static inline long
escaped_length(VALUE str)
{
const long len = RSTRING_LEN(str);
if (len >= LONG_MAX / HTML_ESCAPE_MAX_LEN) {
ruby_malloc_size_overflow(len, HTML_ESCAPE_MAX_LEN);
}
return len * HTML_ESCAPE_MAX_LEN;
}

static VALUE
optimized_escape_html(VALUE str)
{
VALUE vbuf;
char *buf = ALLOCV_N(char, vbuf, escaped_length(str));
const char *cstr = RSTRING_PTR(str);
const char *end = cstr + RSTRING_LEN(str);

char *dest = buf;
while (cstr < end) {
const unsigned char c = *cstr++;
uint8_t len = html_escape_table[c].len;
if (len) {
memcpy(dest, html_escape_table[c].str, len);
dest += len;
}
else {
*dest++ = c;
}
}

VALUE escaped = str;
eregon marked this conversation as resolved.
Show resolved Hide resolved
if (RSTRING_LEN(str) < (dest - buf)) {
escaped = rb_str_new(buf, dest - buf);
preserve_original_state(str, escaped);
}
ALLOCV_END(vbuf);
return escaped;
}

// ERB::Util.html_escape is different from CGI.escapeHTML in the following two parts:
// * ERB::Util.html_escape converts an argument with #to_s first (only if it's not T_STRING)
// * ERB::Util.html_escape does not allocate a new string when nothing needs to be escaped
static VALUE
erb_escape_html(VALUE self, VALUE str)
{
str = rb_convert_type(str, T_STRING, "String", "to_s");

if (rb_enc_str_asciicompat_p(str)) {
return optimized_escape_html(str);
}
else {
return rb_call_super(1, &str);
}
}

void
Init_erb(void)
{
rb_cERB = rb_define_class("ERB", rb_cObject);
rb_mEscape = rb_define_module_under(rb_cERB, "Escape");
rb_define_method(rb_mEscape, "html_escape", erb_escape_html, 1);
}
2 changes: 2 additions & 0 deletions ext/erb/extconf.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
require 'mkmf'
create_makefile 'erb'
12 changes: 11 additions & 1 deletion lib/erb.rb
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,6 @@ def def_class(superklass=Object, methodname='result')
class ERB
# A utility module for conversion routines, often handy in HTML generation.
module Util
public
#
# A utility method for escaping HTML tag characters in _s_.
#
Expand All @@ -1002,6 +1001,17 @@ module Util
def html_escape(s)
CGI.escapeHTML(s.to_s)
end
end

begin
require 'erb.so'
rescue LoadError
else
private_constant :Escape
Util.prepend(Escape)
end

module Util
alias h html_escape
module_function :h
module_function :html_escape
Expand Down
13 changes: 13 additions & 0 deletions test/erb/test_erb.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,24 @@ def test_html_escape
assert_equal("", ERB::Util.html_escape(""))
assert_equal("abc", ERB::Util.html_escape("abc"))
assert_equal("&lt;&lt;", ERB::Util.html_escape("<\<"))
assert_equal("&#39;&amp;&quot;&gt;&lt;", ERB::Util.html_escape("'&\"><"))

assert_equal("", ERB::Util.html_escape(nil))
assert_equal("123", ERB::Util.html_escape(123))
end

def test_html_escape_to_s
object = Object.new
def object.to_s
"object"
end
assert_equal("object", ERB::Util.html_escape(object))
end

def test_html_escape_extension
assert_nil(ERB::Util.method(:html_escape).source_location)
end if RUBY_ENGINE == 'ruby'

def test_concurrent_default_binding
# This test randomly fails with JRuby -- NameError: undefined local variable or method `template2'
pend if RUBY_ENGINE == 'jruby'
Expand Down