Skip to content

Commit

Permalink
Support Encoding::Converter newline: :lf and :lf_newline options
Browse files Browse the repository at this point in the history
Previously, newline: :lf was accepted but ignored.  Where it
should have been used was commented out code that didn't work,
but unlike all other invalid values, using newline: :lf did
not raise an error.

This adds support for newline: :lf and :lf_newline, for consistency
with newline: :cr and :cr_newline.  This is basically the same as
universal_newline, except that it only affects writing and not
reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK.

Add tests for the File.open :newline option while here.

Fixes [Bug #12436]
  • Loading branch information
jeremyevans committed Aug 20, 2022
1 parent 1a2f992 commit 6f3857f
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 8 deletions.
20 changes: 20 additions & 0 deletions enc/trans/newline.trans
Expand Up @@ -17,10 +17,16 @@
map_cr["0a"] = "0d"

transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")

map_normalize = {}
map_normalize["{00-ff}"] = :func_so

transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline")
%>

<%= transcode_generated_code %>

#define lf_newline universal_newline
#define STATE (sp[0])
#define NORMAL 0
#define JUST_AFTER_CR 1
Expand Down Expand Up @@ -126,10 +132,24 @@ rb_cr_newline = {
0, 0, 0, 0
};

static const rb_transcoder
rb_lf_newline = {
"", "lf_newline", lf_newline,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
1, /* max_input */
2, /* max_output */
asciicompat_converter, /* asciicompat_type */
2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
0, 0, 0, fun_so_universal_newline,
universal_newline_finish
};

void
Init_newline(void)
{
rb_register_transcoder(&rb_universal_newline);
rb_register_transcoder(&rb_crlf_newline);
rb_register_transcoder(&rb_cr_newline);
rb_register_transcoder(&rb_lf_newline);
}
18 changes: 11 additions & 7 deletions include/ruby/internal/encoding/transcode.h
Expand Up @@ -476,16 +476,16 @@ enum ruby_econv_flag_type {
RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,

/** Decorators are there. */
RUBY_ECONV_DECORATOR_MASK = 0x0000ff00,
RUBY_ECONV_DECORATOR_MASK = 0x0001ff00,

/** Newline converters are there. */
RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00,
RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00,

/** (Unclear; seems unused). */
RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,

/** (Unclear; seems unused). */
RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000,
RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000,

/** Universal newline mode. */
RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
Expand All @@ -496,11 +496,14 @@ enum ruby_econv_flag_type {
/** CRLF to CR conversion shall happen. */
RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,

/** CRLF to LF conversion shall happen. */
RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000,

/** Texts shall be XML-escaped. */
RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000,
RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000,

/** Texts shall be AttrValue escaped */
RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000,
RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000,

/** (Unclear; seems unused). */
RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
Expand Down Expand Up @@ -529,6 +532,7 @@ enum ruby_econv_flag_type {
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
Expand All @@ -543,10 +547,10 @@ enum ruby_econv_flag_type {
*/

/** Indicates the input is a part of much larger one. */
RUBY_ECONV_PARTIAL_INPUT = 0x00010000,
RUBY_ECONV_PARTIAL_INPUT = 0x00020000,

/** Instructs the converter to stop after output. */
RUBY_ECONV_AFTER_OUTPUT = 0x00020000,
RUBY_ECONV_AFTER_OUTPUT = 0x00040000,
#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */

Expand Down
42 changes: 42 additions & 0 deletions test/ruby/test_file.rb
Expand Up @@ -460,6 +460,48 @@ def test_long_unc
end
end

def test_file_open_newline_option
Dir.mktmpdir(__method__.to_s) do |tmpdir|
path = File.join(tmpdir, "foo")
test = lambda do |newline|
File.open(path, "wt", newline: newline) do |f|
f.write "a\n"
f.puts "b"
end
File.binread(path)
end
assert_equal("a\nb\n", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
assert_equal("a\r\nb\r\n", test.(:crlf))
assert_equal("a\rb\r", test.(:cr))

test = lambda do |newline|
File.open(path, "rt", newline: newline) do |f|
f.read
end
end

File.binwrite(path, "a\nb\n")
assert_equal("a\nb\n", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
assert_equal("a\nb\n", test.(:crlf))
assert_equal("a\nb\n", test.(:cr))

File.binwrite(path, "a\r\nb\r\n")
assert_equal("a\r\nb\r\n", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
# Work on both Windows and non-Windows
assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf))
assert_equal("a\r\nb\r\n", test.(:cr))

File.binwrite(path, "a\rb\r")
assert_equal("a\rb\r", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
assert_equal("a\rb\r", test.(:crlf))
assert_equal("a\rb\r", test.(:cr))
end
end

def test_open_nul
Dir.mktmpdir(__method__.to_s) do |tmpdir|
path = File.join(tmpdir, "foo")
Expand Down
2 changes: 2 additions & 0 deletions test/ruby/test_transcode.rb
Expand Up @@ -2305,5 +2305,7 @@ def test_newline_options
assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr))
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true))
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf))
assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true))
assert_equal("A\nB\nC", s.encode(usascii, newline: :lf))
end
end
30 changes: 29 additions & 1 deletion transcode.c
Expand Up @@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr;
static VALUE sym_universal_newline;
static VALUE sym_crlf_newline;
static VALUE sym_cr_newline;
static VALUE sym_lf_newline;
#ifdef ENABLE_ECONV_NEWLINE_OPTION
static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
#endif
Expand Down Expand Up @@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret)
case ECONV_UNIVERSAL_NEWLINE_DECORATOR:
case ECONV_CRLF_NEWLINE_DECORATOR:
case ECONV_CR_NEWLINE_DECORATOR:
case ECONV_LF_NEWLINE_DECORATOR:
case 0:
break;
default:
Expand All @@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret)
decorators_ret[num_decorators++] = "crlf_newline";
if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "cr_newline";
if (ecflags & ECONV_LF_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "lf_newline";
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "universal_newline";

Expand Down Expand Up @@ -1982,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec)
case ECONV_CR_NEWLINE_DECORATOR:
dname = "cr_newline";
break;
case ECONV_LF_NEWLINE_DECORATOR:
dname = "lf_newline";
break;
}

if (dname) {
Expand Down Expand Up @@ -2040,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "cr_newline");
}
if (ecflags & ECONV_LF_NEWLINE_DECORATOR) {
rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "lf_newline");
}
if (ecflags & ECONV_XML_TEXT_DECORATOR) {
rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "xml_text");
Expand Down Expand Up @@ -2515,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags)
ecflags |= ECONV_CR_NEWLINE_DECORATOR;
}
else if (v == sym_lf) {
/* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */
ecflags |= ECONV_LF_NEWLINE_DECORATOR;
}
else if (SYMBOL_P(v)) {
rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE,
Expand Down Expand Up @@ -2544,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags)
setflags |= ECONV_CR_NEWLINE_DECORATOR;
newlineflag |= !NIL_P(v);

v = rb_hash_aref(opt, sym_lf_newline);
if (RTEST(v))
setflags |= ECONV_LF_NEWLINE_DECORATOR;
newlineflag |= !NIL_P(v);

switch (newlineflag) {
case 1:
ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
Expand Down Expand Up @@ -3281,18 +3297,21 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* :undef => :replace # replace undefined conversion
* :replace => string # replacement string ("?" or "\uFFFD" if not specified)
* :newline => :universal # decorator for converting CRLF and CR to LF
* :newline => :lf # decorator for converting CRLF and CR to LF when writing
* :newline => :crlf # decorator for converting LF to CRLF
* :newline => :cr # decorator for converting LF to CR
* :universal_newline => true # decorator for converting CRLF and CR to LF
* :crlf_newline => true # decorator for converting LF to CRLF
* :cr_newline => true # decorator for converting LF to CR
* :lf_newline => true # decorator for converting CRLF and CR to LF when writing
* :xml => :text # escape as XML CharData.
* :xml => :attr # escape as XML AttValue
* integer form:
* Encoding::Converter::INVALID_REPLACE
* Encoding::Converter::UNDEF_REPLACE
* Encoding::Converter::UNDEF_HEX_CHARREF
* Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
* Encoding::Converter::LF_NEWLINE_DECORATOR
* Encoding::Converter::CRLF_NEWLINE_DECORATOR
* Encoding::Converter::CR_NEWLINE_DECORATOR
* Encoding::Converter::XML_TEXT_DECORATOR
Expand Down Expand Up @@ -3335,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* Convert LF to CRLF.
* [:cr_newline => true]
* Convert LF to CR.
* [:lf_newline => true]
* Convert CRLF and CR to LF (when writing).
* [:xml => :text]
* Escape as XML CharData.
* This form can be used as an HTML 4.0 #PCDATA.
Expand Down Expand Up @@ -4437,6 +4458,7 @@ Init_transcode(void)
sym_universal_newline = ID2SYM(rb_intern_const("universal_newline"));
sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline"));
sym_cr_newline = ID2SYM(rb_intern_const("cr_newline"));
sym_lf_newline = ID2SYM(rb_intern("lf_newline"));
sym_partial_input = ID2SYM(rb_intern_const("partial_input"));

#ifdef ENABLE_ECONV_NEWLINE_OPTION
Expand Down Expand Up @@ -4533,6 +4555,12 @@ InitVM_transcode(void)
*/
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR));

/* Document-const: LF_NEWLINE_DECORATOR
*
* Decorator for converting CRLF and CR to LF when writing
*/
rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR));

/* Document-const: CRLF_NEWLINE_DECORATOR
*
* Decorator for converting LF to CRLF
Expand Down

0 comments on commit 6f3857f

Please sign in to comment.