Skip to content

Commit 355f451

Browse files
Earlopainkddnewton
authored andcommitted
Respect encoding option in Prism.lex and friends
utf-8 is the default for source files but can be overwritten via options
1 parent 62511d5 commit 355f451

2 files changed

Lines changed: 19 additions & 1 deletion

File tree

ext/prism/extension.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o
793793
parse_lex_data_t parse_lex_data = {
794794
.source = source,
795795
.tokens = rb_ary_new(),
796-
.encoding = rb_utf8_encoding(),
796+
.encoding = rb_enc_find(pm_parser_encoding_name(parser)),
797797
.freeze = pm_options_freeze(options),
798798
};
799799

test/prism/lex_test.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,24 @@ def test_parse_lex_file
4747
end
4848
end
4949

50+
def test_lex_encoding
51+
tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value
52+
tokens.each do |t|
53+
assert_equal(Encoding::Windows_31J, t[0].value.encoding)
54+
end
55+
56+
# Shebangs must appear on the first line. For these cases, the encoding
57+
# comment may appear second, but it should still change encoding.
58+
tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value
59+
#! /usr/bin/env ruby
60+
# encoding: utf-8
61+
"わたし"
62+
RUBY
63+
tokens.each do |t|
64+
assert_equal(Encoding::UTF_8, t[0].value.encoding)
65+
end
66+
end
67+
5068
if RUBY_VERSION >= "3.3"
5169
def test_lex_compat
5270
source = "foo bar"

0 commit comments

Comments
 (0)