Skip to content

Commit

Permalink
[ruby/prism] added CP950 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Dhaval authored and kddnewton committed Nov 29, 2023
1 parent 57cb47b commit 9fada99
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/prism/prism.gemspec
Expand Up @@ -88,6 +88,7 @@ Gem::Specification.new do |spec|
"src/enc/pm_big5.c",
"src/enc/pm_cp51932.c",
"src/enc/pm_cp949.c",
"src/enc/pm_cp950.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
"src/enc/pm_shift_jis.c",
Expand Down
57 changes: 57 additions & 0 deletions prism/enc/pm_cp950.c
@@ -0,0 +1,57 @@
#include "prism/enc/pm_encoding.h"

static size_t
pm_encoding_cp950_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters
if (*b < 0x80) {
return 1;
}

// These are the double byte characters
if (
(n > 1) &&
((b[0] >= 0x81 && b[0] <= 0xFE) &&
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE)))
) {
return 2;
}

return 0;
}

static size_t
pm_encoding_cp950_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp950_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}

static size_t
pm_encoding_cp950_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp950_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}

static bool
pm_encoding_cp950_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp950_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return 0;
}
}

/** cp950 encoding */
pm_encoding_t pm_encoding_cp950 = {
.name = "cp950",
.char_width = pm_encoding_cp950_char_width,
.alnum_char = pm_encoding_cp950_alnum_char,
.alpha_char = pm_encoding_cp950_alpha_char,
.isupper_char = pm_encoding_cp950_isupper_char,
.multibyte = true
};
1 change: 1 addition & 0 deletions prism/enc/pm_encoding.h
Expand Up @@ -165,6 +165,7 @@ extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;
extern pm_encoding_t pm_encoding_cp855;
extern pm_encoding_t pm_encoding_cp949;
extern pm_encoding_t pm_encoding_cp950;
extern pm_encoding_t pm_encoding_euc_jp;
extern pm_encoding_t pm_encoding_gb1988;
extern pm_encoding_t pm_encoding_gbk;
Expand Down
1 change: 1 addition & 0 deletions prism/prism.c
Expand Up @@ -6233,6 +6233,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
ENCODING1("CP936", pm_encoding_gbk);
ENCODING1("CP949", pm_encoding_cp949);
ENCODING1("CP950", pm_encoding_cp950);
ENCODING1("CP1250", pm_encoding_windows_1250);
ENCODING1("CP1251", pm_encoding_windows_1251);
ENCODING1("CP1252", pm_encoding_windows_1252);
Expand Down
1 change: 1 addition & 0 deletions test/prism/encoding_test.rb
Expand Up @@ -69,6 +69,7 @@ class EncodingTest < TestCase
Encoding::Big5_HKSCS => 0x00...0x10000,
Encoding::Big5_UAO => 0x00...0x10000,
Encoding::CP949 => 0x00...0x10000,
Encoding::CP950 => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,
Expand Down

0 comments on commit 9fada99

Please sign in to comment.