Skip to content

Commit ebe8349

Browse files
committed
Add cp852 encoding
1 parent e77b549 commit ebe8349

File tree

5 files changed

+124
-46
lines changed

5 files changed

+124
-46
lines changed

bin/encodings

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ case ARGV[0].downcase
4949
when "ascii" then table(Encoding::ASCII_8BIT)
5050
when "ascii-8bit" then table(Encoding::ASCII_8BIT)
5151
when "big5" then lists("big5", 0...0x10000, Encoding::Big5)
52+
when "cp850" then table(Encoding::CP850)
53+
when "cp852" then table(Encoding::CP852)
5254
when "euc-jp" then lists("euc-jp", 0...0x10000, Encoding::EUC_JP)
5355
when "gbk" then lists("gbk", 0...0x10000, Encoding::GBK)
5456
when "iso-8859-1" then table(Encoding::ISO8859_1)

include/prism/enc/pm_encoding.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ extern const uint8_t pm_encoding_unicode_table[256];
158158
extern pm_encoding_t pm_encoding_ascii;
159159
extern pm_encoding_t pm_encoding_ascii_8bit;
160160
extern pm_encoding_t pm_encoding_big5;
161+
extern pm_encoding_t pm_encoding_cp850;
162+
extern pm_encoding_t pm_encoding_cp852;
161163
extern pm_encoding_t pm_encoding_cp51932;
162164
extern pm_encoding_t pm_encoding_euc_jp;
163165
extern pm_encoding_t pm_encoding_gbk;

src/enc/pm_tables.c

Lines changed: 98 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,54 @@ static uint8_t pm_encoding_ascii_table[256] = {
2424
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
2525
};
2626

27+
/**
28+
* Each element of the following table contains a bitfield that indicates a
29+
* piece of information about the corresponding CP850 character.
30+
*/
31+
static uint8_t pm_encoding_cp850_table[256] = {
32+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
33+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
34+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
35+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
36+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
37+
0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
38+
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
39+
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
40+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
41+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
42+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
43+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
44+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
45+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
46+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
47+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
48+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
49+
};
50+
51+
/**
52+
* Each element of the following table contains a bitfield that indicates a
53+
* piece of information about the corresponding CP852 character.
54+
*/
55+
static uint8_t pm_encoding_cp852_table[256] = {
56+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
57+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
58+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
59+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
60+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
61+
0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
62+
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
63+
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
64+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
65+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
66+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
67+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
68+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
69+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
70+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
71+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
72+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
73+
};
74+
2775
/**
2876
* Each element of the following table contains a bitfield that indicates a
2977
* piece of information about the corresponding ISO-8859-1 character.
@@ -689,6 +737,8 @@ pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
689737
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
690738
}
691739

740+
PRISM_ENCODING_TABLE(cp850)
741+
PRISM_ENCODING_TABLE(cp852)
692742
PRISM_ENCODING_TABLE(iso_8859_1)
693743
PRISM_ENCODING_TABLE(iso_8859_2)
694744
PRISM_ENCODING_TABLE(iso_8859_3)
@@ -717,9 +767,9 @@ PRISM_ENCODING_TABLE(windows_1258)
717767

718768
#undef PRISM_ENCODING_TABLE
719769

720-
/** ASCII encoding */
770+
/** US-ASCII encoding */
721771
pm_encoding_t pm_encoding_ascii = {
722-
.name = "ascii",
772+
.name = "US-ASCII",
723773
.char_width = pm_encoding_ascii_char_width,
724774
.alnum_char = pm_encoding_ascii_alnum_char,
725775
.alpha_char = pm_encoding_ascii_alpha_char,
@@ -729,17 +779,37 @@ pm_encoding_t pm_encoding_ascii = {
729779

730780
/** ASCII-8BIT encoding */
731781
pm_encoding_t pm_encoding_ascii_8bit = {
732-
.name = "ascii-8bit",
782+
.name = "ASCII-8BIT",
733783
.char_width = pm_encoding_single_char_width,
734784
.alnum_char = pm_encoding_ascii_alnum_char,
735785
.alpha_char = pm_encoding_ascii_alpha_char,
736786
.isupper_char = pm_encoding_ascii_isupper_char,
737787
.multibyte = false
738788
};
739789

790+
/** CP850 */
791+
pm_encoding_t pm_encoding_cp850 = {
792+
.name = "CP850",
793+
.char_width = pm_encoding_single_char_width,
794+
.alnum_char = pm_encoding_cp850_alnum_char,
795+
.alpha_char = pm_encoding_cp850_alpha_char,
796+
.isupper_char = pm_encoding_cp850_isupper_char,
797+
.multibyte = false
798+
};
799+
800+
/** CP852 */
801+
pm_encoding_t pm_encoding_cp852 = {
802+
.name = "CP852",
803+
.char_width = pm_encoding_single_char_width,
804+
.alnum_char = pm_encoding_cp852_alnum_char,
805+
.alpha_char = pm_encoding_cp852_alpha_char,
806+
.isupper_char = pm_encoding_cp852_isupper_char,
807+
.multibyte = false
808+
};
809+
740810
/** ISO-8859-1 */
741811
pm_encoding_t pm_encoding_iso_8859_1 = {
742-
.name = "iso-8859-1",
812+
.name = "ISO-8859-1",
743813
.char_width = pm_encoding_single_char_width,
744814
.alnum_char = pm_encoding_iso_8859_1_alnum_char,
745815
.alpha_char = pm_encoding_iso_8859_1_alpha_char,
@@ -749,7 +819,7 @@ pm_encoding_t pm_encoding_iso_8859_1 = {
749819

750820
/** ISO-8859-2 */
751821
pm_encoding_t pm_encoding_iso_8859_2 = {
752-
.name = "iso-8859-2",
822+
.name = "ISO-8859-2",
753823
.char_width = pm_encoding_single_char_width,
754824
.alnum_char = pm_encoding_iso_8859_2_alnum_char,
755825
.alpha_char = pm_encoding_iso_8859_2_alpha_char,
@@ -759,7 +829,7 @@ pm_encoding_t pm_encoding_iso_8859_2 = {
759829

760830
/** ISO-8859-3 */
761831
pm_encoding_t pm_encoding_iso_8859_3 = {
762-
.name = "iso-8859-3",
832+
.name = "ISO-8859-3",
763833
.char_width = pm_encoding_single_char_width,
764834
.alnum_char = pm_encoding_iso_8859_3_alnum_char,
765835
.alpha_char = pm_encoding_iso_8859_3_alpha_char,
@@ -769,7 +839,7 @@ pm_encoding_t pm_encoding_iso_8859_3 = {
769839

770840
/** ISO-8859-4 */
771841
pm_encoding_t pm_encoding_iso_8859_4 = {
772-
.name = "iso-8859-4",
842+
.name = "ISO-8859-4",
773843
.char_width = pm_encoding_single_char_width,
774844
.alnum_char = pm_encoding_iso_8859_4_alnum_char,
775845
.alpha_char = pm_encoding_iso_8859_4_alpha_char,
@@ -779,7 +849,7 @@ pm_encoding_t pm_encoding_iso_8859_4 = {
779849

780850
/** ISO-8859-5 */
781851
pm_encoding_t pm_encoding_iso_8859_5 = {
782-
.name = "iso-8859-5",
852+
.name = "ISO-8859-5",
783853
.char_width = pm_encoding_single_char_width,
784854
.alnum_char = pm_encoding_iso_8859_5_alnum_char,
785855
.alpha_char = pm_encoding_iso_8859_5_alpha_char,
@@ -789,7 +859,7 @@ pm_encoding_t pm_encoding_iso_8859_5 = {
789859

790860
/** ISO-8859-6 */
791861
pm_encoding_t pm_encoding_iso_8859_6 = {
792-
.name = "iso-8859-6",
862+
.name = "ISO-8859-6",
793863
.char_width = pm_encoding_single_char_width,
794864
.alnum_char = pm_encoding_iso_8859_6_alnum_char,
795865
.alpha_char = pm_encoding_iso_8859_6_alpha_char,
@@ -799,7 +869,7 @@ pm_encoding_t pm_encoding_iso_8859_6 = {
799869

800870
/** ISO-8859-7 */
801871
pm_encoding_t pm_encoding_iso_8859_7 = {
802-
.name = "iso-8859-7",
872+
.name = "ISO-8859-7",
803873
.char_width = pm_encoding_single_char_width,
804874
.alnum_char = pm_encoding_iso_8859_7_alnum_char,
805875
.alpha_char = pm_encoding_iso_8859_7_alpha_char,
@@ -809,7 +879,7 @@ pm_encoding_t pm_encoding_iso_8859_7 = {
809879

810880
/** ISO-8859-8 */
811881
pm_encoding_t pm_encoding_iso_8859_8 = {
812-
.name = "iso-8859-8",
882+
.name = "ISO-8859-8",
813883
.char_width = pm_encoding_single_char_width,
814884
.alnum_char = pm_encoding_iso_8859_8_alnum_char,
815885
.alpha_char = pm_encoding_iso_8859_8_alpha_char,
@@ -819,7 +889,7 @@ pm_encoding_t pm_encoding_iso_8859_8 = {
819889

820890
/** ISO-8859-9 */
821891
pm_encoding_t pm_encoding_iso_8859_9 = {
822-
.name = "iso-8859-9",
892+
.name = "ISO-8859-9",
823893
.char_width = pm_encoding_single_char_width,
824894
.alnum_char = pm_encoding_iso_8859_9_alnum_char,
825895
.alpha_char = pm_encoding_iso_8859_9_alpha_char,
@@ -829,7 +899,7 @@ pm_encoding_t pm_encoding_iso_8859_9 = {
829899

830900
/** ISO-8859-10 */
831901
pm_encoding_t pm_encoding_iso_8859_10 = {
832-
.name = "iso-8859-10",
902+
.name = "ISO-8859-10",
833903
.char_width = pm_encoding_single_char_width,
834904
.alnum_char = pm_encoding_iso_8859_10_alnum_char,
835905
.alpha_char = pm_encoding_iso_8859_10_alpha_char,
@@ -839,7 +909,7 @@ pm_encoding_t pm_encoding_iso_8859_10 = {
839909

840910
/** ISO-8859-11 */
841911
pm_encoding_t pm_encoding_iso_8859_11 = {
842-
.name = "iso-8859-11",
912+
.name = "ISO-8859-11",
843913
.char_width = pm_encoding_single_char_width,
844914
.alnum_char = pm_encoding_iso_8859_11_alnum_char,
845915
.alpha_char = pm_encoding_iso_8859_11_alpha_char,
@@ -849,7 +919,7 @@ pm_encoding_t pm_encoding_iso_8859_11 = {
849919

850920
/** ISO-8859-13 */
851921
pm_encoding_t pm_encoding_iso_8859_13 = {
852-
.name = "iso-8859-13",
922+
.name = "ISO-8859-13",
853923
.char_width = pm_encoding_single_char_width,
854924
.alnum_char = pm_encoding_iso_8859_13_alnum_char,
855925
.alpha_char = pm_encoding_iso_8859_13_alpha_char,
@@ -859,7 +929,7 @@ pm_encoding_t pm_encoding_iso_8859_13 = {
859929

860930
/** ISO-8859-14 */
861931
pm_encoding_t pm_encoding_iso_8859_14 = {
862-
.name = "iso-8859-14",
932+
.name = "ISO-8859-14",
863933
.char_width = pm_encoding_single_char_width,
864934
.alnum_char = pm_encoding_iso_8859_14_alnum_char,
865935
.alpha_char = pm_encoding_iso_8859_14_alpha_char,
@@ -869,7 +939,7 @@ pm_encoding_t pm_encoding_iso_8859_14 = {
869939

870940
/** ISO-8859-15 */
871941
pm_encoding_t pm_encoding_iso_8859_15 = {
872-
.name = "iso-8859-15",
942+
.name = "ISO-8859-15",
873943
.char_width = pm_encoding_single_char_width,
874944
.alnum_char = pm_encoding_iso_8859_15_alnum_char,
875945
.alpha_char = pm_encoding_iso_8859_15_alpha_char,
@@ -879,7 +949,7 @@ pm_encoding_t pm_encoding_iso_8859_15 = {
879949

880950
/** ISO-8859-16 */
881951
pm_encoding_t pm_encoding_iso_8859_16 = {
882-
.name = "iso-8859-16",
952+
.name = "ISO-8859-16",
883953
.char_width = pm_encoding_single_char_width,
884954
.alnum_char = pm_encoding_iso_8859_16_alnum_char,
885955
.alpha_char = pm_encoding_iso_8859_16_alpha_char,
@@ -889,7 +959,7 @@ pm_encoding_t pm_encoding_iso_8859_16 = {
889959

890960
/** KOI8-R */
891961
pm_encoding_t pm_encoding_koi8_r = {
892-
.name = "koi8-r",
962+
.name = "KOI8-R",
893963
.char_width = pm_encoding_koi8_r_char_width,
894964
.alnum_char = pm_encoding_koi8_r_alnum_char,
895965
.alpha_char = pm_encoding_koi8_r_alpha_char,
@@ -899,7 +969,7 @@ pm_encoding_t pm_encoding_koi8_r = {
899969

900970
/** Windows-1250 */
901971
pm_encoding_t pm_encoding_windows_1250 = {
902-
.name = "windows-1250",
972+
.name = "Windows-1250",
903973
.char_width = pm_encoding_single_char_width,
904974
.alnum_char = pm_encoding_windows_1250_alnum_char,
905975
.alpha_char = pm_encoding_windows_1250_alpha_char,
@@ -909,7 +979,7 @@ pm_encoding_t pm_encoding_windows_1250 = {
909979

910980
/** Windows-1251 */
911981
pm_encoding_t pm_encoding_windows_1251 = {
912-
.name = "windows-1251",
982+
.name = "Windows-1251",
913983
.char_width = pm_encoding_single_char_width,
914984
.alnum_char = pm_encoding_windows_1251_alnum_char,
915985
.alpha_char = pm_encoding_windows_1251_alpha_char,
@@ -919,7 +989,7 @@ pm_encoding_t pm_encoding_windows_1251 = {
919989

920990
/** Windows-1252 */
921991
pm_encoding_t pm_encoding_windows_1252 = {
922-
.name = "windows-1252",
992+
.name = "Windows-1252",
923993
.char_width = pm_encoding_single_char_width,
924994
.alnum_char = pm_encoding_windows_1252_alnum_char,
925995
.alpha_char = pm_encoding_windows_1252_alpha_char,
@@ -929,7 +999,7 @@ pm_encoding_t pm_encoding_windows_1252 = {
929999

9301000
/** Windows-1253 */
9311001
pm_encoding_t pm_encoding_windows_1253 = {
932-
.name = "windows-1253",
1002+
.name = "Windows-1253",
9331003
.char_width = pm_encoding_single_char_width,
9341004
.alnum_char = pm_encoding_windows_1253_alnum_char,
9351005
.alpha_char = pm_encoding_windows_1253_alpha_char,
@@ -939,7 +1009,7 @@ pm_encoding_t pm_encoding_windows_1253 = {
9391009

9401010
/** Windows-1254 */
9411011
pm_encoding_t pm_encoding_windows_1254 = {
942-
.name = "windows-1254",
1012+
.name = "Windows-1254",
9431013
.char_width = pm_encoding_single_char_width,
9441014
.alnum_char = pm_encoding_windows_1254_alnum_char,
9451015
.alpha_char = pm_encoding_windows_1254_alpha_char,
@@ -949,7 +1019,7 @@ pm_encoding_t pm_encoding_windows_1254 = {
9491019

9501020
/** Windows-1255 */
9511021
pm_encoding_t pm_encoding_windows_1255 = {
952-
.name = "windows-1255",
1022+
.name = "Windows-1255",
9531023
.char_width = pm_encoding_single_char_width,
9541024
.alnum_char = pm_encoding_windows_1255_alnum_char,
9551025
.alpha_char = pm_encoding_windows_1255_alpha_char,
@@ -959,7 +1029,7 @@ pm_encoding_t pm_encoding_windows_1255 = {
9591029

9601030
/** Windows-1256 */
9611031
pm_encoding_t pm_encoding_windows_1256 = {
962-
.name = "windows-1256",
1032+
.name = "Windows-1256",
9631033
.char_width = pm_encoding_single_char_width,
9641034
.alnum_char = pm_encoding_windows_1256_alnum_char,
9651035
.alpha_char = pm_encoding_windows_1256_alpha_char,
@@ -969,7 +1039,7 @@ pm_encoding_t pm_encoding_windows_1256 = {
9691039

9701040
/** Windows-1257 */
9711041
pm_encoding_t pm_encoding_windows_1257 = {
972-
.name = "windows-1257",
1042+
.name = "Windows-1257",
9731043
.char_width = pm_encoding_single_char_width,
9741044
.alnum_char = pm_encoding_windows_1257_alnum_char,
9751045
.alpha_char = pm_encoding_windows_1257_alpha_char,
@@ -979,7 +1049,7 @@ pm_encoding_t pm_encoding_windows_1257 = {
9791049

9801050
/** Windows-1258 */
9811051
pm_encoding_t pm_encoding_windows_1258 = {
982-
.name = "windows-1258",
1052+
.name = "Windows-1258",
9831053
.char_width = pm_encoding_single_char_width,
9841054
.alnum_char = pm_encoding_windows_1258_alnum_char,
9851055
.alpha_char = pm_encoding_windows_1258_alpha_char,

0 commit comments

Comments
 (0)