|
7 | 7 | #define PRISM_ENCODING_H
|
8 | 8 |
|
9 | 9 | #include "prism/defines.h"
|
| 10 | +#include "prism/util/pm_strncasecmp.h" |
10 | 11 |
|
11 | 12 | #include <assert.h>
|
12 | 13 | #include <stdbool.h>
|
@@ -119,98 +120,21 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
|
119 | 120 | */
|
120 | 121 | extern const uint8_t pm_encoding_unicode_table[256];
|
121 | 122 |
|
122 |
| -// Below are the encodings that are supported by the parser. They are defined in |
123 |
| -// their own files in the src/enc directory. |
124 |
| - |
125 |
| -extern pm_encoding_t pm_encoding_ascii; |
126 |
| -extern pm_encoding_t pm_encoding_ascii_8bit; |
127 |
| -extern pm_encoding_t pm_encoding_big5; |
128 |
| -extern pm_encoding_t pm_encoding_big5_hkscs; |
129 |
| -extern pm_encoding_t pm_encoding_big5_uao; |
130 |
| -extern pm_encoding_t pm_encoding_cesu_8; |
131 |
| -extern pm_encoding_t pm_encoding_cp51932; |
132 |
| -extern pm_encoding_t pm_encoding_cp850; |
133 |
| -extern pm_encoding_t pm_encoding_cp852; |
134 |
| -extern pm_encoding_t pm_encoding_cp855; |
135 |
| -extern pm_encoding_t pm_encoding_cp949; |
136 |
| -extern pm_encoding_t pm_encoding_cp950; |
137 |
| -extern pm_encoding_t pm_encoding_cp951; |
138 |
| -extern pm_encoding_t pm_encoding_emacs_mule; |
139 |
| -extern pm_encoding_t pm_encoding_euc_jp; |
140 |
| -extern pm_encoding_t pm_encoding_euc_jp_ms; |
141 |
| -extern pm_encoding_t pm_encoding_euc_jis_2004; |
142 |
| -extern pm_encoding_t pm_encoding_euc_kr; |
143 |
| -extern pm_encoding_t pm_encoding_euc_tw; |
144 |
| -extern pm_encoding_t pm_encoding_gb12345; |
145 |
| -extern pm_encoding_t pm_encoding_gb18030; |
146 |
| -extern pm_encoding_t pm_encoding_gb1988; |
147 |
| -extern pm_encoding_t pm_encoding_gb2312; |
148 |
| -extern pm_encoding_t pm_encoding_gbk; |
149 |
| -extern pm_encoding_t pm_encoding_ibm437; |
150 |
| -extern pm_encoding_t pm_encoding_ibm720; |
151 |
| -extern pm_encoding_t pm_encoding_ibm737; |
152 |
| -extern pm_encoding_t pm_encoding_ibm775; |
153 |
| -extern pm_encoding_t pm_encoding_ibm852; |
154 |
| -extern pm_encoding_t pm_encoding_ibm855; |
155 |
| -extern pm_encoding_t pm_encoding_ibm857; |
156 |
| -extern pm_encoding_t pm_encoding_ibm860; |
157 |
| -extern pm_encoding_t pm_encoding_ibm861; |
158 |
| -extern pm_encoding_t pm_encoding_ibm862; |
159 |
| -extern pm_encoding_t pm_encoding_ibm863; |
160 |
| -extern pm_encoding_t pm_encoding_ibm864; |
161 |
| -extern pm_encoding_t pm_encoding_ibm865; |
162 |
| -extern pm_encoding_t pm_encoding_ibm866; |
163 |
| -extern pm_encoding_t pm_encoding_ibm869; |
164 |
| -extern pm_encoding_t pm_encoding_iso_8859_1; |
165 |
| -extern pm_encoding_t pm_encoding_iso_8859_2; |
166 |
| -extern pm_encoding_t pm_encoding_iso_8859_3; |
167 |
| -extern pm_encoding_t pm_encoding_iso_8859_4; |
168 |
| -extern pm_encoding_t pm_encoding_iso_8859_5; |
169 |
| -extern pm_encoding_t pm_encoding_iso_8859_6; |
170 |
| -extern pm_encoding_t pm_encoding_iso_8859_7; |
171 |
| -extern pm_encoding_t pm_encoding_iso_8859_8; |
172 |
| -extern pm_encoding_t pm_encoding_iso_8859_9; |
173 |
| -extern pm_encoding_t pm_encoding_iso_8859_10; |
174 |
| -extern pm_encoding_t pm_encoding_iso_8859_11; |
175 |
| -extern pm_encoding_t pm_encoding_iso_8859_13; |
176 |
| -extern pm_encoding_t pm_encoding_iso_8859_14; |
177 |
| -extern pm_encoding_t pm_encoding_iso_8859_15; |
178 |
| -extern pm_encoding_t pm_encoding_iso_8859_16; |
179 |
| -extern pm_encoding_t pm_encoding_koi8_r; |
180 |
| -extern pm_encoding_t pm_encoding_koi8_u; |
181 |
| -extern pm_encoding_t pm_encoding_mac_cent_euro; |
182 |
| -extern pm_encoding_t pm_encoding_mac_croatian; |
183 |
| -extern pm_encoding_t pm_encoding_mac_cyrillic; |
184 |
| -extern pm_encoding_t pm_encoding_mac_greek; |
185 |
| -extern pm_encoding_t pm_encoding_mac_iceland; |
186 |
| -extern pm_encoding_t pm_encoding_mac_japanese; |
187 |
| -extern pm_encoding_t pm_encoding_mac_roman; |
188 |
| -extern pm_encoding_t pm_encoding_mac_romania; |
189 |
| -extern pm_encoding_t pm_encoding_mac_thai; |
190 |
| -extern pm_encoding_t pm_encoding_mac_turkish; |
191 |
| -extern pm_encoding_t pm_encoding_mac_ukraine; |
192 |
| -extern pm_encoding_t pm_encoding_shift_jis; |
193 |
| -extern pm_encoding_t pm_encoding_sjis_docomo; |
194 |
| -extern pm_encoding_t pm_encoding_sjis_kddi; |
195 |
| -extern pm_encoding_t pm_encoding_sjis_softbank; |
196 |
| -extern pm_encoding_t pm_encoding_stateless_iso_2022_jp; |
197 |
| -extern pm_encoding_t pm_encoding_stateless_iso_2022_jp_kddi; |
198 |
| -extern pm_encoding_t pm_encoding_tis_620; |
| 123 | +/** |
| 124 | + * This is the default encoding for Ruby source files. We keep a specific |
| 125 | + * visible pointer around to it so that prism.c can compare it against the |
| 126 | + * default. |
| 127 | + */ |
199 | 128 | extern pm_encoding_t pm_encoding_utf_8;
|
200 |
| -extern pm_encoding_t pm_encoding_utf8_mac; |
201 |
| -extern pm_encoding_t pm_encoding_utf8_docomo; |
202 |
| -extern pm_encoding_t pm_encoding_utf8_kddi; |
203 |
| -extern pm_encoding_t pm_encoding_utf8_softbank; |
204 |
| -extern pm_encoding_t pm_encoding_windows_1250; |
205 |
| -extern pm_encoding_t pm_encoding_windows_1251; |
206 |
| -extern pm_encoding_t pm_encoding_windows_1252; |
207 |
| -extern pm_encoding_t pm_encoding_windows_1253; |
208 |
| -extern pm_encoding_t pm_encoding_windows_1254; |
209 |
| -extern pm_encoding_t pm_encoding_windows_1255; |
210 |
| -extern pm_encoding_t pm_encoding_windows_1256; |
211 |
| -extern pm_encoding_t pm_encoding_windows_1257; |
212 |
| -extern pm_encoding_t pm_encoding_windows_1258; |
213 |
| -extern pm_encoding_t pm_encoding_windows_31j; |
214 |
| -extern pm_encoding_t pm_encoding_windows_874; |
| 129 | + |
| 130 | +/** |
| 131 | + * Parse the given name of an encoding and return a pointer to the corresponding |
| 132 | + * encoding struct if one can be found, otherwise return NULL. |
| 133 | + * |
| 134 | + * @param start A pointer to the first byte of the name. |
| 135 | + * @param end A pointer to the last byte of the name. |
| 136 | + * @returns A pointer to the encoding it finds, otherwise NULL. |
| 137 | + */ |
| 138 | +pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end); |
215 | 139 |
|
216 | 140 | #endif
|
0 commit comments