Skip to content
Newer
Older
100644 902 lines (797 sloc) 28.4 KB
e0d6430 add mruby sources
mimaki authored
1 /**********************************************************************
2 regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "mruby.h"
31 #ifdef INCLUDE_ENCODING
32 #include <string.h>
33 #include "regint.h"
34
35 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
36
37 extern int
38 onigenc_init(void)
39 {
40 return 0;
41 }
42
43 extern OnigEncoding
44 onigenc_get_default_encoding(void)
45 {
46 return OnigEncDefaultCharEncoding;
47 }
48
49 extern int
50 onigenc_set_default_encoding(OnigEncoding enc)
51 {
52 OnigEncDefaultCharEncoding = enc;
53 return 0;
54 }
55
56 extern int
57 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
58 {
59 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
60 if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
61 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
62 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
63 return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
64 return 1;
65 }
66
67 extern UChar*
68 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
69 {
70 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
71 if (p < s) {
72 p += enclen(enc, p, end);
73 }
74 return p;
75 }
76
77 extern UChar*
78 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
4ec6d41 rm whitespace
roco authored
79 const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
e0d6430 add mruby sources
mimaki authored
80 {
81 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
82
83 if (p < s) {
84 if (prev) *prev = (const UChar* )p;
85 p += enclen(enc, p, end);
86 }
87 else {
88 if (prev) *prev = (const UChar* )NULL; /* Sorry */
89 }
90 return p;
91 }
92
93 extern UChar*
94 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
95 {
96 if (s <= start)
97 return (UChar* )NULL;
98
99 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
100 }
101
102 extern UChar*
103 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
104 {
105 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
106 if (s <= start)
107 return (UChar* )NULL;
108
109 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
110 }
111 return (UChar* )s;
112 }
113
114 extern UChar*
115 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
116 {
117 UChar* q = (UChar* )p;
118 while (n-- > 0) {
119 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
120 }
121 return (q <= end ? q : NULL);
122 }
123
124 extern int
125 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
126 {
127 int n = 0;
128 UChar* q = (UChar* )p;
129
130 while (q < end) {
131 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
132 n++;
133 }
134 return n;
135 }
136
137 extern int
138 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
139 {
140 int n = 0;
141 UChar* p = (UChar* )s;
142 UChar* e;
143
144 while (1) {
145 if (*p == '\0') {
146 UChar* q;
147 int len = ONIGENC_MBC_MINLEN(enc);
148
149 if (len == 1) return n;
150 q = p + 1;
151 while (len > 1) {
152 if (*q != '\0') break;
153 q++;
154 len--;
155 }
156 if (len == 1) return n;
157 }
158 e = p + ONIGENC_MBC_MAXLEN(enc);
159 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
160 n++;
161 }
162 }
163
164 extern int
165 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
166 {
167 UChar* start = (UChar* )s;
168 UChar* p = (UChar* )s;
169 UChar* e;
170
171 while (1) {
172 if (*p == '\0') {
173 UChar* q;
174 int len = ONIGENC_MBC_MINLEN(enc);
175
176 if (len == 1) return (int )(p - start);
177 q = p + 1;
178 while (len > 1) {
179 if (*q != '\0') break;
180 q++;
181 len--;
182 }
183 if (len == 1) return (int )(p - start);
184 }
185 e = p + ONIGENC_MBC_MAXLEN(enc);
186 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
187 }
188 }
189
190 const UChar OnigEncAsciiToLowerCaseTable[] = {
191 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
192 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
193 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
194 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
195 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
196 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
197 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
198 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
199 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
200 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
201 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
202 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
203 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
204 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
205 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
206 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
207 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
208 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
209 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
210 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
211 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
212 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
213 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
214 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
215 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
216 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
217 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
218 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
219 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
220 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
221 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
222 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
223 };
224
225 #ifdef USE_UPPER_CASE_TABLE
226 const UChar OnigEncAsciiToUpperCaseTable[256] = {
227 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
228 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
229 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
230 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
231 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
232 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
233 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
234 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
235 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
236 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
237 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
238 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
239 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
240 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
241 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
242 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
243 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
244 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
245 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
246 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
247 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
248 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
249 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
250 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
251 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
252 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
253 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
254 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
255 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
256 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
257 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
258 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
259 };
260 #endif
261
262 const unsigned short OnigEncAsciiCtypeTable[256] = {
263 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
264 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
265 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
266 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
267 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
268 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
269 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
270 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
271 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
272 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
273 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
274 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
275 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
276 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
277 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
278 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
290 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
291 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
292 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
293 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
294 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
295 };
296
297 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
298 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
299 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
300 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
301 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
302 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
303 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
304 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
305 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
306 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
307 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
308 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
309 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
310 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
311 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
312 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
313 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
314 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
315 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
316 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
317 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
318 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
319 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
320 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
321 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
322 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
323 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
324 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
325 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
326 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
327 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
328 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
329 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
330 };
331
332 #ifdef USE_UPPER_CASE_TABLE
333 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
334 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
335 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
336 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
337 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
338 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
339 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
340 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
341 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
342 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
343 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
344 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
345 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
346 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
347 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
348 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
349 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
350 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
351 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
352 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
353 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
354 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
355 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
356 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
357 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
358 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
359 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
360 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
361 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
362 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
363 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
364 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
365 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
366 };
367 #endif
368
369 extern void
370 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
371 {
372 /* nothing */
373 /* obsoleted. */
374 }
375
376 extern UChar*
377 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
378 {
379 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
380 }
381
382 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
383 { 0x41, 0x61 },
384 { 0x42, 0x62 },
385 { 0x43, 0x63 },
386 { 0x44, 0x64 },
387 { 0x45, 0x65 },
388 { 0x46, 0x66 },
389 { 0x47, 0x67 },
390 { 0x48, 0x68 },
391 { 0x49, 0x69 },
392 { 0x4a, 0x6a },
393 { 0x4b, 0x6b },
394 { 0x4c, 0x6c },
395 { 0x4d, 0x6d },
396 { 0x4e, 0x6e },
397 { 0x4f, 0x6f },
398 { 0x50, 0x70 },
399 { 0x51, 0x71 },
400 { 0x52, 0x72 },
401 { 0x53, 0x73 },
402 { 0x54, 0x74 },
403 { 0x55, 0x75 },
404 { 0x56, 0x76 },
405 { 0x57, 0x77 },
406 { 0x58, 0x78 },
407 { 0x59, 0x79 },
408 { 0x5a, 0x7a }
409 };
410
411 extern int
412 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
4ec6d41 rm whitespace
roco authored
413 OnigApplyAllCaseFoldFunc f, void* arg,
414 OnigEncoding enc ARG_UNUSED)
e0d6430 add mruby sources
mimaki authored
415 {
416 OnigCodePoint code;
417 int i, r;
418
419 for (i = 0;
420 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
421 i++) {
422 code = OnigAsciiLowerMap[i].to;
423 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
424 if (r != 0) return r;
425
426 code = OnigAsciiLowerMap[i].from;
427 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
428 if (r != 0) return r;
429 }
430
431 return 0;
432 }
433
434 extern int
435 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
436 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
437 OnigEncoding enc ARG_UNUSED)
438 {
439 if (0x41 <= *p && *p <= 0x5a) {
440 items[0].byte_len = 1;
441 items[0].code_len = 1;
442 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
443 return 1;
444 }
445 else if (0x61 <= *p && *p <= 0x7a) {
446 items[0].byte_len = 1;
447 items[0].code_len = 1;
448 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
449 return 1;
450 }
451 else
452 return 0;
453 }
454
455 static int
456 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
4ec6d41 rm whitespace
roco authored
457 OnigApplyAllCaseFoldFunc f, void* arg)
e0d6430 add mruby sources
mimaki authored
458 {
459 OnigCodePoint ss[] = { 0x73, 0x73 };
460
461 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
462 }
463
464 extern int
465 onigenc_apply_all_case_fold_with_map(int map_size,
466 const OnigPairCaseFoldCodes map[],
467 int ess_tsett_flag, OnigCaseFoldType flag,
468 OnigApplyAllCaseFoldFunc f, void* arg)
469 {
470 OnigCodePoint code;
471 int i, r;
472
473 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
474 if (r != 0) return r;
475
476 for (i = 0; i < map_size; i++) {
477 code = map[i].to;
478 r = (*f)(map[i].from, &code, 1, arg);
479 if (r != 0) return r;
480
481 code = map[i].from;
482 r = (*f)(map[i].to, &code, 1, arg);
483 if (r != 0) return r;
484 }
485
486 if (ess_tsett_flag != 0)
487 return ss_apply_all_case_fold(flag, f, arg);
488
489 return 0;
490 }
491
492 extern int
493 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
494 const OnigPairCaseFoldCodes map[],
495 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
496 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
497 {
498 if (0x41 <= *p && *p <= 0x5a) {
499 items[0].byte_len = 1;
500 items[0].code_len = 1;
501 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
502 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
4ec6d41 rm whitespace
roco authored
503 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
e0d6430 add mruby sources
mimaki authored
504 /* SS */
505 items[1].byte_len = 2;
506 items[1].code_len = 1;
507 items[1].code[0] = (OnigCodePoint )0xdf;
508 return 2;
509 }
510 else
511 return 1;
512 }
513 else if (0x61 <= *p && *p <= 0x7a) {
514 items[0].byte_len = 1;
515 items[0].code_len = 1;
516 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
517 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
4ec6d41 rm whitespace
roco authored
518 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
e0d6430 add mruby sources
mimaki authored
519 /* ss */
520 items[1].byte_len = 2;
521 items[1].code_len = 1;
522 items[1].code[0] = (OnigCodePoint )0xdf;
523 return 2;
524 }
525 else
526 return 1;
527 }
528 else if (*p == 0xdf && ess_tsett_flag != 0) {
529 items[0].byte_len = 1;
530 items[0].code_len = 2;
531 items[0].code[0] = (OnigCodePoint )'s';
532 items[0].code[1] = (OnigCodePoint )'s';
533
534 items[1].byte_len = 1;
535 items[1].code_len = 2;
536 items[1].code[0] = (OnigCodePoint )'S';
537 items[1].code[1] = (OnigCodePoint )'S';
538
539 items[2].byte_len = 1;
540 items[2].code_len = 2;
541 items[2].code[0] = (OnigCodePoint )'s';
542 items[2].code[1] = (OnigCodePoint )'S';
543
544 items[3].byte_len = 1;
545 items[3].code_len = 2;
546 items[3].code[0] = (OnigCodePoint )'S';
547 items[3].code[1] = (OnigCodePoint )'s';
548
549 return 4;
550 }
551 else {
552 int i;
553
554 for (i = 0; i < map_size; i++) {
555 if (*p == map[i].from) {
4ec6d41 rm whitespace
roco authored
556 items[0].byte_len = 1;
557 items[0].code_len = 1;
558 items[0].code[0] = map[i].to;
559 return 1;
e0d6430 add mruby sources
mimaki authored
560 }
561 else if (*p == map[i].to) {
4ec6d41 rm whitespace
roco authored
562 items[0].byte_len = 1;
563 items[0].code_len = 1;
564 items[0].code[0] = map[i].from;
565 return 1;
e0d6430 add mruby sources
mimaki authored
566 }
567 }
568 }
569
570 return 0;
571 }
572
573
574 extern int
575 onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
576 OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
4ec6d41 rm whitespace
roco authored
577 OnigEncoding enc)
e0d6430 add mruby sources
mimaki authored
578 {
579 return ONIG_NO_SUPPORT_CONFIG;
580 }
581
582 extern int
583 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
584 {
585 if (p < end) {
586 if (*p == 0x0a) return 1;
587 }
588 return 0;
589 }
590
591 /* for single byte encodings */
592 extern int
593 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
4ec6d41 rm whitespace
roco authored
594 const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
e0d6430 add mruby sources
mimaki authored
595 {
596 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
597
598 (*p)++;
599 return 1; /* return byte length of converted char to lower */
600 }
601
602 extern int
603 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
4ec6d41 rm whitespace
roco authored
604 OnigEncoding enc ARG_UNUSED)
e0d6430 add mruby sources
mimaki authored
605 {
606 return 1;
607 }
608
609 extern OnigCodePoint
610 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
4ec6d41 rm whitespace
roco authored
611 OnigEncoding enc ARG_UNUSED)
e0d6430 add mruby sources
mimaki authored
612 {
613 return (OnigCodePoint )(*p);
614 }
615
616 extern int
617 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
618 {
619 return 1;
620 }
621
622 extern int
623 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
624 {
625 *buf = (UChar )(code & 0xff);
626 return 1;
627 }
628
629 extern UChar*
630 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
631 const UChar* end,
4ec6d41 rm whitespace
roco authored
632 OnigEncoding enc ARG_UNUSED)
e0d6430 add mruby sources
mimaki authored
633 {
634 return (UChar* )s;
635 }
636
637 extern int
638 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
4ec6d41 rm whitespace
roco authored
639 OnigEncoding enc ARG_UNUSED)
e0d6430 add mruby sources
mimaki authored
640 {
641 return TRUE;
642 }
643
644 extern int
645 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
4ec6d41 rm whitespace
roco authored
646 OnigEncoding enc ARG_UNUSED)
e0d6430 add mruby sources
mimaki authored
647 {
648 return FALSE;
649 }
650
651 extern int
652 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
653 OnigEncoding enc ARG_UNUSED)
654 {
655 if (code < 128)
656 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
657 else
658 return FALSE;
659 }
660
661 extern OnigCodePoint
662 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
663 {
664 int c, i, len;
665 OnigCodePoint n;
666
667 len = enclen(enc, p, end);
668 n = (OnigCodePoint )(*p++);
669 if (len == 1) return n;
670
671 for (i = 1; i < len; i++) {
672 if (p >= end) break;
673 c = *p++;
674 n <<= 8; n += c;
675 }
676 return n;
677 }
678
679 extern int
680 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
681 const UChar** pp, const UChar* end ARG_UNUSED,
4ec6d41 rm whitespace
roco authored
682 UChar* lower)
e0d6430 add mruby sources
mimaki authored
683 {
684 int len;
685 const UChar *p = *pp;
686
687 if (ONIGENC_IS_MBC_ASCII(p)) {
688 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
689 (*pp)++;
690 return 1;
691 }
692 else {
693 int i;
694
695 len = enclen(enc, p, end);
696 for (i = 0; i < len; i++) {
697 *lower++ = *p++;
698 }
699 (*pp) += len;
700 return len; /* return byte length of converted to lower char */
701 }
702 }
703
704 extern int
705 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
706 {
707 if ((code & 0xff00) != 0) return 2;
708 else return 1;
709 }
710
711 extern int
712 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
713 {
714 if ((code & 0xff000000) != 0) return 4;
715 else if ((code & 0xff0000) != 0) return 3;
716 else if ((code & 0xff00) != 0) return 2;
717 else return 1;
718 }
719
720 extern int
721 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
722 {
723 UChar *p = buf;
724
725 if ((code & 0xff00) != 0) {
726 *p++ = (UChar )((code >> 8) & 0xff);
727 }
728 *p++ = (UChar )(code & 0xff);
729
730 if (enclen(enc, buf, p) != (p - buf))
731 return ONIGERR_INVALID_CODE_POINT_VALUE;
732 return (int)(p - buf);
733 }
734
735 extern int
736 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
737 {
738 UChar *p = buf;
739
740 if ((code & 0xff000000) != 0) {
741 *p++ = (UChar )((code >> 24) & 0xff);
742 }
743 if ((code & 0xff0000) != 0 || p != buf) {
744 *p++ = (UChar )((code >> 16) & 0xff);
745 }
746 if ((code & 0xff00) != 0 || p != buf) {
747 *p++ = (UChar )((code >> 8) & 0xff);
748 }
749 *p++ = (UChar )(code & 0xff);
750
751 if (enclen(enc, buf, p) != (p - buf))
752 return ONIGERR_INVALID_CODE_POINT_VALUE;
753 return (int)(p - buf);
754 }
755
756 extern int
757 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
758 {
759 static const PosixBracketEntryType PBS[] = {
760 PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
761 PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
762 PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
763 PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
764 PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
765 PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
766 PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
767 PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
768 PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
769 PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
770 PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
771 PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
772 PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
773 PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
774 };
775
776 const PosixBracketEntryType *pb, *pbe;
777 int len;
778
779 len = onigenc_strlen(enc, p, end);
780 for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
781 if (len == pb->len &&
782 onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
783 return pb->ctype;
784 }
785
786 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
787 }
788
789 extern int
790 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
4ec6d41 rm whitespace
roco authored
791 unsigned int ctype)
e0d6430 add mruby sources
mimaki authored
792 {
793 if (code < 128)
794 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
795 else {
796 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
797 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
798 }
799 }
800
801 return FALSE;
802 }
803
804 extern int
805 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
4ec6d41 rm whitespace
roco authored
806 unsigned int ctype)
e0d6430 add mruby sources
mimaki authored
807 {
808 if (code < 128)
809 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
810 else {
811 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
812 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
813 }
814 }
815
816 return FALSE;
817 }
818
819 extern int
820 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
821 const UChar* sascii /* ascii */, int n)
822 {
823 int x, c;
824
825 while (n-- > 0) {
826 if (p >= end) return (int )(*sascii);
827
828 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
829 x = *sascii - c;
830 if (x) return x;
831
832 sascii++;
833 p += enclen(enc, p, end);
834 }
835 return 0;
836 }
837
838 /* Property management */
839 static int
840 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
841 {
842 size_t size;
843 const OnigCodePoint **list = *plist;
844
845 size = sizeof(OnigCodePoint*) * new_size;
846 if (IS_NULL(list)) {
847 list = (const OnigCodePoint** )xmalloc(size);
848 }
849 else {
850 list = (const OnigCodePoint** )xrealloc((void* )list, size);
851 }
852
853 if (IS_NULL(list)) return ONIGERR_MEMORY;
854
855 *plist = list;
856 *psize = new_size;
857
858 return 0;
859 }
860
861 extern int
862 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
863 hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
864 int *psize)
865 {
866 #define PROP_INIT_SIZE 16
867
868 int r;
869
870 if (*psize <= *pnum) {
871 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
872 r = resize_property_list(new_size, plist, psize);
873 if (r != 0) return r;
874 }
875
876 (*plist)[*pnum] = prop;
877
878 if (ONIG_IS_NULL(*table)) {
879 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
880 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
881 }
882
883 *pnum = *pnum + 1;
884 onig_st_insert_strend(*table, name, name + strlen((char* )name),
4ec6d41 rm whitespace
roco authored
885 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
e0d6430 add mruby sources
mimaki authored
886 return 0;
887 }
888
889 extern int
890 onigenc_property_list_init(int (*f)(void))
891 {
892 int r;
893
894 THREAD_ATOMIC_START;
895
896 r = f();
897
898 THREAD_ATOMIC_END;
899 return r;
900 }
901 #endif //INCLUDE_ENCODING
Something went wrong with that request. Please try again.