Permalink
Browse files

break out of the search loop early when we find the right utf8 code

length
  • Loading branch information...
1 parent 377f56e commit 855fe4d7ba9725180ffb4a1d8dd760fc2b2770f3 Tony Cook committed Oct 25, 2006
Showing with 6 additions and 1 deletion.
  1. +6 −1 io.c
View
@@ -351,10 +351,14 @@ Modifies *p and *len to indicate the consumed characters.
This doesn't support the extended UTF8 encoding used by later versions
of Perl.
+This doesn't check that the UTF8 charecter is using the shortest
+possible representation.
+
=cut
*/
-unsigned long i_utf8_advance(char const **p, int *len) {
+unsigned long
+i_utf8_advance(char const **p, int *len) {
unsigned char c;
int i, ci, clen = 0;
unsigned char codes[3];
@@ -365,6 +369,7 @@ unsigned long i_utf8_advance(char const **p, int *len) {
for (i = 0; i < sizeof(utf8_sizes)/sizeof(*utf8_sizes); ++i) {
if ((c & utf8_sizes[i].mask) == utf8_sizes[i].expect) {
clen = utf8_sizes[i].size;
+ break;
}
}
if (clen == 0 || *len < clen-1) {

0 comments on commit 855fe4d

Please sign in to comment.