@@ -272,6 +272,14 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str)
272272# define ALIGNED_WORD_ACCESS 1
273273#endif
274274
275+ #ifdef MRB_64BIT
276+ #define bitint uint64_t
277+ #define MASK1 0x0101010101010101ull
278+ #else
279+ #define bitint uint32_t
280+ #define MASK1 0x01010101ul
281+ #endif
282+
275283#ifdef MRB_UTF8_STRING
276284
277285#define NOASCII (c ) ((c) & 0x80)
@@ -421,15 +429,46 @@ mrb_utf8len(const char* p, const char* e)
421429 return len ;
422430}
423431
432+ #if defined(__GNUC__ ) || __has_builtin (__builtin_popcount )
433+ # ifdef MRB_64BIT
434+ # define popcount (x ) __builtin_popcountll(x)
435+ # else
436+ # define popcount (x ) __builtin_popcountl(x)
437+ # endif
438+ #else
439+ static inline uint32_t popcount (bitint x )
440+ {
441+ x = (x & (MASK1 * 0x55 )) + ((x >> 1 ) & (MASK1 * 0x55 ));
442+ x = (x & (MASK1 * 0x33 )) + ((x >> 2 ) & (MASK1 * 0x33 ));
443+ x = (x & (MASK1 * 0x0F )) + ((x >> 4 ) & (MASK1 * 0x0F ));
444+ return (x * MASK1 ) >> 56 ;
445+ }
446+ #endif
447+
424448mrb_int
425449mrb_utf8_strlen (const char * str , mrb_int byte_len )
426450{
427451 mrb_int len = 0 ;
452+
428453 const char * p = str ;
429- const char * e = p + byte_len ;
430- while (p < e ) {
431- if (utf8_islead (* p )) len ++ ;
432- p ++ ;
454+ const char * be = p + sizeof (bitint ) * (byte_len / sizeof (bitint ));
455+ for (; p < be ; p += sizeof (bitint )) {
456+ bitint t0 ;
457+
458+ memcpy (& t0 , p , sizeof (bitint ));
459+ const bitint t1 = t0 & (MASK1 * 0xc0 );
460+ const bitint t2 = t1 + (MASK1 * 0x40 );
461+ const bitint t3 = t1 & t2 ;
462+ len += popcount (t3 );
463+ }
464+ len = sizeof (bitint ) * (byte_len / sizeof (bitint )) - len ;
465+
466+ if (byte_len % sizeof (bitint )) {
467+ const char * e = str + byte_len ;
468+ while (p < e ) {
469+ if (utf8_islead (* p )) len ++ ;
470+ p ++ ;
471+ }
433472 }
434473 return len ;
435474}
0 commit comments