Permalink
Browse files

Replace u_word_breaks with u_words

  • Loading branch information...
1 parent 7d4ca20 commit 1e69d046aadd955a84f178c552e02d02100b181e Nikolai Weibull committed Apr 7, 2013
@@ -1,24 +1,9 @@
#include "rb_includes.h"
-struct closure {
- VALUE self;
- const char *previous;
-};
-
-static inline void
-yield(const char *p, struct closure *closure)
-{
- rb_yield(rb_u_string_new_c(closure->self,
- closure->previous,
- p - closure->previous));
-}
-
static void
-each(const char *p, struct closure *closure)
+each(const char *p, size_t n, VALUE *self)
{
- if (p != closure->previous)
- yield(p, closure);
- closure->previous = p;
+ rb_yield(rb_u_string_new_c(*self, p, n));
}
/* @overload each_grapheme_cluster{ |cluster| … }
@@ -46,9 +31,6 @@ rb_u_string_each_grapheme_cluster(VALUE self)
const char *end = USTRING_END(string);
size_t length = end - p;
rb_u_validate(p, length);
- struct closure closure = { self, p };
- u_grapheme_breaks(p, length, (u_break_fn)each, &closure);
- if (closure.previous != end)
- yield(end, &closure);
+ u_grapheme_clusters(p, length, (u_substring_fn)each, &self);
return self;
}
@@ -1,24 +1,9 @@
#include "rb_includes.h"
-struct closure {
- VALUE self;
- const char *previous;
-};
-
-static inline void
-yield(const char *p, struct closure *closure)
-{
- rb_yield(rb_u_string_new_c(closure->self,
- closure->previous,
- p - closure->previous));
-}
-
static void
-each(const char *p, struct closure *closure)
+each(const char *p, size_t n, VALUE *self)
{
- if (p != closure->previous)
- yield(p, closure);
- closure->previous = p;
+ rb_yield(rb_u_string_new_c(*self, p, n));
}
/* @overload each_word{ |word| … }
@@ -42,12 +27,8 @@ rb_u_string_each_word(VALUE self)
const struct rb_u_string *string = RVAL2USTRING(self);
const char *p = USTRING_STR(string);
- const char *end = USTRING_END(string);
- size_t length = end - p;
+ size_t length = USTRING_LENGTH(string);
rb_u_validate(p, length);
- struct closure closure = { self, p };
- u_word_breaks(p, length, (u_break_fn)each, &closure);
- if (closure.previous != end)
- yield(end, &closure);
+ u_words(p, length, (u_substring_fn)each, &self);
return self;
}
View
@@ -404,9 +404,10 @@ char *u_reverse_n(const char *str, size_t n);
bool u_isvalid(const char *str);
bool u_isvalid_n(const char *str, size_t max, const char **end);
-typedef void (*u_break_fn)(const char *, void *);
-void u_word_breaks(const char *string, size_t n, u_break_fn fn, void *closure);
-void u_grapheme_breaks(const char *string, size_t n, u_break_fn fn, void *closure);
+typedef void (*u_substring_fn)(const char *, size_t, void *);
+void u_words(const char *string, size_t n, u_substring_fn fn, void *closure);
+void u_grapheme_clusters(const char *string, size_t n, u_substring_fn fn,
+ void *closure);
int u_char_to_u(uint32_t c, char *result);
char *u_ucs4_to_u(uint32_t *str, size_t *items_read, size_t *items_written);
@@ -38,15 +38,21 @@ static const uint8_t gb_dfa[][U_GRAPHEME_BREAK_V + 1] = {
#undef K
void
-u_grapheme_breaks(const char *string, size_t n, u_break_fn fn, void *closure)
+u_grapheme_clusters(const char *string, size_t n, u_substring_fn fn, void *closure)
{
const char *p = string;
+ const char *q = p;
const char *end = p + n;
uint8_t state = 2;
- while (p < end) {
- state = gb_dfa[state & 0xf][s_grapheme_break(u_dref(p))];
- if (state >> 4 != 1)
- fn(p, closure);
- p = u_next(p);
+ while (q < end) {
+ state = gb_dfa[state & 0xf][s_grapheme_break(u_dref(q))];
+ if (state >> 4 != 1) {
+ if (p < q)
+ fn(p, q - p, closure);
+ p = q;
+ }
+ q = u_next(q);
}
+ if (p < q)
+ fn(p, q - p, closure);
}
View
@@ -21,50 +21,44 @@
struct titlecase_closure {
const char *string;
- const char *previous;
enum locale locale;
struct output *output;
};
static void
-titlecase_step(const char *p, struct titlecase_closure *closure)
+titlecase_step(const char *p, const char *q, struct titlecase_closure *closure)
{
- const char *t = closure->previous;
- while (t < p && !u_char_iscased(u_dref(t)))
+ const char *t = p;
+ while (t < q && !u_char_iscased(u_dref(t)))
t = u_next(t);
- output_string(closure->output, closure->previous, t - closure->previous);
- if (t == p)
+ output_string(closure->output, p, t - p);
+ if (t == q)
return;
- _u_upcase_step(closure->string, &t, p, true, closure->locale, true,
+ _u_upcase_step(closure->string, &t, q, true, closure->locale, true,
closure->output);
- if (t + 1 < p && closure->locale == LOCALE_DUTCH &&
+ if (t + 1 < q && closure->locale == LOCALE_DUTCH &&
(*t == LATIN_CAPITAL_LETTER_I || *t == LATIN_SMALL_LETTER_I) &&
(*(t + 1) == LATIN_CAPITAL_LETTER_J || *(t + 1) == LATIN_SMALL_LETTER_J)) {
output_char(closure->output, LATIN_CAPITAL_LETTER_J);
t++;
}
- for (t = u_next(t); t < p; t = u_next(t))
- _u_downcase_step(closure->string, t, p, true, closure->locale,
+ for (t = u_next(t); t < q; t = u_next(t))
+ _u_downcase_step(closure->string, t, q, true, closure->locale,
closure->output);
}
static void
-titlecase_word_break(const char *p, struct titlecase_closure *closure)
+titlecase_words(const char *p, size_t n, struct titlecase_closure *closure)
{
- if (closure->previous < p)
- titlecase_step(p, closure);
- closure->previous = p;
+ titlecase_step(p, p + n, closure);
}
static void
titlecase_loop(const char *string, size_t n, enum locale locale,
struct output *output)
{
- const char *end = string + n;
- struct titlecase_closure closure = { string, string, locale, output };
- u_word_breaks(string, n, (u_break_fn)titlecase_word_break, &closure);
- if (closure.previous != end)
- titlecase_step(end, &closure);
+ struct titlecase_closure closure = { string, locale, output };
+ u_words(string, n, (u_substring_fn)titlecase_words, &closure);
}
char *
@@ -45,32 +45,40 @@ static const uint8_t wb_dfa[][U_WORD_BREAK_REGIONAL_INDICATOR + 1] = {
#undef K
void
-u_word_breaks(const char *string, size_t n, u_break_fn fn, void *closure)
+u_words(const char *string, size_t n, u_substring_fn fn, void *closure)
{
const char *p = string;
+ const char *q = p;
const char *end = p + n;
const char *s = NULL;
uint8_t state = 2;
- while (p < end) {
- state = wb_dfa[state & 0xf][s_word_break(u_dref(p))];
+ while (q < end) {
+ state = wb_dfa[state & 0xf][s_word_break(u_dref(q))];
switch (state >> 4) {
case 1:
break;
case 2:
- s = p;
+ s = q;
break;
case 3:
s = NULL;
break;
default:
if (s != NULL) {
- fn(s, closure);
+ fn(p, s - p, closure);
+ p = s;
s = NULL;
}
- fn(p, closure);
+ if (p < q)
+ fn(p, q - p, closure);
+ p = q;
}
- p = u_next(p);
+ q = u_next(q);
}
- if (s != NULL)
- fn(s, closure);
+ if (s != NULL) {
+ fn(p, s - p, closure);
+ p = s;
+ }
+ if (p < q)
+ fn(p, q - p, closure);
}
View
@@ -258,7 +258,7 @@ def sources
u_downcase.c
u_dref.c
u_foldcase.c
- u_grapheme_breaks.c
+ u_grapheme_clusters.c
u_has_prefix.c
u_index.c
u_is_ascii_only.c
@@ -278,7 +278,7 @@ def sources
u_ucs4_to_u.c
u_upcase.c
u_width.c
- u_word_breaks.c
+ u_words.c
utf8.h
word-break.c
word-break.h'

0 comments on commit 1e69d04

Please sign in to comment.