From 2defab0636aff5f37a63bcf1a9697ef70bcd53bb Mon Sep 17 00:00:00 2001 From: Navaneeth K N Date: Mon, 9 Jun 2014 01:26:14 +0530 Subject: [PATCH] Added strbuf_chars() to strbuf which returns each character in string This function is unicode aware and works well for UTF-8 encoded texts. --- CMakeLists.txt | 1 + strbuf.c | 24 ++++++++++++++++++++++++ tests/strbuftest.c | 42 ++++++++++++++++++++++++++++++++++++++++++ util.h | 1 + 4 files changed, 68 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c07d2d..4a4c921 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,7 @@ SET_TARGET_PROPERTIES(${VARNAM_LIBRARY_NAME} PROPERTIES # a custom target to compile scheme files add_custom_target (vst command ./varnamc --compile schemes/ml && ./varnamc --compile schemes/hi) +add_custom_target (test command cd tests && ./runtests) # Makes the distro add_custom_target (distro COMMAND ./makedistro.sh "${VARNAM_VERSION_MAJOR}.${VARNAM_VERSION_MINOR}.${VARNAM_VERSION_PATCH}") diff --git a/strbuf.c b/strbuf.c index fd5c396..15c43be 100644 --- a/strbuf.c +++ b/strbuf.c @@ -177,6 +177,30 @@ int strbuf_addvf(struct strbuf *string, const char *format, va_list args) } +/* + * Gets each unicode character in this string + * returned result should be destroyed + * */ +varray* +strbuf_chars(strbuf *b) +{ + const unsigned char *ustring; const char *inputcopy; + int bytes_read; + varray *chars; + strbuf *tmp; + + inputcopy = b->buffer; + chars = varray_init(); + while (*inputcopy != '\0') { + READ_A_UTF8_CHAR (ustring, inputcopy, bytes_read); + tmp = strbuf_init(8); + strbuf_add_bytes (tmp, inputcopy - bytes_read, bytes_read); + varray_push (chars, strbuf_detach(tmp)); + bytes_read = 0; + } + return chars; +} + void strbuf_destroy(void *s) { strbuf *string; diff --git a/tests/strbuftest.c b/tests/strbuftest.c index 643ec98..933943f 100644 --- a/tests/strbuftest.c +++ b/tests/strbuftest.c @@ -112,6 +112,46 @@ START_TEST (addfln_should_add_newline) } END_TEST +START_TEST (get_each_character) +{ + varray *chars; + strbuf *str = strbuf_init (20); + strbuf_add(str, "test"); + chars = strbuf_chars(str); + ck_assert_int_eq (4, varray_length(chars)); + ck_assert_str_eq ("t", (const char*) varray_get(chars, 0)); + ck_assert_str_eq ("e", (const char*) varray_get(chars, 1)); + ck_assert_str_eq ("s", (const char*) varray_get(chars, 2)); + ck_assert_str_eq ("t", (const char*) varray_get(chars, 3)); + + strbuf_clear (str); + strbuf_add(str, "t"); + chars = strbuf_chars(str); + ck_assert_int_eq (1, varray_length(chars)); + ck_assert_str_eq ("t", (const char*) varray_get(chars, 0)); + + strbuf_clear (str); + chars = strbuf_chars(str); + ck_assert_int_eq (0, varray_length(chars)); +} +END_TEST + +START_TEST (get_each_character_should_be_unicode_aware) +{ + varray *chars; + strbuf *str = strbuf_init (20); + strbuf_add(str, "മലയാളം"); + chars = strbuf_chars(str); + ck_assert_int_eq (6, varray_length(chars)); + ck_assert_str_eq ("മ", (const char*) varray_get(chars, 0)); + ck_assert_str_eq ("ല", (const char*) varray_get(chars, 1)); + ck_assert_str_eq ("യ", (const char*) varray_get(chars, 2)); + ck_assert_str_eq ("ാ", (const char*) varray_get(chars, 3)); + ck_assert_str_eq ("ള", (const char*) varray_get(chars, 4)); + ck_assert_str_eq ("ം", (const char*) varray_get(chars, 5)); +} +END_TEST + TCase* get_strbuf_tests() { TCase* tcase = tcase_create("strbuf"); @@ -123,5 +163,7 @@ TCase* get_strbuf_tests() tcase_add_test (tcase, split_string); tcase_add_test (tcase, addf_should_not_add_newline); tcase_add_test (tcase, addfln_should_add_newline); + tcase_add_test (tcase, get_each_character); + tcase_add_test (tcase, get_each_character_should_be_unicode_aware); return tcase; } diff --git a/util.h b/util.h index dc76aab..23beafa 100644 --- a/util.h +++ b/util.h @@ -161,6 +161,7 @@ VARNAM_EXPORT int strbuf_addvf(struct strbuf *string, const char *format, va_lis VARNAM_EXPORT void strbuf_destroy(void *s); VARNAM_EXPORT char* strbuf_detach(struct strbuf *string); VARNAM_EXPORT const char* strbuf_to_s(struct strbuf *string); +VARNAM_EXPORT struct varray_t* strbuf_chars(strbuf *b); VARNAM_EXPORT void strbuf_clear(struct strbuf *string); VARNAM_EXPORT int strbuf_is_blank(struct strbuf *string); VARNAM_EXPORT int strbuf_endswith(struct strbuf *string, const char *str);