Skip to content

Commit

Permalink
Added strbuf_chars() to strbuf which returns each character in string
Browse files Browse the repository at this point in the history
This function is unicode aware and works well for UTF-8 encoded texts.
  • Loading branch information
navaneeth committed Jun 8, 2014
1 parent bd7102f commit 2defab0
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Expand Up @@ -110,6 +110,7 @@ SET_TARGET_PROPERTIES(${VARNAM_LIBRARY_NAME} PROPERTIES

# a custom target to compile scheme files
add_custom_target (vst command ./varnamc --compile schemes/ml && ./varnamc --compile schemes/hi)
add_custom_target (test command cd tests && ./runtests)

# Makes the distro
add_custom_target (distro COMMAND ./makedistro.sh "${VARNAM_VERSION_MAJOR}.${VARNAM_VERSION_MINOR}.${VARNAM_VERSION_PATCH}")
Expand Down
24 changes: 24 additions & 0 deletions strbuf.c
Expand Up @@ -177,6 +177,30 @@ int strbuf_addvf(struct strbuf *string, const char *format, va_list args)

}

/*
* Gets each unicode character in this string
* returned result should be destroyed
* */
varray*
strbuf_chars(strbuf *b)
{
const unsigned char *ustring; const char *inputcopy;
int bytes_read;
varray *chars;
strbuf *tmp;

inputcopy = b->buffer;
chars = varray_init();
while (*inputcopy != '\0') {
READ_A_UTF8_CHAR (ustring, inputcopy, bytes_read);
tmp = strbuf_init(8);
strbuf_add_bytes (tmp, inputcopy - bytes_read, bytes_read);
varray_push (chars, strbuf_detach(tmp));
bytes_read = 0;
}
return chars;
}

void strbuf_destroy(void *s)
{
strbuf *string;
Expand Down
42 changes: 42 additions & 0 deletions tests/strbuftest.c
Expand Up @@ -112,6 +112,46 @@ START_TEST (addfln_should_add_newline)
}
END_TEST

START_TEST (get_each_character)
{
varray *chars;
strbuf *str = strbuf_init (20);
strbuf_add(str, "test");
chars = strbuf_chars(str);
ck_assert_int_eq (4, varray_length(chars));
ck_assert_str_eq ("t", (const char*) varray_get(chars, 0));
ck_assert_str_eq ("e", (const char*) varray_get(chars, 1));
ck_assert_str_eq ("s", (const char*) varray_get(chars, 2));
ck_assert_str_eq ("t", (const char*) varray_get(chars, 3));

strbuf_clear (str);
strbuf_add(str, "t");
chars = strbuf_chars(str);
ck_assert_int_eq (1, varray_length(chars));
ck_assert_str_eq ("t", (const char*) varray_get(chars, 0));

strbuf_clear (str);
chars = strbuf_chars(str);
ck_assert_int_eq (0, varray_length(chars));
}
END_TEST

START_TEST (get_each_character_should_be_unicode_aware)
{
varray *chars;
strbuf *str = strbuf_init (20);
strbuf_add(str, "മലയാളം");
chars = strbuf_chars(str);
ck_assert_int_eq (6, varray_length(chars));
ck_assert_str_eq ("മ", (const char*) varray_get(chars, 0));
ck_assert_str_eq ("ല", (const char*) varray_get(chars, 1));
ck_assert_str_eq ("യ", (const char*) varray_get(chars, 2));
ck_assert_str_eq ("ാ", (const char*) varray_get(chars, 3));
ck_assert_str_eq ("ള", (const char*) varray_get(chars, 4));
ck_assert_str_eq ("ം", (const char*) varray_get(chars, 5));
}
END_TEST

TCase* get_strbuf_tests()
{
TCase* tcase = tcase_create("strbuf");
Expand All @@ -123,5 +163,7 @@ TCase* get_strbuf_tests()
tcase_add_test (tcase, split_string);
tcase_add_test (tcase, addf_should_not_add_newline);
tcase_add_test (tcase, addfln_should_add_newline);
tcase_add_test (tcase, get_each_character);
tcase_add_test (tcase, get_each_character_should_be_unicode_aware);
return tcase;
}
1 change: 1 addition & 0 deletions util.h
Expand Up @@ -161,6 +161,7 @@ VARNAM_EXPORT int strbuf_addvf(struct strbuf *string, const char *format, va_lis
VARNAM_EXPORT void strbuf_destroy(void *s);
VARNAM_EXPORT char* strbuf_detach(struct strbuf *string);
VARNAM_EXPORT const char* strbuf_to_s(struct strbuf *string);
VARNAM_EXPORT struct varray_t* strbuf_chars(strbuf *b);
VARNAM_EXPORT void strbuf_clear(struct strbuf *string);
VARNAM_EXPORT int strbuf_is_blank(struct strbuf *string);
VARNAM_EXPORT int strbuf_endswith(struct strbuf *string, const char *str);
Expand Down

0 comments on commit 2defab0

Please sign in to comment.