diff --git a/.gitignore b/.gitignore index 8e5d128..6ca282f 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,7 @@ dox/BUILD.md wilted/ mulle-template-composer-* mulle-markdown-preview-*.html +coverage*.html +*.gcda +*.gcno +cola/wilted/ diff --git a/.mulle/etc/env/environment-user-nat.sh b/.mulle/etc/env/environment-user-nat.sh index 977474b..77164fc 100644 --- a/.mulle/etc/env/environment-user-nat.sh +++ b/.mulle/etc/env/environment-user-nat.sh @@ -1,6 +1,6 @@ # # # -export MULLE_CRAFT_POST_PROJECT="/home/mulle-kybernetik-tv/bin/reamalgamate-mulle-core" +export MULLE_CRAFT_POST_PROJECT="/home/nat/.mulle/bin/reamalgamate-mulle-core" diff --git a/.mulle/etc/match/match.d/86-header--project-only-headers b/.mulle/etc/match/match.d/86-header--project-only-headers new file mode 120000 index 0000000..d43227b --- /dev/null +++ b/.mulle/etc/match/match.d/86-header--project-only-headers @@ -0,0 +1 @@ +../../../share/match/match.d/86-header--project-only-headers \ No newline at end of file diff --git a/.mulle/share/env/environment-extension.sh b/.mulle/share/env/environment-extension.sh index 026d960..d19587c 100644 --- a/.mulle/share/env/environment-extension.sh +++ b/.mulle/share/env/environment-extension.sh @@ -22,6 +22,12 @@ export MULLE_MATCH_PATH=".mulle/etc/sourcetree:${PROJECT_SOURCE_DIR}:CMakeLists. export MULLE_MATCH_IGNORE_PATH="" +# +# mulle-c and mulle-objc projects have an actual latest tag, so don't resolve +# +export MULLE_SOURCETREE_RESOLVE_TAG="NO" + + # # tell mulle-sde to keep files protected from read/write changes # diff --git a/.mulle/share/env/motd b/.mulle/share/env/motd new file mode 100644 index 0000000..4e223cb --- /dev/null +++ b/.mulle/share/env/motd @@ -0,0 +1,2 @@ +Run external commands with mudo +Project is ready to craft diff --git a/.mulle/share/match/match.d/86-header--project-only-headers b/.mulle/share/match/match.d/86-header--project-only-headers new file mode 100644 index 0000000..dcd99de --- /dev/null +++ b/.mulle/share/match/match.d/86-header--project-only-headers @@ -0,0 +1,6 @@ +# This is the fallback if patterns file with smaller numbers fail. +# These are project headers, that are not included anywhere so far +# and not even exported as private! + +*.inc + diff --git a/.mulle/share/sde/version/mulle-c/c b/.mulle/share/sde/version/mulle-c/c index 2a0970c..c5523bd 100644 --- a/.mulle/share/sde/version/mulle-c/c +++ b/.mulle/share/sde/version/mulle-c/c @@ -1 +1 @@ -0.16.1 +0.17.0 diff --git a/.mulle/share/sde/version/mulle-c/c-demo b/.mulle/share/sde/version/mulle-c/c-demo index a803cc2..c5523bd 100644 --- a/.mulle/share/sde/version/mulle-c/c-demo +++ b/.mulle/share/sde/version/mulle-c/c-demo @@ -1 +1 @@ -0.14.0 +0.17.0 diff --git a/.mulle/share/sde/version/mulle-sde/c-cmake b/.mulle/share/sde/version/mulle-sde/c-cmake index 4e8f395..1b58cc1 100644 --- a/.mulle/share/sde/version/mulle-sde/c-cmake +++ b/.mulle/share/sde/version/mulle-sde/c-cmake @@ -1 +1 @@ -0.26.0 +0.27.0 diff --git a/.mulle/share/sde/version/mulle-sde/c-demo b/.mulle/share/sde/version/mulle-sde/c-demo index 2157409..1b58cc1 100644 --- a/.mulle/share/sde/version/mulle-sde/c-demo +++ b/.mulle/share/sde/version/mulle-sde/c-demo @@ -1 +1 @@ -0.22.0 +0.27.0 diff --git a/README.md b/README.md index 86011b9..c839287 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,10 @@ contains some limited `` like functionality for UTF16 and UTF32. It is the backbone of **NSString**. + | Release Version | Release Notes |-------------------------------------------------------|-------------- -| ![Mulle kybernetiK tag](https://img.shields.io/github/tag/mulle-c/mulle-utf.svg?branch=release) [![Build Status](https://github.com/mulle-c/mulle-utf/workflows/CI/badge.svg?branch=release)](//github.com/mulle-c/mulle-utf/actions)| [RELEASENOTES](RELEASENOTES.md) | +| ![Mulle kybernetiK tag](https://img.shields.io/github/tag/mulle-c/mulle-utf.svg?branch=release) [![Build Status](https://github.com/mulle-c/mulle-utf/workflows/CI/badge.svg?branch=release)](//github.com/mulle-c/mulle-utf/actions) ![Coverage](https://img.shields.io/badge/coverage-75%25%C2%A0-yellow) | [RELEASENOTES](RELEASENOTES.md) | ## API @@ -23,6 +24,8 @@ It is the backbone of **NSString**. | [information](dox/API_INFORMATION.md) | Analyze and classify string encodings | | [string](dox/API_STRING.md) | Primitive UTF16 and UTF32 string handling | +`size_t` is used for byte-sized (really `char`-sized) lengths. UTF16 and +UTF32 use `unsigned int`. @@ -96,7 +99,7 @@ cmake --install build --config Release ## Author -[Nat!](https://mulle-kybernetik.com/weblog) for Mulle kybernetiK +[Nat!](https://mulle-kybernetik.com/weblog) for Mulle kybernetiK diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 03ef28d..9cc7b3a 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -1,3 +1,15 @@ +# 4.0.0 + +* upped the test coverage significantly +* **BREAKING CHANGE** the `mulle_utf_add_bytes_function_t` typedef is no longer a function pointer, to keep it more inline with other library callback typedefs +* **BREAKING CHANGE** changed the parameter order of `_strcnpy` functions, because progress demands it +* **BREAKING CHANGE** renamed `mulle_unicode_is_privatecharacterplane` to `mulle_utf_is_privatecharacterplane` (but MulleObjCUnicodeFoundation resupplies it as a static inline function) +* **BREAKING CHANGE** uses now ``size_t`` for char (byte) lengths and unsigned int for anything else +* `mulle_utf8_t` is no more, it is now just char +* improve `mulle_utf32_as_utf8` conversion, add some `mulle_utf8data` introspection +* improve buffer conversion by not calling addbytes for each character + + ### 3.1.3 * remove package.json as it conflicts with clib.json diff --git a/clib.json b/clib.json index 8034578..ef774a2 100644 --- a/clib.json +++ b/clib.json @@ -35,6 +35,8 @@ "src/mulle-utf32-string.h", "src/mulle-utf32.c", "src/mulle-utf32.h", + "src/mulle-utf8-string.c", + "src/mulle-utf8-string.h", "src/mulle-utf8.c", "src/mulle-utf8.h", "src/reflect/_mulle-utf-include-private.h", diff --git a/cmake/reflect/_Dependencies.cmake b/cmake/reflect/_Dependencies.cmake index 4667643..c1a6074 100644 --- a/cmake/reflect/_Dependencies.cmake +++ b/cmake/reflect/_Dependencies.cmake @@ -17,35 +17,35 @@ endif() # Disable for this platform: `mulle-sourcetree mark mulle-allocator no-cmake-platform-${MULLE_UNAME}` # Disable for a sdk: `mulle-sourcetree mark mulle-allocator no-cmake-sdk-` # -if( NOT MULLE_ALLOCATOR_LIBRARY) - find_library( MULLE_ALLOCATOR_LIBRARY NAMES +if( NOT MULLE__ALLOCATOR_LIBRARY) + find_library( MULLE__ALLOCATOR_LIBRARY NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-allocator${CMAKE_DEBUG_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-allocator${CMAKE_STATIC_LIBRARY_SUFFIX} mulle-allocator NO_CMAKE_SYSTEM_PATH NO_SYSTEM_ENVIRONMENT_PATH ) - if( NOT MULLE_ALLOCATOR_LIBRARY AND NOT DEPENDENCY_IGNORE_SYSTEM_LIBARIES) - find_library( MULLE_ALLOCATOR_LIBRARY NAMES + if( NOT MULLE__ALLOCATOR_LIBRARY AND NOT DEPENDENCY_IGNORE_SYSTEM_LIBARIES) + find_library( MULLE__ALLOCATOR_LIBRARY NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-allocator${CMAKE_DEBUG_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-allocator${CMAKE_STATIC_LIBRARY_SUFFIX} mulle-allocator ) endif() - message( STATUS "MULLE_ALLOCATOR_LIBRARY is ${MULLE_ALLOCATOR_LIBRARY}") + message( STATUS "MULLE__ALLOCATOR_LIBRARY is ${MULLE__ALLOCATOR_LIBRARY}") # # The order looks ascending, but due to the way this file is read # it ends up being descending, which is what we need. # - if( MULLE_ALLOCATOR_LIBRARY) + if( MULLE__ALLOCATOR_LIBRARY) # - # Add MULLE_ALLOCATOR_LIBRARY to DEPENDENCY_LIBRARIES list. + # Add MULLE__ALLOCATOR_LIBRARY to DEPENDENCY_LIBRARIES list. # Disable with: `mulle-sourcetree mark mulle-allocator no-cmake-add` # - list( APPEND DEPENDENCY_LIBRARIES ${MULLE_ALLOCATOR_LIBRARY}) + list( APPEND DEPENDENCY_LIBRARIES ${MULLE__ALLOCATOR_LIBRARY}) # intentionally left blank else() # Disable with: `mulle-sourcetree mark mulle-allocator no-require-link` - message( FATAL_ERROR "MULLE_ALLOCATOR_LIBRARY was not found") + message( FATAL_ERROR "MULLE__ALLOCATOR_LIBRARY was not found") endif() endif() @@ -56,34 +56,34 @@ endif() # Disable for this platform: `mulle-sourcetree mark mulle-data no-cmake-platform-${MULLE_UNAME}` # Disable for a sdk: `mulle-sourcetree mark mulle-data no-cmake-sdk-` # -if( NOT MULLE_DATA_LIBRARY) - find_library( MULLE_DATA_LIBRARY NAMES +if( NOT MULLE__DATA_LIBRARY) + find_library( MULLE__DATA_LIBRARY NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-data${CMAKE_DEBUG_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-data${CMAKE_STATIC_LIBRARY_SUFFIX} mulle-data NO_CMAKE_SYSTEM_PATH NO_SYSTEM_ENVIRONMENT_PATH ) - if( NOT MULLE_DATA_LIBRARY AND NOT DEPENDENCY_IGNORE_SYSTEM_LIBARIES) - find_library( MULLE_DATA_LIBRARY NAMES + if( NOT MULLE__DATA_LIBRARY AND NOT DEPENDENCY_IGNORE_SYSTEM_LIBARIES) + find_library( MULLE__DATA_LIBRARY NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-data${CMAKE_DEBUG_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_STATIC_LIBRARY_PREFIX}mulle-data${CMAKE_STATIC_LIBRARY_SUFFIX} mulle-data ) endif() - message( STATUS "MULLE_DATA_LIBRARY is ${MULLE_DATA_LIBRARY}") + message( STATUS "MULLE__DATA_LIBRARY is ${MULLE__DATA_LIBRARY}") # # The order looks ascending, but due to the way this file is read # it ends up being descending, which is what we need. # - if( MULLE_DATA_LIBRARY) + if( MULLE__DATA_LIBRARY) # - # Add MULLE_DATA_LIBRARY to DEPENDENCY_LIBRARIES list. + # Add MULLE__DATA_LIBRARY to DEPENDENCY_LIBRARIES list. # Disable with: `mulle-sourcetree mark mulle-data no-cmake-add` # - list( APPEND DEPENDENCY_LIBRARIES ${MULLE_DATA_LIBRARY}) + list( APPEND DEPENDENCY_LIBRARIES ${MULLE__DATA_LIBRARY}) # intentionally left blank else() # Disable with: `mulle-sourcetree mark mulle-data no-require-link` - message( FATAL_ERROR "MULLE_DATA_LIBRARY was not found") + message( FATAL_ERROR "MULLE__DATA_LIBRARY was not found") endif() endif() diff --git a/cmake/reflect/_Headers.cmake b/cmake/reflect/_Headers.cmake index a88d8cd..ac8c580 100644 --- a/cmake/reflect/_Headers.cmake +++ b/cmake/reflect/_Headers.cmake @@ -101,6 +101,7 @@ src/mulle-utf16-string.h src/mulle-utf32.h src/mulle-utf32-string.h src/mulle-utf8.h +src/mulle-utf8-string.h src/mulle-utf-convenience.h src/mulle-utf.h src/mulle-utf-noncharacter.h diff --git a/cmake/reflect/_Sources.cmake b/cmake/reflect/_Sources.cmake index a818162..8075acd 100644 --- a/cmake/reflect/_Sources.cmake +++ b/cmake/reflect/_Sources.cmake @@ -19,6 +19,7 @@ src/mulle-utf16-string.c src/mulle-utf32.c src/mulle-utf32-string.c src/mulle-utf8.c +src/mulle-utf8-string.c src/mulle-utf-convenience.c src/mulle-utf-noncharacter.c src/mulle-utf-privatecharacter.c diff --git a/cmake/share/Environment.cmake b/cmake/share/Environment.cmake index 6f5131c..c3eaa37 100644 --- a/cmake/share/Environment.cmake +++ b/cmake/share/Environment.cmake @@ -40,7 +40,7 @@ if( NOT __ENVIRONMENT__CMAKE__) # distinctions, the paths will be different # # Slight change. MULLE_SDK_PATH contains now addiction_dir - # and depedency_dir, but notthing else. The configuration/sdk/platform + # and depedency_dir, but nothing else. The configuration/sdk/platform # is passed in MULLE_SDK_SUBDIR # if( NOT MULLE_SDK_PATH) @@ -55,10 +55,16 @@ if( NOT __ENVIRONMENT__CMAKE__) list( GET MULLE_SDK_PATH 1 ADDICTION_DIR) option( DEPENDENCY_IGNORE_SYSTEM_LIBARIES "Ignore system library paths in search for dependencies" ON) + if( NOT DEPENDENCY_IGNORE_SYSTEM_LIBARIES) + message( WARNING "Will also search system paths for dependencies, as MULLE_SDK_PATH is not set") + endif() else() if( NOT MULLE_SDK_SUBDIR) set( MULLE_SDK_SUBDIR "${CMAKE_BUILD_TYPE}") endif() + if( NOT MULLE_SDK_SUBDIR) + set( MULLE_SDK_SUBDIR "Debug") + endif() option( DEPENDENCY_IGNORE_SYSTEM_LIBARIES "Ignore system library paths in search for dependencies" OFF) endif() @@ -79,9 +85,6 @@ if( NOT __ENVIRONMENT__CMAKE__) endif() endif() - if( NOT DEPENDENCY_IGNORE_SYSTEM_LIBARIES) - message( WARNING "Will also search system paths for dependencies") - endif() message( STATUS "DEPENDENCY_DIR=\"${DEPENDENCY_DIR}\"") list( APPEND ADDITIONAL_BIN_PATH "${DEPENDENCY_DIR}/bin") diff --git a/cmake/share/Headers.cmake b/cmake/share/Headers.cmake index 24671a2..f393b23 100644 --- a/cmake/share/Headers.cmake +++ b/cmake/share/Headers.cmake @@ -57,6 +57,10 @@ set( TMP_HEADERS ${PUBLIC_HEADERS} ) ResolveFileSymlinksIfNeeded( TMP_HEADERS INSTALL_PUBLIC_HEADERS) +# +# Do not install generated private headers and include-private.h +# which aren't valid outside of the project scope. +# set( TMP_HEADERS ${PRIVATE_HEADERS}) if( TMP_HEADERS) list( REMOVE_ITEM TMP_HEADERS "include-private.h") diff --git a/cola/api.md.bud b/cola/api.md.bud index dcc85ce..31c6956 100644 --- a/cola/api.md.bud +++ b/cola/api.md.bud @@ -8,3 +8,6 @@ | [conversion](dox/API_CONVERSION.md) | Character and string conversions | | [information](dox/API_INFORMATION.md) | Analyze and classify string encodings | | [string](dox/API_STRING.md) | Primitive UTF16 and UTF32 string handling | + +`size_t` is used for byte-sized (really `char`-sized) lengths. UTF16 and +UTF32 use `unsigned int`. \ No newline at end of file diff --git a/cola/properties.plist b/cola/properties.plist index 6cd3c2c..7ac950e 100644 --- a/cola/properties.plist +++ b/cola/properties.plist @@ -1,4 +1,9 @@ { + config = + { + showButtons = YES; + showCoverage = YES; + }; project = { description="🔤 UTF8-16-32 analysis and manipulation library"; diff --git a/src/mulle-ascii.c b/src/mulle-ascii.c index 1513cc4..6499961 100644 --- a/src/mulle-ascii.c +++ b/src/mulle-ascii.c @@ -90,7 +90,7 @@ mulle_utf32_t *_mulle_ascii_convert_to_utf32( char *src, void mulle_ascii_bufferconvert_to_utf16( char *src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { char *sentinel; char _c; @@ -118,7 +118,7 @@ void mulle_ascii_bufferconvert_to_utf16( char *src, void mulle_ascii_bufferconvert_to_utf32( char *src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { char *sentinel; char _c; diff --git a/src/mulle-ascii.h b/src/mulle-ascii.h index 29f0d62..5b47b15 100644 --- a/src/mulle-ascii.h +++ b/src/mulle-ascii.h @@ -78,12 +78,12 @@ MULLE__UTF_GLOBAL void mulle_ascii_bufferconvert_to_utf16( char *src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); MULLE__UTF_GLOBAL void mulle_ascii_bufferconvert_to_utf32( char *src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); #endif diff --git a/src/mulle-char5.c b/src/mulle-char5.c index a43c20b..6d52b62 100644 --- a/src/mulle-char5.c +++ b/src/mulle-char5.c @@ -11,6 +11,10 @@ #include "mulle-char5.h" +#include "mulle-utf8.h" +#include "mulle-utf16.h" +#include "mulle-utf32.h" + char mulle_char5_lookup_table[ 128] = { @@ -89,6 +93,12 @@ int mulle_char5_is_char5string32( char *src, size_t len) { char *sentinel; + if( ! src) + return( 0); + + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + if( len > mulle_char5_maxlength32) return( 0); @@ -108,6 +118,12 @@ int mulle_char5_is_char5string64( char *src, size_t len) { char *sentinel; + if( ! src) + return( 0); + + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + if( len > mulle_char5_maxlength64) return( 0); @@ -131,14 +147,18 @@ uint32_t mulle_char5_encode32( char *src, size_t len) int char5; uint32_t value; + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + + assert( len <= mulle_char5_maxlength32); + value = 0; sentinel = src; s = &src[ len]; while( s > sentinel) { c = *--s; - if( ! c) - continue; + assert( c); char5 = mulle_char5_lookup_character( c); assert( char5 > 0 && char5 < 0x20); @@ -158,14 +178,18 @@ uint64_t mulle_char5_encode64( char *src, size_t len) int char5; uint64_t value; + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + + assert( len <= mulle_char5_maxlength64); + value = 0; sentinel = src; s = &src[ len]; while( s > sentinel) { c = *--s; - if( ! c) - continue; + assert( c); char5 = mulle_char5_lookup_character( c); assert( char5 > 0 && char5 < 0x20); @@ -185,14 +209,18 @@ uint32_t mulle_char5_encode32_utf16( mulle_utf16_t *src, size_t len) int char5; uint32_t value; + if( len == (size_t ) -1) + len = mulle_utf16_strlen( src); + + assert( len <= mulle_char5_maxlength32); + value = 0; sentinel = src; s = &src[ len]; while( s > sentinel) { c = *--s; - if( ! c) - continue; + assert( c); char5 = mulle_char5_lookup_character( c); assert( char5 > 0 && char5 < 0x20); @@ -212,14 +240,18 @@ uint64_t mulle_char5_encode64_utf16( mulle_utf16_t *src, size_t len) int char5; uint64_t value; + if( len == (size_t ) -1) + len = mulle_utf16_strlen( src); + + assert( len <= mulle_char5_maxlength64); + value = 0; sentinel = src; s = &src[ len]; while( s > sentinel) { c = *--s; - if( ! c) - continue; + assert( c); char5 = mulle_char5_lookup_character( c); assert( char5 > 0 && char5 < 0x20); @@ -231,8 +263,6 @@ uint64_t mulle_char5_encode64_utf16( mulle_utf16_t *src, size_t len) } - - uint32_t mulle_char5_encode32_utf32( mulle_utf32_t *src, size_t len) { mulle_utf32_t *s; @@ -241,14 +271,18 @@ uint32_t mulle_char5_encode32_utf32( mulle_utf32_t *src, size_t len) int char5; uint32_t value; + if( len == (size_t ) -1) + len = mulle_utf32_strlen( src); + + assert( len <= mulle_char5_maxlength32); + value = 0; sentinel = src; s = &src[ len]; while( s > sentinel) { c = *--s; - if( ! c) - continue; + assert( c); char5 = mulle_char5_lookup_character( c); assert( char5 > 0 && char5 < 0x20); @@ -268,14 +302,18 @@ uint64_t mulle_char5_encode64_utf32( mulle_utf32_t *src, size_t len) int char5; uint64_t value; + if( len == (size_t ) -1) + len = mulle_utf32_strlen( src); + + assert( len <= mulle_char5_maxlength64); + value = 0; sentinel = src; s = &src[ len]; while( s > sentinel) { c = *--s; - if( ! c) - continue; + assert( c); char5 = mulle_char5_lookup_character( c); assert( char5 > 0 && char5 < 0x20); @@ -300,9 +338,8 @@ size_t mulle_char5_decode32( uint32_t value, char *dst, size_t len) if( ! value) break; - char5 = value & 0x1F; - *s++ = (char) mulle_char5_decode_character( char5); - + char5 = value & 0x1F; + *s++ = (char) mulle_char5_decode_character( char5); value >>= 5; } return( s - dst); @@ -322,9 +359,8 @@ size_t mulle_char5_decode64( uint64_t value, char *dst, size_t len) if( ! value) break; - char5 = value & 0x1F; - *s++ = (char) mulle_char5_decode_character( char5); - + char5 = value & 0x1F; + *s++ = (char) mulle_char5_decode_character( char5); value >>= 5; } return( s - dst); @@ -335,12 +371,12 @@ int mulle_char5_get64( uint64_t value, unsigned int index) { int char5; + + assert( index < 12); + do { - char5 = value & 0x1F; - if( ! value) - break; - + char5 = value & 0x1F; value >>= 5; } while( index--); @@ -353,12 +389,11 @@ int mulle_char5_get32( uint32_t value, unsigned int index) { int char5; + assert( index < 6); + do { char5 = value & 0x1F; - if( ! value) - break; - value >>= 5; } while( index--); diff --git a/src/mulle-char5.h b/src/mulle-char5.h index 6a52ba5..5b0e735 100644 --- a/src/mulle-char5.h +++ b/src/mulle-char5.h @@ -160,8 +160,8 @@ static inline size_t mulle_char5_strlen32( uint32_t value) // static inline size_t mulle_char5_fstrlen64( uint64_t value) { - int64_t mask; - size_t len; + uint64_t mask; + size_t len; // if any of m.l.j.i.h.g is set, we know f.e.d.c.b.a exist, so len 6 + strlen( m.l.j.i.h.g) mask = ~0x3FFFFFFFLL; // ~ffffff.eeeee.ddddd.ccccc.bbbbb.aaaaa @@ -195,8 +195,8 @@ static inline size_t mulle_char5_fstrlen64( uint64_t value) // static inline size_t mulle_char5_fstrlen32( uint32_t value) { - int32_t mask; - size_t len; + uint32_t mask; + size_t len; // if any of f.e.d is set, we know c.b.a exist, so len 3 + strlen( ffffff.eeeee.ddddd) mask = ~0x7FFF; // ~ccccc.bbbbb.aaaaa diff --git a/src/mulle-char7.c b/src/mulle-char7.c index e9dfd60..6e69262 100644 --- a/src/mulle-char7.c +++ b/src/mulle-char7.c @@ -10,11 +10,21 @@ #include "mulle-char7.h" +#include "mulle-utf8.h" +#include "mulle-utf16.h" +#include "mulle-utf32.h" + int mulle_char7_is_char7string32( char *src, size_t len) { char *sentinel; + if( ! src) + return( 0); + + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + if( len > mulle_char7_maxlength32) return( 0); @@ -36,6 +46,12 @@ int mulle_char7_is_char7string64( char *src, size_t len) { char *sentinel; + if( ! src) + return( 0); + + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + if( len > mulle_char7_maxlength64) return( 0); @@ -60,6 +76,9 @@ uint32_t mulle_char7_encode32( char *src, size_t len) int char7; uint32_t value; + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + assert( len <= mulle_char7_maxlength32); value = 0; @@ -68,8 +87,7 @@ uint32_t mulle_char7_encode32( char *src, size_t len) while( s > sentinel) { char7 = *--s; - if( ! char7) - continue; + assert( char7); assert( ! (char7 & 0x80)); value <<= 7; @@ -86,6 +104,9 @@ uint64_t mulle_char7_encode64( char *src, size_t len) int char7; uint64_t value; + if( len == (size_t ) -1) + len = mulle_utf8_strlen( src); + assert( len <= mulle_char7_maxlength64); value = 0; @@ -94,8 +115,7 @@ uint64_t mulle_char7_encode64( char *src, size_t len) while( s > sentinel) { char7 = *--s; - if( ! char7) - continue; + assert( char7); assert( ! (char7 & 0x80)); value <<= 7; @@ -112,6 +132,9 @@ uint32_t mulle_char7_encode32_utf16( mulle_utf16_t *src, size_t len) int char7; uint32_t value; + if( len == (size_t ) -1) + len = mulle_utf16_strlen( src); + assert( len <= mulle_char7_maxlength32); value = 0; @@ -120,8 +143,7 @@ uint32_t mulle_char7_encode32_utf16( mulle_utf16_t *src, size_t len) while( s > sentinel) { char7 = *--s; - if( ! char7) - continue; + assert( char7); assert( ! (char7 & 0x80)); value <<= 7; @@ -138,6 +160,9 @@ uint64_t mulle_char7_encode64_utf16( mulle_utf16_t *src, size_t len) int char7; uint64_t value; + if( len == (size_t ) -1) + len = mulle_utf16_strlen( src); + assert( len <= mulle_char7_maxlength64); value = 0; @@ -146,8 +171,7 @@ uint64_t mulle_char7_encode64_utf16( mulle_utf16_t *src, size_t len) while( s > sentinel) { char7 = *--s; - if( ! char7) - continue; + assert( char7); assert( ! (char7 & 0x80)); value <<= 7; @@ -164,6 +188,9 @@ uint32_t mulle_char7_encode32_utf32( mulle_utf32_t *src, size_t len) int char7; uint32_t value; + if( len == (size_t ) -1) + len = mulle_utf32_strlen( src); + assert( len <= mulle_char7_maxlength32); value = 0; @@ -172,8 +199,7 @@ uint32_t mulle_char7_encode32_utf32( mulle_utf32_t *src, size_t len) while( s > sentinel) { char7 = *--s; - if( ! char7) - continue; + assert( char7); assert( ! (char7 & 0x80)); value <<= 7; @@ -190,6 +216,9 @@ uint64_t mulle_char7_encode64_utf32( mulle_utf32_t *src, size_t len) int char7; uint64_t value; + if( len == (size_t ) -1) + len = mulle_utf32_strlen( src); + assert( len <= mulle_char7_maxlength64); value = 0; @@ -198,8 +227,7 @@ uint64_t mulle_char7_encode64_utf32( mulle_utf32_t *src, size_t len) while( s > sentinel) { char7 = *--s; - if( ! char7) - continue; + assert( char7); assert( ! (char7 & 0x80)); value <<= 7; @@ -251,12 +279,10 @@ int mulle_char7_get64( uint64_t value, unsigned int index) { int char7; + assert( index < 8); do { char7 = value & 0x7F; - if( ! value) - break; - value >>= 7; } while( index--); @@ -269,12 +295,10 @@ int mulle_char7_get32( uint32_t value, unsigned int index) { int char7; + assert( index < 4); do { - char7 = value & 0x7F; - if( ! value) - break; - + char7 = value & 0x7F; value >>= 7; } while( index--); diff --git a/src/mulle-char7.h b/src/mulle-char7.h index 959fc4e..43edce5 100644 --- a/src/mulle-char7.h +++ b/src/mulle-char7.h @@ -122,8 +122,8 @@ static inline size_t mulle_char7_strlen32( uint32_t value) // static inline size_t mulle_char7_fstrlen64( uint64_t value) { - int64_t mask; - size_t len; + uint64_t mask; + size_t len; mask = ~0xFFFFFFFLL; len = 0; @@ -151,8 +151,8 @@ static inline size_t mulle_char7_fstrlen64( uint64_t value) // static inline size_t mulle_char7_fstrlen32( uint32_t value) { - int32_t mask; - size_t len; + uint32_t mask; + size_t len; mask = ~0x3FFF; len = 0; diff --git a/src/mulle-utf-convenience.c b/src/mulle-utf-convenience.c index 45b09f4..27ee227 100644 --- a/src/mulle-utf-convenience.c +++ b/src/mulle-utf-convenience.c @@ -199,7 +199,7 @@ mulle_utf32_t *mulle_utf8_convert_to_utf32_string( char *src, # pragma mark - utf16 char *mulle_utf16_convert_to_utf8_string( mulle_utf16_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator) { struct mulle_utf_information info; @@ -229,7 +229,7 @@ char *mulle_utf16_convert_to_utf8_string( mulle_utf16_t *src, mulle_utf32_t *mulle_utf16_convert_to_utf32_string( mulle_utf16_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator) { struct mulle_utf_information info; @@ -263,7 +263,7 @@ mulle_utf32_t *mulle_utf16_convert_to_utf32_string( mulle_utf16_t *src, # pragma mark utf32 char *mulle_utf32_convert_to_utf8_string( mulle_utf32_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator) { struct mulle_utf_information info; @@ -293,7 +293,7 @@ char *mulle_utf32_convert_to_utf8_string( mulle_utf32_t *src, mulle_utf16_t *mulle_utf32_convert_to_utf16_string( mulle_utf32_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator) { struct mulle_utf_information info; @@ -336,7 +336,7 @@ int _mulle_utf8_character_mogrify( struct mulle_utf8data *dst, char *q_sentinel; mulle_utf32_t c; mulle_utf32_t d; - size_t conversions; + unsigned int conversions; assert( info); assert( dst); @@ -381,7 +381,7 @@ int _mulle_utf16_character_mogrify_unsafe( struct mulle_utf16data *dst, mulle_utf16_t *q; mulle_utf32_t c; mulle_utf32_t d; - size_t conversions; + unsigned int conversions; assert( info); assert( dst); @@ -423,7 +423,7 @@ int _mulle_utf16_character_mogrify( struct mulle_utf32data *dst, mulle_utf32_t *q; mulle_utf32_t c; mulle_utf32_t d; - size_t conversions; + unsigned int conversions; assert( info); assert( dst); @@ -467,7 +467,7 @@ int _mulle_utf32_character_mogrify( struct mulle_utf32data *dst, mulle_utf32_t *p_sentinel; mulle_utf32_t c; mulle_utf32_t d; - size_t conversions; + unsigned int conversions; assert( info); assert( dst); @@ -510,7 +510,7 @@ int _mulle_utf8_word_mogrify( struct mulle_utf8data *dst, char *q; char *p_sentinel; char *q_sentinel; - size_t conversions; + unsigned int conversions; assert( info); assert( dst); @@ -567,7 +567,7 @@ int _mulle_utf16_word_mogrify( struct mulle_utf32data *dst, mulle_utf16_t *p; mulle_utf16_t *p_sentinel; mulle_utf32_t *q; - size_t conversions; + unsigned int conversions; assert( info); assert( dst); @@ -631,7 +631,7 @@ int _mulle_utf32_word_mogrify( struct mulle_utf32data *dst, mulle_utf32_t *p; mulle_utf32_t *q; mulle_utf32_t *p_sentinel; - size_t conversions; + unsigned int conversions; assert( info); assert( dst); diff --git a/src/mulle-utf-convenience.h b/src/mulle-utf-convenience.h index b0628f3..5ebae22 100644 --- a/src/mulle-utf-convenience.h +++ b/src/mulle-utf-convenience.h @@ -68,20 +68,20 @@ mulle_utf32_t *mulle_utf8_convert_to_utf32_string( char *src, MULLE__UTF_GLOBAL char *mulle_utf16_convert_to_utf8_string( mulle_utf16_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator); MULLE__UTF_GLOBAL mulle_utf32_t *mulle_utf16_convert_to_utf32_string( mulle_utf16_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator); MULLE__UTF_GLOBAL char *mulle_utf32_convert_to_utf8_string( mulle_utf32_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator); MULLE__UTF_GLOBAL mulle_utf16_t *mulle_utf32_convert_to_utf16_string( mulle_utf32_t *src, - size_t len, + unsigned int len, struct mulle_allocator *allocator); @@ -93,6 +93,7 @@ struct mulle_utf_mogrification_info int (*is_white)( mulle_utf32_t); }; + typedef int mulle_utf8_mogrifier_function_t( struct mulle_utf8data *dst, struct mulle_utf8data *src, struct mulle_utf_mogrification_info *info); @@ -149,9 +150,4 @@ int _mulle_utf32_word_mogrify( struct mulle_utf32data *dst, struct mulle_utf32data *src, struct mulle_utf_mogrification_info *info); - -#if MULLE__ALLOCATOR_VERSION < ((1 << 20) | (5 << 8) | 0) -# error "mulle-allocator is too old" -#endif - #endif /* mulle_utf_convenience_h */ diff --git a/src/mulle-utf-noncharacter.c b/src/mulle-utf-noncharacter.c index 594c4b3..0d415d5 100644 --- a/src/mulle-utf-noncharacter.c +++ b/src/mulle-utf-noncharacter.c @@ -43,7 +43,7 @@ int mulle_utf32_is_noncharacter( mulle_utf32_t c) if( c < 0x10000) { if( c < 0x0FFF) - return( 0); + return( 0); if( mulle_utf16_is_noncharacter( (mulle_utf16_t) c)) return( 1); } diff --git a/src/mulle-utf-privatecharacter.c b/src/mulle-utf-privatecharacter.c index add26d7..5494719 100644 --- a/src/mulle-utf-privatecharacter.c +++ b/src/mulle-utf-privatecharacter.c @@ -37,7 +37,7 @@ int mulle_utf32_is_privatecharacter( int32_t c) } -int mulle_unicode_is_privatecharacterplane( unsigned int plane) +int mulle_utf_is_privatecharacterplane( unsigned int plane) { switch( plane) { diff --git a/src/mulle-utf-rover.c b/src/mulle-utf-rover.c index 8816afd..9c43d37 100644 --- a/src/mulle-utf-rover.c +++ b/src/mulle-utf-rover.c @@ -112,7 +112,7 @@ static void mulle_utf32_rover_dial_back( struct mulle_utf_rover *rover) void _mulle_utf32_rover_init( struct mulle_utf_rover *rover, mulle_utf32_t *s, - size_t len) + unsigned int len) { rover->s = s; rover->sentinel = &s[ len]; @@ -123,7 +123,7 @@ void _mulle_utf32_rover_init( struct mulle_utf_rover *rover, void _mulle_utf16_rover_init( struct mulle_utf_rover *rover, mulle_utf16_t *s, - size_t len) + unsigned int len) { rover->s = s; rover->sentinel = &s[ len]; diff --git a/src/mulle-utf-rover.h b/src/mulle-utf-rover.h index c0b6253..4b2ba41 100644 --- a/src/mulle-utf-rover.h +++ b/src/mulle-utf-rover.h @@ -34,8 +34,8 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // -#ifndef mulle_utf32_rover_h__ -#define mulle_utf32_rover_h__ +#ifndef mulle_utf_rover_h__ +#define mulle_utf_rover_h__ #include "mulle-utf-type.h" @@ -76,11 +76,11 @@ static inline void _mulle_utf_rover_dial_back( struct mulle_utf_rover *rover) MULLE__UTF_GLOBAL void _mulle_utf32_rover_init( struct mulle_utf_rover *rover, mulle_utf32_t *s, - size_t len); + unsigned int len); MULLE__UTF_GLOBAL void _mulle_utf16_rover_init( struct mulle_utf_rover *rover, mulle_utf16_t *s, - size_t len); + unsigned int len); MULLE__UTF_GLOBAL void _mulle_utf8_rover_init( struct mulle_utf_rover *rover, char *s, diff --git a/src/mulle-utf-scan.c b/src/mulle-utf-scan.c index c038775..eee64cd 100644 --- a/src/mulle-utf-scan.c +++ b/src/mulle-utf-scan.c @@ -142,7 +142,7 @@ int _mulle_utf8_scan_longlong_decimal( char **p_s, int _mulle_utf16_scan_longlong_decimal( mulle_utf16_t **p_s, - size_t len, + unsigned int len, long long *p_value) { struct mulle_utf_rover rover; @@ -157,7 +157,7 @@ int _mulle_utf16_scan_longlong_decimal( mulle_utf16_t **p_s, int _mulle_utf32_scan_longlong_decimal( mulle_utf32_t **p_s, - size_t len, + unsigned int len, long long *p_value) { struct mulle_utf_rover rover; @@ -184,7 +184,7 @@ int _mulle_utf32_scan_longlong_decimal( mulle_utf32_t **p_s, // returns -3 : contains trailing garbage // int _mulle_utf32_scan_longlong_decimal( mulle_utf32_t **p_s, - size_t len, + unsigned int len, long long *p_value) { enum scan_state state; diff --git a/src/mulle-utf-scan.h b/src/mulle-utf-scan.h index 32d2825..0c5dffa 100644 --- a/src/mulle-utf-scan.h +++ b/src/mulle-utf-scan.h @@ -63,12 +63,12 @@ int _mulle_utf8_scan_longlong_decimal( char **p_s, MULLE__UTF_GLOBAL int _mulle_utf16_scan_longlong_decimal( mulle_utf16_t **p_s, - size_t len, + unsigned int len, long long *p_value); MULLE__UTF_GLOBAL int _mulle_utf32_scan_longlong_decimal( mulle_utf32_t **p_s, - size_t len, + unsigned int len, long long *p_value); #endif diff --git a/src/mulle-utf-type.h b/src/mulle-utf-type.h index 83384ec..9a45c3d 100644 --- a/src/mulle-utf-type.h +++ b/src/mulle-utf-type.h @@ -92,7 +92,7 @@ enum mulle_utf_scan_return }; // void * is a user pointer, supplied else -typedef void (*mulle_utf_add_bytes_function_t)( void *userinfo, void *bytes, size_t length); +typedef void mulle_utf_add_bytes_function_t( void *userinfo, void *bytes, size_t length); enum mulle_utf_charinfo diff --git a/src/mulle-utf.h b/src/mulle-utf.h index 9dbfccf..0ce1936 100644 --- a/src/mulle-utf.h +++ b/src/mulle-utf.h @@ -37,7 +37,7 @@ #ifndef mulle_utf_h__ #define mulle_utf_h__ -#define MULLE__UTF_VERSION ((4 << 20) | (0 << 8) | 0) +#define MULLE__UTF_VERSION ((4UL << 20) | (0 << 8) | 0) #include "include.h" @@ -49,6 +49,7 @@ #include "mulle-utf8.h" #include "mulle-utf16.h" #include "mulle-utf32.h" +#include "mulle-utf8-string.h" #include "mulle-utf16-string.h" #include "mulle-utf32-string.h" diff --git a/src/mulle-utf16-string.c b/src/mulle-utf16-string.c index b491046..8a9c209 100644 --- a/src/mulle-utf16-string.c +++ b/src/mulle-utf16-string.c @@ -18,16 +18,19 @@ #include #include #include -#if _WIN32 -#include // for alloca -#endif -size_t mulle_utf16_strnlen( mulle_utf16_t *src, size_t len) + +unsigned int mulle_utf16_strnlen( mulle_utf16_t *src, unsigned int len) { mulle_utf16_t *sentinel; mulle_utf16_t *p; + if( ! src) + return( 0); + + assert( len != (unsigned int) -1); + p = src; sentinel = &p[ len]; @@ -42,6 +45,20 @@ size_t mulle_utf16_strnlen( mulle_utf16_t *src, size_t len) +mulle_utf16_t *mulle_utf16_strdup( mulle_utf16_t *s) +{ + size_t length; + mulle_utf16_t *dst; + + length = (size_t) (mulle_utf16_strlen( s) + 1) * sizeof( mulle_utf16_t); + dst = mulle_allocator_malloc( NULL, length); + memcpy( dst, s, length); + return( dst); +} + + + + /* memo: "string" oriented routines view the utf16 string as an array of unsigned shorts, where the only special character is the 0, which is the terminator. "string" oriented routines are all mulle_utf16_ @@ -61,15 +78,17 @@ size_t mulle_utf16_strnlen( mulle_utf16_t *src, size_t len) /* * only terminates, does not fill with zero */ -mulle_utf16_t *mulle_utf16_strncpy( mulle_utf16_t *dst, mulle_utf16_t *src, size_t len) +mulle_utf16_t *mulle_utf16_strncpy( mulle_utf16_t *dst, unsigned int len, mulle_utf16_t *src) { mulle_utf16_t *memo; mulle_utf16_t *sentinel; mulle_utf16_t c; - assert( dst); - assert( src); - assert( src >= &dst[len] || &src[len] <= dst); + if( ! dst || ! src) + return( dst); + + assert( len != (unsigned int) -1); + assert( src >= &dst[ len] || src <= dst); // len for dst is known, but can't be inferred for src memo = dst; sentinel = &dst[ len]; @@ -94,6 +113,9 @@ mulle_utf16_t *mulle_utf16_strchr( mulle_utf16_t *s, mulle_utf32_t c) mulle_utf16_t e; mulle_utf32_t f; + if( ! s) + return( NULL); + --s; for( ;;) { @@ -140,6 +162,9 @@ mulle_utf16_t *mulle_utf16_strstr( mulle_utf16_t *s, mulle_utf16_t *pattern) unsigned int i; unsigned int n; + if( ! s || ! pattern) + return( NULL); + if( pattern[ 0] == 0) return( s); @@ -170,15 +195,18 @@ mulle_utf16_t *mulle_utf16_strstr( mulle_utf16_t *s, mulle_utf16_t *pattern) } -int mulle_utf16_strncmp( mulle_utf16_t *s1, mulle_utf16_t *s2, size_t len) +int mulle_utf16_strncmp( mulle_utf16_t *s1, mulle_utf16_t *s2, unsigned int len) { mulle_utf16_t *sentinel; mulle_utf32_t c; mulle_utf32_t d; - sentinel = &s1[ len]; + if( len == (unsigned int) -1) + len = mulle_utf16_strlen( s2); - while( s1 < sentinel) + sentinel = &s2[ len]; + + while( s2 < sentinel) { c = *s1++; d = *s2++; @@ -194,13 +222,14 @@ int mulle_utf16_strncmp( mulle_utf16_t *s1, mulle_utf16_t *s2, size_t len) } - -static int _compare_mulle_utf32_t( mulle_utf32_t *a, mulle_utf32_t *b) +static int compare_mulle_utf32( const void *p_a, const void *p_b) { - return( *a - *b); + mulle_utf32_t a = *(mulle_utf32_t *) p_a; + mulle_utf32_t b = *(mulle_utf32_t *) p_b; + + return( a - b); } -#define compare_mulle_utf32_t ((int (*)( const void *, const void *)) _compare_mulle_utf32_t) static inline mulle_utf32_t mulle_utf16_pull_surrogatepair( mulle_utf16_t c, mulle_utf16_t **s) @@ -226,68 +255,68 @@ static inline mulle_utf32_t mulle_utf16_pull_surrogatepair( mulle_utf16_t c, m order. It will not match 0xDCxx, 0xD8xx. It can match a single surrogate though if needed. */ -static size_t _mulle_utf16_strspn( mulle_utf16_t *s1, mulle_utf16_t *s2, int flag) +static unsigned int _mulle_utf16_strxspn( mulle_utf16_t *s1, mulle_utf16_t *s2, int flag) { mulle_utf16_t *start; mulle_utf16_t *tmp; mulle_utf32_t c; mulle_utf32_t d; - size_t s2_len; + mulle_utf32_t *buf; + unsigned int s2_len; unsigned int i; + int found; + mulle_utf32_t space[ 32]; + + if( ! s1) + return( 0); assert( flag == 0 || flag == 1); - start = s1; s2_len = mulle_utf16_strlen( s2); if( ! s2_len) return( 0); - if( s2_len == 1) - { - d = *s2; + i = 0; + start = s1; + tmp = s1; - --s1; - while( (c = *++s1)) - if( c != d) - break; - return( s1 - start); - } + // we don't have a flexbuffer here... + if( s2_len < 32) + buf = space; + else + buf = mulle_malloc( 32 * sizeof( mulle_utf32_t)); + + --s2; + while( (d = mulle_utf16_pull_surrogatepair( *++s2, &s2))) + buf[ i++] = d; + assert( i <= s2_len); + + qsort( buf, i, sizeof( mulle_utf32_t), compare_mulle_utf32); - i = 0; + --s1; + while( tmp = s1, c = mulle_utf16_pull_surrogatepair( *++s1, &s1)) { -#if _WIN32 - mulle_utf32_t *buf = alloca( sizeof( mulle_utf32_t) * s2_len); -#else - mulle_utf32_t buf[ sizeof( mulle_utf32_t) * s2_len]; -#endif - - --s2; - while( (d = mulle_utf16_pull_surrogatepair( *++s2, &s2))) - buf[ i++] = d; - assert( i <= s2_len); - - qsort( buf, i, sizeof( mulle_utf32_t), compare_mulle_utf32_t); - - --s1; - while( tmp = s1, c = mulle_utf16_pull_surrogatepair( *++s1, &s1)) - { - if( (! bsearch( &c, buf, i, sizeof( mulle_utf32_t), compare_mulle_utf32_t)) == flag) - break; - } - return( tmp + 1 - start); + found = ! bsearch( &c, buf, i, sizeof( mulle_utf32_t), compare_mulle_utf32); + if( found == flag) + break; } + + if( buf != space) + mulle_free( buf); + + return( tmp + 1 - start); } -size_t mulle_utf16_strspn( mulle_utf16_t *s1, mulle_utf16_t *s2) +unsigned int mulle_utf16_strspn( mulle_utf16_t *s1, mulle_utf16_t *s2) { - return( _mulle_utf16_strspn( s1, s2, 1)); + return( _mulle_utf16_strxspn( s1, s2, 1)); } -size_t mulle_utf16_strcspn( mulle_utf16_t *s1, mulle_utf16_t *s2) +unsigned int mulle_utf16_strcspn( mulle_utf16_t *s1, mulle_utf16_t *s2) { - return( _mulle_utf16_strspn( s1, s2, 0)); + return( _mulle_utf16_strxspn( s1, s2, 0)); } diff --git a/src/mulle-utf16-string.h b/src/mulle-utf16-string.h index 0af102e..bc02d5b 100644 --- a/src/mulle-utf16-string.h +++ b/src/mulle-utf16-string.h @@ -13,26 +13,24 @@ #include "mulle-utf-type.h" -#include #include +#include +#include +#include "mulle-utf16.h" -static inline size_t mulle_utf16_strlen( mulle_utf16_t *s) -{ - mulle_utf16_t *p; - assert( s); - /* this produced the nicest looking i386 code :P */ - for( p = s - 1; *++p;); +MULLE__UTF_GLOBAL +unsigned int mulle_utf16_strnlen( mulle_utf16_t *src, unsigned int len); - return( p - s); -} +MULLE__UTF_GLOBAL +mulle_utf16_t *mulle_utf16_strdup( mulle_utf16_t *s); MULLE__UTF_GLOBAL -size_t mulle_utf16_strnlen( mulle_utf16_t *src, size_t len); +mulle_utf16_t *mulle_utf16_strncpy( mulle_utf16_t *dst, unsigned int len, mulle_utf16_t *src); MULLE__UTF_GLOBAL -mulle_utf16_t *mulle_utf16_strncpy( mulle_utf16_t *dst, mulle_utf16_t *src, size_t len); +int mulle_utf16_strncmp( mulle_utf16_t *s1, mulle_utf16_t *s2, unsigned int len); MULLE__UTF_GLOBAL mulle_utf16_t *mulle_utf16_strchr( mulle_utf16_t *s, mulle_utf32_t c); // sic @@ -44,19 +42,16 @@ MULLE__UTF_GLOBAL mulle_utf16_t *mulle_utf16_strstr( mulle_utf16_t *s1, mulle_utf16_t *s2); MULLE__UTF_GLOBAL -int mulle_utf16_strncmp( mulle_utf16_t *s1, mulle_utf16_t *s2, size_t len); - -MULLE__UTF_GLOBAL -size_t mulle_utf16_strspn( mulle_utf16_t *s1, mulle_utf16_t *s2); +unsigned int mulle_utf16_strspn( mulle_utf16_t *s1, mulle_utf16_t *s2); MULLE__UTF_GLOBAL -size_t mulle_utf16_strcspn( mulle_utf16_t *s1, mulle_utf16_t *s2); +unsigned int mulle_utf16_strcspn( mulle_utf16_t *s1, mulle_utf16_t *s2); static inline int mulle_utf16_strcmp( mulle_utf16_t *s1, mulle_utf16_t *s2) { - return( mulle_utf16_strncmp( s1, s2, -1)); + return( mulle_utf16_strncmp( s1, s2, mulle_utf16_strlen( s2))); } @@ -65,4 +60,17 @@ static inline int mulle_utf16_atoi( mulle_utf16_t *s) return( _mulle_utf16_atoi( &s)); } + + +static inline void mulle_utf16_memcpy( mulle_utf16_t *dst, mulle_utf16_t *src, unsigned int len) +{ + memcpy( dst, src, sizeof( mulle_utf16_t) * len); +} + + +static inline void mulle_utf16_memmove( mulle_utf16_t *dst, mulle_utf16_t *src, unsigned int len) +{ + memmove( dst, src, sizeof( mulle_utf16_t) * len); +} + #endif diff --git a/src/mulle-utf16.c b/src/mulle-utf16.c index e967bcc..7c4c01a 100644 --- a/src/mulle-utf16.c +++ b/src/mulle-utf16.c @@ -82,14 +82,14 @@ int mulle_utf16_is_valid_surrogatepair( mulle_utf16_t hi, mulle_utf16_t lo) * copy converters */ mulle_utf32_t *_mulle_utf16_convert_to_utf32( mulle_utf16_t *src, - size_t len, + unsigned int len, mulle_utf32_t *dst) { mulle_utf16_t *sentinel; mulle_utf32_t x; // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) - assert( len != (size_t) -1); + assert( len != (unsigned int) -1); sentinel = &src[ len]; @@ -112,14 +112,14 @@ mulle_utf32_t *_mulle_utf16_convert_to_utf32( mulle_utf16_t *src, // // this also does not do any error checking, // must be proper UTF16 code! -char *_mulle_utf16_convert_to_utf8( mulle_utf16_t *src, size_t len, char *_dst) +char *_mulle_utf16_convert_to_utf8( mulle_utf16_t *src, unsigned int len, char *_dst) { unsigned char *dst = (unsigned char *) _dst; mulle_utf16_t *sentinel; mulle_utf32_t x; // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) - assert( len != (size_t) -1); + assert( len != (unsigned int) -1); sentinel = &src[ len]; @@ -181,9 +181,9 @@ char *_mulle_utf16_convert_to_utf8( mulle_utf16_t *src, size_t len, char *_dst) // this also does not do any error checking, // must be proper UTF16 code! void mulle_utf16_bufferconvert_to_utf8( mulle_utf16_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { mulle_utf16_t *sentinel; mulle_utf32_t x; @@ -191,7 +191,7 @@ void mulle_utf16_bufferconvert_to_utf8( mulle_utf16_t *src, unsigned char *s_flush; unsigned char tmp[ 128]; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( src); // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) @@ -209,9 +209,9 @@ void mulle_utf16_bufferconvert_to_utf8( mulle_utf16_t *src, } x = *src++; - assert( x >= 0 && x <= mulle_utf32_max); recheck: + assert( x >= 0 && x <= mulle_utf32_max); if( x < 0x80) { *s++ = (unsigned char) x; @@ -261,9 +261,9 @@ void mulle_utf16_bufferconvert_to_utf8( mulle_utf16_t *src, void mulle_utf16_bufferconvert_to_utf32( mulle_utf16_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { mulle_utf16_t *sentinel; mulle_utf32_t x; @@ -271,7 +271,7 @@ void mulle_utf16_bufferconvert_to_utf32( mulle_utf16_t *src, mulle_utf32_t *s_flush; mulle_utf32_t tmp[ 32]; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( src); // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) @@ -313,7 +313,7 @@ static inline int mulle_utf16_is_invalid_char( mulle_utf16_t c) // // just checks that the surrogate pairs are ok // -mulle_utf16_t *mulle_utf16_validate( mulle_utf16_t *src, size_t len) +mulle_utf16_t *mulle_utf16_validate( mulle_utf16_t *src, unsigned int len) { mulle_utf16_t c; mulle_utf16_t d; @@ -322,7 +322,7 @@ mulle_utf16_t *mulle_utf16_validate( mulle_utf16_t *src, size_t len) if( ! src) return( NULL); - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( src); sentinel = &src[ len]; @@ -362,17 +362,19 @@ mulle_utf16_t *mulle_utf16_validate( mulle_utf16_t *src, size_t len) // // this routine does not validate... // -size_t mulle_utf16_utf8length( mulle_utf16_t *src, size_t len) +size_t mulle_utf16_utf8length( mulle_utf16_t *src, unsigned int len) { mulle_utf16_t c; mulle_utf16_t *sentinel; + size_t size; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( src); if( ! len) return( 0); sentinel = &src[ len]; + size = (size_t) len; for( ; src < sentinel;) { @@ -383,32 +385,32 @@ size_t mulle_utf16_utf8length( mulle_utf16_t *src, size_t len) if( c < 0x0800) { - len++; + size++; continue; } // not a surrogate pair ? if( ! mulle_utf32_is_surrogatecharacter( c)) { - len += 2; + size += 2; continue; } if( ++src > sentinel) return( -1); - len += -1 + 3; + size += -1 + 3; } - return( len); + return( size); } -size_t mulle_utf16_utf32length( mulle_utf16_t *src, size_t len) +unsigned int mulle_utf16_utf32length( mulle_utf16_t *src, unsigned int len) { mulle_utf16_t c; mulle_utf16_t *sentinel; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( src); if( ! len) return( 0); @@ -432,13 +434,13 @@ size_t mulle_utf16_utf32length( mulle_utf16_t *src, size_t len) -size_t mulle_utf16_length( mulle_utf16_t *src, size_t len) +unsigned int mulle_utf16_length( mulle_utf16_t *src, unsigned int len) { mulle_utf16_t c; mulle_utf16_t *sentinel; - size_t dst_len; + unsigned int dst_len; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( src); sentinel = &src[ len]; @@ -511,7 +513,7 @@ mulle_utf32_t _mulle_utf16_previous_utf32character( mulle_utf16_t **s_p) // a long or long long // (b) masking value with 0x80808080 to figure out if all are "ASCII" // -int mulle_utf16_information( mulle_utf16_t *src, size_t len, struct mulle_utf_information *info) +int mulle_utf16_information( mulle_utf16_t *src, unsigned int len, struct mulle_utf_information *info) { mulle_utf16_t _c; mulle_utf16_t *start; @@ -537,7 +539,7 @@ int mulle_utf16_information( mulle_utf16_t *src, size_t len, struct mulle_utf_i if( ! src) goto fail; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( src); // @@ -615,14 +617,14 @@ int mulle_utf16_information( mulle_utf16_t *src, size_t len, struct mulle_utf_i int mulle_utf16_contains_character_larger_or_equal( mulle_utf16_t *s, - size_t len, + unsigned int len, mulle_utf16_t d) { mulle_utf16_t _c; mulle_utf16_t *sentinel; mulle_utf16_t *p; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf16_strlen( s); p = s; @@ -641,7 +643,7 @@ int mulle_utf16_contains_character_larger_or_equal( mulle_utf16_t *s, // // src must be known to be UTF15, and contain no zeroes // -enum mulle_utf_charinfo _mulle_utf16_charinfo( mulle_utf16_t *src, size_t len) +enum mulle_utf_charinfo _mulle_utf16_charinfo( mulle_utf16_t *src, unsigned int len) { mulle_utf16_t _c; mulle_utf16_t *start; diff --git a/src/mulle-utf16.h b/src/mulle-utf16.h index 5cfb2c4..99945d9 100644 --- a/src/mulle-utf16.h +++ b/src/mulle-utf16.h @@ -49,16 +49,31 @@ struct mulle_utf16data { mulle_utf16_t *characters; - size_t length; + unsigned int length; }; -static inline struct mulle_utf16data mulle_utf16data_make( mulle_utf16_t *s, size_t length) +static inline unsigned int mulle_utf16_strlen( mulle_utf16_t *s) { - struct mulle_utf16data data; + mulle_utf16_t *p; - data.characters = s; - data.length = length; + if( ! s) + return( 0); + + for( p = s - 1; *++p;); + + return( p - s); +} + + +static inline struct mulle_utf16data mulle_utf16data_make( mulle_utf16_t *s, + unsigned int length) +{ + struct mulle_utf16data data; + const static mulle_utf16_t zero = { 0 }; + + data.length = (length == (unsigned int) -1) ? mulle_utf16_strlen( s) : length; + data.characters = data.length ? s : (mulle_utf16_t *) &zero; return( data); } @@ -76,41 +91,41 @@ static inline int mulle_utf16_is_char5character( mulle_utf16_t c) MULLE__UTF_GLOBAL -int mulle_utf16_information( mulle_utf16_t *src, size_t len, struct mulle_utf_information *info); +int mulle_utf16_information( mulle_utf16_t *src, unsigned int len, struct mulle_utf_information *info); MULLE__UTF_GLOBAL -size_t mulle_utf16_utf8length( mulle_utf16_t *src, size_t len); +size_t mulle_utf16_utf8length( mulle_utf16_t *src, unsigned int len); MULLE__UTF_GLOBAL -size_t mulle_utf16_utf32length( mulle_utf16_t *src, size_t len); +unsigned int mulle_utf16_utf32length( mulle_utf16_t *src, unsigned int len); MULLE__UTF_GLOBAL -size_t mulle_utf16_length( mulle_utf16_t *src, size_t len); +unsigned int mulle_utf16_length( mulle_utf16_t *src, unsigned int len); MULLE__UTF_GLOBAL int mulle_utf16_contains_character_larger_or_equal( mulle_utf16_t *src, - size_t len, + unsigned int len, mulle_utf16_t d); -static inline int mulle_utf16_is_ascii( mulle_utf16_t *src, size_t len) +static inline int mulle_utf16_is_ascii( mulle_utf16_t *src, unsigned int len) { return( ! mulle_utf16_contains_character_larger_or_equal( src, len, 0x80)); } -static inline int mulle_utf16_is_utf15( mulle_utf16_t *src, size_t len) +static inline int mulle_utf16_is_utf15( mulle_utf16_t *src, unsigned int len) { return( ! mulle_utf16_contains_character_larger_or_equal( src, len, 0x8000)); } -static inline size_t mulle_utf16_utf8maxlength( size_t len) +static inline size_t mulle_utf16_utf8maxlength( unsigned int len) { return( len * 4); } MULLE__UTF_GLOBAL -mulle_utf16_t *mulle_utf16_validate( mulle_utf16_t *src, size_t len); +mulle_utf16_t *mulle_utf16_validate( mulle_utf16_t *src, unsigned int len); // hi and lo MUST be surrogates MULLE__UTF_GLOBAL @@ -128,12 +143,12 @@ mulle_utf32_t _mulle_utf16_previous_utf32character( mulle_utf16_t **s_p); // returns end of dst MULLE__UTF_GLOBAL mulle_utf32_t *_mulle_utf16_convert_to_utf32( mulle_utf16_t *src, - size_t len, + unsigned int len, mulle_utf32_t *dst); MULLE__UTF_GLOBAL char *_mulle_utf16_convert_to_utf8( mulle_utf16_t *src, - size_t len, - char *dst); + unsigned int len, + char *dst); // // This will not stop on a zero. It will not by itself append a zero. // return value = 0 means OK! @@ -141,17 +156,23 @@ char *_mulle_utf16_convert_to_utf8( mulle_utf16_t *src, // MULLE__UTF_GLOBAL void mulle_utf16_bufferconvert_to_utf8( mulle_utf16_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); MULLE__UTF_GLOBAL void mulle_utf16_bufferconvert_to_utf32( mulle_utf16_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); MULLE__UTF_GLOBAL -enum mulle_utf_charinfo _mulle_utf16_charinfo( mulle_utf16_t *src, size_t len); +enum mulle_utf_charinfo _mulle_utf16_charinfo( mulle_utf16_t *src, unsigned int len); + + +// would not work, because we need two mulle_utf16_taa +// MULLE__UTF_GLOBAL +// char *_mulle_utf16_as_utf8_not_ascii( mulle_utf16_t x, char *dst); + #endif diff --git a/src/mulle-utf32-string.c b/src/mulle-utf32-string.c index 9d9f859..3838f6f 100644 --- a/src/mulle-utf32-string.c +++ b/src/mulle-utf32-string.c @@ -21,11 +21,16 @@ #endif -size_t mulle_utf32_strnlen( mulle_utf32_t *src, size_t len) +unsigned int mulle_utf32_strnlen( mulle_utf32_t *src, unsigned int len) { mulle_utf32_t *sentinel; mulle_utf32_t *p; + if( ! src) + return( 0); + + assert( len != (unsigned int) -1); + p = src; sentinel = &p[ len]; @@ -48,15 +53,19 @@ size_t mulle_utf32_strnlen( mulle_utf32_t *src, size_t len) */ -mulle_utf32_t *mulle_utf32_strncpy( mulle_utf32_t *dst, mulle_utf32_t *src, size_t len) +mulle_utf32_t *mulle_utf32_strncpy( mulle_utf32_t *dst, unsigned int len, mulle_utf32_t *src) { mulle_utf32_t *memo; mulle_utf32_t *sentinel; mulle_utf32_t c; - assert( dst); + if( ! dst || ! src) + return( dst); + assert( src); - assert( src >= &dst[len] || &src[len] <= dst); + assert( len != (unsigned int) -1); + + assert( src >= &dst[ len] || src <= dst); // len for dst is known, but can't be inferred for src memo = dst; sentinel = &dst[ len]; @@ -76,6 +85,9 @@ mulle_utf32_t *mulle_utf32_strchr( mulle_utf32_t *s, mulle_utf32_t c) { mulle_utf32_t d; + if( ! s) + return( NULL); + --s; for( ;;) { @@ -90,6 +102,18 @@ mulle_utf32_t *mulle_utf32_strchr( mulle_utf32_t *s, mulle_utf32_t c) } +mulle_utf32_t *mulle_utf32_strdup( mulle_utf32_t *s) +{ + size_t length; + mulle_utf32_t *dst; + + length = (size_t) (mulle_utf32_strlen( s) + 1) * sizeof( mulle_utf32_t); + dst = mulle_allocator_malloc( NULL, length); + memcpy( dst, s, length); + return( dst); +} + + /* * use Rabin–Karp for expected "usual" strings to match * this could "partial" match a surrogate, but is this @@ -135,12 +159,15 @@ mulle_utf32_t *mulle_utf32_strstr( mulle_utf32_t *s, mulle_utf32_t *pattern) } -int mulle_utf32_strncmp( mulle_utf32_t *s1, mulle_utf32_t *s2, size_t len) +int mulle_utf32_strncmp( mulle_utf32_t *s1, mulle_utf32_t *s2, unsigned int len) { mulle_utf32_t *sentinel; mulle_utf32_t c; mulle_utf32_t d; + if( len == (unsigned int) -1) + len = mulle_utf32_strlen( s2); + sentinel = &s1[ len]; while( s1 < sentinel) @@ -167,13 +194,13 @@ static int _compare_mulle_utf32_t( mulle_utf32_t *a, mulle_utf32_t *b) #define compare_mulle_utf32_t ((int (*)( const void *, const void *)) _compare_mulle_utf32_t) -static size_t _mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2, int flag) +static unsigned int _mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2, int flag) { mulle_utf32_t *start; mulle_utf32_t *tmp; mulle_utf32_t c; mulle_utf32_t d; - size_t s2_len; + unsigned int s2_len; unsigned int i; assert( flag == 0 || flag == 1); @@ -195,12 +222,10 @@ static size_t _mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2, int f } i = 0; + tmp = s1; + + mulle_alloca_do( buf, mulle_utf32_t, s2_len) { -#if _WIN32 - mulle_utf32_t *buf = alloca( sizeof( mulle_utf32_t) * s2_len); -#else - mulle_utf32_t buf[ sizeof( mulle_utf32_t) * s2_len]; -#endif --s2; while( d = *++s2) buf[ i++] = d; @@ -213,18 +238,18 @@ static size_t _mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2, int f if( (! bsearch( &c, buf, i, sizeof( mulle_utf32_t), compare_mulle_utf32_t)) == flag) break; } - return( tmp + 1 - start); } + return( tmp + 1 - start); } -size_t mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2) +unsigned int mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2) { return( _mulle_utf32_strspn( s1, s2, 1)); } -size_t mulle_utf32_strcspn( mulle_utf32_t *s1, mulle_utf32_t *s2) +unsigned int mulle_utf32_strcspn( mulle_utf32_t *s1, mulle_utf32_t *s2) { return( _mulle_utf32_strspn( s1, s2, 0)); } diff --git a/src/mulle-utf32-string.h b/src/mulle-utf32-string.h index 516ce0f..9f9b585 100644 --- a/src/mulle-utf32-string.h +++ b/src/mulle-utf32-string.h @@ -13,26 +13,21 @@ #include "mulle-utf-type.h" +#include "mulle-utf32.h" + #include #include +#include -static inline size_t mulle_utf32_strlen( mulle_utf32_t *s) -{ - mulle_utf32_t *p; - - /* this produced the nicest looking i386 code :P */ - for( p = s - 1; *++p;); - - return( p - s); -} - MULLE__UTF_GLOBAL -size_t mulle_utf32_strnlen( mulle_utf32_t *src, size_t len); +unsigned int mulle_utf32_strnlen( mulle_utf32_t *src, unsigned int len); +MULLE__UTF_GLOBAL +mulle_utf32_t *mulle_utf32_strdup( mulle_utf32_t *s); MULLE__UTF_GLOBAL -mulle_utf32_t *mulle_utf32_strncpy( mulle_utf32_t *dst, mulle_utf32_t *src, size_t len); +mulle_utf32_t *mulle_utf32_strncpy( mulle_utf32_t *dst, unsigned int len, mulle_utf32_t *src); MULLE__UTF_GLOBAL mulle_utf32_t *mulle_utf32_strchr( mulle_utf32_t *s, mulle_utf32_t c); @@ -44,13 +39,13 @@ MULLE__UTF_GLOBAL mulle_utf32_t *mulle_utf32_strstr( mulle_utf32_t *s1, mulle_utf32_t *s2); MULLE__UTF_GLOBAL -int mulle_utf32_strncmp( mulle_utf32_t *s1, mulle_utf32_t *s2, size_t len); +int mulle_utf32_strncmp( mulle_utf32_t *s1, mulle_utf32_t *s2, unsigned int len); MULLE__UTF_GLOBAL -size_t mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2); +unsigned int mulle_utf32_strspn( mulle_utf32_t *s1, mulle_utf32_t *s2); MULLE__UTF_GLOBAL -size_t mulle_utf32_strcspn( mulle_utf32_t *s1, mulle_utf32_t *s2); +unsigned int mulle_utf32_strcspn( mulle_utf32_t *s1, mulle_utf32_t *s2); static inline int mulle_utf32_strcmp( mulle_utf32_t *s1, mulle_utf32_t *s2) @@ -63,4 +58,17 @@ static inline int mulle_utf32_atoi( mulle_utf32_t *s) return( _mulle_utf32_atoi( &s)); } + +static inline void mulle_utf32_memcpy( mulle_utf32_t *dst, mulle_utf32_t *src, unsigned int len) +{ + memcpy( dst, src, sizeof( mulle_utf32_t) * len); +} + + +static inline void mulle_utf32_memmove( mulle_utf32_t *dst, mulle_utf32_t *src, unsigned int len) +{ + memmove( dst, src, sizeof( mulle_utf32_t) * len); +} + + #endif diff --git a/src/mulle-utf32.c b/src/mulle-utf32.c index bb61ede..5c24268 100644 --- a/src/mulle-utf32.c +++ b/src/mulle-utf32.c @@ -54,14 +54,14 @@ mulle_utf16_t *_mulle_utf32_convert_to_utf16_as_surrogatepair( mulle_utf32_t x, // must be proper UTF32 code! -char *_mulle_utf32_convert_to_utf8( mulle_utf32_t *src, size_t len, char *_dst) +char *_mulle_utf32_convert_to_utf8( mulle_utf32_t *src, unsigned int len, char *_dst) { unsigned char *dst = (unsigned char *) _dst; mulle_utf32_t *sentinel; mulle_utf32_t x; // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) - assert( len != (size_t) -1); + assert( len != (unsigned int) -1); sentinel = &src[ len]; @@ -74,6 +74,7 @@ char *_mulle_utf32_convert_to_utf8( mulle_utf32_t *src, size_t len, char *_dst) if( x < 0x80) { *dst++ = (unsigned char) x; + continue; } *dst++ = 0xC0 | (unsigned char) (x >> 6); @@ -105,14 +106,14 @@ char *_mulle_utf32_convert_to_utf8( mulle_utf32_t *src, size_t len, char *_dst) mulle_utf16_t *_mulle_utf32_convert_to_utf16( mulle_utf32_t *src, - size_t len, + unsigned int len, mulle_utf16_t *dst) { mulle_utf32_t *sentinel; mulle_utf32_t x; // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) - assert( len != (size_t) -1); + assert( len != (unsigned int) -1); sentinel = &src[ len]; @@ -135,9 +136,9 @@ mulle_utf16_t *_mulle_utf32_convert_to_utf16( mulle_utf32_t *src, // must be proper UTF32 code! void mulle_utf32_bufferconvert_to_utf8( mulle_utf32_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { mulle_utf32_t *sentinel; mulle_utf32_t x; @@ -145,7 +146,7 @@ void mulle_utf32_bufferconvert_to_utf8( mulle_utf32_t *src, unsigned char *s_flush; unsigned char tmp[ 128]; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf32_strlen( src); // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) @@ -203,9 +204,9 @@ void mulle_utf32_bufferconvert_to_utf8( mulle_utf32_t *src, void mulle_utf32_bufferconvert_to_utf16( mulle_utf32_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { mulle_utf32_t *sentinel; mulle_utf32_t x; @@ -213,7 +214,7 @@ void mulle_utf32_bufferconvert_to_utf16( mulle_utf32_t *src, mulle_utf16_t *s_flush; mulle_utf16_t tmp[ 64]; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf32_strlen( src); // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) @@ -251,22 +252,23 @@ void mulle_utf32_bufferconvert_to_utf16( mulle_utf32_t *src, // must be proper UTF32 code! size_t mulle_utf32_utf8length( mulle_utf32_t *src, - size_t len) + unsigned int len) { mulle_utf32_t *sentinel; uint32_t x; + size_t size; if( ! src) return( 0); - if( len == (size_t) -1) - len = mulle_utf32_strlen( src); + len = (len == (unsigned int) -1) ? mulle_utf32_strlen( src) : len; if( ! len) return( 0); // if dst_len == -1, then sentinel - 1 = dst_sentinel (OK!) sentinel = &src[ len]; + size = len; while( src < sentinel) { @@ -279,27 +281,27 @@ size_t mulle_utf32_utf8length( mulle_utf32_t *src, if( x < 0x80) continue; - ++len; + ++size; continue; } if( x < 0x10000) { assert( ! mulle_utf32_is_surrogatecharacter( x)); - len += 2; + size += 2; continue; } assert( x <= 0x10FFFF); - len += 3; + size += 3; } - return( len); + return( size); } // must be proper UTF32 code! -size_t mulle_utf32_utf16length( mulle_utf32_t *src, - size_t len) +unsigned int mulle_utf32_utf16length( mulle_utf32_t *src, + unsigned int len) { mulle_utf32_t *sentinel; uint32_t x; @@ -307,7 +309,7 @@ size_t mulle_utf32_utf16length( mulle_utf32_t *src, if( ! src) return( 0); - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf32_strlen( src); if( ! len) return( 0); @@ -331,7 +333,7 @@ size_t mulle_utf32_utf16length( mulle_utf32_t *src, int mulle_utf32_information( mulle_utf32_t *src, - size_t len, + unsigned int len, struct mulle_utf_information *info) { mulle_utf32_t _c; @@ -358,7 +360,7 @@ int mulle_utf32_information( mulle_utf32_t *src, if( ! src) goto fail; - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf32_strlen( src); // @@ -471,7 +473,7 @@ static inline int mulle_utf32_is_invalid_char( mulle_utf32_t c) } -mulle_utf32_t *mulle_utf32_validate( mulle_utf32_t *src, size_t len) +mulle_utf32_t *mulle_utf32_validate( mulle_utf32_t *src, unsigned int len) { mulle_utf32_t c; mulle_utf32_t *sentinel; @@ -479,7 +481,7 @@ mulle_utf32_t *mulle_utf32_validate( mulle_utf32_t *src, size_t len) if( ! src) return( NULL); - if( len == (size_t) -1) + if( len == (unsigned int) -1) len = mulle_utf32_strlen( src); sentinel = &src[ len]; @@ -501,14 +503,14 @@ mulle_utf32_t *mulle_utf32_validate( mulle_utf32_t *src, size_t len) } -enum mulle_utf_charinfo _mulle_utf32_charinfo( mulle_utf32_t *src, size_t len) +enum mulle_utf_charinfo _mulle_utf32_charinfo( mulle_utf32_t *src, unsigned int len) { mulle_utf32_t _c; mulle_utf32_t *start; mulle_utf32_t *sentinel; assert( len); - assert( len != (size_t) -1); + assert( len != (unsigned int) -1); if( len > mulle_char5_get_maxlength()) return( mulle_utf_is_not_char5_or_char7); diff --git a/src/mulle-utf32.h b/src/mulle-utf32.h index cb848cd..02f088f 100644 --- a/src/mulle-utf32.h +++ b/src/mulle-utf32.h @@ -47,20 +47,43 @@ struct mulle_utf32data { mulle_utf32_t *characters; - size_t length; + unsigned int length; }; -static inline struct mulle_utf32data mulle_utf32data_make( mulle_utf32_t *s, size_t length) +static inline unsigned int mulle_utf32_strlen( mulle_utf32_t *s) { - struct mulle_utf32data data; + mulle_utf32_t *p; - data.characters = s; - data.length = length; + if( ! s) + return( 0); + + /* this produced the nicest looking i386 code :P */ + for( p = s - 1; *++p;); + + return( p - s); +} + + +static inline struct mulle_utf32data mulle_utf32data_make( mulle_utf32_t *s, + unsigned int length) +{ + struct mulle_utf32data data; + const static mulle_utf32_t zero = { 0 }; + + data.length = (length == (unsigned int) -1) ? mulle_utf32_strlen( s) : length; + data.characters = data.length ? s : (mulle_utf32_t *) &zero; return( data); } +static inline struct mulle_utf32data mulle_utf32data_make_null( void) +{ + return( (struct mulle_utf32data ) { NULL, 0 }); +} + + + static inline int mulle_utf32_is_asciicharacter( mulle_utf32_t c) { return( (uint32_t) c < 0x80); @@ -73,21 +96,31 @@ static inline int mulle_utf32_is_char5character( mulle_utf32_t c) } +static inline int mulle_utf32_get_unicodeplane( mulle_utf32_t c) +{ + int plane; + + plane = (uint32_t) c >> 16; + return( plane); +} + + + MULLE__UTF_GLOBAL size_t mulle_utf32_utf8length( mulle_utf32_t *src, - size_t len); + unsigned int len); MULLE__UTF_GLOBAL -size_t mulle_utf32_utf16length( mulle_utf32_t *src, - size_t len); +unsigned int mulle_utf32_utf16length( mulle_utf32_t *src, + unsigned int len); MULLE__UTF_GLOBAL int mulle_utf32_information( mulle_utf32_t *src, - size_t len, + unsigned int len, struct mulle_utf_information *info); MULLE__UTF_GLOBAL -mulle_utf32_t *mulle_utf32_validate( mulle_utf32_t *src, size_t len); +mulle_utf32_t *mulle_utf32_validate( mulle_utf32_t *src, unsigned int len); // // these two are just here for completeness @@ -112,30 +145,30 @@ mulle_utf16_t *_mulle_utf32_convert_to_utf16_as_surrogatepair( mulle_utf32_t x, MULLE__UTF_GLOBAL mulle_utf16_t *_mulle_utf32_convert_to_utf16( mulle_utf32_t *src, - size_t len, + unsigned int len, mulle_utf16_t *dst); MULLE__UTF_GLOBAL -char *_mulle_utf32_convert_to_utf8( mulle_utf32_t *src, size_t len, char *dst); +char *_mulle_utf32_convert_to_utf8( mulle_utf32_t *src, unsigned int len, char *dst); // these routines do not skip BOM characters MULLE__UTF_GLOBAL void mulle_utf32_bufferconvert_to_utf8( mulle_utf32_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); MULLE__UTF_GLOBAL void mulle_utf32_bufferconvert_to_utf16( mulle_utf32_t *src, - size_t len, + unsigned int len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); // unused nowadays static inline void mulle_utf32_bufferconvert_to_utf16_as_surrogatepair( mulle_utf32_t x, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { mulle_utf16_t hilo[ 2]; @@ -166,7 +199,7 @@ static inline char *mulle_utf32_as_utf8( mulle_utf32_t x, char *dst) MULLE__UTF_GLOBAL -enum mulle_utf_charinfo _mulle_utf32_charinfo( mulle_utf32_t *src, size_t len); +enum mulle_utf_charinfo _mulle_utf32_charinfo( mulle_utf32_t *src, unsigned int len); #endif diff --git a/src/mulle-utf8-string.c b/src/mulle-utf8-string.c new file mode 100644 index 0000000..16736b2 --- /dev/null +++ b/src/mulle-utf8-string.c @@ -0,0 +1,222 @@ +// +// mulle-utf8-string.c +// mulle-utf +// +// Copyright (c) 2023 Nat! - Mulle kybernetiK. +// All rights reserved. +// +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// Neither the name of Mulle kybernetiK nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#include "mulle-utf8-string.h" + +#include "include-private.h" + +#include "mulle-utf32.h" + + + +/* + * only terminates, does not fill with zero + */ +char *mulle_utf8_strncpy( char *dst, size_t len, char *src) +{ + char *memo; + char *sentinel; + char c; + + if( ! dst || ! src) + return( dst); + + assert( src); + assert( len != (size_t) -1); + assert( src >= &dst[ len] || src <= dst); // len for dst is known, but can't be inferred for src + + memo = dst; + sentinel = &dst[ len]; + + while( dst < sentinel) + { + c = *src++; + *dst++ = c; + if( ! c) + break; + } + return( memo); +} + +// +// different API to strnstr, and on linux strnstr is only available +// with BSD... +// You can't search for '\0' with this function. +// +char *mulle_utf8_strnstr( char *s, size_t len, char *search) +{ + char *p; + char *sentinel; + size_t offset; + + if( ! s || ! search) + return( NULL); + + if( len == (size_t) -1) + len = mulle_utf8_strlen( s); + + offset = mulle_utf8_strlen( search); + if( ! offset) + return( s); + + sentinel = &s[ len]; + p = search; + + // fprintf( stderr, "# s=\"%s\" search=\"%s\" len=%ld, offset=%ld, sentinel=\"%s\"\n", + // s, search, (long) len, (long) offset, sentinel); + + for(;;) + { + if( s >= sentinel) + return( NULL); + + if( *s++ != *p) + { + p = search; + continue; + } + + if( *++p) + continue; + + return( (char *) &s[ -(long) offset]); + } +} + + +char *mulle_utf8_strnchr( char *s, size_t len, mulle_utf32_t c) +{ + char buf[ 16]; + char *end; + char n; + + if( ! s) + return( NULL); + + if( len == (size_t) -1) + len = mulle_utf8_strlen( s) + 1; + + end = mulle_utf32_as_utf8( c, buf); + n = end - buf; + if( ! n) + return( NULL); + + // special code for 'c' == 0 and simplifies other ascii searches + if( n == 1) + return( memchr( s, c, len)); + + assert( n < sizeof( buf)); + *end = 0; + + return( mulle_utf8_strnstr( s, len, buf)); +} + + +static size_t _mulle_utf8_strxspn( char *string, char *search, int flag) +{ + char *s; + char c; + int found; + + if( ! string) + return( 0); + + for( s = string; (c = *s); s++) + { + found = mulle_utf8_strchr( search, c) != NULL; + if( found != flag) + break; + } + return( s - string); +} + + +size_t mulle_utf8_strspn( char *s, char *search) +{ + return( _mulle_utf8_strxspn( s, search, 1)); +} + + +size_t mulle_utf8_strcspn( char *s, char *search) +{ + return( _mulle_utf8_strxspn( s, search, 0)); +} + + + +size_t _mulle_utf8_strnxspn( char *string, size_t length, char *search, int flag) +{ + char *s; + char *sentinel; + char c; + int found; + + assert( ! length || string); + + for( s = string, sentinel = &s[ length]; s < sentinel; s++) + { + c = *s; + assert( c); + + found = mulle_utf8_strchr( search, c) != NULL; + if( found != flag) + break; + } + return( s - string); +} + + + + +char *mulle_utf8_skiputf32( char *s, unsigned int *p_n) +{ + unsigned int n; + unsigned int i; + mulle_utf32_t c; + + i = 0; + if( s) + { + n = *p_n; + while( i < n) + { + c = _mulle_utf8_next_utf32character( &s); + if( ! c) + break; + ++i; + } + } + *p_n = i; + return( s); +} diff --git a/src/mulle-utf8-string.h b/src/mulle-utf8-string.h new file mode 100644 index 0000000..d749bde --- /dev/null +++ b/src/mulle-utf8-string.h @@ -0,0 +1,172 @@ +// +// mulle-utf8-string.c +// mulle-utf +// +// Copyright (c) 2023 Nat! - Mulle kybernetiK. +// All rights reserved. +// +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// Neither the name of Mulle kybernetiK nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef mulle_utf8_string_h__ +#define mulle_utf8_string_h__ + +#include "include.h" + +#include "mulle-utf-type.h" + +#include "mulle-utf8.h" + +#include +#include + + +static inline int mulle_utf8_strcmp( char *s, char *other) +{ + return( strcmp( s, other)); +} + + +static inline int mulle_utf8_strncmp( char *s, char *other, int len) +{ + return( strncmp( s, other, len)); +} + + +static inline char *mulle_utf8_strdup( char *s) +{ + return( mulle_allocator_strdup( NULL, s)); +} + + +// +// hand coded because linux doesn't have it by default, and I want to get rid +// of the warning without having to define __USE_XOPEN2K8 +// +static inline size_t mulle_utf8_strnlen( char *s, size_t len) +{ + char *start; + char *sentinel; + + if( ! s) + return( 0); + + assert( len != (size_t) -1); + + start = s; + sentinel = &s[ len]; + + while( s < sentinel) + { + if( ! *s) + break; + ++s; + } + return( (size_t) (s - start)); +} + + +/* + * only terminates, does not fill with zero + */ +MULLE__UTF_GLOBAL +char *mulle_utf8_strncpy( char *dst, size_t len, char *src); + + +// strstr +MULLE__UTF_GLOBAL +char *mulle_utf8_strnstr( char *s, size_t len, char *search); + + +static inline char *mulle_utf8_strstr( char *s, char *search) +{ + return( mulle_utf8_strnstr( s, (size_t) -1, search)); +} + + +// strchr +MULLE__UTF_GLOBAL +char *mulle_utf8_strnchr( char *s, size_t len, mulle_utf32_t c); + +static inline char *mulle_utf8_strchr( char *s, mulle_utf32_t c) +{ + return( mulle_utf8_strnchr( s, (size_t) -1, c)); +} + + +// strspn +MULLE__UTF_GLOBAL +size_t mulle_utf8_strspn( char *s, char *search); + + +// strcspn +MULLE__UTF_GLOBAL +size_t mulle_utf8_strcspn( char *s, char *search); + + +// p_n: contains utf32 chars to skip, returns actually skipped +// +MULLE__UTF_GLOBAL +char *mulle_utf8_skiputf32( char *s, unsigned int *p_n); + + +// MEMO: need these variety for ObjC where there might not be a terminating 0 +static inline size_t mulle_utf8_strnspn( char *s, size_t length, char *search) +{ + MULLE__UTF_GLOBAL + size_t _mulle_utf8_strnxspn( char *string, size_t length, char *search, int flag); + + if( ! s) + return( 0); + return( _mulle_utf8_strnxspn( s, length, search, 1)); +} + + +static inline size_t mulle_utf8_strncspn( char *s, size_t length, char *search) +{ + MULLE__UTF_GLOBAL + size_t _mulle_utf8_strnxspn( char *string, size_t length, char *search, int flag); + + if( ! s) + return( 0); + return( _mulle_utf8_strnxspn( s, length, search, 0)); +} + + +static inline void mulle_utf8_memcpy( char *dst, char *src, size_t len) +{ + memcpy( dst, src, len); +} + + +static inline void mulle_utf8_memmove( char *dst, char *src, size_t len) +{ + memmove( dst, src, len); +} + + +#endif diff --git a/src/mulle-utf8.c b/src/mulle-utf8.c index 72e40ff..ccb2297 100644 --- a/src/mulle-utf8.c +++ b/src/mulle-utf8.c @@ -36,6 +36,7 @@ // #include "mulle-utf8.h" +#include "mulle-utf8-string.h" #include "mulle-char5.h" #include "mulle-utf16.h" #include "mulle-utf32.h" @@ -71,49 +72,49 @@ static mulle_utf32_t mulle_utf8_extracharactersvalue( char *_src, switch( extra_len) { case 1 : // 11 bits - x = (_c & 0x1F) << 6; + x = (mulle_utf32_t) (_c & 0x1F) << 6; _c = *src; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( -2); - x |= (_c & 0x3F); + x |= (mulle_utf32_t) (_c & 0x3F); assert( x >= 128); break; case 2 : // 16 bits - x = (_c & 0x0F) << 12; + x = (mulle_utf32_t) (_c & 0x0F) << 12; _c = *src++; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( -2); - x |= (_c & 0x3F) << 6; + x |= (mulle_utf32_t) (_c & 0x3F) << 6; _c = *src; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( -2); - x |= (_c & 0x3F); + x |= (mulle_utf32_t) (_c & 0x3F); assert( x >= 0x800 && (x < 0xD800 || x >= 0xE000)); break; case 3 : // 21 bits -> UTF32 - x = (_c & 0x7) << 18; + x = (mulle_utf32_t) (_c & 0x7) << 18; _c = *src++; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( -2); - x |= (_c & 0x3F) << 12; + x |= (mulle_utf32_t) (_c & 0x3F) << 12; _c = *src++; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( -2); - x |= (_c & 0x3F) << 6; + x |= (mulle_utf32_t) (_c & 0x3F) << 6; _c = *src; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( -2); - x |= (_c & 0x3F); + x |= (mulle_utf32_t) (_c & 0x3F); assert( x >= 0x10000 && x <= 0x10FFFF); } @@ -186,18 +187,11 @@ mulle_utf32_t _mulle_utf8data_next_utf32character( struct mulle_utf8data *rove struct mulle_utf8data mulle_utf8data_copy( struct mulle_utf8data data, struct mulle_allocator *allocator) { - char *p; - - assert( ! data.length || data.characters); - if( ! data.length) - return( data); - - p = mulle_allocator_malloc( allocator, data.length + 1); - memcpy( p, data.characters, data.length); - p[ data.length] = 0; - return( mulle_utf8data_make( p, data.length)); -} + struct mulle_utf8data copied; + mulle_utf8data_init( &copied, data.characters, data.length, allocator); + return( copied); +} // sorta undoes _mulle_utf8_next_utf32_value @@ -227,11 +221,12 @@ mulle_utf32_t _mulle_utf8_previous_utf32character( char **s_p) } - // // the slower non-crashing code ... // -int mulle_utf8_are_valid_extracharacters( char *src, unsigned int len, mulle_utf32_t *p_x) +int mulle_utf8_are_valid_extracharacters( char *src, + size_t len, + mulle_utf32_t *p_x) { unsigned char _c; mulle_utf32_t x; @@ -244,51 +239,52 @@ int mulle_utf8_are_valid_extracharacters( char *src, unsigned int len, mulle_u switch( len) { case 1 : // 11 bits - x = (_c & 0x1F) << 6; + x = (mulle_utf32_t) (_c & 0x1F) << 6; _c = (unsigned char) *src; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( 0); - x |= (_c & 0x3F); + x |= (mulle_utf32_t) (_c & 0x3F); if( x < 0x80) // can't be ASCII return( 0); break; case 2 : // 16 bits - x = (_c & 0x0F) << 12; + x = (mulle_utf32_t) (_c & 0x0F) << 12; _c = (unsigned char) *src++; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( 0); - x |= (_c & 0x3F) << 6; + x |= (mulle_utf32_t) (_c & 0x3F) << 6; _c = (unsigned char) *src; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( 0); - x |= (_c & 0x3F); + x |= (mulle_utf32_t) (_c & 0x3F); if( x < 0x800) // can't be in case 1 range return( 0); break; - case 3 : // 21 bits -> UTF32 - x = (_c & 0x7) << 18; + default : // 21 bits -> UTF32 + assert( len == 3); + x = (mulle_utf32_t) (_c & 0x7) << 18; _c = (unsigned char) *src++; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( 0); - x |= (_c & 0x3F) << 12; + x |= (mulle_utf32_t) (_c & 0x3F) << 12; _c = (unsigned char) *src++; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( 0); - x |= (_c & 0x3F) << 6; + x |= (mulle_utf32_t) (_c & 0x3F) << 6; _c = (unsigned char) *src; if( ! mulle_utf8_is_validcontinuationcharacter( _c)) return( 0); - x |= (_c & 0x3F); + x |= (mulle_utf32_t) (_c & 0x3F); if( x < 0x10000 || x > 0x0010FFFF) // // can't be in case 2 range or totally too large return( 0); @@ -469,7 +465,7 @@ mulle_utf32_t *_mulle_utf8_convert_to_utf32( char *_src, void mulle_utf8_bufferconvert_to_utf16( char *_src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { unsigned char *src = (unsigned char *) _src; unsigned char *next; @@ -533,7 +529,7 @@ void mulle_utf8_bufferconvert_to_utf16( char *_src, void mulle_utf8_bufferconvert_to_utf32( char *_src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes) + mulle_utf_add_bytes_function_t *addbytes) { unsigned char *src = (unsigned char *) _src; unsigned char *next; @@ -837,13 +833,13 @@ int mulle_utf8_is_ascii( char *src, size_t len) // // this routine does not validate... // -size_t mulle_utf8_utf16length( char *src, size_t len) +unsigned int mulle_utf8_utf16length( char *src, size_t len) { - char *end; - char *sentinel; - char _c; - size_t extra_len; - size_t dst_len; + char *end; + char *sentinel; + char _c; + unsigned int extra_len; + unsigned int dst_len; if( ! src) return( 0); @@ -866,7 +862,7 @@ size_t mulle_utf8_utf16length( char *src, size_t len) dst_len -= extra_len == 3 ? 2 : extra_len; // ==3 : 32 bit end = &src[ extra_len]; if( end >= sentinel) - return( -1); + return( 0); #ifndef NDEBUG do { @@ -885,13 +881,13 @@ size_t mulle_utf8_utf16length( char *src, size_t len) // // this routine does not validate... // -size_t mulle_utf8_utf32length( char *src, size_t len) +unsigned int mulle_utf8_utf32length( char *src, size_t len) { - char *end; - char *sentinel; - char _c; - size_t extra_len; - size_t dst_len; + char *end; + char *sentinel; + char _c; + unsigned int extra_len; + unsigned int dst_len; if( ! src) return( 0); @@ -900,7 +896,7 @@ size_t mulle_utf8_utf32length( char *src, size_t len) len = mulle_utf8_strlen( src); sentinel = &src[ len]; - dst_len = len; + dst_len = (unsigned int) len; for( ; src < sentinel; src++) { @@ -932,132 +928,6 @@ size_t mulle_utf8_utf32length( char *src, size_t len) -// -// different API to strnstr, and on linux strnstr is only available -// with BSD... -// You can't search for '\0' with this function. -// -char *mulle_utf8_strnstr( char *s, size_t len, char *search) -{ - char *p; - char *sentinel; - size_t offset; - - if( ! s || ! search) - return( NULL); - - if( len == (size_t) -1) - len = mulle_utf8_strlen( s); - - offset = mulle_utf8_strlen( search); - if( ! offset) - return( NULL); - - sentinel = &s[ len]; - p = search; - - // fprintf( stderr, "# s=\"%s\" search=\"%s\" len=%ld, offset=%ld, sentinel=\"%s\"\n", - // s, search, (long) len, (long) offset, sentinel); - - for(;;) - { - if( s >= sentinel) - return( NULL); - - if( *s++ != *p) - { - p = search; - continue; - } - - if( *++p) - continue; - - return( (char *) &s[ -(long) offset]); - } -} - - -// 0 is no terminator in this case -char *mulle_utf8_strnchr( char *s, size_t len, char c) -{ - char *sentinel; - - if( ! s) - return( NULL); - - if( len == (size_t) -1) - len = mulle_utf8_strlen( s); - - sentinel = &s[ len]; - - while( s < sentinel) - { - if( *s == (unsigned char) c) - return( s); - ++s; - } - return( NULL); -} - - -size_t mulle_utf8_strnspn( char *s, size_t len, char *search) -{ - char *start; - char *sentinel; - size_t search_len; - - if( ! s) - return( 0); - - if( ! search) - return( 0); - search_len = mulle_utf8_strlen( search); - - if( len == (size_t) -1) - len = mulle_utf8_strlen( s); - - start = s; - sentinel = &s[ len]; - - while( s < sentinel) - { - if( ! mulle_utf8_strnchr( search, search_len, *s)) - break; - ++s; - } - return( s - start); -} - - -size_t mulle_utf8_strncspn( char *s, size_t len, char *search) -{ - char *sentinel; - char *start; - size_t search_len; - - if( ! s) - return( 0); - - if( len == (size_t) -1) - len = mulle_utf8_strlen( s); - - if( ! search) - return( len); - - search_len = mulle_utf8_strlen( search); - - start = s; - sentinel = &s[ len]; - - while( s < sentinel) - { - if( mulle_utf8_strnchr( search, search_len,*s)) - return( s - start); - ++s; - } - return( len); -} struct mulle_utf8data mulle_utf8data_range_of_utf32_range( struct mulle_utf8data data, @@ -1121,7 +991,7 @@ static char *_mulle_table_convert_to_utf8( char *macroman, unsigned char *src; unsigned char *sentinel; unsigned char _c; - mulle_utf32_t utf32; + mulle_utf16_t utf16; assert( len != (size_t) -1); @@ -1137,16 +1007,15 @@ static char *_mulle_table_convert_to_utf8( char *macroman, continue; } - utf32 = table[ _c - 0x80]; - dst = _mulle_utf32_as_utf8_not_ascii( utf32, dst); + utf16 = table[ _c - 0x80]; + assert( utf16 >= 0x80); + dst = _mulle_utf32_as_utf8_not_ascii( (mulle_utf32_t) utf16, dst); } return( dst); } -char *_mulle_macroman_convert_to_utf8( char *macroman, - size_t len, - char *dst) +char *_mulle_macroman_convert_to_utf8( char *macroman, size_t len, char *dst) { static uint16_t macroman_0x80_0xFF[] = { @@ -1173,37 +1042,29 @@ char *_mulle_macroman_convert_to_utf8( char *macroman, } -char *_mulle_nextstep_convert_to_utf8( char *nextstep, - size_t len, - char *dst) +char *_mulle_nextstep_convert_to_utf8( char *nextstep, size_t len, char *dst) { - // http://www.kostis.net/charsets/nextstep.htm + // http://www.kostis.net/charsets/nextstep.htm (misses one character!) // https://en.wikipedia.org/wiki/NeXT_character_set - static uint16_t nextstep_0x80_0xFF[] = + static uint16_t nextstep_0x80_0xFD[] = { - 160, 192, 193, 194, 195, 196, 197, 199, - 200, 201, 202, 203, 204, 205, 206, 207, - - 208, 209, 210, 211, 212, 213, 214, 217, - 218, 219, 220, 221, 222, 181, 215, 247, - - 169, 161, 162, 163, 8260, 165, 402, 167, - 164, 0x27, 8220, 171, 8249, 8250, 64257, 64258, - - 174, 8211, 8224, 8225, 183, 166, 182, 8729, - 8218, 8222, 8221, 187, 8943, 8240, 172, 191, - - 185, 768, 180, 770, 771, 175, 728, 729, - 168, 178, 176, 184, 179, 733, 731, 711, - - 821, 177, 188, 189, 190, 224, 225, 226, - 227, 228, 229, 231, 232, 233, 234, 235, - - 236, 198, 237, 170, 238, 239, 240, 241, - 321, 216, 338, 186, 242, 243, 244, 245, - - 246, 230, 249, 250, 251, 305, 252, 253, - 322, 248, 339, 223, 254, 255, 0, 0 + 160, 192, 193, 194, 195, 196, 197, 199, + 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 217, + 218, 219, 220, 221, 222, 181, 215, 247, + 169, 161, 162, 163, 8260, 165, 402, 167, + 164, 8217, 8220, 171, 8249, 8250, 64257, 64258, + 174, 8211, 8224, 8225, 183, 166, 182, 8729, + 8218, 8222, 8221, 187, 8943, 8240, 172, 191, + + 185, 768, 180, 770, 771, 175, 728, 729, + 168, 178, 176, 184, 179, 733, 731, 711, + 821, 177, 188, 189, 190, 224, 225, 226, + 227, 228, 229, 231, 232, 233, 234, 235, + 236, 198, 237, 170, 238, 239, 240, 241, + 321, 216, 338, 186, 242, 243, 244, 245, + 246, 230, 249, 250, 251, 305, 252, 253, + 322, 248, 339, 223, 254, 255, 0xFFFD, 0xFFFD // two extra for filler }; - return( _mulle_table_convert_to_utf8( nextstep, len, nextstep_0x80_0xFF, dst)); + return( _mulle_table_convert_to_utf8( nextstep, len, nextstep_0x80_0xFD, dst)); } diff --git a/src/mulle-utf8.h b/src/mulle-utf8.h index b926c3a..6e0c564 100644 --- a/src/mulle-utf8.h +++ b/src/mulle-utf8.h @@ -120,10 +120,10 @@ static inline unsigned int mulle_utf8_get_extracharacterslength( char c) // returned length does not include BOM // MULLE__UTF_GLOBAL -size_t mulle_utf8_utf16length( char *src, size_t len); +unsigned int mulle_utf8_utf16length( char *src, size_t len); MULLE__UTF_GLOBAL -size_t mulle_utf8_utf32length( char *src, size_t len); +unsigned int mulle_utf8_utf32length( char *src, size_t len); static inline size_t mulle_utf8_utf16maxlength( size_t len) @@ -146,7 +146,7 @@ static inline int mulle_utf8_has_leading_bomcharacter( char *src, size_t len) // if yes, p_c will contain char value // MULLE__UTF_GLOBAL -int mulle_utf8_are_valid_extracharacters( char *s, unsigned int len, mulle_utf32_t *p_c); +int mulle_utf8_are_valid_extracharacters( char *s, size_t len, mulle_utf32_t *p_c); // // if len is -1, assume that *s is '\0' terminated @@ -166,52 +166,6 @@ MULLE__UTF_GLOBAL char *mulle_utf8_validate( char *src, size_t len); -static inline size_t mulle_utf8_strlen( char *s) -{ - return( strlen( s)); -} - - -// -// hand coded because linux doesn't have it by default, and I want to get rid -// of the warning without having to define __USE_XOPEN2K8 -// -static inline size_t mulle_utf8_strnlen( char *s, size_t len) -{ - char *start; - char *sentinel; - - start = s; - sentinel = &s[ len]; - - while( s < sentinel) - { - if( ! *s) - break; - ++s; - } - return( (size_t) (s - start)); -} - - -// use the more canonical *, size_t oder -static inline int mulle_utf8_strncmp( char *s, size_t len, char *other) -{ - return( strncmp( s, other, len)); -} - - -MULLE__UTF_GLOBAL -char *mulle_utf8_strnstr( char *s, size_t len, char *search); - -MULLE__UTF_GLOBAL -char *mulle_utf8_strnchr( char *s, size_t len, char c); - -MULLE__UTF_GLOBAL -size_t mulle_utf8_strnspn( char *s, size_t len, char *search); - -MULLE__UTF_GLOBAL -size_t mulle_utf8_strncspn( char *s, size_t len, char *search); // extremely primitive! (Not as primitive anymore...) @@ -244,12 +198,26 @@ struct mulle_utf8data }; +static inline size_t mulle_utf8_strlen( char *s) +{ + char *p; + + if( ! s) + return( 0); + + /* this produced the nicest looking i386 code :P */ + for( p = s - 1; *++p;); + + return( p - s); +} + + static inline struct mulle_utf8data mulle_utf8data_make( char *s, size_t length) { struct mulle_utf8data data; - data.length = (length == (size_t) -1) ? mulle_utf8_strlen( s) : length; + data.length = (length == (size_t) -1) ? strlen( s) : length; data.characters = data.length ? s : ""; return( data); } @@ -276,6 +244,48 @@ static inline struct mulle_utf8data mulle_utf8data_make_invalid( void) } +// +// use these two functions to allocate and free mulle_utf8data +// They know that if data.length == 0, that data.characters is a "" +// static string +// +static inline void mulle_utf8data_init( struct mulle_utf8data *data, + char *characters, + size_t length, + struct mulle_allocator *allocator) +{ + + if( ! data) + return; + + if( ! length) + { + *data = mulle_utf8data_make( NULL, 0); + return; + } + + data->characters = mulle_allocator_malloc( allocator, length + 1); + if( characters) + memcpy( data->characters, characters, length); + data->length = length; + data->characters[ data->length] = 0; +} + + +static inline void + mulle_utf8data_done( struct mulle_utf8data *data, struct mulle_allocator *allocator) +{ + if( data && data->length) + { + mulle_allocator_free( allocator, data->characters); +#ifdef DEBUG + data->characters = ""; + data->length = UINTPTR_MAX; +#endif + } +} + + static inline int mulle_utf8data_is_empty( struct mulle_utf8data data) { @@ -377,14 +387,14 @@ MULLE__UTF_GLOBAL void mulle_utf8_bufferconvert_to_utf16( char *src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); // as above, but for utf32 MULLE__UTF_GLOBAL void mulle_utf8_bufferconvert_to_utf32( char *src, size_t len, void *buffer, - mulle_utf_add_bytes_function_t addbytes); + mulle_utf_add_bytes_function_t *addbytes); // diff --git a/src/reflect/_mulle-utf-include.h b/src/reflect/_mulle-utf-include.h index 75cf56b..07fdd01 100644 --- a/src/reflect/_mulle-utf-include.h +++ b/src/reflect/_mulle-utf-include.h @@ -14,27 +14,33 @@ #ifndef _mulle_utf_include_h__ #define _mulle_utf_include_h__ -// You can tweak the following #include with these commands. -// (Use 4CDE68EB-07DD-45DE-8DBB-2FD112F42C18 instead of mulle-allocator if there are duplicate entries) -// remove #include:: `mulle-sde dependency mark mulle-allocator no-header` -// rename: `mulle-sde dependency|library set mulle-allocator include whatever.h` -// reorder: `mulle-sde dependency move mulle-allocator ` -// toggle #include: `mulle-sde dependency mark mulle-allocator [no-]import` -// toggle public: `mulle-sde dependency mark mulle-allocator [no-]public` -// toggle optional: `mulle-sde dependency mark mulle-allocator [no-]require` -// remove for platform: `mulle-sde dependency mark mulle-allocator no-platform-` +// To remove the following dependency (headers and library) completely: +// `mulle-sde dependency remove mulle-allocator` +// (Use 4CDE68EB-07DD-45DE-8DBB-2FD112F42C18 instead of mulle-allocator, if there are duplicate entries) +// +// You can tweak the following #include with these commands: +// remove #include: `mulle-sde dependency mark mulle-allocator no-header` +// rename : `mulle-sde dependency|library set mulle-allocator include whatever.h` +// reorder : `mulle-sde dependency move mulle-allocator ` +// toggle #include: `mulle-sde dependency mark mulle-allocator [no-]import` +// toggle public : `mulle-sde dependency mark mulle-allocator [no-]public` +// toggle optional : `mulle-sde dependency mark mulle-allocator [no-]require` +// remove for platform : `mulle-sde dependency mark mulle-allocator no-platform-` // (use `mulle-sourcetree-to-c --unames` to list known values) #include // mulle-allocator -// You can tweak the following #include with these commands. -// (Use 5a4aa8fb-0f1b-43a0-b81c-d5218a642b58 instead of mulle-data if there are duplicate entries) -// remove #include:: `mulle-sde dependency mark mulle-data no-header` -// rename: `mulle-sde dependency|library set mulle-data include whatever.h` -// reorder: `mulle-sde dependency move mulle-data ` -// toggle #include: `mulle-sde dependency mark mulle-data [no-]import` -// toggle public: `mulle-sde dependency mark mulle-data [no-]public` -// toggle optional: `mulle-sde dependency mark mulle-data [no-]require` -// remove for platform: `mulle-sde dependency mark mulle-data no-platform-` +// To remove the following dependency (headers and library) completely: +// `mulle-sde dependency remove mulle-data` +// (Use 5a4aa8fb-0f1b-43a0-b81c-d5218a642b58 instead of mulle-data, if there are duplicate entries) +// +// You can tweak the following #include with these commands: +// remove #include: `mulle-sde dependency mark mulle-data no-header` +// rename : `mulle-sde dependency|library set mulle-data include whatever.h` +// reorder : `mulle-sde dependency move mulle-data ` +// toggle #include: `mulle-sde dependency mark mulle-data [no-]import` +// toggle public : `mulle-sde dependency mark mulle-data [no-]public` +// toggle optional : `mulle-sde dependency mark mulle-data [no-]require` +// remove for platform : `mulle-sde dependency mark mulle-data no-platform-` // (use `mulle-sourcetree-to-c --unames` to list known values) #include // mulle-data diff --git a/src/reflect/_mulle-utf-provide.h b/src/reflect/_mulle-utf-provide.h index edec341..2a6cf70 100644 --- a/src/reflect/_mulle-utf-provide.h +++ b/src/reflect/_mulle-utf-provide.h @@ -9,8 +9,8 @@ * * mulle-sde environment set MULLE_MATCH_TO_C_RUN DISABLE */ -#ifndef mulle_utf_provide_h__ -#define mulle_utf_provide_h__ +#ifndef _mulle__utf__provide_h__ +#define _mulle__utf__provide_h__ #include "mulle-ascii.h" @@ -21,6 +21,7 @@ #include "mulle-utf32.h" #include "mulle-utf32-string.h" #include "mulle-utf8.h" +#include "mulle-utf8-string.h" #include "mulle-utf-convenience.h" #include "mulle-utf-noncharacter.h" #include "mulle-utf-privatecharacter.h" diff --git a/src/reflect/_mulle-utf-versioncheck.h b/src/reflect/_mulle-utf-versioncheck.h index 6e6da18..499c7d4 100644 --- a/src/reflect/_mulle-utf-versioncheck.h +++ b/src/reflect/_mulle-utf-versioncheck.h @@ -7,10 +7,10 @@ #if defined( MULLE__ALLOCATOR_VERSION) # ifndef MULLE__ALLOCATOR_VERSION_MIN -# define MULLE__ALLOCATOR_VERSION_MIN ((5 << 20) | (0 << 8) | 2) +# define MULLE__ALLOCATOR_VERSION_MIN ((6 << 20) | (0 << 8) | 0) # endif # ifndef MULLE__ALLOCATOR_VERSION_MAX -# define MULLE__ALLOCATOR_VERSION_MAX ((6 << 20) | (0 << 8) | 0) +# define MULLE__ALLOCATOR_VERSION_MAX ((7 << 20) | (0 << 8) | 0) # endif # if MULLE__ALLOCATOR_VERSION < MULLE__ALLOCATOR_VERSION_MIN # error "mulle-allocator is too old" diff --git a/test/char5/encode.c b/test/char5/encode.c index c88d565..7fc6237 100644 --- a/test/char5/encode.c +++ b/test/char5/encode.c @@ -12,6 +12,7 @@ int main() int i; int d; uint32_t code; + uint64_t code64; char buf[ 7]; for( i = 0; i < 255; i++) @@ -45,13 +46,49 @@ int main() code = mulle_char5_encode32( ".u", 2); mulle_char5_decode32( code, buf, 2); - printf( "%llx\n", (long long) code); for( i = 0; i < 2; i++) putchar( mulle_char5_get32( code, i)); printf( "\n"); + code64 = mulle_char5_encode64( ".u", 2); + mulle_char5_decode64( code, buf, 2); + printf( "%llx\n", (long long) code64); + + for( i = 0; i < 2; i++) + putchar( mulle_char5_get64( code64, i)); + printf( "\n"); + + /**/ + /**/ + /**/ + mulle_utf16_t text16[ 2] = { '.', 'u' }; + + code = mulle_char5_encode32_utf16( text16, 2); + mulle_char5_decode32( code, buf, 2); + printf( "%llx\n", (long long) code); + + code64 = mulle_char5_encode64_utf16( text16, 2); + mulle_char5_decode64( code64, buf, 2); + printf( "%llx\n", (long long) code64); + + + + /**/ + /**/ + /**/ + mulle_utf32_t text32[ 2] = { '.', 'u' }; + + code = mulle_char5_encode32_utf32( text32, 2); + mulle_char5_decode32( code, buf, 2); + printf( "%llx\n", (long long) code); + + code64 = mulle_char5_encode64_utf32( text32, 2); + mulle_char5_decode64( code64, buf, 2); + printf( "%llx\n", (long long) code64); + + /**/ /**/ /**/ diff --git a/test/char5/encode.stdout b/test/char5/encode.stdout index d8f4bb0..ca7f96c 100644 --- a/test/char5/encode.stdout +++ b/test/char5/encode.stdout @@ -1,5 +1,11 @@ .ACDEINOPST_abcdefghilmnoprstuy 3c1 .u +3c1 +.u +3c1 +3c1 +3c1 +3c1 128398a4 DEINOP diff --git a/test/char7/encode.c b/test/char7/encode.c index 5299e40..14d4fa9 100644 --- a/test/char7/encode.c +++ b/test/char7/encode.c @@ -12,6 +12,7 @@ int main() int i; int d; uint32_t code; + uint32_t code64; char buf[ 7]; size_t size; @@ -29,6 +30,35 @@ int main() putchar( mulle_char7_get( code, i)); printf( "\n"); + /**/ + /**/ + /**/ + mulle_utf16_t text16[ 2] = { '.', 'u' }; + + code = mulle_char7_encode32_utf16( text16, 2); + mulle_char7_decode32( code, buf, 2); + printf( "%llx\n", (long long) code); + + code64 = mulle_char7_encode64_utf16( text16, 2); + mulle_char7_decode64( code64, buf, 2); + printf( "%llx\n", (long long) code64); + + + + /**/ + /**/ + /**/ + mulle_utf32_t text32[ 2] = { '.', 'u' }; + + code = mulle_char7_encode32_utf32( text32, 2); + mulle_char7_decode32( code, buf, 2); + printf( "%llx\n", (long long) code); + + code64 = mulle_char7_encode64_utf32( text32, 2); + mulle_char7_decode64( code64, buf, 2); + printf( "%llx\n", (long long) code64); + + /**/ /**/ /**/ @@ -54,8 +84,35 @@ int main() printf( "%llx\n", (long long) code); for( i = 0; i < 4; i++) - putchar( mulle_char7_get( code, i)); + putchar( mulle_char7_get32( code, i)); + printf( "\n"); + /**/ + /**/ + /**/ + + memset( buf, 'X', sizeof( buf)); + + code64 = mulle_char7_encode64( "LMPR", 4); + size = mulle_char7_decode64( code64, buf, 4); + + if( buf[ 5] != 'X') + { + printf( "encode decode overflow\n"); + abort(); + } + buf[ size] = 0; + + if( strncmp( buf, "LMPR", 4)) + { + printf( "encode decode failed\n"); + abort(); + } + + printf( "%llx\n", (long long) code64); + + for( i = 0; i < 4; i++) + putchar( mulle_char7_get64( code64, i)); printf( "\n"); return( 0); diff --git a/test/char7/encode.stdout b/test/char7/encode.stdout index 4e29eea..56031a1 100644 --- a/test/char7/encode.stdout +++ b/test/char7/encode.stdout @@ -1,4 +1,10 @@ 3aae .u +3aae +3aae +3aae +3aae +a5426cc +LMPR a5426cc LMPR diff --git a/test/char7/various.c b/test/char7/various.c new file mode 100644 index 0000000..4bdc00a --- /dev/null +++ b/test/char7/various.c @@ -0,0 +1,62 @@ +#include + +#include +#include + + + +static void test_is_char7string( char *s) +{ + int result; + int result2; + + if( mulle_char7_get_maxlength() == mulle_char7_maxlength32) + result = mulle_char7_is_char7string32( s, -1); + else + result = mulle_char7_is_char7string64( s, -1); + + result2 = mulle_char7_is_char7string( s, -1); + + printf( "mulle_char7_is_char7string( %s) : %s\n", + s ? s : "NULL", + (result == result2) ? "OK" : "FAIL"); +} + + +static void test_is_char7string32( char *s) +{ + printf( "mulle_char7_is_char7string32( %s)=%s\n", + s ? s : "NULL", + mulle_char7_is_char7string32( s, -1) ? "YES" : "NO"); +} + + +static void test_is_char7string64( char *s) +{ + printf( "mulle_char7_is_char7string64( %s)=%s\n", + s ? s : "NULL", + mulle_char7_is_char7string32( s, -1) ? "YES" : "NO"); +} + + + +static void test( char *s) +{ + test_is_char7string( s); + test_is_char7string32( s); + test_is_char7string64( s); +} + + +int main() +{ + char *s1 = "VfL Bochum 1848"; + char *s2 = "VfL"; + + test( NULL); + test( s1); + test( s2); + + return( 0); +} + diff --git a/test/char7/various.stdout b/test/char7/various.stdout new file mode 100644 index 0000000..5e665b0 --- /dev/null +++ b/test/char7/various.stdout @@ -0,0 +1,9 @@ +mulle_char7_is_char7string( NULL) : OK +mulle_char7_is_char7string32( NULL)=NO +mulle_char7_is_char7string64( NULL)=NO +mulle_char7_is_char7string( VfL Bochum 1848) : OK +mulle_char7_is_char7string32( VfL Bochum 1848)=NO +mulle_char7_is_char7string64( VfL Bochum 1848)=NO +mulle_char7_is_char7string( VfL) : OK +mulle_char7_is_char7string32( VfL)=YES +mulle_char7_is_char7string64( VfL)=YES diff --git a/test/conversion/backandforth-allocator.c b/test/conversion/backandforth-allocator.c new file mode 100644 index 0000000..195ed98 --- /dev/null +++ b/test/conversion/backandforth-allocator.c @@ -0,0 +1,52 @@ +#include + +#include +#include + + +static void test( mulle_utf32_t original[ 4]) +{ + char *text8; + mulle_utf16_t *text16; + mulle_utf32_t *text32; + + text8 = mulle_utf32_convert_to_utf8_string( original, 4, NULL); + text32 = mulle_utf8_convert_to_utf32_string( text8, -1, NULL); + if( mulle_utf32_strncmp( text32, original, 4)) + { + printf( "FAIL1\n"); + } + mulle_free( text8); + mulle_free( text32); + + text16 = mulle_utf32_convert_to_utf16_string( original, 4, NULL); + text32 = mulle_utf16_convert_to_utf32_string( text16, -1, NULL); + if( mulle_utf32_strncmp( text32, original, 4)) + { + printf( "FAIL2\n"); + } + mulle_free( text16); + mulle_free( text32); +} + + + +// 3emos: UTF8 f0 9f 8c 91 f0 9f 9a 9a f0 9f 91 a0 (unbommed) +// 3emos: UTF16 ff fe 3c d8 11 df 3d d8 9a de 3d d8 60 dc (bommed) +// 3emos: UTF32 ff fe 00 00 11 f3 01 00 9a f6 01 00 60 f4 01 00 + +int main() +{ + mulle_utf32_t text_3emos[ 4] = { 0x01f311, 0x01f69a, 0x1f460 }; + mulle_utf32_t text_utf16[ 4] = { 47177, 29938, 18497 }; + mulle_utf32_t text_utf15[ 4] = { 32313, 29938, 18497 }; + mulle_utf32_t ascii_trailing_zero[ 4] = { 'V', 'f', 'L', 0 }; + + test( text_3emos); + test( text_utf15); + test( text_utf16); + test( ascii_trailing_zero); + + return( 0); +} + diff --git a/test/conversion/backandforth-allocator.stdout b/test/conversion/backandforth-allocator.stdout new file mode 100644 index 0000000..e69de29 diff --git a/test/conversion/backandforth-ascii.c b/test/conversion/backandforth-ascii.c new file mode 100644 index 0000000..5450807 --- /dev/null +++ b/test/conversion/backandforth-ascii.c @@ -0,0 +1,100 @@ +#include + +#include +#include + + + +struct bytes_buffer +{ + union + { + mulle_utf32_t _32[ 32]; + mulle_utf16_t _16[ 64]; + char _8[ 128]; + } text; + size_t n; // n bytes +}; + + +static void buffer_add( struct bytes_buffer *p, void *bytes, size_t len) +{ + memmove( &p->text._8[ p->n], bytes, len); + p->n += len; +} + + +static void test_buffer_conversion( char *text, int n) +{ + struct bytes_buffer buffer32 = { 0 }; + struct bytes_buffer buffer16 = { 0 }; + struct bytes_buffer buffer8 = { 0 }; + + assert( n < 32); + + printf( "ASCII: %.*s\n", (int) n, text); + + mulle_ascii_bufferconvert_to_utf16( text, n, &buffer16, (void *) buffer_add); + mulle_utf16_bufferconvert_to_utf8( buffer16.text._16, buffer16.n / sizeof( mulle_utf16_t), &buffer8, (void *) buffer_add); + printf( "UTF16: %.*s\n", (int) buffer8.n, buffer8.text._8); + + mulle_ascii_bufferconvert_to_utf32( text, n, &buffer16, (void *) buffer_add); + mulle_utf32_bufferconvert_to_utf16( buffer32.text._32, buffer32.n / sizeof( mulle_utf32_t), &buffer8, (void *) buffer_add); + printf( "UTF32: %.*s\n", (int) buffer8.n, buffer8.text._8); +} + + + +struct unit_buffer +{ + union + { + mulle_utf32_t _32[ 32]; + mulle_utf16_t _16[ 64]; + char _8[ 128]; + } text; + size_t n; // n units +}; + + +static void test_direct_conversion( char *text, int n) +{ + struct unit_buffer buffer32 = { 0 }; + struct unit_buffer buffer16 = { 0 }; + struct unit_buffer buffer8 = { 0 }; + + assert( n < 32); + + if( ! mulle_utf8_is_ascii( text, n)) + printf( "Fail ASCII\n"); + + printf( "ASCII: %.*s\n", (int) n, text); + + buffer16.n = _mulle_ascii_convert_to_utf16( text, n, buffer16.text._16) - buffer16.text._16; + buffer8.n = _mulle_utf16_convert_to_utf8( buffer16.text._16, buffer16.n, buffer8.text._8) - buffer8.text._8; + printf( "UTF16: %.*s\n", (int) buffer8.n, buffer8.text._8); + + buffer32.n = _mulle_ascii_convert_to_utf32( text, n, buffer32.text._32) - buffer32.text._32; + buffer8.n = _mulle_utf32_convert_to_utf8( buffer32.text._32, buffer32.n, buffer8.text._8) - buffer8.text._8; + printf( "UTF32: %.*s\n", (int) buffer8.n, buffer8.text._8); +} + + + +int main() +{ + char *ascii = "VfL Bochum 1848\n"; + + // for coverage + assert( ! mulle_utf8_is_ascii( NULL, 0)); + assert( mulle_utf8_is_ascii( "", -1)); + assert( ! mulle_utf8_is_ascii( "\x90", 1)); + + test_direct_conversion( ascii, strlen( ascii)); + + test_buffer_conversion( ascii, strlen( ascii)); + test_buffer_conversion( ascii, -1); + + return( 0); +} + diff --git a/test/conversion/backandforth-ascii.stdout b/test/conversion/backandforth-ascii.stdout new file mode 100644 index 0000000..dc4567c --- /dev/null +++ b/test/conversion/backandforth-ascii.stdout @@ -0,0 +1,18 @@ +ASCII: VfL Bochum 1848 + +UTF16: VfL Bochum 1848 + +UTF32: VfL Bochum 1848 + +ASCII: VfL Bochum 1848 + +UTF16: VfL Bochum 1848 + +UTF32: VfL Bochum 1848 + +ASCII: VfL Bochum 1848 + +UTF16: VfL Bochum 1848 + +UTF32: VfL Bochum 1848 + diff --git a/test/conversion/backandforth.c b/test/conversion/backandforth.c index 8ef569a..f2b6d6b 100644 --- a/test/conversion/backandforth.c +++ b/test/conversion/backandforth.c @@ -15,7 +15,10 @@ static mulle_utf32_t random_char( mulle_utf32_t mask) if( ! c) continue; } - while( mulle_utf32_is_bomcharacter( c) || mulle_utf32_is_noncharacter( c) || mulle_utf32_is_privatecharacter( c)); + while( mulle_utf32_is_bomcharacter( c) || \ + mulle_utf32_is_noncharacter( c) || \ + (mulle_utf_is_privatecharacterplane( mulle_utf32_get_unicodeplane( c)) && \ + mulle_utf32_is_privatecharacter( c))); return( c); } diff --git a/test/conversion/buffer-convenience.c b/test/conversion/buffer-convenience.c index 5258d41..c1c8aa4 100644 --- a/test/conversion/buffer-convenience.c +++ b/test/conversion/buffer-convenience.c @@ -11,7 +11,7 @@ int main( void) "L\xc3\xb6\xc3\xb6\xc3\xb6\xc3\xb6orem ipsum dolor sit amet, consectetur " "adipisici elit, sed eiusmod " }; - char buf[ 4]; + char buf[ 5]; mulle_utf32_t *s; ctxt.buf = buf; @@ -22,7 +22,9 @@ int main( void) 4, &ctxt, mulle_utf8_conversion_context_add_bytes); - printf( "%.*s\n", 4, buf); + *ctxt.buf = 0; + + printf( "%s\n", buf); mulle_free( s); return( 0); diff --git a/test/conversion/macroman.c b/test/conversion/macroman.c new file mode 100644 index 0000000..74a183e --- /dev/null +++ b/test/conversion/macroman.c @@ -0,0 +1,46 @@ +#include + +#include +#include + + + +int main() +{ + char macos[ 256]; + int n; + unsigned int m; + char buf[ 256 * 4] = { 0 }; + char *end; + char *s; + char *t; + int i; + unsigned int off; + + for( n = 0, i = 32; i < 256; i++, n++) + macos[ n] = i; + + end = _mulle_macroman_convert_to_utf8( macos, n, buf); + + // MEMO: the test output is wrong but I don't know why! + off = 32; + s = buf; + for(;;) + { + m = 16; + t = mulle_utf8_skiputf32( s, &m); + if( m != 16) + break; + + printf( "%02x: %.*s\n", off, (int) (t - s), s); + s = t; + off += 16; + } + + if( end != s) + { + printf( "%02x: %.*s\n", off, (int) (end - s), s); + } + return( 0); +} + diff --git a/test/conversion/macroman.stdout b/test/conversion/macroman.stdout new file mode 100644 index 0000000..2966f2c --- /dev/null +++ b/test/conversion/macroman.stdout @@ -0,0 +1,14 @@ +20: !"#$%&'()*+,-./ +30: 0123456789:;<=>? +40: @ABCDEFGHIJKLMNO +50: PQRSTUVWXYZ[\]^_ +60: `abcdefghijklmno +70: pqrstuvwxyz{|}~ +80: ÄÅÇÉÑÖÜáàâäãåçéè +90: êëíìîïñóòôöõúùûü +a0: †°¢£§•¶ß®©™´¨≠ÆØ +b0: ∞±≤≥¥µ∂∑∏π∫ªºΩæø +c0: ¿¡¬√ƒ≈∆«»… ÀÃÕŒœ +d0: –—“”‘’÷◊ÿŸ⁄€‹›fifl +e0: ‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔ +f0: ÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ diff --git a/test/conversion/nextstep.c b/test/conversion/nextstep.c new file mode 100644 index 0000000..2b94448 --- /dev/null +++ b/test/conversion/nextstep.c @@ -0,0 +1,44 @@ +#include + +#include +#include + + + +int main() +{ + char nextstep[ 256]; + int n; + unsigned int m; + char buf[ 256 * 4] = { 0 }; + char *end; + char *s; + char *t; + int i; + unsigned int off; + + for( n = 0, i = 32; i < 254; i++, n++) + nextstep[ n] = i; + + end = _mulle_nextstep_convert_to_utf8( nextstep, n, buf); + + off = 32; + s = buf; + for(;;) + { + m = 16; + t = mulle_utf8_skiputf32( s, &m); + if( m != 16) + break; + + printf( "%02x: %.*s\n", off, (int) (t - s), s); + s = t; + off += 16; + } + + if( end != s) + printf( "%02x: %.*s\n", off, (int) (end - s), s); + + return( 0); +} + diff --git a/test/conversion/nextstep.stdout b/test/conversion/nextstep.stdout new file mode 100644 index 0000000..bf7cf11 --- /dev/null +++ b/test/conversion/nextstep.stdout @@ -0,0 +1,14 @@ +20: !"#$%&'()*+,-./ +30: 0123456789:;<=>? +40: @ABCDEFGHIJKLMNO +50: PQRSTUVWXYZ[\]^_ +60: `abcdefghijklmno +70: pqrstuvwxyz{|}~ +80:  ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏ +90: ÐÑÒÓÔÕÖÙÚÛÜÝÞµ×÷ +a0: ©¡¢£⁄¥ƒ§¤’“«‹›fifl +b0: ®–†‡·¦¶∙‚„”»⋯‰¬¿ +c0: ¹̀´̂̃¯˘˙¨²°¸³˝˛ˇ +d0: ̵±¼½¾àáâãäåçèéêë +e0: ìÆíªîïðñŁØŒºòóôõ +f0: öæùúûıüýłøœßþÿ diff --git a/test/coverage.json b/test/coverage.json new file mode 100644 index 0000000..fbcbb11 --- /dev/null +++ b/test/coverage.json @@ -0,0 +1,210 @@ +{ + "branch_covered": 675, + "branch_percent": 65.5, + "branch_total": 1030, + "files": [ + { + "branch_covered": 12, + "branch_percent": 1.0, + "branch_total": 12, + "filename": "mulle-ascii.c", + "line_covered": 30, + "line_percent": 1.0, + "line_total": 30 + }, + { + "branch_covered": 77, + "branch_percent": 0.846, + "branch_total": 91, + "filename": "mulle-char5.c", + "line_covered": 149, + "line_percent": 0.914, + "line_total": 163 + }, + { + "branch_covered": 1, + "branch_percent": 0.5, + "branch_total": 2, + "filename": "mulle-char5.h", + "line_covered": 8, + "line_percent": 0.8, + "line_total": 10 + }, + { + "branch_covered": 50, + "branch_percent": 0.833, + "branch_total": 60, + "filename": "mulle-char7.c", + "line_covered": 114, + "line_percent": 0.919, + "line_total": 124 + }, + { + "branch_covered": 0, + "branch_percent": null, + "branch_total": 0, + "filename": "mulle-char7.h", + "line_covered": 0, + "line_percent": 0.0, + "line_total": 2 + }, + { + "branch_covered": 50, + "branch_percent": 0.6579999999999999, + "branch_total": 76, + "filename": "mulle-utf-convenience.c", + "line_covered": 194, + "line_percent": 0.833, + "line_total": 233 + }, + { + "branch_covered": 19, + "branch_percent": 0.95, + "branch_total": 20, + "filename": "mulle-utf-noncharacter.c", + "line_covered": 19, + "line_percent": 0.8640000000000001, + "line_total": 22 + }, + { + "branch_covered": 10, + "branch_percent": 0.833, + "branch_total": 12, + "filename": "mulle-utf-noncharacter.h", + "line_covered": 16, + "line_percent": 1.0, + "line_total": 16 + }, + { + "branch_covered": 15, + "branch_percent": 0.938, + "branch_total": 16, + "filename": "mulle-utf-privatecharacter.c", + "line_covered": 19, + "line_percent": 1.0, + "line_total": 19 + }, + { + "branch_covered": 0, + "branch_percent": null, + "branch_total": 0, + "filename": "mulle-utf-rover.c", + "line_covered": 45, + "line_percent": 1.0, + "line_total": 45 + }, + { + "branch_covered": 0, + "branch_percent": null, + "branch_total": 0, + "filename": "mulle-utf-rover.h", + "line_covered": 9, + "line_percent": 1.0, + "line_total": 9 + }, + { + "branch_covered": 27, + "branch_percent": 0.9309999999999999, + "branch_total": 29, + "filename": "mulle-utf-scan.c", + "line_covered": 63, + "line_percent": 0.9690000000000001, + "line_total": 65 + }, + { + "branch_covered": 58, + "branch_percent": 0.725, + "branch_total": 80, + "filename": "mulle-utf16-string.c", + "line_covered": 96, + "line_percent": 0.787, + "line_total": 122 + }, + { + "branch_covered": 76, + "branch_percent": 0.45799999999999996, + "branch_total": 166, + "filename": "mulle-utf16.c", + "line_covered": 151, + "line_percent": 0.557, + "line_total": 271 + }, + { + "branch_covered": 4, + "branch_percent": 1.0, + "branch_total": 4, + "filename": "mulle-utf16.h", + "line_covered": 9, + "line_percent": 1.0, + "line_total": 9 + }, + { + "branch_covered": 34, + "branch_percent": 0.607, + "branch_total": 56, + "filename": "mulle-utf32-string.c", + "line_covered": 68, + "line_percent": 0.687, + "line_total": 99 + }, + { + "branch_covered": 76, + "branch_percent": 0.585, + "branch_total": 130, + "filename": "mulle-utf32.c", + "line_covered": 163, + "line_percent": 0.718, + "line_total": 227 + }, + { + "branch_covered": 6, + "branch_percent": 1.0, + "branch_total": 6, + "filename": "mulle-utf32.h", + "line_covered": 14, + "line_percent": 1.0, + "line_total": 14 + }, + { + "branch_covered": 35, + "branch_percent": 0.9209999999999999, + "branch_total": 38, + "filename": "mulle-utf8-string.c", + "line_covered": 62, + "line_percent": 0.9840000000000001, + "line_total": 63 + }, + { + "branch_covered": 0, + "branch_percent": null, + "branch_total": 0, + "filename": "mulle-utf8-string.h", + "line_covered": 2, + "line_percent": 1.0, + "line_total": 2 + }, + { + "branch_covered": 101, + "branch_percent": 0.515, + "branch_total": 196, + "filename": "mulle-utf8.c", + "line_covered": 243, + "line_percent": 0.606, + "line_total": 401 + }, + { + "branch_covered": 24, + "branch_percent": 0.667, + "branch_total": 36, + "filename": "mulle-utf8.h", + "line_covered": 31, + "line_percent": 0.838, + "line_total": 37 + } + ], + "gcovr/summary_format_version": "0.2", + "line_covered": 1505, + "line_percent": 75.9, + "line_total": 1983, + "root": "../../src" +} \ No newline at end of file diff --git a/test/mogrify/mogrify_character.c b/test/mogrify/mogrify_character.c new file mode 100644 index 0000000..ebd4550 --- /dev/null +++ b/test/mogrify/mogrify_character.c @@ -0,0 +1,138 @@ +#include + +#include +#include +#include + +static mulle_utf32_t xtoupper( mulle_utf32_t c) +{ + return( toupper( c)); +} + +static struct mulle_utf_mogrification_info mogrify = +{ + xtoupper +}; + + + +static void test_8( mulle_utf16_t original[ 4]) +{ + char *text; + char buf[ 32] = { 0 }; // important + struct mulle_utf8data src; + struct mulle_utf8data dst; + + text = mulle_utf16_convert_to_utf8_string( original, 4, NULL); + src = mulle_utf8data_make( text, (size_t) -1); + dst = mulle_utf8data_make( buf, sizeof( buf)); + + _mulle_utf8_character_mogrify( &dst, &src, &mogrify); + + printf( "%s -> %s\n", text, dst.characters); + mulle_free( text); +} + + +static void test_16( mulle_utf16_t original[ 4]) +{ + mulle_utf16_t *text; + char *s; + char *t; + mulle_utf32_t buf[ 32] = { 0 }; // important + struct mulle_utf16data src; + struct mulle_utf32data dst; + + src = mulle_utf16data_make( original, (size_t) -1); + dst = mulle_utf32data_make( buf, sizeof( buf)); + + _mulle_utf16_character_mogrify( &dst, &src, &mogrify); + + s = mulle_utf16_convert_to_utf8_string( original, 4, NULL); + t = mulle_utf32_convert_to_utf8_string( dst.characters, dst.length, NULL); + printf( "%s -> %s\n", s, t); + mulle_free( t); + mulle_free( s); +} + +static void test_16_unsafe( mulle_utf16_t original[ 4]) +{ + mulle_utf16_t *text; + char *s; + char *t; + mulle_utf16_t buf[ 32] = { 0 }; // important + struct mulle_utf16data src; + struct mulle_utf16data dst; + + src = mulle_utf16data_make( original, (size_t) -1); + dst = mulle_utf16data_make( buf, sizeof( buf)); + + _mulle_utf16_character_mogrify_unsafe( &dst, &src, &mogrify); + + s = mulle_utf16_convert_to_utf8_string( original, 4, NULL); + t = mulle_utf16_convert_to_utf8_string( dst.characters, dst.length, NULL); + printf( "%s -> %s\n", s, t); + mulle_free( t); + mulle_free( s); +} + +static void test_32( mulle_utf16_t original[ 4]) +{ + mulle_utf32_t *text; + char *s; + char *t; + mulle_utf32_t buf[ 32] = { 0 }; // important + struct mulle_utf32data src; + struct mulle_utf32data dst; + + text = mulle_utf16_convert_to_utf32_string( original, 4, NULL); + + src = mulle_utf32data_make( text, (size_t) -1); + dst = mulle_utf32data_make( buf, sizeof( buf)); + + _mulle_utf32_character_mogrify( &dst, &src, &mogrify); + + s = mulle_utf16_convert_to_utf8_string( original, 4, NULL); + t = mulle_utf32_convert_to_utf8_string( dst.characters, dst.length, NULL); + printf( "%s -> %s\n", s, t); + mulle_free( t); + mulle_free( s); + + mulle_free( text); +} + +int main() +{ + mulle_utf16_t text_utf16[ 4] = { 47177, 29938, 18497 }; + mulle_utf16_t text_utf15[ 4] = { 32313, 29938, 18497 }; + mulle_utf16_t ascii_capitalized[ 4] = { 'V', 'f', 'L', 0 }; + mulle_utf16_t ascii_lowercase[ 4] = { 'v', 'f', 'l', 0 }; + mulle_utf16_t ascii_uppercase[ 4] = { 'V', 'F', 'L', 0 }; + + test_8( text_utf15); + test_8( text_utf16); + test_8( ascii_capitalized); + test_8( ascii_lowercase); + test_8( ascii_uppercase); + + test_16( text_utf15); + test_16( text_utf16); + test_16( ascii_capitalized); + test_16( ascii_lowercase); + test_16( ascii_uppercase); + + test_16_unsafe( text_utf15); + test_16_unsafe( text_utf16); + test_16_unsafe( ascii_capitalized); + test_16_unsafe( ascii_lowercase); + test_16_unsafe( ascii_uppercase); + + test_32( text_utf15); + test_32( text_utf16); + test_32( ascii_capitalized); + test_32( ascii_lowercase); + test_32( ascii_uppercase); + + return( 0); +} + diff --git a/test/mogrify/mogrify_character.stdout b/test/mogrify/mogrify_character.stdout new file mode 100644 index 0000000..03dd3d7 --- /dev/null +++ b/test/mogrify/mogrify_character.stdout @@ -0,0 +1,20 @@ +縹瓲䡁 -> 縹瓲䡁 +롉瓲䡁 -> 롉瓲䡁 +VfL -> VFL +vfl -> VFL +VFL -> VFL +縹瓲䡁 -> 縹瓲䡁 +롉瓲䡁 -> 롉瓲䡁 +VfL -> VFL +vfl -> VFL +VFL -> VFL +縹瓲䡁 -> 縹瓲䡁 +롉瓲䡁 -> 롉瓲䡁 +VfL -> VFL +vfl -> VFL +VFL -> VFL +縹瓲䡁 -> 縹瓲䡁 +롉瓲䡁 -> 롉瓲䡁 +VfL -> VFL +vfl -> VFL +VFL -> VFL diff --git a/test/mogrify/mogrify_word.c b/test/mogrify/mogrify_word.c new file mode 100644 index 0000000..45ce12e --- /dev/null +++ b/test/mogrify/mogrify_word.c @@ -0,0 +1,123 @@ +#include + +#include +#include +#include + +static mulle_utf32_t xtoupper( mulle_utf32_t c) +{ + return( toupper( c)); +} + +static mulle_utf32_t xtolower( mulle_utf32_t c) +{ + return( tolower( c)); +} + +static int xiswhite( mulle_utf32_t c) +{ + return( c == ' ' || c == '\t'); +} + + +static struct mulle_utf_mogrification_info mogrify = +{ + xtoupper, + xtolower, + xiswhite +}; + + +static void test_8( mulle_utf16_t original[ 4]) +{ + char *text; + char buf[ 32] = { 0 }; // important + struct mulle_utf8data src; + struct mulle_utf8data dst; + + text = mulle_utf16_convert_to_utf8_string( original, 4, NULL); + src = mulle_utf8data_make( text, (size_t) -1); + dst = mulle_utf8data_make( buf, sizeof( buf)); + + _mulle_utf8_word_mogrify( &dst, &src, &mogrify); + + printf( "%s -> %s\n", text, dst.characters); + mulle_free( text); +} + + +static void test_16( mulle_utf16_t original[ 4]) +{ + mulle_utf16_t *text; + char *s; + char *t; + mulle_utf32_t buf[ 32] = { 0 }; // important + struct mulle_utf16data src; + struct mulle_utf32data dst; + + src = mulle_utf16data_make( original, (size_t) -1); + dst = mulle_utf32data_make( buf, sizeof( buf)); + + _mulle_utf16_word_mogrify( &dst, &src, &mogrify); + + s = mulle_utf16_convert_to_utf8_string( original, 4, NULL); + t = mulle_utf32_convert_to_utf8_string( dst.characters, dst.length, NULL); + printf( "%s -> %s\n", s, t); + mulle_free( t); + mulle_free( s); +} + +static void test_32( mulle_utf16_t original[ 4]) +{ + mulle_utf32_t *text; + char *s; + char *t; + mulle_utf32_t buf[ 32] = { 0 }; // important + struct mulle_utf32data src; + struct mulle_utf32data dst; + + text = mulle_utf16_convert_to_utf32_string( original, 4, NULL); + + src = mulle_utf32data_make( text, (size_t) -1); + dst = mulle_utf32data_make( buf, sizeof( buf)); + + _mulle_utf32_word_mogrify( &dst, &src, &mogrify); + + s = mulle_utf16_convert_to_utf8_string( original, 4, NULL); + t = mulle_utf32_convert_to_utf8_string( dst.characters, dst.length, NULL); + printf( "%s -> %s\n", s, t); + mulle_free( t); + mulle_free( s); + + mulle_free( text); +} + +int main() +{ + mulle_utf16_t text_utf16[ 5] = { 47177, 29938, ' ', 18497 }; + mulle_utf16_t text_utf15[ 5] = { 32313, 29938, '\t', 18497 }; + mulle_utf16_t ascii_capitalized[ 5] = { 'V', 'f', ' ', 'L', 0 }; + mulle_utf16_t ascii_lowercase[ 5] = { 'v', 'f', '\t', 'l', 0 }; + mulle_utf16_t ascii_uppercase[ 5] = { 'V', 'F', ' ', 'L', 0 }; + + test_8( text_utf15); + test_8( text_utf16); + test_8( ascii_capitalized); + test_8( ascii_lowercase); + test_8( ascii_uppercase); + + test_16( text_utf15); + test_16( text_utf16); + test_16( ascii_capitalized); + test_16( ascii_lowercase); + test_16( ascii_uppercase); + + test_32( text_utf15); + test_32( text_utf16); + test_32( ascii_capitalized); + test_32( ascii_lowercase); + test_32( ascii_uppercase); + + return( 0); +} + diff --git a/test/mogrify/mogrify_word.stdout b/test/mogrify/mogrify_word.stdout new file mode 100644 index 0000000..039d96f --- /dev/null +++ b/test/mogrify/mogrify_word.stdout @@ -0,0 +1,15 @@ +縹瓲 䡁 -> 縹瓲 䡁 +롉瓲 䡁 -> 롉瓲 䡁 +Vf L -> Vf L +vf l -> Vf L +VF L -> Vf L +縹瓲 䡁 -> 縹瓲 䡁 +롉瓲 䡁 -> 롉瓲 䡁 +Vf L -> Vf L +vf l -> Vf L +VF L -> Vf L +縹瓲 䡁 -> 縹瓲 䡁 +롉瓲 䡁 -> 롉瓲 䡁 +Vf L -> Vf L +vf l -> Vf L +VF L -> Vf L diff --git a/test/scan/longlong-utf16.c b/test/scan/longlong-utf16.c new file mode 100644 index 0000000..32806ba --- /dev/null +++ b/test/scan/longlong-utf16.c @@ -0,0 +1,136 @@ +#include + +#include +#include +#include + + +static void print_longlong( long long value, int rval) +{ + if( value == LLONG_MIN) + printf( "LLONG_MIN"); + else + if( value == LLONG_MAX) + printf( "LLONG_MAX"); + else + if( rval == mulle_utf_is_too_large_for_signed) + { + if( (unsigned long long) value == ULLONG_MAX) + printf( "ULLONG_MAX"); + else + printf( "%llu", value); + } + else + printf( "%lld", value); +} + + +static void test( char *text) +{ + int rval; + long long value; + mulle_utf16_t *s; + mulle_utf16_t buf[ 256]; + size_t i; + size_t len; + + printf( "\"%s\"", text); + + len = strlen( text); + for( i = 0; i < len; i++) + buf[ i] = text[ i]; + + s = buf; + value = 0x18481848; + rval = _mulle_utf16_scan_longlong_decimal( &s, len, &value); + if( rval >= 0) + { + printf( " -> "); + print_longlong( value, rval); + } + + if( s != &buf[ len] && rval != mulle_utf_has_overflown) + printf( " garbage: '%c'", (int) *s); + + if( rval != 0) + printf( " (%d)", rval); + + printf( "\n"); +} + + +int main() +{ + char buf[ 256]; + + // happy path tests + test( "1848"); + test( "+1848"); + test( "-1848"); + + // garbage at end + test( "-1848a"); + + test( "+"); + test( "-"); + test( "a"); + test( "."); + test( ","); + test( "0"); + test( "1"); + test( "9"); + + test( "++"); + test( "+-"); + test( "+a"); + test( "+."); + test( "+,"); + test( "+0"); + test( "+1"); + test( "+9"); + + test( "-+"); + test( "--"); + test( "-a"); + test( "-."); + test( "-,"); + test( "-0"); + test( "-1"); + test( "-9"); + + test( "0+"); + test( "0-"); + test( "0a"); + test( "0."); + test( "0,"); + test( "00"); + test( "01"); + test( "09"); + + test( "1+"); + test( "1-"); + test( "1a"); + test( "1."); + test( "1,"); + test( "10"); + test( "11"); + test( "19"); + + test( ""); + + sprintf( buf, "%lld", LLONG_MIN); + test( buf); + + sprintf( buf, "%lld", LLONG_MAX); + test( buf); + + sprintf( buf, "%llu", ULLONG_MAX); + test( buf); + + test( "-123456789012345678901234567890"); + test( "123456789012345678901234567890"); + test( "+123456789012345678901234567890"); + + return( 0); +} + diff --git a/test/scan/longlong-utf16.stdout b/test/scan/longlong-utf16.stdout new file mode 100644 index 0000000..7f67383 --- /dev/null +++ b/test/scan/longlong-utf16.stdout @@ -0,0 +1,51 @@ +"1848" -> 1848 +"+1848" -> 1848 +"-1848" -> -1848 +"-1848a" -> -1848 garbage: 'a' (2) +"+" garbage: '+' (-1) +"-" garbage: '-' (-1) +"a" garbage: 'a' (-1) +"." garbage: '.' (-1) +"," garbage: ',' (-1) +"0" -> 0 +"1" -> 1 +"9" -> 9 +"++" garbage: '+' (-1) +"+-" garbage: '+' (-1) +"+a" garbage: '+' (-1) +"+." garbage: '+' (-1) +"+," garbage: '+' (-1) +"+0" -> 0 +"+1" -> 1 +"+9" -> 9 +"-+" garbage: '-' (-1) +"--" garbage: '-' (-1) +"-a" garbage: '-' (-1) +"-." garbage: '-' (-1) +"-," garbage: '-' (-1) +"-0" -> 0 +"-1" -> -1 +"-9" -> -9 +"0+" -> 0 garbage: '+' (2) +"0-" -> 0 garbage: '-' (2) +"0a" -> 0 garbage: 'a' (2) +"0." -> 0 garbage: '.' (2) +"0," -> 0 garbage: ',' (2) +"00" -> 0 +"01" -> 1 +"09" -> 9 +"1+" -> 1 garbage: '+' (2) +"1-" -> 1 garbage: '-' (2) +"1a" -> 1 garbage: 'a' (2) +"1." -> 1 garbage: '.' (2) +"1," -> 1 garbage: ',' (2) +"10" -> 10 +"11" -> 11 +"19" -> 19 +"" (-1) +"-9223372036854775808" -> LLONG_MIN +"9223372036854775807" -> LLONG_MAX +"18446744073709551615" -> ULLONG_MAX (1) +"-123456789012345678901234567890" (-2) +"123456789012345678901234567890" (-2) +"+123456789012345678901234567890" (-2) diff --git a/test/scan/longlong-utf8.c b/test/scan/longlong-utf8.c new file mode 100644 index 0000000..c89a46c --- /dev/null +++ b/test/scan/longlong-utf8.c @@ -0,0 +1,136 @@ +#include + +#include +#include +#include + + +static void print_longlong( long long value, int rval) +{ + if( value == LLONG_MIN) + printf( "LLONG_MIN"); + else + if( value == LLONG_MAX) + printf( "LLONG_MAX"); + else + if( rval == mulle_utf_is_too_large_for_signed) + { + if( (unsigned long long) value == ULLONG_MAX) + printf( "ULLONG_MAX"); + else + printf( "%llu", value); + } + else + printf( "%lld", value); +} + + +static void test( char *text) +{ + int rval; + long long value; + size_t len; + char *s; + char buf[ 256]; + int i; + + printf( "\"%s\"", text); + + len = strlen( text); + for( i = 0; i < len; i++) + buf[ i] = text[ i]; + + s = buf; + value = 0x18481848; + rval = _mulle_utf8_scan_longlong_decimal( &s, len, &value); + if( rval >= 0) + { + printf( " -> "); + print_longlong( value, rval); + } + + if( s != &buf[ len] && rval != mulle_utf_has_overflown) + printf( " garbage: '%c'", (int) *s); + + if( rval != 0) + printf( " (%d)", rval); + + printf( "\n"); +} + + +int main() +{ + char buf[ 256]; + + // happy path tests + test( "1848"); + test( "+1848"); + test( "-1848"); + + // garbage at end + test( "-1848a"); + + test( "+"); + test( "-"); + test( "a"); + test( "."); + test( ","); + test( "0"); + test( "1"); + test( "9"); + + test( "++"); + test( "+-"); + test( "+a"); + test( "+."); + test( "+,"); + test( "+0"); + test( "+1"); + test( "+9"); + + test( "-+"); + test( "--"); + test( "-a"); + test( "-."); + test( "-,"); + test( "-0"); + test( "-1"); + test( "-9"); + + test( "0+"); + test( "0-"); + test( "0a"); + test( "0."); + test( "0,"); + test( "00"); + test( "01"); + test( "09"); + + test( "1+"); + test( "1-"); + test( "1a"); + test( "1."); + test( "1,"); + test( "10"); + test( "11"); + test( "19"); + + test( ""); + + sprintf( buf, "%lld", LLONG_MIN); + test( buf); + + sprintf( buf, "%lld", LLONG_MAX); + test( buf); + + sprintf( buf, "%llu", ULLONG_MAX); + test( buf); + + test( "-123456789012345678901234567890"); + test( "123456789012345678901234567890"); + test( "+123456789012345678901234567890"); + + return( 0); +} + diff --git a/test/scan/longlong-utf8.stdout b/test/scan/longlong-utf8.stdout new file mode 100644 index 0000000..7f67383 --- /dev/null +++ b/test/scan/longlong-utf8.stdout @@ -0,0 +1,51 @@ +"1848" -> 1848 +"+1848" -> 1848 +"-1848" -> -1848 +"-1848a" -> -1848 garbage: 'a' (2) +"+" garbage: '+' (-1) +"-" garbage: '-' (-1) +"a" garbage: 'a' (-1) +"." garbage: '.' (-1) +"," garbage: ',' (-1) +"0" -> 0 +"1" -> 1 +"9" -> 9 +"++" garbage: '+' (-1) +"+-" garbage: '+' (-1) +"+a" garbage: '+' (-1) +"+." garbage: '+' (-1) +"+," garbage: '+' (-1) +"+0" -> 0 +"+1" -> 1 +"+9" -> 9 +"-+" garbage: '-' (-1) +"--" garbage: '-' (-1) +"-a" garbage: '-' (-1) +"-." garbage: '-' (-1) +"-," garbage: '-' (-1) +"-0" -> 0 +"-1" -> -1 +"-9" -> -9 +"0+" -> 0 garbage: '+' (2) +"0-" -> 0 garbage: '-' (2) +"0a" -> 0 garbage: 'a' (2) +"0." -> 0 garbage: '.' (2) +"0," -> 0 garbage: ',' (2) +"00" -> 0 +"01" -> 1 +"09" -> 9 +"1+" -> 1 garbage: '+' (2) +"1-" -> 1 garbage: '-' (2) +"1a" -> 1 garbage: 'a' (2) +"1." -> 1 garbage: '.' (2) +"1," -> 1 garbage: ',' (2) +"10" -> 10 +"11" -> 11 +"19" -> 19 +"" (-1) +"-9223372036854775808" -> LLONG_MIN +"9223372036854775807" -> LLONG_MAX +"18446744073709551615" -> ULLONG_MAX (1) +"-123456789012345678901234567890" (-2) +"123456789012345678901234567890" (-2) +"+123456789012345678901234567890" (-2) diff --git a/test/strchr/mulle_utf16_strchr.c b/test/strchr/mulle_utf16_strchr.c new file mode 100644 index 0000000..8683bb7 --- /dev/null +++ b/test/strchr/mulle_utf16_strchr.c @@ -0,0 +1,47 @@ +#include + +#include + + +int main() +{ + mulle_utf16_t s[ 4] = { 'A', 'B', 'C' , 0 }; + mulle_utf16_t s2[ 7] = { 'A', 'B', 'A', 'C', 'A', 'B', 0 }; + mulle_utf32_t emos3_32[ 3] = { 0x01f311, 0x01f69a, 0x1f460 }; + mulle_utf16_t emos3[ 256]; + mulle_utf16_t *end; + + end = _mulle_utf32_convert_to_utf16( emos3_32, 3, emos3); + assert( end < &emos3[ sizeof( emos3)]); + *end = 0; + + if( mulle_utf16_strchr( NULL, 'a') != NULL) + printf( "FAIL 1\n"); + + if( mulle_utf16_strchr( s, 'A') != s) + printf( "FAIL 2\n"); + + if( mulle_utf16_strchr( s, 'B') != &s[ 1]) + printf( "FAIL 3\n"); + + if( mulle_utf16_strchr( s, 'C') != &s[ 2]) + printf( "FAIL 4\n"); + + if( mulle_utf16_strchr( s, 'D') != NULL) + printf( "FAIL 5\n"); + + if( mulle_utf16_strchr( s, 0) != &s[ 3]) + printf( "FAIL 6\n"); + + if( mulle_utf16_strchr( s2, 'B') != &s2[ 1]) + printf( "FAIL 7\n"); + + if( mulle_utf16_strchr( emos3, 'B') != NULL) + printf( "FAIL 8\n"); + + if( mulle_utf16_strchr( emos3, 0x01f69a) == NULL) + printf( "FAIL 9\n"); + + return( 0); +} + diff --git a/test/strchr/mulle_utf32_strchr.c b/test/strchr/mulle_utf32_strchr.c new file mode 100644 index 0000000..f6873d2 --- /dev/null +++ b/test/strchr/mulle_utf32_strchr.c @@ -0,0 +1,41 @@ +#include + +#include + + +int main() +{ + mulle_utf32_t s[ 4] = { 'A', 'B', 'C' , 0 }; + mulle_utf32_t s2[ 7] = { 'A', 'B', 'A', 'C', 'A', 'B', 0 }; + mulle_utf32_t emos3[ 4] = { 0x01f311, 0x01f69a, 0x1f460, 0 }; + + if( mulle_utf32_strchr( NULL, 'a') != NULL) + printf( "FAIL 1\n"); + + if( mulle_utf32_strchr( s, 'A') != s) + printf( "FAIL 2\n"); + + if( mulle_utf32_strchr( s, 'B') != &s[ 1]) + printf( "FAIL 3\n"); + + if( mulle_utf32_strchr( s, 'C') != &s[ 2]) + printf( "FAIL 4\n"); + + if( mulle_utf32_strchr( s, 'D') != NULL) + printf( "FAIL 5\n"); + + if( mulle_utf32_strchr( s, 0) != &s[ 3]) + printf( "FAIL 6\n"); + + if( mulle_utf32_strchr( s2, 'B') != &s2[ 1]) + printf( "FAIL 7\n"); + + if( mulle_utf32_strchr( emos3, 'B') != NULL) + printf( "FAIL 8\n"); + + if( mulle_utf32_strchr( emos3, 0x01f69a) == NULL) + printf( "FAIL 9\n"); + + return( 0); +} + diff --git a/test/strchr/mulle_utf32_strchr.stdout b/test/strchr/mulle_utf32_strchr.stdout new file mode 100644 index 0000000..e69de29 diff --git a/test/strchr/mulle_utf8_strchr.c b/test/strchr/mulle_utf8_strchr.c new file mode 100644 index 0000000..50a183e --- /dev/null +++ b/test/strchr/mulle_utf8_strchr.c @@ -0,0 +1,47 @@ +#include + +#include + + +int main() +{ + char *s = "ABC"; + char *s2 = "ABACAB"; + mulle_utf32_t emos3_32[ 3] = { 0x01f311, 0x01f69a, 0x1f460 }; + char emos3[ 256]; + char *end; + + end = _mulle_utf32_convert_to_utf8( emos3_32, 3, emos3); + assert( end < &emos3[ sizeof( emos3)]); + *end = 0; + + if( mulle_utf8_strchr( NULL, 'a') != NULL) + printf( "FAIL 1\n"); + + if( mulle_utf8_strchr( s, 'A') != s) + printf( "FAIL 2\n"); + + if( mulle_utf8_strchr( s, 'B') != &s[ 1]) + printf( "FAIL 3\n"); + + if( mulle_utf8_strchr( s, 'C') != &s[ 2]) + printf( "FAIL 4\n"); + + if( mulle_utf8_strchr( s, 0) != &s[ 3]) // sic! + printf( "FAIL 6\n"); + + if( mulle_utf8_strchr( s, 'D') != NULL) + printf( "FAIL 5\n"); + + if( mulle_utf8_strchr( s2, 'B') != &s2[ 1]) + printf( "FAIL 7\n"); + + if( mulle_utf8_strchr( emos3, 'B') != NULL) + printf( "FAIL 8\n"); + + if( mulle_utf8_strchr( emos3, 0x01f69a) == NULL) + printf( "FAIL 9\n"); + + return( 0); +} + diff --git a/test/strchr/mulle_utf8_strchr.stdout b/test/strchr/mulle_utf8_strchr.stdout new file mode 100644 index 0000000..e69de29 diff --git a/test/strcspn/strcspn_utf16.c b/test/strcspn/strcspn_utf16.c new file mode 100644 index 0000000..a305d71 --- /dev/null +++ b/test/strcspn/strcspn_utf16.c @@ -0,0 +1,76 @@ +#include + +#include +#include + + +static void test( mulle_utf16_t *s, mulle_utf16_t *p) +{ + size_t result; + char dst[ 128]; + char *end; + + printf( "mulle_utf16_strcspn( "); + if( ! s) + printf( "NULL"); + else + { + end = _mulle_utf16_convert_to_utf8( s, mulle_utf16_strlen( s), dst); + + printf( "\"%.*s\"", (int) (end - dst), dst); + } + + if( ! p) + printf( ", NULL"); + else + { + end = _mulle_utf16_convert_to_utf8( p, mulle_utf16_strlen( p), dst); + printf( ", \"%.*s\"", (int) (end - dst), dst); + } + + printf( ") = "); + + result = mulle_utf16_strcspn( s, p); + printf( "%ld\n", (long) result); +} + + + +int main( int argc, char **argv) +{ + mulle_utf16_t abcd[ 5] = { 'a', 'b', 'c', 'd', 0 }; + mulle_utf16_t abc[ 4] = { 'a', 'b', 'c', 0 }; + mulle_utf16_t xyz[ 4] = { 'x', 'y', 'z', 0 }; + mulle_utf16_t xcz[ 4] = { 'x', 'c', 'z', 0 }; + mulle_utf16_t ab[ 4] = { 'a', 'b', 0 }; + mulle_utf16_t bc[ 4] = { 'b', 'c', 0 }; + mulle_utf16_t a[ 2] = { 'a', 0 }; + mulle_utf16_t b[ 2] = { 'b', 0 }; + mulle_utf16_t d[ 2] = { 'd', 0 }; + mulle_utf16_t empty[ 1] = { 0 }; + + test( NULL, NULL); + test( empty, NULL); + test( NULL, empty); + test( empty, empty); + test( empty, empty); + + test( NULL, empty); + + test( abcd, a); + test( abcd, b); + test( abcd, d); + + test( abcd, xyz); + test( abcd, xcz); + + test( abcd, ab); + test( abcd, bc); + + test( abcd, abc); + + test( abc, b); + test( abc, abc); + + return( 0); +} \ No newline at end of file diff --git a/test/strcspn/strcspn_utf16.stdout b/test/strcspn/strcspn_utf16.stdout new file mode 100644 index 0000000..dcea5f2 --- /dev/null +++ b/test/strcspn/strcspn_utf16.stdout @@ -0,0 +1,16 @@ +mulle_utf16_strcspn( NULL, NULL) = 0 +mulle_utf16_strcspn( "", NULL) = 0 +mulle_utf16_strcspn( NULL, "") = 0 +mulle_utf16_strcspn( "", "") = 0 +mulle_utf16_strcspn( "", "") = 0 +mulle_utf16_strcspn( NULL, "") = 0 +mulle_utf16_strcspn( "abcd", "a") = 0 +mulle_utf16_strcspn( "abcd", "b") = 1 +mulle_utf16_strcspn( "abcd", "d") = 3 +mulle_utf16_strcspn( "abcd", "xyz") = 4 +mulle_utf16_strcspn( "abcd", "xcz") = 2 +mulle_utf16_strcspn( "abcd", "ab") = 0 +mulle_utf16_strcspn( "abcd", "bc") = 1 +mulle_utf16_strcspn( "abcd", "abc") = 0 +mulle_utf16_strcspn( "abc", "b") = 1 +mulle_utf16_strcspn( "abc", "abc") = 0 diff --git a/test/strcspn/strcspn_utf32.c b/test/strcspn/strcspn_utf32.c new file mode 100644 index 0000000..553593a --- /dev/null +++ b/test/strcspn/strcspn_utf32.c @@ -0,0 +1,76 @@ +#include + +#include +#include + + +static void test( mulle_utf32_t *s, mulle_utf32_t *p) +{ + size_t result; + char dst[ 128]; + char *end; + + printf( "mulle_utf32_strcspn( "); + if( ! s) + printf( "NULL"); + else + { + end = _mulle_utf32_convert_to_utf8( s, mulle_utf32_strlen( s), dst); + + printf( "\"%.*s\"", (int) (end - dst), dst); + } + + if( ! p) + printf( ", NULL"); + else + { + end = _mulle_utf32_convert_to_utf8( p, mulle_utf32_strlen( p), dst); + printf( ", \"%.*s\"", (int) (end - dst), dst); + } + + printf( ") = "); + + result = mulle_utf32_strcspn( s, p); + printf( "%ld\n", (long) result); +} + + + +int main( int argc, char **argv) +{ + mulle_utf32_t abcd[ 5] = { 'a', 'b', 'c', 'd', 0 }; + mulle_utf32_t abc[ 4] = { 'a', 'b', 'c', 0 }; + mulle_utf32_t xyz[ 4] = { 'x', 'y', 'z', 0 }; + mulle_utf32_t xcz[ 4] = { 'x', 'c', 'z', 0 }; + mulle_utf32_t ab[ 4] = { 'a', 'b', 0 }; + mulle_utf32_t bc[ 4] = { 'b', 'c', 0 }; + mulle_utf32_t a[ 2] = { 'a', 0 }; + mulle_utf32_t b[ 2] = { 'b', 0 }; + mulle_utf32_t d[ 2] = { 'b', 0 }; + mulle_utf32_t empty[ 1] = { 0 }; + + test( NULL, NULL); + test( empty, NULL); + test( NULL, empty); + test( empty, empty); + test( empty, empty); + + test( NULL, empty); + + test( abcd, a); + test( abcd, b); + test( abcd, d); + + test( abcd, xyz); + test( abcd, xcz); + + test( abcd, ab); + test( abcd, bc); + + test( abcd, abc); + + test( abc, b); + test( abc, abc); + + return( 0); +} \ No newline at end of file diff --git a/test/strcspn/strcspn_utf32.stdout b/test/strcspn/strcspn_utf32.stdout new file mode 100644 index 0000000..080e359 --- /dev/null +++ b/test/strcspn/strcspn_utf32.stdout @@ -0,0 +1,16 @@ +mulle_utf32_strcspn( NULL, NULL) = 0 +mulle_utf32_strcspn( "", NULL) = 0 +mulle_utf32_strcspn( NULL, "") = 0 +mulle_utf32_strcspn( "", "") = 0 +mulle_utf32_strcspn( "", "") = 0 +mulle_utf32_strcspn( NULL, "") = 0 +mulle_utf32_strcspn( "abcd", "a") = 1 +mulle_utf32_strcspn( "abcd", "b") = 0 +mulle_utf32_strcspn( "abcd", "b") = 0 +mulle_utf32_strcspn( "abcd", "xyz") = 4 +mulle_utf32_strcspn( "abcd", "xcz") = 2 +mulle_utf32_strcspn( "abcd", "ab") = 0 +mulle_utf32_strcspn( "abcd", "bc") = 1 +mulle_utf32_strcspn( "abcd", "abc") = 0 +mulle_utf32_strcspn( "abc", "b") = 0 +mulle_utf32_strcspn( "abc", "abc") = 0 diff --git a/test/strcspn/strcspn_utf8.c b/test/strcspn/strcspn_utf8.c new file mode 100644 index 0000000..a027e97 --- /dev/null +++ b/test/strcspn/strcspn_utf8.c @@ -0,0 +1,56 @@ +#include + +#include +#include + + +static void test( char *s, char *p) +{ + size_t result; + + printf( "mulle_utf8_strcspn( "); + if( ! s) + printf( "NULL"); + else + printf( "\"%s\"", s); + + if( ! p) + printf( ", NULL"); + else + printf( ", \"%s\"", p); + + printf( ") = "); + + result = mulle_utf8_strcspn( s, p); + printf( "%ld (%ld)\n", (long) result, (long) ((s && p) ? strcspn( s, p) : 0)); +} + + + +int main( int argc, char **argv) +{ + test( NULL, NULL); + test( "", NULL); + test( NULL, ""); + test( "", ""); + test( "", ""); + + test( NULL, ""); + + test( "abcd", "a"); + test( "abcd", "b"); + test( "abcd", "d"); + + test( "abcd", "xyz"); + test( "abcd", "xcz"); + + test( "abcd", "ab"); + test( "abcd", "bc"); + + test( "abcd", "abc"); + + test( "abc", "b"); + test( "abc", "abc"); + + return( 0); +} \ No newline at end of file diff --git a/test/strcspn/strcspn_utf8.stdout b/test/strcspn/strcspn_utf8.stdout new file mode 100644 index 0000000..a117e37 --- /dev/null +++ b/test/strcspn/strcspn_utf8.stdout @@ -0,0 +1,16 @@ +mulle_utf8_strcspn( NULL, NULL) = 0 (0) +mulle_utf8_strcspn( "", NULL) = 0 (0) +mulle_utf8_strcspn( NULL, "") = 0 (0) +mulle_utf8_strcspn( "", "") = 0 (0) +mulle_utf8_strcspn( "", "") = 0 (0) +mulle_utf8_strcspn( NULL, "") = 0 (0) +mulle_utf8_strcspn( "abcd", "a") = 0 (0) +mulle_utf8_strcspn( "abcd", "b") = 1 (1) +mulle_utf8_strcspn( "abcd", "d") = 3 (3) +mulle_utf8_strcspn( "abcd", "xyz") = 4 (4) +mulle_utf8_strcspn( "abcd", "xcz") = 2 (2) +mulle_utf8_strcspn( "abcd", "ab") = 0 (0) +mulle_utf8_strcspn( "abcd", "bc") = 1 (1) +mulle_utf8_strcspn( "abcd", "abc") = 0 (0) +mulle_utf8_strcspn( "abc", "b") = 1 (1) +mulle_utf8_strcspn( "abc", "abc") = 0 (0) diff --git a/test/strncpy/mulle_utf16_strncpy.c b/test/strncpy/mulle_utf16_strncpy.c new file mode 100644 index 0000000..e0a7ab5 --- /dev/null +++ b/test/strncpy/mulle_utf16_strncpy.c @@ -0,0 +1,25 @@ +#include + +#include + + +int main() +{ + mulle_utf16_t text[] = { 'A', 'B', 'C', 0 }; + mulle_utf16_t dst[16]; + mulle_utf16_t *s; + + if( mulle_utf16_strncpy( NULL, 0, NULL) != NULL) + printf( "FAIL"); + + s = mulle_utf16_strncpy( dst, 16, text); + if( s != dst) + printf( "FAIL"); + if( mulle_utf16_strlen( s) != 3) + printf( "FAIL"); + if( mulle_utf16_strcmp( s, text)) + printf( "FAIL"); + + return( 0); +} + diff --git a/test/strncpy/mulle_utf32_strncpy.c b/test/strncpy/mulle_utf32_strncpy.c new file mode 100644 index 0000000..0811890 --- /dev/null +++ b/test/strncpy/mulle_utf32_strncpy.c @@ -0,0 +1,25 @@ +#include + +#include + + +int main() +{ + mulle_utf32_t text[] = { 'A', 'B', 'C', 0 }; + mulle_utf32_t dst[32]; + mulle_utf32_t *s; + + if( mulle_utf32_strncpy( NULL, 0, NULL) != NULL) + printf( "FAIL"); + + s = mulle_utf32_strncpy( dst, 32, text); + if( s != dst) + printf( "FAIL"); + if( mulle_utf32_strlen( s) != 3) + printf( "FAIL"); + if( mulle_utf32_strcmp( s, text)) + printf( "FAIL"); + + return( 0); +} + diff --git a/test/strncpy/mulle_utf8_strncpy.c b/test/strncpy/mulle_utf8_strncpy.c new file mode 100644 index 0000000..7e3ff73 --- /dev/null +++ b/test/strncpy/mulle_utf8_strncpy.c @@ -0,0 +1,25 @@ +#include + +#include + + +int main() +{ + char text[] = { 'A', 'B', 'C', 0 }; + char dst[ 8]; + char *s; + + if( mulle_utf8_strncpy( NULL, 0, NULL) != NULL) + printf( "FAIL"); + + s = mulle_utf8_strncpy( dst, 8, text); + if( s != dst) + printf( "FAIL"); + if( mulle_utf8_strlen( s) != 3) + printf( "FAIL"); + if( mulle_utf8_strcmp( s, text)) + printf( "FAIL"); + + return( 0); +} + diff --git a/test/strncspn/tests.c b/test/strncspn/tests.c deleted file mode 100644 index 2bcc768..0000000 --- a/test/strncspn/tests.c +++ /dev/null @@ -1,58 +0,0 @@ -#include - -#include - - -static void test( char *s, size_t len, char *p) -{ - size_t result; - - printf( "mulle_utf8_strncspn( "); - if( ! s) - printf( "NULL"); - else - printf( "\"%.*s\"", (int) len, s); - - printf( ", %d", (int) len); - - if( ! p) - printf( ", NULL"); - else - printf( ", \"%s\"", p); - - printf( ") = "); - - result = mulle_utf8_strncspn( (char *) s, len, (char *) p); - printf( "%ld (%ld)\n", (long) result, (long) ((s && p) ? strcspn( s, p) : 0)); -} - - - -int main( int argc, char **argv) -{ - test( NULL, 0, NULL); - test( "", 0, NULL); - test( NULL, 0, ""); - test( "", 0, ""); - test( "", 1, ""); - - test( NULL, 2, ""); - - test( "abcd", -1, "a"); - test( "abcd", 4, "b"); - test( "abcd", 3, "d"); - test( "abcd", 4, "d"); - test( "abcd", 4, "xyz"); - - test( "abcd", -1, "xdy"); - - test( "abcd", 4, "ab"); - test( "abcd", 4, "bc"); - - test( "abcd", 4, "abc"); - - test( "abc", 3, "b"); - test( "abc", 3, "abc"); - - return( 0); -} \ No newline at end of file diff --git a/test/strncspn/tests.stdout b/test/strncspn/tests.stdout deleted file mode 100644 index 548dee2..0000000 --- a/test/strncspn/tests.stdout +++ /dev/null @@ -1,17 +0,0 @@ -mulle_utf8_strncspn( NULL, 0, NULL) = 0 (0) -mulle_utf8_strncspn( "", 0, NULL) = 0 (0) -mulle_utf8_strncspn( NULL, 0, "") = 0 (0) -mulle_utf8_strncspn( "", 0, "") = 0 (0) -mulle_utf8_strncspn( "", 1, "") = 1 (0) -mulle_utf8_strncspn( NULL, 2, "") = 0 (0) -mulle_utf8_strncspn( "abcd", -1, "a") = 0 (0) -mulle_utf8_strncspn( "abcd", 4, "b") = 1 (1) -mulle_utf8_strncspn( "abc", 3, "d") = 3 (3) -mulle_utf8_strncspn( "abcd", 4, "d") = 3 (3) -mulle_utf8_strncspn( "abcd", 4, "xyz") = 4 (4) -mulle_utf8_strncspn( "abcd", -1, "xdy") = 3 (3) -mulle_utf8_strncspn( "abcd", 4, "ab") = 0 (0) -mulle_utf8_strncspn( "abcd", 4, "bc") = 1 (1) -mulle_utf8_strncspn( "abcd", 4, "abc") = 0 (0) -mulle_utf8_strncspn( "abc", 3, "b") = 1 (1) -mulle_utf8_strncspn( "abc", 3, "abc") = 0 (0) diff --git a/test/strnlen/mulle_utf16_strnlen.c b/test/strnlen/mulle_utf16_strnlen.c new file mode 100644 index 0000000..9f153b7 --- /dev/null +++ b/test/strnlen/mulle_utf16_strnlen.c @@ -0,0 +1,21 @@ +#include + +#include + + +int main() +{ + mulle_utf16_t text[] = { 'A', 'B', 'C', 0 }; + + if( mulle_utf16_strnlen( NULL, 0) != 0) + printf( "FAIL"); + + if( mulle_utf16_strnlen( text, 3) != 3) + printf( "FAIL"); + + if( mulle_utf16_strnlen( text, 4) != 3) + printf( "FAIL"); + + return( 0); +} + diff --git a/test/strnlen/mulle_utf32_strnlen.c b/test/strnlen/mulle_utf32_strnlen.c new file mode 100644 index 0000000..20d09a5 --- /dev/null +++ b/test/strnlen/mulle_utf32_strnlen.c @@ -0,0 +1,21 @@ +#include + +#include + + +int main() +{ + mulle_utf32_t text[] = { 'A', 'B', 'C', 0 }; + + if( mulle_utf32_strnlen( NULL, 0) != 0) + printf( "FAIL"); + + if( mulle_utf32_strnlen( text, 3) != 3) + printf( "FAIL"); + + if( mulle_utf32_strnlen( text, 4) != 3) + printf( "FAIL"); + + return( 0); +} + diff --git a/test/strnspn/tests.c b/test/strnspn/tests.c deleted file mode 100644 index 444c63a..0000000 --- a/test/strnspn/tests.c +++ /dev/null @@ -1,59 +0,0 @@ -#include - -#include -#include - - -static void test( char *s, size_t len, char *p) -{ - size_t result; - - printf( "mulle_utf8_strnspn( "); - if( ! s) - printf( "NULL"); - else - printf( "\"%.*s\"", (int) len, s); - - printf( ", %d", (int) len); - - if( ! p) - printf( ", NULL"); - else - printf( ", \"%s\"", p); - - printf( ") = "); - - result = mulle_utf8_strnspn( s, len, p); - printf( "%ld (%ld)\n", (long) result, (long) ((s && p) ? strspn( s, p) : 0)); -} - - - -int main( int argc, char **argv) -{ - test( NULL, 0, NULL); - test( "", 0, NULL); - test( NULL, 0, ""); - test( "", 0, ""); - test( "", 1, ""); - - test( NULL, 2, ""); - - test( "abcd", -1, "a"); - test( "abcd", 4, "b"); - test( "abcd", 3, "d"); - test( "abcd", 4, "d"); - - test( "abcd", -1, "xyz"); - test( "abcd", -1, "xcz"); - - test( "abcd", 4, "ab"); - test( "abcd", 4, "bc"); - - test( "abcd", 4, "abc"); - - test( "abc", 3, "b"); - test( "abc", 3, "abc"); - - return( 0); -} \ No newline at end of file diff --git a/test/strnspn/tests.stdout b/test/strnspn/tests.stdout deleted file mode 100644 index 1901fdd..0000000 --- a/test/strnspn/tests.stdout +++ /dev/null @@ -1,17 +0,0 @@ -mulle_utf8_strnspn( NULL, 0, NULL) = 0 (0) -mulle_utf8_strnspn( "", 0, NULL) = 0 (0) -mulle_utf8_strnspn( NULL, 0, "") = 0 (0) -mulle_utf8_strnspn( "", 0, "") = 0 (0) -mulle_utf8_strnspn( "", 1, "") = 0 (0) -mulle_utf8_strnspn( NULL, 2, "") = 0 (0) -mulle_utf8_strnspn( "abcd", -1, "a") = 1 (1) -mulle_utf8_strnspn( "abcd", 4, "b") = 0 (0) -mulle_utf8_strnspn( "abc", 3, "d") = 0 (0) -mulle_utf8_strnspn( "abcd", 4, "d") = 0 (0) -mulle_utf8_strnspn( "abcd", -1, "xyz") = 0 (0) -mulle_utf8_strnspn( "abcd", -1, "xcz") = 0 (0) -mulle_utf8_strnspn( "abcd", 4, "ab") = 2 (2) -mulle_utf8_strnspn( "abcd", 4, "bc") = 0 (0) -mulle_utf8_strnspn( "abcd", 4, "abc") = 3 (3) -mulle_utf8_strnspn( "abc", 3, "b") = 0 (0) -mulle_utf8_strnspn( "abc", 3, "abc") = 3 (3) diff --git a/test/strnstr/tests.c b/test/strnstr/strnstr-tests.c similarity index 100% rename from test/strnstr/tests.c rename to test/strnstr/strnstr-tests.c diff --git a/test/strnstr/strnstr-tests.stdout b/test/strnstr/strnstr-tests.stdout new file mode 100644 index 0000000..fe54950 --- /dev/null +++ b/test/strnstr/strnstr-tests.stdout @@ -0,0 +1,16 @@ +mulle_utf8_strnstr( NULL, 0, NULL) = NULL +mulle_utf8_strnstr( "", 0, NULL) = NULL +mulle_utf8_strnstr( NULL, 0, "") = NULL +mulle_utf8_strnstr( "", 0, "") = "" (0) +mulle_utf8_strnstr( "", 1, "") = "" (0) +mulle_utf8_strnstr( NULL, 2, "") = NULL +mulle_utf8_strnstr( "abcd", -1, "a") = "a" (0) +mulle_utf8_strnstr( "abcd", 4, "b") = "b" (1) +mulle_utf8_strnstr( "abcd", 4, "c") = "c" (2) +mulle_utf8_strnstr( "abcd", 4, "d") = "d" (3) +mulle_utf8_strnstr( "abcd", 4, "ab") = "ab" (0) +mulle_utf8_strnstr( "abcd", 4, "bc") = "bc" (1) +mulle_utf8_strnstr( "abcd", 4, "cd") = "cd" (2) +mulle_utf8_strnstr( "abcd", 4, "abc") = "abc" (0) +mulle_utf8_strnstr( "abc", 3, "b") = "b" (1) +mulle_utf8_strnstr( "abc", 3, "abc") = "abc" (0) diff --git a/test/strspn/strspn_utf16.c b/test/strspn/strspn_utf16.c new file mode 100644 index 0000000..27a21c6 --- /dev/null +++ b/test/strspn/strspn_utf16.c @@ -0,0 +1,77 @@ +#include + +#include +#include + + +static void test( mulle_utf16_t *s, mulle_utf16_t *p) +{ + size_t result; + char dst[ 128]; + char *end; + + printf( "mulle_utf16_strspn( "); + if( ! s) + printf( "NULL"); + else + { + end = _mulle_utf16_convert_to_utf8( s, mulle_utf16_strlen( s), dst); + + printf( "\"%.*s\"", (int) (end - dst), dst); + } + + if( ! p) + printf( ", NULL"); + else + { + end = _mulle_utf16_convert_to_utf8( p, mulle_utf16_strlen( p), dst); + printf( ", \"%.*s\"", (int) (end - dst), dst); + } + + printf( ") = "); + + result = mulle_utf16_strspn( s, p); + printf( "%ld\n", (long) result); +} + + + +int main( int argc, char **argv) +{ + mulle_utf16_t abcd[ 5] = { 'a', 'b', 'c', 'd', 0 }; + mulle_utf16_t abc[ 4] = { 'a', 'b', 'c', 0 }; + mulle_utf16_t xyz[ 4] = { 'x', 'y', 'z', 0 }; + mulle_utf16_t xcz[ 4] = { 'x', 'c', 'z', 0 }; + mulle_utf16_t ab[ 4] = { 'a', 'b', 0 }; + mulle_utf16_t bc[ 4] = { 'b', 'c', 0 }; + mulle_utf16_t a[ 2] = { 'a', 0 }; + mulle_utf16_t b[ 2] = { 'b', 0 }; + mulle_utf16_t d[ 2] = { 'b', 0 }; + mulle_utf16_t empty[ 1] = { 0 }; + + test( NULL, NULL); + test( empty, NULL); + test( NULL, empty); + test( empty, empty); + test( empty, empty); + + test( NULL, empty); + + test( abcd, a); + test( abcd, b); + test( abcd, d); + test( abcd, d); + + test( abcd, xyz); + test( abcd, xcz); + + test( abcd, ab); + test( abcd, bc); + + test( abcd, abc); + + test( abc, b); + test( abc, abc); + + return( 0); +} \ No newline at end of file diff --git a/test/strspn/strspn_utf16.stdout b/test/strspn/strspn_utf16.stdout new file mode 100644 index 0000000..b253e94 --- /dev/null +++ b/test/strspn/strspn_utf16.stdout @@ -0,0 +1,17 @@ +mulle_utf16_strspn( NULL, NULL) = 0 +mulle_utf16_strspn( "", NULL) = 0 +mulle_utf16_strspn( NULL, "") = 0 +mulle_utf16_strspn( "", "") = 0 +mulle_utf16_strspn( "", "") = 0 +mulle_utf16_strspn( NULL, "") = 0 +mulle_utf16_strspn( "abcd", "a") = 1 +mulle_utf16_strspn( "abcd", "b") = 0 +mulle_utf16_strspn( "abcd", "b") = 0 +mulle_utf16_strspn( "abcd", "b") = 0 +mulle_utf16_strspn( "abcd", "xyz") = 0 +mulle_utf16_strspn( "abcd", "xcz") = 0 +mulle_utf16_strspn( "abcd", "ab") = 2 +mulle_utf16_strspn( "abcd", "bc") = 0 +mulle_utf16_strspn( "abcd", "abc") = 3 +mulle_utf16_strspn( "abc", "b") = 0 +mulle_utf16_strspn( "abc", "abc") = 3 diff --git a/test/strspn/strspn_utf32.c b/test/strspn/strspn_utf32.c new file mode 100644 index 0000000..2c1535a --- /dev/null +++ b/test/strspn/strspn_utf32.c @@ -0,0 +1,77 @@ +#include + +#include +#include + + +static void test( mulle_utf32_t *s, mulle_utf32_t *p) +{ + size_t result; + char dst[ 128]; + char *end; + + printf( "mulle_utf32_strspn( "); + if( ! s) + printf( "NULL"); + else + { + end = _mulle_utf32_convert_to_utf8( s, mulle_utf32_strlen( s), dst); + + printf( "\"%.*s\"", (int) (end - dst), dst); + } + + if( ! p) + printf( ", NULL"); + else + { + end = _mulle_utf32_convert_to_utf8( p, mulle_utf32_strlen( p), dst); + printf( ", \"%.*s\"", (int) (end - dst), dst); + } + + printf( ") = "); + + result = mulle_utf32_strspn( s, p); + printf( "%ld\n", (long) result); +} + + + +int main( int argc, char **argv) +{ + mulle_utf32_t abcd[ 5] = { 'a', 'b', 'c', 'd', 0 }; + mulle_utf32_t abc[ 4] = { 'a', 'b', 'c', 0 }; + mulle_utf32_t xyz[ 4] = { 'x', 'y', 'z', 0 }; + mulle_utf32_t xcz[ 4] = { 'x', 'c', 'z', 0 }; + mulle_utf32_t ab[ 4] = { 'a', 'b', 0 }; + mulle_utf32_t bc[ 4] = { 'b', 'c', 0 }; + mulle_utf32_t a[ 2] = { 'a', 0 }; + mulle_utf32_t b[ 2] = { 'b', 0 }; + mulle_utf32_t d[ 2] = { 'b', 0 }; + mulle_utf32_t empty[ 1] = { 0 }; + + test( NULL, NULL); + test( empty, NULL); + test( NULL, empty); + test( empty, empty); + test( empty, empty); + + test( NULL, empty); + + test( abcd, a); + test( abcd, b); + test( abcd, d); + test( abcd, d); + + test( abcd, xyz); + test( abcd, xcz); + + test( abcd, ab); + test( abcd, bc); + + test( abcd, abc); + + test( abc, b); + test( abc, abc); + + return( 0); +} \ No newline at end of file diff --git a/test/strspn/strspn_utf32.stdout b/test/strspn/strspn_utf32.stdout new file mode 100644 index 0000000..d5e755c --- /dev/null +++ b/test/strspn/strspn_utf32.stdout @@ -0,0 +1,17 @@ +mulle_utf32_strspn( NULL, NULL) = 0 +mulle_utf32_strspn( "", NULL) = 0 +mulle_utf32_strspn( NULL, "") = 0 +mulle_utf32_strspn( "", "") = 0 +mulle_utf32_strspn( "", "") = 0 +mulle_utf32_strspn( NULL, "") = 0 +mulle_utf32_strspn( "abcd", "a") = 1 +mulle_utf32_strspn( "abcd", "b") = 0 +mulle_utf32_strspn( "abcd", "b") = 0 +mulle_utf32_strspn( "abcd", "b") = 0 +mulle_utf32_strspn( "abcd", "xyz") = 0 +mulle_utf32_strspn( "abcd", "xcz") = 0 +mulle_utf32_strspn( "abcd", "ab") = 2 +mulle_utf32_strspn( "abcd", "bc") = 0 +mulle_utf32_strspn( "abcd", "abc") = 3 +mulle_utf32_strspn( "abc", "b") = 0 +mulle_utf32_strspn( "abc", "abc") = 3 diff --git a/test/strspn/strspn_utf8.c b/test/strspn/strspn_utf8.c new file mode 100644 index 0000000..c56f27e --- /dev/null +++ b/test/strspn/strspn_utf8.c @@ -0,0 +1,57 @@ +#include + +#include +#include + + +static void test( char *s, char *p) +{ + size_t result; + + printf( "mulle_utf8_strspn( "); + if( ! s) + printf( "NULL"); + else + printf( "\"%s\"", s); + + if( ! p) + printf( ", NULL"); + else + printf( ", \"%s\"", p); + + printf( ") = "); + + result = mulle_utf8_strspn( s, p); + printf( "%ld (%ld)\n", (long) result, (long) ((s && p) ? strspn( s, p) : 0)); +} + + + +int main( int argc, char **argv) +{ + test( NULL, NULL); + test( "", NULL); + test( NULL, ""); + test( "", ""); + test( "", ""); + + test( NULL, ""); + + test( "abcd", "a"); + test( "abcd", "b"); + test( "abcd", "d"); + test( "abcd", "d"); + + test( "abcd", "xyz"); + test( "abcd", "xcz"); + + test( "abcd", "ab"); + test( "abcd", "bc"); + + test( "abcd", "abc"); + + test( "abc", "b"); + test( "abc", "abc"); + + return( 0); +} \ No newline at end of file diff --git a/test/strspn/strspn_utf8.stdout b/test/strspn/strspn_utf8.stdout new file mode 100644 index 0000000..dba048d --- /dev/null +++ b/test/strspn/strspn_utf8.stdout @@ -0,0 +1,17 @@ +mulle_utf8_strspn( NULL, NULL) = 0 (0) +mulle_utf8_strspn( "", NULL) = 0 (0) +mulle_utf8_strspn( NULL, "") = 0 (0) +mulle_utf8_strspn( "", "") = 0 (0) +mulle_utf8_strspn( "", "") = 0 (0) +mulle_utf8_strspn( NULL, "") = 0 (0) +mulle_utf8_strspn( "abcd", "a") = 1 (1) +mulle_utf8_strspn( "abcd", "b") = 0 (0) +mulle_utf8_strspn( "abcd", "d") = 0 (0) +mulle_utf8_strspn( "abcd", "d") = 0 (0) +mulle_utf8_strspn( "abcd", "xyz") = 0 (0) +mulle_utf8_strspn( "abcd", "xcz") = 0 (0) +mulle_utf8_strspn( "abcd", "ab") = 2 (2) +mulle_utf8_strspn( "abcd", "bc") = 0 (0) +mulle_utf8_strspn( "abcd", "abc") = 3 (3) +mulle_utf8_strspn( "abc", "b") = 0 (0) +mulle_utf8_strspn( "abc", "abc") = 3 (3) diff --git a/test/strstr/mulle_utf16_strstr.c b/test/strstr/mulle_utf16_strstr.c new file mode 100644 index 0000000..b9a2bd8 --- /dev/null +++ b/test/strstr/mulle_utf16_strstr.c @@ -0,0 +1,61 @@ +#include + +#include + +int main() +{ + mulle_utf16_t s[ 4] ={ 'A', 'B', 'C', 0 }; + mulle_utf16_t s2[ 7] = { 'A', 'B', 'A', 'C', 'A','B', 0 }; + mulle_utf16_t AB[ 4] = { 'A', 'B', 0 }; + mulle_utf16_t BC[ 4] = { 'B', 'C', 0 }; + mulle_utf16_t A[ 2] = { 'A', 0 }; + mulle_utf16_t B[ 2] = { 'B', 0 }; + mulle_utf16_t C[ 2] = { 'C', 0 }; + mulle_utf16_t D[ 2] = { 'D', 0 }; + mulle_utf16_t empty[ 1] = { 0 }; + mulle_utf32_t emos3_32[ 3] = { 0x01f311, 0x01f69a, 0x1f460 }; + mulle_utf16_t emos3[ 256]; + mulle_utf16_t *end; + + end = _mulle_utf32_convert_to_utf16( emos3_32, 3, emos3); + *end = 0; + + if( mulle_utf16_strstr( NULL, A) != NULL) + printf( "FAIL 1\n"); + + if( mulle_utf16_strstr( s, A) != s) + printf( "FAIL 2\n"); + + if( mulle_utf16_strstr( s, B) != &s[ 1]) + printf( "FAIL 3\n"); + + if( mulle_utf16_strstr( s, C) != &s[ 2]) + printf( "FAIL 4\n"); + + if( mulle_utf16_strstr( s, AB) != &s[ 0]) + printf( "FAIL 5\n"); + + if( mulle_utf16_strstr( s, BC) != &s[ 1]) + printf( "FAIL 6\n"); + + if( mulle_utf16_strstr( s, NULL) != NULL) // sic! + printf( "FAIL 7\n"); + + if( mulle_utf16_strstr( s, empty) != s) // sic! + printf( "FAIL 8\n"); + + if( mulle_utf16_strstr( s, D) != NULL) + printf( "FAIL 9\n"); + + if( mulle_utf16_strstr( s2, AB) != &s2[ 0]) + printf( "FAIL 10\n"); + + if( mulle_utf16_strstr( emos3, B) != NULL) + printf( "FAIL 11\n"); + + if( mulle_utf16_strstr( emos3, emos3) != emos3) + printf( "FAIL 12\n"); + + return( 0); +} + diff --git a/test/strstr/mulle_utf32_strstr.c b/test/strstr/mulle_utf32_strstr.c new file mode 100644 index 0000000..f6873d2 --- /dev/null +++ b/test/strstr/mulle_utf32_strstr.c @@ -0,0 +1,41 @@ +#include + +#include + + +int main() +{ + mulle_utf32_t s[ 4] = { 'A', 'B', 'C' , 0 }; + mulle_utf32_t s2[ 7] = { 'A', 'B', 'A', 'C', 'A', 'B', 0 }; + mulle_utf32_t emos3[ 4] = { 0x01f311, 0x01f69a, 0x1f460, 0 }; + + if( mulle_utf32_strchr( NULL, 'a') != NULL) + printf( "FAIL 1\n"); + + if( mulle_utf32_strchr( s, 'A') != s) + printf( "FAIL 2\n"); + + if( mulle_utf32_strchr( s, 'B') != &s[ 1]) + printf( "FAIL 3\n"); + + if( mulle_utf32_strchr( s, 'C') != &s[ 2]) + printf( "FAIL 4\n"); + + if( mulle_utf32_strchr( s, 'D') != NULL) + printf( "FAIL 5\n"); + + if( mulle_utf32_strchr( s, 0) != &s[ 3]) + printf( "FAIL 6\n"); + + if( mulle_utf32_strchr( s2, 'B') != &s2[ 1]) + printf( "FAIL 7\n"); + + if( mulle_utf32_strchr( emos3, 'B') != NULL) + printf( "FAIL 8\n"); + + if( mulle_utf32_strchr( emos3, 0x01f69a) == NULL) + printf( "FAIL 9\n"); + + return( 0); +} + diff --git a/test/strstr/mulle_utf8_strstr.c b/test/strstr/mulle_utf8_strstr.c new file mode 100644 index 0000000..3e7d3c6 --- /dev/null +++ b/test/strstr/mulle_utf8_strstr.c @@ -0,0 +1,56 @@ +#include + +#include + + +int main() +{ + char *s = "ABC"; + char *s2 = "ABACAB"; + mulle_utf32_t emos3_32[ 3] = { 0x01f311, 0x01f69a, 0x1f460 }; + char emos3[ 256]; + char *end; + + end = _mulle_utf32_convert_to_utf8( emos3_32, 3, emos3); + assert( end < &emos3[ sizeof( emos3)]); + *end = 0; + + if( mulle_utf8_strstr( NULL, "a") != NULL) + printf( "FAIL 1\n"); + + if( mulle_utf8_strstr( s, "A") != s) + printf( "FAIL 2\n"); + + if( mulle_utf8_strstr( s, "B") != &s[ 1]) + printf( "FAIL 3\n"); + + if( mulle_utf8_strstr( s, "C") != &s[ 2]) + printf( "FAIL 4\n"); + + if( mulle_utf8_strstr( s, "AB") != &s[ 0]) + printf( "FAIL 5\n"); + + if( mulle_utf8_strstr( s, "BC") != &s[ 1]) + printf( "FAIL 6\n"); + + if( mulle_utf8_strstr( s, NULL) != NULL) // sic! + printf( "FAIL 7\n"); + + if( mulle_utf8_strstr( s, "") != s) // sic! + printf( "FAIL 8\n"); + + if( mulle_utf8_strstr( s, "D") != NULL) + printf( "FAIL 9\n"); + + if( mulle_utf8_strstr( s2, "AB") != &s2[ 0]) + printf( "FAIL 10\n"); + + if( mulle_utf8_strstr( emos3, "B") != NULL) + printf( "FAIL 11\n"); + + if( mulle_utf8_strstr( emos3, emos3) != emos3) + printf( "FAIL 12\n"); + + return( 0); +} +