Skip to content

Commit

Permalink
Fix for issue #92: Clang 10 Compiler warnings
Browse files Browse the repository at this point in the history
Fix warnings when -Wconversion or -Wsign-conversion are enabled.
  • Loading branch information
nemtrif committed Dec 26, 2022
1 parent d736c29 commit e3e57c8
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 37 deletions.
20 changes: 2 additions & 18 deletions source/utf8/checked.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ namespace utf8
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
invalid_utf8 (char c) : u8(static_cast<uint8_t>(c)) {}
virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
Expand All @@ -75,24 +76,7 @@ namespace utf8
if (!utf8::internal::is_code_point_valid(cp))
throw invalid_code_point(cp);

if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
return internal::append(cp, result);
}

template <typename octet_iterator, typename output_iterator>
Expand Down
49 changes: 49 additions & 0 deletions source/utf8/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,55 @@ namespace internal
return utf8::internal::validate_next(it, end, ignored);
}

// Internal implementation of both checked and unchecked append() function
// This function will be invoked by the overloads below, as they will know
// the octet_type.
template <typename octet_iterator, typename octet_type>
octet_iterator append(uint32_t cp, octet_iterator result) {
if (cp < 0x80) // one octet
*(result++) = static_cast<octet_type>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<octet_type>((cp >> 6) | 0xc0);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<octet_type>((cp >> 12) | 0xe0);
*(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<octet_type>((cp >> 18) | 0xf0);
*(result++) = static_cast<octet_type>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
return result;
}

// One of the following overloads will be invoked from the API calls

// A simple (but dangerous) case: the caller appends byte(s) to a char array
inline char* append(uint32_t cp, char* result) {
return append<char*, char>(cp, result);
}

// Hopefully, most common case: the caller uses back_inserter
// i.e. append(cp, std::back_inserter(str));
template<typename container_type>
std::back_insert_iterator<container_type> append
(uint32_t cp, std::back_insert_iterator<container_type> result) {
return append<std::back_insert_iterator<container_type>,
typename container_type::value_type>(cp, result);
}

// The caller uses some other kind of output operator - not covered above
// Note that in this case we are not able to determine octet_type
// so we assume it's uint_8; that can cause a conversion warning if we are wrong.
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result) {
return append<octet_iterator, uint8_t>(cp, result);
}

} // namespace internal

/// The library API - functions intended to be called by the users
Expand Down
19 changes: 1 addition & 18 deletions source/utf8/unchecked.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,7 @@ namespace utf8
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
return internal::append(cp, result);
}

template <typename octet_iterator, typename output_iterator>
Expand Down
2 changes: 1 addition & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ target_link_libraries(noexceptionstests PRIVATE utf8::cpp)

target_compile_options(${PROJECT_NAME} INTERFACE
$<$<CXX_COMPILER_ID:MSVC>:/W4>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic>)
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion>)

target_compile_options(noexceptionstests PUBLIC -fno-exceptions)

Expand Down
6 changes: 6 additions & 0 deletions tests/test_checked_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ TEST(CheckedAPITests, test_append)
EXPECT_EQ (u[2], 0x8d);
EXPECT_EQ (u[3], 0x86);
EXPECT_EQ (u[4], 0);

// Ensure no warnings with plain char
char c[2] = {0,0};
append('a', c);
EXPECT_EQ (u[0], 'a');
EXPECT_EQ (u[1], 0);
}

TEST(CheckedAPITests, test_next)
Expand Down

0 comments on commit e3e57c8

Please sign in to comment.