Skip to content

Commit

Permalink
✨ Unicode constants moved from external libraries to here!
Browse files Browse the repository at this point in the history
  • Loading branch information
ThePhD committed Nov 18, 2021
1 parent 8f5549b commit b09deb1
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 4 deletions.
141 changes: 138 additions & 3 deletions include/ztd/idk/detail/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@

#include <ztd/idk/charN_t.h>

#include <ztd/prologue.hpp>
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_replacement = 0xFFFD;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_ascii_replacement = 0x003F;

ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_unicode_code_point = 0x10FFFF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_first_lead_surrogate = 0xD800;
Expand All @@ -49,6 +50,40 @@ ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_first_surroga
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_surrogate
= __ztd_idk_detail_last_trail_surrogate;

ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_1byte_value = 0x7F;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_2byte_value = 0x7FF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_3byte_value = 0xFFFF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_4byte_value = 0x1FFFFF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_5byte_value = 0x3FFFFFF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_6byte_value = 0x7FFFFFFF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_1byte_mask = 0x80u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_1byte_continuation = 0x00u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_1byte_shift = 7u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_2byte_mask = 0xC0u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_2byte_continuation
= __ztd_idk_detail_start_2byte_mask;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_2byte_shift = 5u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_3byte_mask = 0xE0u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_3byte_continuation
= __ztd_idk_detail_start_3byte_mask;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_3byte_shift = 4u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_4byte_mask = 0xF0u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_4byte_continuation
= __ztd_idk_detail_start_4byte_mask;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_4byte_shift = 3u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_5byte_mask = 0xF8u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_5byte_continuation
= __ztd_idk_detail_start_5byte_mask;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_5byte_shift = 2u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_6byte_mask = 0xFCu;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_6byte_continuation
= __ztd_idk_detail_start_6byte_mask;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_start_6byte_shift = 1u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_continuation_mask = 0xC0u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_continuation_signature = 0x80u;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_continuation_mask_value = 0x3Fu;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char8_t __ztd_idk_detail_single_mask_value = 0x7Fu;

inline ZTD_CONSTEXPR_IF_CXX_I_ bool __ztd_idk_detail_is_lead_surrogate(ztd_char32_t __value) ZTD_NOEXCEPT_IF_CXX_I_ {
return __value >= __ztd_idk_detail_first_lead_surrogate && __value <= __ztd_idk_detail_last_lead_surrogate;
}
Expand All @@ -59,13 +94,113 @@ inline ZTD_CONSTEXPR_IF_CXX_I_ bool __ztd_idk_detail_is_surrogate(ztd_char32_t _
return __value >= __ztd_idk_detail_first_surrogate && __value <= __ztd_idk_detail_last_surrogate;
}
inline ZTD_CONSTEXPR_IF_CXX_I_ bool __ztd_idk_detail_is_lead_utf8(ztd_char8_t __value) ZTD_NOEXCEPT_IF_CXX_I_ {
return (__value & static_cast<ztd_char8_t>(0b11000000)) == static_cast<ztd_char8_t>(0b10000000);
return (__value & __ztd_idk_detail_continuation_mask) == __ztd_idk_detail_continuation_signature;
}
inline ZTD_CONSTEXPR_IF_CXX_I_ bool __ztd_idk_detail_is_single_or_lead_utf8(
ztd_char8_t __value) ZTD_NOEXCEPT_IF_CXX_I_ {
return __value <= static_cast<ztd_char8_t>(0x7F) || __ztd_idk_detail_is_lead_utf8(__value);
}
inline ZTD_CONSTEXPR_IF_CXX_I_ bool __ztd_idk_detail_utf8_is_invalid(ztd_char8_t __value) noexcept {
return __value == 0xC0 || __value == 0xC1 || __value > 0xF4;
}

inline ZTD_CONSTEXPR_IF_CXX_I_ bool __ztd_idk_detail_utf8_is_overlong(
ztd_char32_t __value, ::std::size_t __bytes) noexcept {
return __value <= __ztd_idk_detail_last_1byte_value
|| (__value <= __ztd_idk_detail_last_2byte_value && __bytes > 2)
|| (__value <= __ztd_idk_detail_last_3byte_value && __bytes > 3);
}

inline ZTD_CONSTEXPR_IF_CXX_I_ bool __ztd_idk_detail_utf8_is_overlong_extended(
ztd_char32_t __value, ::std::size_t __bytes) noexcept {
return __value <= __ztd_idk_detail_last_1byte_value
|| (__value <= __ztd_idk_detail_last_2byte_value && __bytes > 2)
|| (__value <= __ztd_idk_detail_last_3byte_value && __bytes > 3)
|| (__value <= __ztd_idk_detail_last_4byte_value && __bytes > 4)
|| (__value <= __ztd_idk_detail_last_5byte_value && __bytes > 5);
}

inline ZTD_CONSTEXPR_IF_CXX_I_ int __ztd_idk_detail_utf8_decode_length(ztd_char32_t __value) noexcept {
if (__value <= __ztd_idk_detail_last_1byte_value) {
return 1;
}
if (__value <= __ztd_idk_detail_last_2byte_value) {
return 2;
}
if (__value <= __ztd_idk_detail_last_3byte_value) {
return 3;
}
if (__value <= __ztd_idk_detail_last_4byte_value) {
return 4;
}
return 0;
}

inline ZTD_CONSTEXPR_IF_CXX_I_ int __ztd_idk_detail_utf8_decode_length_overlong(ztd_char32_t __value) noexcept {
if (__value <= __ztd_idk_detail_last_1byte_value) {
return 1;
}
if (__value <= __ztd_idk_detail_last_2byte_value) {
return 2;
}
if (__value <= __ztd_idk_detail_last_3byte_value) {
return 3;
}
if (__value <= __ztd_idk_detail_last_4byte_value) {
return 4;
}
if (__value <= __ztd_idk_detail_last_5byte_value) {
return 5;
}
if (__value <= __ztd_idk_detail_last_6byte_value) {
return 6;
}
return 0;
}

inline ZTD_CONSTEXPR_IF_CXX_I_ int __ztd_idk_detail_utf8_sequence_length(ztd_char8_t __value) noexcept {
return (__value & __ztd_idk_detail_start_1byte_mask) == __ztd_idk_detail_start_1byte_continuation ? 1
: (__value & __ztd_idk_detail_start_3byte_mask) != __ztd_idk_detail_start_3byte_continuation ? 2
: (__value & __ztd_idk_detail_start_4byte_mask) != __ztd_idk_detail_start_4byte_continuation ? 3
: 4;
}

inline ZTD_CONSTEXPR_IF_CXX_I_ int __utf8_sequence_length_extended(ztd_char8_t __value) noexcept {
return (__value & __ztd_idk_detail_start_1byte_mask) == __ztd_idk_detail_start_1byte_continuation ? 1
: (__value & __ztd_idk_detail_start_3byte_mask) != __ztd_idk_detail_start_3byte_continuation ? 2
: (__value & __ztd_idk_detail_start_4byte_mask) != __ztd_idk_detail_start_4byte_continuation ? 3
: (__value & __ztd_idk_detail_start_5byte_mask) != __ztd_idk_detail_start_5byte_continuation ? 4
: (__value & __ztd_idk_detail_start_6byte_mask) != __ztd_idk_detail_start_6byte_continuation ? 5
: 6;
}

#include <ztd/epilogue.hpp>
inline ZTD_CONSTEXPR_IF_CXX_I_ ztd_char32_t __ztd_idk_detail_utf8_decode(
ztd_char8_t __value0, ztd_char8_t __value1) noexcept {
return ((__value0 & 0x1F) << 6) | (__value1 & 0x3F);
}

inline ZTD_CONSTEXPR_IF_CXX_I_ ztd_char32_t __ztd_idk_detail_utf8_decode(
ztd_char8_t __value0, ztd_char8_t __value1, ztd_char8_t __value2) noexcept {
return ((__value0 & 0x0F) << 12) | ((__value1 & 0x3F) << 6) | (__value2 & 0x3F);
}

inline ZTD_CONSTEXPR_IF_CXX_I_ ztd_char32_t __ztd_idk_detail_utf8_decode(
ztd_char8_t __value0, ztd_char8_t __value1, ztd_char8_t __value2, ztd_char8_t __value3) noexcept {
return ((__value0 & 0x07) << 18) | ((__value1 & 0x3F) << 12) | ((__value2 & 0x3F) << 6) | (__value3 & 0x3F);
}

ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_ascii_value = 0x7F;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_last_bmp_value = 0xFFFF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const ztd_char32_t __ztd_idk_detail_normalizing_value = 0x10000;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const int __ztd_idk_detail_lead_surrogate_bitmask = 0xFFC00;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const int __ztd_idk_detail_trail_surrogate_bitmask = 0x3FF;
ZTD_INLINE_CONSTEXPR_IF_CXX_I_ const int __ztd_idk_detail_lead_shifted_bits = 10;

inline ZTD_CONSTEXPR_IF_CXX_I_ ztd_char32_t __ztd_idk_detail_utf16_combine_surrogates(
ztd_char16_t __lead, ztd_char16_t __trail) noexcept {
auto __hibits = __lead - __ztd_idk_detail_first_lead_surrogate;
auto __lobits = __trail - __ztd_idk_detail_first_trail_surrogate;
return __ztd_idk_detail_normalizing_value + ((__hibits << __ztd_idk_detail_lead_shifted_bits) | __lobits);
}

#endif // ZTD_IDK_DETAIL_UNICODE_H
61 changes: 61 additions & 0 deletions include/ztd/idk/detail/unicode.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// =============================================================================
//
// ztd.idk
// Copyright © 2021 JeanHeyd "ThePhD" Meneide and Shepherd's Oasis, LLC
// Contact: opensource@soasis.org
//
// Commercial License Usage
// Licensees holding valid commercial ztd.idk licenses may use this file in
// accordance with the commercial license agreement provided with the
// Software or, alternatively, in accordance with the terms contained in
// a written agreement between you and Shepherd's Oasis, LLC.
// For licensing terms and conditions see your agreement. For
// further information contact opensource@soasis.org.
//
// Apache License Version 2 Usage
// Alternatively, this file may be used under the terms of Apache License
// Version 2.0 (the "License") for non-commercial use; you may not use this
// file except in compliance with the License. You may obtain a copy of the
// License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// ============================================================================>

#pragma once

#ifndef ZTD_IDK_DETAIL_UNICODE_HPP
#define ZTD_IDK_DETAIL_UNICODE_HPP

#include <ztd/idk/version.hpp>

#include <ztd/idk/detail/unicode.h>
#include <ztd/idk/charN_t.h>

#include <ztd/prologue.hpp>

namespace ztd {
ZTD_IDK_INLINE_ABI_NAMESPACE_OPEN_I_
namespace __idk_detail {
template <bool _IsOverlong>
inline constexpr int __utf8_decode_length(ztd_char32_t __value) noexcept {
if constexpr (_IsOverlong) {
return __ztd_idk_detail_utf8_decode_length_overlong(__value);
}
else {
return __ztd_idk_detail_utf8_decode_length(__value);
}
}
} // namespace __idk_detail
ZTD_IDK_INLINE_ABI_NAMESPACE_CLOSE_I_
} // namespace ztd

#include <ztd/epilogue.hpp>

#endif // ZTD_IDK_DETAIL_UNICODE_HPP
2 changes: 1 addition & 1 deletion tests/idk/inclusion/source/ztd/idk/detail/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@
//
// ============================================================================>

#include <ztd/idk/detail/unicode.h>
#include <ztd/idk/detail/unicode.hpp>

0 comments on commit b09deb1

Please sign in to comment.