Skip to content

Commit

Permalink
✨ A decent iconv encoding
Browse files Browse the repository at this point in the history
- 💬 "glibc/iconv can rot...."
  • Loading branch information
ThePhD committed Aug 14, 2021
1 parent 125311d commit 911a31d
Show file tree
Hide file tree
Showing 25 changed files with 1,364 additions and 98 deletions.
82 changes: 73 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ option(ZTD_TEXT_DOCUMENTATION_NO_SPHINX "Turn off Sphinx usage (useful for ReadT
option(ZTD_TEXT_EXAMPLES "Enable build of examples" OFF)
option(ZTD_TEXT_BENCHMARKS "Enable build of benchmarks" OFF)
option(ZTD_TEXT_GENERATE_SINGLE "Enable generation of a single header and its target" OFF)
option(ZTD_TEXT_USE_CUNEICODE "Enable generation of a single header and its target" OFF)
option(ZTD_TEXT_USE_CUNEICODE "Use ztd.cuneicode" OFF)
option(ZTD_TEXT_USE_LIBICONV "Use libiconv" OFF)
option(ZTD_TEXT_USE_STATIC_LIBICONV "Use libiconv and link against it even if it's a static library" OFF)

# # Dependencies
# ztd.idk
Expand All @@ -133,11 +135,66 @@ if (ZTD_TEXT_USE_CUNEICODE)
GIT_REPOSITORY https://github.com/soasis/cuneicode.git
GIT_TAG main)
FetchContent_MakeAvailable(ztd.cuneicode)
set(ztd_text_has_cuneicode ON)
else()
set(ztd_text_has_cuneicode OFF)
set(ztd-text-cuneicode ztd::cuneicode)
endif()

# iconv - static and dynamic
if (NOT TARGET Iconv::Iconv AND ZTD_TEXT_USE_LIBICONV)
# because of iconv's license, using it as a static dependency
# is tricky: therefore, it's only enabled for folks who explicitly asked for it,
# regardless of whether it is available or not
find_package(Iconv)
endif()
# define generator expressions for each moment
string(CONCAT ztd-text-libiconv-define
$<IF:$<BOOL:ZTD_TEXT_USE_LIBICONV>,
ZTD_LIBICONV=1,
ZTD_LIBICONV=0
>
)
string(CONCAT ztd-text-libiconv-load-define
$<IF:$<AND:$<BOOL:Iconv_FOUND>,$<BOOL:ZTD_TEXT_USE_LIBICONV>>,
ZTD_LIBICONV_LOAD=0,
ZTD_LIBICONV_LOAD=1
>
)
string(CONCAT ztd-text-libiconv-header-define
$<IF:$<AND:$<BOOL:Iconv_FOUND>,$<BOOL:ZTD_TEXT_USE_LIBICONV>>,
ZTD_ICONV_H=1,
ZTD_ICONV_H=0
>
)
string(CONCAT ztd-text-libiconv
$<TARGET_NAME_IF_EXISTS:Iconv::Iconv>
)
string(CONCAT ztd-text-libiconv-dl
$<$<BOOL:ztd-text-libiconv>:${CMAKE_DL_LIBS}>
)
string(CONCAT ztd-text-static-libiconv-define
$<IF:
$<AND:$<BOOL:Iconv_FOUND>, $<BOOL:ZTD_TEXT_USE_STATIC_LIBICONV>,
$<STREQUAL:
$<$<BOOL:$<TARGET_NAME_IF_EXISTS:Iconv::Iconv>>:$<TARGET_PROPERTY:Iconv::Iconv,TYPE>>,
STATIC_LIBRARY
>
>,
ZTD_LIBICONV_STATIC=1,
ZTD_LIBICONV_STATIC=0
>
)
string(CONCAT ztd-text-dynamic-libiconv-define
$<IF:
$<AND:$<BOOL:Iconv_FOUND>, $<BOOL:ZTD_TEXT_USE_LIBICONV>,
$<STREQUAL:
$<$<BOOL:$<TARGET_NAME_IF_EXISTS:Iconv::Iconv>>:$<TARGET_PROPERTY:Iconv::Iconv,TYPE>>,
SHARED_LIBRARY
>
>,
ZTD_LIBICONV_DYNAMIC=1,
ZTD_LIBICONV_DYNAMIC=0
>
)

# Main library declarations
file(GLOB_RECURSE ztd.text.includes CONFIGURE_DEPENDS include/*.hpp)

Expand All @@ -148,14 +205,21 @@ target_include_directories(ztd.text
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
target_sources(ztd.text INTERFACE ${ztd.text.includes})
target_compile_definitions(ztd.text
INTERFACE
${ztd-text-libiconv-define}
${ztd-text-libiconv-load-define}
${ztd-text-libiconv-header-define}
${ztd-text-static-libiconv-define}
${ztd-text-dynamic-libiconv-define})
target_link_libraries(ztd.text
INTERFACE
ztd::idk
${ztd-use-cuneicode}
)
${ztd-text-libiconv-dl}
ztd::idk
${ztd-text-libiconv}
${ztd-text-cuneicode})
install(DIRECTORY include/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

# # Config / Version packaging
# Version configurations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@
..
.. =============================================================================>
🔨 iconv_encoding (In Progress)
===============================
basic_iconv
===========

.. warning::
This encoding is only available if the :ref:`configuration macro/build option for ZTD__LIBICONV <config-ZTD_LIBICONV>` is turned on.

|unfinished_warning|
This encoding is tied to the `iconv library <https://www.gnu.org/software/libiconv/>`_. It will attempt to use the header and the functions directly, and if not otherwise bootstrap iconv on first use of the encoding through ``GetProcAddress``/``dlsym`` and related. If it cannot find it will either assert, abort, or loudly annoy the user in some way. The code is retrieved dynamically where possible unless the user explicitly defines the build option for ``ZTD_TEXT_USE_STATIC_LIBICONV`` (for CMake), as iconv is under a LGPL/GPL licensed and cannot be traditionally built / statically linked with application code (though in the future we may provide a way for software to do that if the software being made with this library is also GPL-compatible software).

This encoding is only available if the :ref:`configuration macro for ZTD_TEXT_USE_ICONV <config-ZTD_TEXT_USE_ICONV>` is turned on.
iconv has a fixed set of encodings it can be compiled with to support. States are pre-constructed in the encoding itself and copied as necessary when ``encode_state`` or ``decode_state``\ s are being created to call the iconv functions. The user can inspect the output error parameter from the ``basic_iconv`` constructor to know of failure, or not pass in the output error parameter and instead take one of a assert, thrown exception, or ``abort`` (preferred invocation in that order).

This encoding is tied to the `iconv library <https://www.gnu.org/software/libiconv/>`_. It will attempt to bootstrap iconv on first use of the encoding through use of ``GetProcAddress``/``dlsym`` and friends. If it cannot find it will either assert, abort, or loudly annoy the user in some way. The code is retrieved dynamically, as iconv is under a LGPL/GPL licensed and cannot be traditionally built / statically linked with application code (though in the future we may provide a way for software to do that if the software being made with this library is also GPL-compatible software).

iconv has a fixed set of encodings it can be compiled with to support. States are pre-constructed in the encoding itself and copied as necessary when ``encode_state`` or ``decode_state``\ s are being created to call the iconv functions. The user can inspect the output error parameter from the ``iconv_encoding`` constructor to know of failure, or not pass in the output error parameter and instead take one of a assert, thrown exception, or ``abort`` (preferred invocation in that order).
.. doxygentypedef:: ztd::text::basic_iconv
2 changes: 1 addition & 1 deletion documentation/source/api/encodings/cuneicode_encoding.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@

This encoding is only available if the :ref:`configuration macro for ZTD_TEXT_USE_CUNEICODE <config-ZTD_TEXT_USE_CUNEICODE>` is turned on.

This encoding is tied to the `cuneicode library <https://ztdcuneicode.rtfd.io>`_. The cuneicode library is a C library for validation, counting, and transcoding between a fixed set of encodings, with an additional plug for arbitrary encodings that can be added at run-time. This is in opposition to :doc:`iconv </api/encodings/iconv_encoding>`, where additional encodings can only be added by-hand through recompiling the code or hooking specific system configuration points.
This encoding is tied to the `cuneicode library <https://ztdcuneicode.rtfd.io>`_. The cuneicode library is a C library for validation, counting, and transcoding between a fixed set of encodings, with an additional plug for arbitrary encodings that can be added at run-time. This is in opposition to :doc:`iconv </api/encodings/basic_iconv>`, where additional encodings can only be added by-hand through recompiling the code or hooking specific system configuration points.

cuneicode has a variable number of encodings it can be compiled with to support. States are pre-constructed in the encoding itself and copied as necessary when ``encode_state`` or ``decode_state``\ s are being created to call the desired conversion functions. The user can inspect the output error parameter from the ``cuneicode_encoding`` constructor to know of failure, or not pass in the output error parameter and instead take one of a assert, thrown exception, or ``abort`` (preferred invocation in that order).
2 changes: 1 addition & 1 deletion documentation/source/api/encodings/execution.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ This is the locale-based, runtime encoding. It uses a number of compile-time and
Currently, the hierachy of behaviors is like so:

- If the platform is MacOS, then it assumes this is :doc:`UTF-8 </api/encodings/utf8>`;
- Otherwise, if libiconv is available, then it attempts to use :doc:`iconv </api/encodings/iconv_encoding>` configured to the ``"char"``-identified encoding;
- Otherwise, if libiconv is available, then it attempts to use :doc:`iconv </api/encodings/basic_iconv>` configured to the ``"char"``-identified encoding;
- Otherwise, if the headers ``<cuchar>`` or ``<uchar.h>`` are available, then it attempts to use a gnarly, lossy, and dangerous encoding that potentially traffics through the C Standard Library and Locale APIs;
- Otherwise, it produces a compile-time error.

Expand Down
2 changes: 1 addition & 1 deletion documentation/source/api/encodings/wide_execution.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ This is the locale-based, wide runtime encoding. It uses a number of compile-tim
Currently, the hierachy of behaviors is like so:

- If the platform is Windows, then it assumes this is :doc:`UTF-16 </api/encodings/utf16>`;
- Otherwise, if libiconv is available, then it attempts to use :doc:`iconv </api/encodings/iconv_encoding>` configured to the ``"wchar_t"``-identified encoding;
- Otherwise, if libiconv is available, then it attempts to use :doc:`iconv </api/encodings/basic_iconv>` configured to the ``"wchar_t"``-identified encoding;
- Otherwise, if the platform is MacOS and ``WCHAR_MAX`` is greater than the maximum of an unsigned 21-bit number, or ``__STDC_ISO_10646__`` is defined, then it attempts to use :doc:`UTF-32 </api/encodings/utf32>`;
- Otherwise, if the headers ``<cwchar>`` or ``<wchar.h>`` are available, then it attempts to use a gnarly, lossy, and dangerous encoding that potentially traffics through the C Standard Library and Locale APIs in conjunction with a roundtrip through the :doc:`ztd::text::execution </api/encodings/execution>` encoding;
- Otherwise, it produces a compile-time error.
Expand Down
9 changes: 5 additions & 4 deletions documentation/source/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@ There are various configuration macros and CMake/build-time switches that will c
- Default: off.
- Turned on if the special ``__has_include`` directive is present with the compiler and ``__has_include(<ztd/cuneicode/cuneicode.h>)`` works.

.. _config-ZTD_TEXT_USE_ICONV:
.. _config-ZTD_LIBICONV:

- ``ZTD_TEXT_USE_ICONV`` (CMake: ``ZTD_TEXT_USE_ICONV``)
- ``ZTD_LIBICONV`` (CMake: ``ZTD_TEXT_USE_ICONV``)
- Enables use of the `iconv project <https://www.gnu.org/software/libiconv/>`_.
- Attempts to load it from the system at runtime using ``GetProcAddress``, ``dlopen``/``dlsym``/``dlclose``.
- Makes the ``ztd::text::iconv_encoding`` available (accessible directly VIA ``#include <ztd/text/iconv_encoding.hpp>``).
- Attempts to use the headers directly if possible.
- Otherwise, attempts to load it from the system at runtime using ``GetProcAddress``, or ``dlopen``/``dlsym``/``dlclose``.
- Makes the ``ztd::text::basic_iconv`` available (accessible directly VIA ``#include <ztd/text/basic_iconv.hpp>``).
- Default: off.
- Not turned on by-default under any conditions.

Expand Down
2 changes: 1 addition & 1 deletion documentation/source/encodings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ As a general point, we hope to support almost all of the encodings here in one f
* - ``iconv`` Encoding
- Yes
- Yes
- :doc:`WIP 🛠️ </api/encodings/iconv_encoding>`
- :doc:`Yes ✅ </api/encodings/basic_iconv>`
* - ``cuneicode`` Encoding
- Yes
- Yes
Expand Down
4 changes: 4 additions & 0 deletions include/ztd/text/basic_c_string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ namespace ztd { namespace text {
constexpr basic_c_string_view& operator=(basic_c_string_view&&) = default;
constexpr basic_c_string_view& operator=(const basic_c_string_view&) = default;

constexpr __base_t base() const noexcept {
return __base_t(this->data(), this->size());
}

constexpr size_type size() const noexcept {
return this->__base_t::size();
}
Expand Down

0 comments on commit 911a31d

Please sign in to comment.