Skip to content

Commit

Permalink
✍ Write up the Encoding-dependent state docs
Browse files Browse the repository at this point in the history
- Dependent States allow users to have type-erased encodings at very little cost
- Add the necessary API docs into the Sphinx text
- Fix <span> shenanigans
-  Make sure we can actually compile the validate tests in the first place...
  • Loading branch information
ThePhD committed Feb 22, 2021
1 parent 72498be commit 72772ce
Show file tree
Hide file tree
Showing 24 changed files with 621 additions and 336 deletions.
54 changes: 27 additions & 27 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ set(ZTD_TEXT_PROJECT_NAME ztd.text)
set(ZTD_TEXT_PROJECT_VERSION 0.0.0)
set(ZTD_TEXT_PROJECT_DESCRIPTION "A spicy text library.")
project(${ZTD_TEXT_PROJECT_NAME}
VERSION ${ZTD_TEXT_PROJECT_VERSION}
DESCRIPTION ${ZTD_TEXT_PROJECT_DESCRIPTION}
LANGUAGES C CXX)
VERSION ${ZTD_TEXT_PROJECT_VERSION}
DESCRIPTION ${ZTD_TEXT_PROJECT_DESCRIPTION}
LANGUAGES C CXX)

# # Modules
# Include modules useful to the project, whether locally made in our own cmake DIRECTORY
Expand All @@ -50,9 +50,9 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
# # # Top-Level Directories
# Check if this is the top-level project or not
if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
set(ZTD_TEXT_IS_TOP_LEVEL_PROJECT YES)
set(ZTD_TEXT_IS_TOP_LEVEL_PROJECT YES)
else()
set(ZTD_TEXT_IS_TOP_LEVEL_PROJECT NO)
set(ZTD_TEXT_IS_TOP_LEVEL_PROJECT NO)
endif()

# Modify bad flags / change defaults if we are the top level
Expand Down Expand Up @@ -86,7 +86,7 @@ option(ZTD_TEXT_GENERATE_SINGLE "Enable generation of a single header and its ta
option(ZTD_TEXT_USE_CUNEICODE "Enable generation of a single header and its target" OFF)

if (NOT CMAKE_CXX_STANDARD GREATER_EQUAL 20)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD 20)
endif()

# # Dependencies
Expand All @@ -109,16 +109,16 @@ check_cxx_compiler_flag(-Wall ZTD_TEXT_DIAGNOSTIC_DEFAULTS)

string(CONCAT ztd-text-is-top-level $<STREQUAL:${PROJECT_NAME},${CMAKE_PROJECT_NAME}>)
string(CONCAT --disable-permissive $<
$<AND:
$<BOOL:${ZTD_TEXT_MSVC_DISABLE_PERMISSIVE}>,
${ztd-text-is-top-level}
>:/permissive-
$<AND:
$<BOOL:${ZTD_TEXT_MSVC_DISABLE_PERMISSIVE}>,
${ztd-text-is-top-level}
>:/permissive-
>)
string(CONCAT ztd-use-cuneicode $<
$<AND:
$<BOOL:${ZTD_TEXT_USE_CUNEICODE}>,
$<BOOL:${ztd_text_has_cuneicode}>
>:$<TARGET_NAME_IF_EXISTS:ztd::cuneicode>
$<AND:
$<BOOL:${ZTD_TEXT_USE_CUNEICODE}>,
$<BOOL:${ztd_text_has_cuneicode}>
>:$<TARGET_NAME_IF_EXISTS:ztd::cuneicode>
>)

string(CONCAT --warn-pedantic $<$<BOOL:${ZTD_TEXT_DIAGNOSTIC_PEDANTIC}>:-Wpedantic>)
Expand All @@ -128,9 +128,9 @@ string(CONCAT --deny-errors $<$<BOOL:${ZTD_TEXT_DIAGNOSTIC_ERRORS}>:-Werror>)
add_library(ztd.text INTERFACE)
add_library(ztd::text ALIAS ztd.text)
target_include_directories(ztd.text
INTERFACE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
INTERFACE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
target_compile_features(ztd.text INTERFACE $<${ztd-text-is-top-level}:cxx_std_20>)
target_compile_options(ztd.text INTERFACE ${--disable-permissive})
target_sources(ztd.text INTERFACE ${ztd.text.includes})
Expand Down Expand Up @@ -164,8 +164,8 @@ endif()

# # Benchmarks, Tests, Examples
if (ZTD_TEXT_TESTS)
include(CTest)
add_subdirectory(tests)
include(CTest)
add_subdirectory(tests)
endif()

if (ZTD_TEXT_DOCUMENTATION)
Expand All @@ -179,12 +179,12 @@ if (ZTD_TEXT_EXAMPLES)
endif()

if (ZTD_TEXT_SCRATCH)
add_executable(scratch main.cpp)
target_link_libraries(scratch PRIVATE ztd::text)
target_compile_options(scratch
PRIVATE
${--warn-pedantic}
${--warn-default}
${--deny-errors})
target_compile_features(scratch PRIVATE cxx_std_20)
add_executable(scratch main.cpp)
target_link_libraries(scratch PRIVATE ztd::text)
target_compile_options(scratch
PRIVATE
${--warn-pedantic}
${--warn-default}
${--deny-errors})
target_compile_features(scratch PRIVATE cxx_std_20)
endif()
2 changes: 2 additions & 0 deletions documentation/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ Result Types, Status Codes and Quality Aides
api/char8_t
api/endian
api/encoding_error
api/make_decode_state
api/make_encode_state
api/unicode_code_point
api/unicode_scalar_value
api/stateless_decode_result
Expand Down
38 changes: 38 additions & 0 deletions documentation/source/api/make_decode_state.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
.. =============================================================================
..
.. ztd.text
.. Copyright © 2021 JeanHeyd "ThePhD" Meneide and Shepherd's Oasis, LLC
.. Contact: opensource@soasis.org
..
.. Commercial License Usage
.. Licensees holding valid commercial ztd.text licenses may use this file in
.. accordance with the commercial license agreement provided with the
.. Software or, alternatively, in accordance with the terms contained in
.. a written agreement between you and Shepherd's Oasis, LLC.
.. For licensing terms and conditions see your agreement. For
.. further information contact opensource@soasis.org.
..
.. Apache License Version 2 Usage
.. Alternatively, this file may be used under the terms of Apache License
.. Version 2.0 (the "License") for non-commercial use; you may not use this
.. file except in compliance with the License. You may obtain a copy of the
.. License at
..
.. http:..www.apache.org/licenses/LICENSE-2.0
..
.. Unless required by applicable law or agreed to in writing, software
.. distributed under the License is distributed on an "AS IS" BASIS,
.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
.. See the License for the specific language governing permissions and
.. limitations under the License.
..
.. =============================================================================>
make_decode_state
=================

This detects when the ``decode_state`` of a given ``encoding`` requires the ``encoding`` itself to make said state. If so, it will call the ``decode_state``\ 's constructor with the encoding passed in. Otherwise, it simply default-constructs a state. In either case, the constructed value is returned to the user.

The classification for this is done by :doc:`ztd::text::is_decode_state_independent </api/is_decode_state_independent>`.

.. doxygenfunction:: ztd::text::make_decode_state
38 changes: 38 additions & 0 deletions documentation/source/api/make_encode_state.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
.. =============================================================================
..
.. ztd.text
.. Copyright © 2021 JeanHeyd "ThePhD" Meneide and Shepherd's Oasis, LLC
.. Contact: opensource@soasis.org
..
.. Commercial License Usage
.. Licensees holding valid commercial ztd.text licenses may use this file in
.. accordance with the commercial license agreement provided with the
.. Software or, alternatively, in accordance with the terms contained in
.. a written agreement between you and Shepherd's Oasis, LLC.
.. For licensing terms and conditions see your agreement. For
.. further information contact opensource@soasis.org.
..
.. Apache License Version 2 Usage
.. Alternatively, this file may be used under the terms of Apache License
.. Version 2.0 (the "License") for non-commercial use; you may not use this
.. file except in compliance with the License. You may obtain a copy of the
.. License at
..
.. http:..www.apache.org/licenses/LICENSE-2.0
..
.. Unless required by applicable law or agreed to in writing, software
.. distributed under the License is distributed on an "AS IS" BASIS,
.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
.. See the License for the specific language governing permissions and
.. limitations under the License.
..
.. =============================================================================>
make_encode_state
=================

This detects when the ``encode_state`` of a given ``encoding`` requires the ``encoding`` itself to make said state. If so, it will call the ``encode_state``\ 's constructor with the encoding passed in. Otherwise, it simply default-constructs a state. In either case, the constructed value is returned to the user.

The classification for this is done by :doc:`ztd::text::is_encode_state_independent </api/is_encode_state_independent>`.

.. doxygenfunction:: ztd::text::make_encode_state
9 changes: 5 additions & 4 deletions documentation/source/design/beyond lucky 7.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ While the given :doc:`Lucky 7 </design/lucky 7>` represents the simplest possibl
.. toctree::
:maxdepth: 1

/design/lucky 7 extensions/seperate states
/design/lucky 7 extensions/injective
/design/lucky 7 extensions/replacement characters
/design/lucky 7 extensions/unicode encoding
lucky 7 extensions/seperate states
lucky 7 extensions/injective
lucky 7 extensions/replacement characters
lucky 7 extensions/unicode encoding
lucky 7 extensions/dependent states
151 changes: 151 additions & 0 deletions documentation/source/design/lucky 7 extensions/dependent states.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
.. =============================================================================
..
.. ztd.text
.. Copyright © 2021 JeanHeyd "ThePhD" Meneide and Shepherd's Oasis, LLC
.. Contact: opensource@soasis.org
..
.. Commercial License Usage
.. Licensees holding valid commercial ztd.text licenses may use this file in
.. accordance with the commercial license agreement provided with the
.. Software or, alternatively, in accordance with the terms contained in
.. a written agreement between you and Shepherd's Oasis, LLC.
.. For licensing terms and conditions see your agreement. For
.. further information contact opensource@soasis.org.
..
.. Apache License Version 2 Usage
.. Alternatively, this file may be used under the terms of Apache License
.. Version 2.0 (the "License") for non-commercial use; you may not use this
.. file except in compliance with the License. You may obtain a copy of the
.. License at
..
.. http:..www.apache.org/licenses/LICENSE-2.0
..
.. Unless required by applicable law or agreed to in writing, software
.. distributed under the License is distributed on an "AS IS" BASIS,
.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
.. See the License for the specific language governing permissions and
.. limitations under the License.
..
.. =============================================================================>
Encoding-Dependent States
=========================

Some states need additional information in order to be constructed and used properly. This can be the case when the encoding has stored some type-erased information, as :doc:`ztd::text::any_encoding </api/encodings/any_encoding>` does, or as if you wrote a ``variant_encoding<utf8le, utf16be, ...>``. For example, given a ``type_erased_encoding`` like so:

.. code-block:: cpp
:linenos:
class type_erased_encoding {
private:
struct erased_state {
virtual ~erased_state () {}
};
struct erased_encoding {
virtual std::unique_ptr<erased_state> create_decode_state() = 0;
virtual std::unique_ptr<erased_state> create_encode_state() = 0;
virtual ~erased_encoding () {}
};
template <typename Encoding>
struct typed_encoding : erased_encoding {
Encoding encoding;
struct decode_state : erased_state {
using state_type = ztd::text::decode_state_t<Encoding>;
state_type state;
decode_state(const Encoding& some_encoding)
: state(ztd::text::make_decode_state(some_encoding)) {
// get a decode state from the given encoding
}
};
struct encode_state : erased_state {
using state_type = ztd::text::encode_state_t<Encoding>;
state_type state;
decode_state(const Encoding& some_encoding)
: state(ztd::text::make_encode_state(some_encoding)) {
// get a decode state from the given encoding
}
};
typed_encoding(Encoding&& some_encoding)
: encoding(std::move(some_encoding)) {
// move encoding in
}
typed_encoding(const Encoding& some_encoding)
: encoding(some_encoding) {
// copy encoding in
}
virtual std::unique_ptr<erased_state> create_decode_state() override {
return std::make_unique<decode_state>(encoding);
}
virtual std::unique_ptr<erased_state> create_encode_state() override {
return std::make_unique<encode_state>(encoding);
}
};
std::unique_ptr<erased_encoding> stored;
public:
template <typename AnyEncoding>
type_erased(AnyEncoding&& some_encoding)
: stored_ptr(std::make_unique<typed_encoding<std::remove_cvref_t<AnyEncoding>>>(
std::forward<AnyEncoding>(some_encoding))
) {
// store any encoding in the member unique pointer
}
// ... rest of the implementation
};
We can see that creating a state with a default constructor no longer works, because the state itself requires more information than can be known by just the constructor itself. It needs access to the wrapped encoding. The solution to this problem is an opt-in when creating your state types by giving your `state` type a constructor that takes the encoding type:

.. code-block:: cpp
:linenos:
:emphasize-lines: 36-39,46-49
class type_erased_encoding {
// from above, etc. …
public:
// public-facing wrappers
struct type_erased_decode_state {
public:
// special constructor!!
type_erased_state (const type_erased_encoding& encoding)
: stored(encoding.stored->create_decode_state()) {
}
private:
std::unique_ptr<erased_state> stored;
};
struct type_erased_encode_state {
public:
// special constructor!!
type_erased_state (const type_erased_encoding& encoding)
: stored(encoding.stored->create_encode_state()) {
// hold onto type-erased state
}
private:
std::unique_ptr<erased_state> stored;
};
using decode_state = type_erased_state;
using encode_state = type_erased_state;
// ... rest of the Lucky 7 members
};
These special constructors will create the necessary state using information from the ``type_erased_encoding`` to do it properly. This will allow us to have states that properly reflect what was erased when we perform a given higher-level :doc:`conversion operation or algorithm </design/converting>`.

This encoding-aware state-construction behavior is detected by the :doc:`ztd::text::is_state_independent </api/is_state_independent>`, :doc:`ztd::text::is_decode_state_independent </api/is_decode_state_independent>`, and :doc:`ztd::text::is_encode_state_independent </api/is_encode_state_independent>` classifications.

These classifications are used in the :doc:`ztd::text::make_decode_state </api/make_decode_state>` and :doc:`ztd::text::make_encode_state </api/make_encode_state>` function calls to correctly create a new state type, which is what the API uses to make states for its higher-level function calls. If you are working in a generic context, you should use these functions too when working in this minute details. However, if you're not working with templates, consider simply using the already-provided :doc:`ztd::text::any_encoding </api/encodings/any_encoding>` to do exactly what this example shows, with some extra attention to detail and internal optimizations done on your behalf.
12 changes: 6 additions & 6 deletions documentation/source/design/lucky 7.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,16 @@ Lucky 7
// (6)
ue_encode_result encode_one(
ztd::text::span<char32_t> input,
ztd::text::span<char> output,
ztd::text::span<const code_point> input,
ztd::text::span<code_unit> output,
state& current,
ue_encode_error_handler error_handler
);
// (7)
ue_decode_result decode_one(
ztd::text::span<char> input,
ztd::text::span<char32_t> output,
ztd::text::span<const code_unit> input,
ztd::text::span<code_point> output,
state& current,
ue_decode_error_handler error_handler
);
Expand Down Expand Up @@ -135,13 +135,13 @@ Result types are specific structs in the library that mark encode and decode ope
#include <ztd/text/decode_result.hpp>
using ue_decode_result = ztd::text::decode_result<
ztd::text::span<char>,
ztd::text::span<const char>,
ztd::text::span<char32_t>,
empty_struct
>;
using ue_encode_result = ztd::text::encode_result<
ztd::text::span<char32_t>,
ztd::text::span<const char32_t>,
ztd::text::span<char>,
empty_struct
>;
Expand Down

0 comments on commit 72772ce

Please sign in to comment.