Skip to content

Commit

Permalink
✨ skip_input_errpr implementation
Browse files Browse the repository at this point in the history
— ✨ constexpr Shift-JIS implementation
— ✨ New skip_handler with grafting into UTF-8 and UTF-16 (and all derivatives) to work with it
— 🛠 Improvements on all replacement handlers
— 🛠 Separation of many error handlers from the core file
— 📝✨ Sincere improvements to getting started and documentation (almost done!)
— 📝 Benchmarks placed properly and live.
  • Loading branch information
ThePhD committed Dec 10, 2022
1 parent 4269245 commit e96e1e0
Show file tree
Hide file tree
Showing 343 changed files with 7,036 additions and 3,669 deletions.
7 changes: 6 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -266,5 +266,10 @@ if(ZTD_TEXT_SCRATCH)
${--warn-pedantic}
${--warn-all}
${--warn-extra}
${--warn-errors})
${--warn-errors}
)
target_compile_definitions(scratch
PRIVATE
ZTD_ASSERT_CHECKS=1
)
endif()
4 changes: 2 additions & 2 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ add_subdirectory(barrier)
add_subdirectory(function_form)
add_subdirectory(conversion_speed)

add_custom_target(ztd.text.benchmarks.all_graphs
add_custom_target(ztd.text.benchmarks.graphs.all
COMMENT "[ztd.text] graphing all benchmarks...")
add_dependencies(ztd.text.benchmarks.all_graphs
add_dependencies(ztd.text.benchmarks.graphs.all
ztd.tools.benchmark_grapher.function_form.large
ztd.tools.benchmark_grapher.function_form.small
ztd.tools.benchmark_grapher.conversion_speed.small
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/barrier/include/barrier/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,4 @@

// clang-format on

#endif // ZTD_TEXT_BENCHMARKS_BARRIER_API_H
#endif
2 changes: 1 addition & 1 deletion benchmarks/barrier/include/barrier/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@
#include <barrier/convert.h>
#include <barrier/data.h>

#endif // ZTD_TEXT_BENCHMARKS_BARRIER_BARRIER_H
#endif
2 changes: 1 addition & 1 deletion benchmarks/barrier/include/barrier/convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ ZTD_C_LANGUAGE_LINKAGE_I_
ZTD_TEXT_BENCHMARKS_BARRIER_API_LINKAGE_I_ error_ptr_ptr ptr_struct_ptr_ptr(ztd_char8_t* output_ptr,
ztd_char8_t* output_ptr_last, const ztd_char32_t* input_ptr, const ztd_char32_t* input_ptr_last);

#endif // ZTD_TEXT_BENCHMARKS_BARRIER_CONVERT_H
#endif
2 changes: 1 addition & 1 deletion benchmarks/barrier/include/barrier/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,4 @@ ZTD_TEXT_BENCHMARKS_BARRIER_API_LINKAGE_I_ c_span_char16_t u16_basic_source_data
ZTD_C_LANGUAGE_LINKAGE_I_
ZTD_TEXT_BENCHMARKS_BARRIER_API_LINKAGE_I_ c_span_char32_t u32_basic_source_data;

#endif // ZTD_TEXT_BENCHMARKS_BARRIER_DATA_H
#endif
2 changes: 1 addition & 1 deletion benchmarks/barrier/include/barrier/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@

#include <barrier/api.h>

#endif // ZTD_TEXT_BENCHAMRKS_BARRIER_VERSION_H
#endif
4 changes: 2 additions & 2 deletions benchmarks/conversion_speed/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,5 @@ function (generate_converion_speed_benchmark_targets name data_name title)
)
endfunction()

generate_converion_speed_benchmark_targets(large unicode "All Unicode Scalar Values (~4.4 MB)")
generate_converion_speed_benchmark_targets(small basic_source "C Character Set (97 Bytes)")
generate_converion_speed_benchmark_targets(large unicode "All Unicode Scalar Values")
generate_converion_speed_benchmark_targets(small basic_source "C Character Set")
133 changes: 77 additions & 56 deletions benchmarks/conversion_speed/graph_config.in.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,109 +9,85 @@
"name": "UTF-16 to UTF-32 (Well-Formed)",
"pattern": "utf16_to_utf32_well_formed_",
"ascending": false,
"description": "A conversion from UTF-16 data to UTF-32 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-16 data to UTF-32 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf16_to_utf32_well_formed"
},
{
"name": "UTF-16 to UTF-8 (Well-Formed)",
"pattern": "utf16_to_utf8_well_formed_",
"ascending": false,
"description": "A conversion from UTF-16 data to UTF-8 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-16 data to UTF-8 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf16_to_utf8_well_formed"
},
{
"name": "UTF-8 to UTF-16 (Well-Formed)",
"pattern": "utf8_to_utf16_well_formed_",
"ascending": false,
"description": "A conversion from UTF-8 data to UTF-16 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-8 data to UTF-16 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf8_to_utf16_well_formed"
},
{
"name": "UTF-8 to UTF-32 (Well-Formed)",
"pattern": "utf8_to_utf32_well_formed_",
"ascending": false,
"description": "A conversion from UTF-8 data to UTF-32 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-8 data to UTF-32 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf8_to_utf32_well_formed"
},
{
"name": "UTF-32 to UTF-8 (Well-Formed)",
"pattern": "utf32_to_utf8_well_formed_",
"ascending": false,
"description": "A conversion from UTF-32 data to UTF-8 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-32 data to UTF-8 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf32_to_utf8_well_formed"
},
{
"name": "UTF-32 to UTF-16 (Well-Formed)",
"pattern": "utf32_to_utf16_well_formed_",
"ascending": false,
"description": "A conversion from UTF-32 data to UTF-16 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-32 data to UTF-16 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf32_to_utf16_well_formed"
},
{
"name": "UTF-16 to UTF-32 (Well-Formed, with Init)",
"pattern": "utf16_to_utf32_init_well_formed_",
"ascending": false,
"description": "A conversion from UTF-16 data to UTF-32 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-16 data to UTF-32 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf16_to_utf32_well_formed_init"
},
{
"name": "UTF-16 to UTF-8 (Well-Formed, with Init)",
"pattern": "utf16_to_utf8_init_well_formed_",
"ascending": false,
"description": "A conversion from UTF-16 data to UTF-8 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-16 data to UTF-8 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf16_to_utf8_well_formed_init"
},
{
"name": "UTF-8 to UTF-16 (Well-Formed, with Init)",
"pattern": "utf8_to_utf16_init_well_formed_",
"ascending": false,
"description": "A conversion from UTF-8 data to UTF-16 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-8 data to UTF-16 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf8_to_utf16_well_formed_init"
},
{
"name": "UTF-8 to UTF-32 (Well-Formed, with Init)",
"pattern": "utf8_to_utf32_init_well_formed_",
"ascending": false,
"description": "A conversion from UTF-8 data to UTF-32 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-8 data to UTF-32 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf8_to_utf32_well_formed_init"
},
{
"name": "UTF-32 to UTF-8 (Well-Formed, with Init)",
"pattern": "utf32_to_utf8_init_well_formed_",
"ascending": false,
"description": "A conversion from UTF-32 data to UTF-8 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-32 data to UTF-8 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf32_to_utf8_well_formed_init"
},
{
"name": "UTF-32 to UTF-16 (Well-Formed, with Init)",
"pattern": "utf32_to_utf16_init_well_formed_",
"ascending": false,
"description": "A conversion from UTF-32 data to UTF-16 data that contains no ill-formed or incomplete sequences."
},
{
"name": "UTF-16 to UTF-32 (Well-Formed, Assumed Valid)",
"pattern": "utf16_to_utf32_unchecked_well_formed_",
"ascending": false,
"description": "A conversion from UTF-16 data to UTF-32 data that contains no ill-formed or incomplete sequences."
},
{
"name": "UTF-16 to UTF-8 (Well-Formed, Assumed Valid)",
"pattern": "utf16_to_utf8_unchecked_well_formed_",
"ascending": false,
"description": "A conversion from UTF-16 data to UTF-8 data that contains no ill-formed or incomplete sequences."
},
{
"name": "UTF-8 to UTF-16 (Well-Formed, Assumed Valid)",
"pattern": "utf8_to_utf16_unchecked_well_formed_",
"ascending": false,
"description": "A conversion from UTF-8 data to UTF-16 data that contains no ill-formed or incomplete sequences."
},
{
"name": "UTF-8 to UTF-32 (Well-Formed, Assumed Valid)",
"pattern": "utf8_to_utf32_unchecked_well_formed_",
"ascending": false,
"description": "A conversion from UTF-8 data to UTF-32 data that contains no ill-formed or incomplete sequences."
},
{
"name": "UTF-32 to UTF-8 (Well-Formed, Assumed Valid)",
"pattern": "utf32_to_utf8_unchecked_well_formed_",
"ascending": false,
"description": "A conversion from UTF-32 data to UTF-8 data that contains no ill-formed or incomplete sequences."
},
{
"name": "UTF-32 to UTF-16 (Well-Formed, Assumed Valid)",
"pattern": "utf32_to_utf16_unchecked_well_formed_",
"ascending": false,
"description": "A conversion from UTF-32 data to UTF-16 data that contains no ill-formed or incomplete sequences."
"description": "A conversion from UTF-32 data to UTF-16 data that contains no ill-formed or incomplete sequences.",
"file_name": "utf32_to_utf16_well_formed_init"
}
],
"data_groups": [
Expand All @@ -136,12 +112,12 @@
"description": "Measures the ztd.cuneicode library from Shepherd's Oasis and its encoding routines, particularly the non-typed conversion routines from its Conversion Registry abstraction with all defaults left alone."
},
{
"name": "cuneicode registry (unchecked & unbounded)",
"pattern": "cuneicode_registry_unbounded_unchecked$",
"name": "cuneicode registry (unbounded, assume valid)",
"pattern": "cuneicode_registry_unchecked_unbounded$",
"description": "Measures the ztd.cuneicode library from Shepherd's Oasis and its encoding routines, particularly the non-typed conversion routines from its Conversion Registry abstraction with specific conversion pathways overridden (without providing an output size, and without checking input validity)."
},
{
"name": "cuneicode registry (unchecked)",
"name": "cuneicode registry (assume valid)",
"pattern": "cuneicode_registry_unchecked$",
"description": "Measures the ztd.cuneicode library from Shepherd's Oasis and its encoding routines, particularly the non-typed conversion routines from its Conversion Registry abstraction with all defaults left alone (without checking for the validity of the input)."
},
Expand Down Expand Up @@ -175,6 +151,11 @@
"pattern": "simdutf$",
"description": "Measures the performance of Daniel Lemire's simdutf library, which is meant to be highly optimized and performant under all unicode workloads."
},
{
"name": "simdutf (unbounded, assume valid)",
"pattern": "simdutf_unchecked$",
"description": "Measures the performance of Daniel Lemire's simdutf library, which is meant to be highly optimized and performant under all unicode workloads."
},
{
"name": "Rust's encoding_c",
"pattern": "encoding_c$",
Expand All @@ -183,17 +164,22 @@
{
"name": "Rust's encoding_c (manual)",
"pattern": "encoding_c_direct$",
"description": "Measures the performance of the Rust encoding_rs library, called through its C bindings encoding_c directly (typically statically linked in to achieve the same optimization potential). Used in the Gecko web engine."
"description": "Measures the performance of the Rust encoding_rs library, called through its C bindings encoding_c and not using the general-purpose conversion routes (typically statically linked in to achieve the same optimization potential). Used in the Gecko web engine."
},
{
"name": "ctre",
"pattern": "ctre$",
"description": "Measures the performance of the Compile-Time Regular Expression (CTRE) library's internal encoding conversion routines."
"name": "ctre (assume valid)",
"pattern": "ctre_unchecked$",
"description": "Measures the performance of the Compile-Time Regular Expression (CTRE) library's internal encoding conversion routines, which assume the input is valid (or produce invalid code points when it is not)."
},
{
"name": "utf8cpp (unbounded)",
"pattern": "utf8cpp$",
"description": "Measures the utf8cpp library and its conversion routines."
"description": "Measures the utf8cpp library and its conversion routines, which do not check for available output space."
},
{
"name": "utf8cpp (unbounded, assume valid)",
"pattern": "utf8cpp_unchecked$",
"description": "Measures the utf8cpp library and its conversion routines, which do not check for available output space and also assume the input is valid."
},
{
"name": "ICU (2 UConverter convertEx)",
Expand Down Expand Up @@ -255,11 +241,46 @@
"pattern": "ztd_text_view$",
"description": "Measures the ztd.text library conversion routine using the ztd::text::transcode_view range abstraction."
},
{
"name": "ztd.text (assume valid)",
"pattern": "ztd_text_unchecked$",
"description": "Measures the ztd.text library conversion routine using the ztd::text::transcode function."
},
{
"name": "ztd.text (unbounded, assume valid)",
"pattern": "ztd_text_unbounded_unchecked$",
"description": "Measures the ztd.text library conversion routine using the ztd::text::transcode function, with output bounds checking turned off."
},
{
"name": "ztd.text (single loop, assume valid)",
"pattern": "ztd_text_single_unchecked$",
"description": "Measures the ztd.text library conversion routine using the ztd::text::transcode_one function."
},
{
"name": "ztd.text (single loop, unbounded, assume valid)",
"pattern": "ztd_text_single_unbounded_unchecked$",
"description": "Measures the ztd.text library conversion routine using the ztd::text::transcode_one function in a loop, with output bounds checking turned off."
},
{
"name": "ztd.text view (assume valid)",
"pattern": "ztd_text_view_unchecked$",
"description": "Measures the ztd.text library conversion routine using the ztd::text::transcode_view range abstraction."
},
{
"name": "Win32",
"pattern": "windows_api$",
"description": "Measures the Win32 API conversion routine using MultibyteToWideChar function or the WideCharToMultibyte function."
},
{
"name": "Standard C",
"pattern": "standard_c$",
"description": "Measures the <uchar.h>, <wchar.h>, and similar functionality present in the C standard library."
},
{
"name": "Standard C++",
"pattern": "standard_cpp$",
"description": "Measures the deprecated std::codecvt_* functionality present in the C++ standard library."
},
{
"name": "noop",
"pattern": "noop$",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,45 +39,49 @@
#include <vector>
#include <memory_resource>

struct conversion_deleter {
void operator()(cnc_conversion* conv) const noexcept {
cnc_conv_delete(conv);
}
};
inline namespace ztd_text_benchmarks_conversion_speed_cuneicode_help {

struct registry_deleter {
void operator()(cnc_conversion_registry* registry) const noexcept {
cnc_registry_delete(registry);
}
};
struct conversion_deleter {
void operator()(cnc_conversion* conv) const noexcept {
cnc_conv_delete(conv);
}
};

struct conversion_closer {
void operator()(cnc_conversion* conv) const noexcept {
cnc_conv_close(conv);
}
};
struct registry_deleter {
void operator()(cnc_conversion_registry* registry) const noexcept {
cnc_registry_delete(registry);
}
};

struct conversion_closer {
void operator()(cnc_conversion* conv) const noexcept {
cnc_conv_close(conv);
}
};

struct registry_closer {
void operator()(cnc_conversion_registry* registry) const noexcept {
cnc_close_registry(registry);
struct registry_closer {
void operator()(cnc_conversion_registry* registry) const noexcept {
cnc_registry_close(registry);
}
};

inline void* mbr_allocate(size_t requested_size, size_t alignment, size_t* p_actual_size, void* user_data) {
std::pmr::monotonic_buffer_resource& mbr = *static_cast<std::pmr::monotonic_buffer_resource*>(user_data);
void* ptr = mbr.allocate(requested_size, alignment);
*p_actual_size = requested_size;
return ptr;
}
};

inline void* mbr_allocate(size_t requested_size, size_t alignment, size_t* p_actual_size, void* user_data) {
std::pmr::monotonic_buffer_resource& mbr = *static_cast<std::pmr::monotonic_buffer_resource*>(user_data);
void* ptr = mbr.allocate(requested_size, alignment);
*p_actual_size = requested_size;
return ptr;
}
inline void mbr_deallocate(void* ptr, size_t ptr_size, size_t alignment, void* user_data) {
std::pmr::monotonic_buffer_resource& mbr = *static_cast<std::pmr::monotonic_buffer_resource*>(user_data);
mbr.deallocate(ptr, ptr_size, alignment);
}

inline void mbr_deallocate(void* ptr, size_t ptr_size, size_t alignment, void* user_data) {
std::pmr::monotonic_buffer_resource& mbr = *static_cast<std::pmr::monotonic_buffer_resource*>(user_data);
mbr.deallocate(ptr, ptr_size, alignment);
}
inline cnc_conversion_heap create_monotonic_buffer_heap(std::pmr::monotonic_buffer_resource& resource) {
cnc_conversion_heap mbr_heap = { &resource, mbr_allocate, nullptr, nullptr, nullptr, mbr_deallocate };
return mbr_heap;
}

inline cnc_conversion_heap create_monotonic_buffer_heap(std::pmr::monotonic_buffer_resource& resource) {
cnc_conversion_heap mbr_heap = { &resource, mbr_allocate, nullptr, nullptr, nullptr, mbr_deallocate };
return mbr_heap;
}
} // namespace ztd_text_benchmarks_conversion_speed_cuneicode_help

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ TEXT_TRANSCODE_EXTENSION_POINTS(16, 32, be, le, , );
TEXT_TRANSCODE_EXTENSION_POINTS(32, 8, , , , );
/// This one is expanded below, as an example, with commentary!
// TEXT_TRANSCODE_EXTENSION_POINTS(8, 32, , , , );
// Macro hygiene!
#undef TEXT_TRANSCODE_EXTENSION_POINTS

template <typename FromErrorHandler, typename ToErrorHandler, typename FromState, typename ToState, typename PivotRange>
auto text_transcode(::ztd::tag<ztd::text::utf8_t, ztd::text::utf32_t>, ztd::span<const ztd_char8_t> input,
Expand Down Expand Up @@ -214,7 +216,4 @@ auto text_transcode(::ztd::tag<ztd::text::utf8_t, ztd::text::utf32_t>, ztd::span
from_state, to_state, __pivot);
}

// Macro hygiene!
#undef TEXT_TRANSCODE_EXTENSION_POINTS

#endif

0 comments on commit e96e1e0

Please sign in to comment.