From e001783e71d69c888e888c068f439d8abcba9979 Mon Sep 17 00:00:00 2001 From: Joseph Wright Date: Thu, 4 Aug 2022 20:34:50 +0100 Subject: [PATCH] Add \text_map_... functions Currently the regional indicator rule is not implemented: I am not clear on the exact requirement. --- l3kernel/CHANGELOG.md | 2 + l3kernel/doc/source3body.tex | 1 + l3kernel/l3.ins | 1 + l3kernel/l3text-map.dtx | 443 ++++++++++++++++++++++++ l3kernel/l3text.dtx | 49 +++ l3kernel/l3unicode.dtx | 48 +++ l3kernel/testfiles/m3text006.luatex.tlg | 9 + l3kernel/testfiles/m3text006.lvt | 28 ++ l3kernel/testfiles/m3text006.tlg | 9 + l3kernel/testfiles/m3text006.xetex.tlg | 9 + 10 files changed, 599 insertions(+) create mode 100644 l3kernel/l3text-map.dtx create mode 100644 l3kernel/testfiles/m3text006.luatex.tlg create mode 100644 l3kernel/testfiles/m3text006.lvt create mode 100644 l3kernel/testfiles/m3text006.tlg create mode 100644 l3kernel/testfiles/m3text006.xetex.tlg diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md index 3a4e44ad34..9733c15d1e 100644 --- a/l3kernel/CHANGELOG.md +++ b/l3kernel/CHANGELOG.md @@ -11,6 +11,8 @@ this project uses date-based 'snapshot' version identifiers. - Support for case changing Croatian diagraph with 8-bit engines - Support accent removal when uppercasing Greek with 8-bit engines - Function `\sys_ensure_backend:` +- `\text_map_function:nN` and `\text_map_inline:nn` for mapping to + graphemes in textual input ### Fixed - Behavior of `\color_math:nn` in alignments diff --git a/l3kernel/doc/source3body.tex b/l3kernel/doc/source3body.tex index 20e1875361..c9b0f3448d 100644 --- a/l3kernel/doc/source3body.tex +++ b/l3kernel/doc/source3body.tex @@ -589,6 +589,7 @@ \part{Text manipulation} \clist_gput_right:Nn \g_docinput_clist { l3text-case.dtx , + l3text-map.dtx , l3text-purify.dtx } \ExplSyntaxOff diff --git a/l3kernel/l3.ins b/l3kernel/l3.ins index fbcc815d9f..08949c097e 100644 --- a/l3kernel/l3.ins +++ b/l3kernel/l3.ins @@ -106,6 +106,7 @@ and all files in that bundle must be distributed together. \from{l3unicode.dtx} {package} \from{l3text.dtx} {package} \from{l3text-case.dtx} {package} + \from{l3text-map.dtx} {package} \from{l3text-purify.dtx}{package} \from{l3candidates.dtx} {package} \from{l3legacy.dtx} {package} diff --git a/l3kernel/l3text-map.dtx b/l3kernel/l3text-map.dtx new file mode 100644 index 0000000000..a99b8deb72 --- /dev/null +++ b/l3kernel/l3text-map.dtx @@ -0,0 +1,443 @@ +% \iffalse meta-comment +% +%% File: l3text-map.dtx +% +% Copyright (C) 2022 The LaTeX Project +% +% It may be distributed and/or modified under the conditions of the +% LaTeX Project Public License (LPPL), either version 1.3c of this +% license or (at your option) any later version. The latest version +% of this license is in the file +% +% https://www.latex-project.org/lppl.txt +% +% This file is part of the "l3kernel bundle" (The Work in LPPL) +% and all files in that bundle must be distributed together. +% +% ----------------------------------------------------------------------- +% +% The development version of the bundle can be found at +% +% https://github.com/latex3/latex3 +% +% for those people who are interested. +% +%<*driver> +\documentclass[full,kernel]{l3doc} +\begin{document} + \DocInput{\jobname.dtx} +\end{document} +% +% \fi +% +% \title{^^A +% The \textsf{l3text-map} package: text processing (mapping)^^A +% } +% +% \author{^^A +% The \LaTeX{} Project\thanks +% {^^A +% E-mail: +% \href{mailto:latex-team@latex-project.org} +% {latex-team@latex-project.org}^^A +% }^^A +% } +% +% \date{Released 2022-07-15} +% +% \maketitle +% +% \begin{documentation} +% +% \end{documentation} +% +% \begin{implementation} +% +% \section{\pkg{l3text-map} implementation} +% +% \begin{macrocode} +%<*package> +% \end{macrocode} +% +% \begin{macrocode} +%<@@=text> +% \end{macrocode} +% +% \subsection{Mapping to text} +% +% \begin{macro}[EXP]{\text_map_function:nN} +% \begin{macro}[EXP]{\@@_map_function:nN} +% \begin{macro}[EXP]{\@@_map_loop:Nnw} +% \begin{macro}[EXP]{\@@_map_group:Nnn} +% \begin{macro}[EXP]{\@@_map_space:Nnw} +% \begin{macro}[EXP]{\@@_map_N_type:NnN} +% \begin{macro}[EXP]{\@@_map_char:NnN} +% \begin{macro}[EXP]{\@@_map_char:NnNN} +% \begin{macro}[EXP]{\@@_map_char:NnNNN} +% \begin{macro}[EXP]{\@@_map_char:NnNNNN} +% \begin{macro}[EXP]{\@@_map_codepoint:Nnn} +% \begin{macro}[EXP]{\@@_map_CR:Nnw} +% \begin{macro}[EXP]{\@@_map_CR:NnN} +% \begin{macro}[EXP]{\@@_map_class:Nnnn} +% \begin{macro}[EXP]{\@@_map_class:nNnnn} +% \begin{macro}[EXP]{\@@_map_class_loop:Nnnnw} +% \begin{macro}[EXP]{\@@_map_class_end:nw} +% \begin{macro}[EXP] +% { +% \@@_map_Control:Nnn , +% \@@_map_Extend:Nnn , +% \@@_map_Prepend:Nnn , +% \@@_map_SpacingMark:Nnn +% } +% \begin{macro}[EXP] +% { +% \@@_map_not_Control:Nnn , +% \@@_map_not_Extend:Nnn , +% \@@_map_not_Prepend:Nnn , +% \@@_map_not_SpacingMark:Nnn +% } +% \begin{macro}[EXP]{\@@_map_output:Nn} +% \begin{macro}[EXP]{\text_map_break:} +% \begin{macro}[EXP]{\text_map_break:n} +% The standard lead-off for an action loop. +% \begin{macrocode} +\cs_new:Npn \text_map_function:nN #1#2 + { \exp_args:Ne \@@_map_function:nN { \text_expand:n {#1} } #2 } +\cs_new:Npn \@@_map_function:nN #1#2 + { + \@@_map_loop:Nnw #2 { } #1 + \q_@@_recursion_tail \q_@@_recursion_stop + \prg_break_point:Nn \text_map_break: { } + } +% \end{macrocode} +% The standard set up for an \enquote{action} loop. Groups are handled by +% recursion, spaces are treated similarly: both count as grapheme boundaries. +% For \texttt{N}-type tokens, we filter out control sequences (again +% a boundary), then move on to further analysis. +% \begin{macrocode} +\cs_new:Npn \@@_map_loop:Nnw #1#2#3 \q_@@_recursion_stop + { + \tl_if_head_is_N_type:nTF {#3} + { \@@_map_N_type:NnN } + { + \tl_if_head_is_group:nTF {#3} + { \@@_map_group:Nnn } + { \@@_map_space:Nnw } + } + #1 {#2} #3 \q_@@_recursion_stop + } +\cs_new:Npn \@@_map_group:Nnn #1#2#3 + { + \@@_map_output:Nn #1 {#2} + { + \@@_map_loop:Nnw #1 { } #2 + \q_@@_recursion_tail \q_@@_recursion_stop + \prg_break_point:Nn \text_map_break: { } + } + \@@_map_loop:Nnw #1 { } + } +\use:x + { \cs_new:Npn \exp_not:N \@@_map_space:Nnw ##1##2 \c_space_tl } + { + \@@_map_output:Nn #1 {#2} + #1 { ~ } + \@@_map_loop:Nnw #1 { } + } +\cs_new:Npn \@@_map_N_type:NnN #1#2#3 + { + \@@_if_recursion_tail_stop_do:Nn #3 + { + \@@_map_output:Nn #1 {#2} + \text_map_break: + } + \token_if_cs:NTF #3 + { + \@@_map_output:Nn #1 {#2} + #1 {#3} + \@@_map_loop:Nnw #1 { } + } + { \@@_map_char:NnN #1 {#2} #3 } + } +% \end{macrocode} +% We want to keep common code paths, so collect up one Unicode codepoint +% as a single argument in an engine-independent way. +% \begin{macrocode} +\bool_lazy_or:nnTF + { \sys_if_engine_luatex_p: } + { \sys_if_engine_xetex_p: } + { + \cs_new:Npn \@@_map_char:NnN #1#2#3 + { \@@_map_codepoint:Nnn #1 {#2} #3 } + } + { + \cs_new:Npn \@@_map_char:NnN #1#2#3 + { + \int_compare:nNnTF { `#3 } > { "80 } + { + \int_compare:nNnTF { `#3 } < { "E0 } + { \@@_map_char:NnNN } + { + \int_compare:nNnTF { `#3 } < { "F0 } + { \@@_map_char:NnNNN } + { \@@_map_char:NnNNNN } + } + } + { \@@_map_codepoint:Nnn } + #1 {#2} #3 + } + \cs_new:Npn \@@_map_char:NnNN #1#2#3#4 + { \@@_map_codepoint:Nnn #1 {#2} {#3#4} } + \cs_new:Npn \@@_map_char:NnNNN #1#2#3#4#5 + { \@@_map_codepoint:Nnn #1 {#2} {#3#4#5} } + \cs_new:Npn \@@_map_char:NnNNNN #1#2#3#4#5#6 + { \@@_map_codepoint:Nnn #1 {#2} {#3#4#5#6} } + } +% \end{macrocode} +% We want to keep common code paths, so collect up one Unicode codepoint +% as a single argument in an engine-independent way. We can then pull out +% the special cases: hard-coded for speed so not actually using the +% grapheme data. The carriage return case needs a bit of context handling +% so has an auxiliary. Codepoint U+200D is the zero-width joiner, which has +% no context to concern us: just don't break. +% \begin{macrocode} +\cs_new:Npn \@@_map_codepoint:Nnn #1#2#3 + { + \@@_map_codepoint_compare:nNnTF {#3} = { "0D } + { + \@@_map_output:Nn #1 {#2} + \@@_map_CR:Nnw #1 {#3} + } + { + \@@_map_codepoint_compare:nNnTF {#3} = { "200D } + { \@@_map_loop:Nnw #1 {#2#3} } + { \@@_map_class:Nnnn #1 {#2} {#3} { Control } } + } + } +% \end{macrocode} +% A carriage return is a boundary unless it is immediately followed by +% a line feed, in which case that pair is a boundary. +% \begin{macrocode} +\cs_new:Npn \@@_map_CR:Nnw #1#2#3 \q_@@_recursion_stop + { + \tl_if_head_is_N_type:nTF {#3} + { \@@_map_CR:NnN #1 {#2} } + { + #1 {#2} + \@@_map_loop:Nnw #1 { } + } + #3 \q_@@_recursion_stop + } +\cs_new:Npn \@@_map_CR:NnN #1#2#3 + { + \@@_if_recursion_tail_stop_do:Nn #3 + { + #1 {#2} + \text_map_break: + } + \bool_lazy_and:nnTF + { ! \token_if_cs_p:N #3 } + { \int_compare_p:nNn { `#3 } = { "0A } } + { + \@@_map_output:Nn #1 {#2#3} + \@@_map_loop:Nnw #1 { } + } + { \@@_map_loop:Nnw #1 { } #3 } + } +% \end{macrocode} +% There are various classes of character, and we deal with them all in +% the same general way. We need to example the relevant list of codepoints: +% if we get a hit, then we do whatever the relevant action is. Otherwise +% we loop, but only if the current codepoint could still match: the +% loop stops early otherwise and we move forward. +% \begin{macrocode} +\cs_new:Npn \@@_map_class:Nnnn #1#2#3#4 + { + \exp_args:Nv \@@_map_class:nNnnn { c_@@_grapheme_ #4 _clist } + #1 {#2} {#3} {#4} + } +\cs_new:Npn \@@_map_class:nNnnn #1#2#3#4#5 + { + \@@_map_class_loop:Nnnnw #2 {#3} {#4} {#5} + #1 , \q_@@_recursion_tail .. , \q_@@_recursion_stop + } +\cs_new:Npn \@@_map_class_loop:Nnnnw #1#2#3#4 #5 .. #6 , + { + \@@_if_recursion_tail_stop_do:nn {#5} + { \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} } + \@@_map_codepoint_compare:nNnTF {#3} < { "#5 } + { + \@@_map_class_end:nw + { \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} } + } + { + \@@_map_codepoint_compare:nNnTF {#3} > { "#6 } + { \@@_map_class_loop:Nnnnw #1 {#2} {#3} {#4} } + { + \@@_map_class_end:nw + { \use:c { @@_map_ #4 :Nnn } #1 {#2} {#3} } + } + } + } +\cs_new:Npn \@@_map_class_end:nw #1#2 \q_@@_recursion_stop {#1} +% \end{macrocode} +% Break before \emph{and} after. +% \begin{macrocode} +\cs_new:Npn \@@_map_Control:Nnn #1#2#3 + { + \@@_map_output:Nn #1 {#2} + \@@_map_output:Nn #1 {#3} + \@@_map_loop:Nnw #1 { } + } +% \end{macrocode} +% Keep collecting. +% \begin{macrocode} +\cs_new:Npn \@@_map_Extend:Nnn #1#2#3 + { \@@_map_loop:Nnw #1 {#2#3} } +\cs_new_eq:NN \@@_map_SpacingMark:Nnn \@@_map_Extend:Nnn +% \end{macrocode} +% Retain and loop, outputting anything earlier. +% \begin{macrocode} +\cs_new:Npn \@@_map_Prepend:Nnn #1#2#3 + { + \@@_map_output:Nn #1 {#2} + \@@_map_loop:Nnw #1 {#3} + } +% \end{macrocode} +% Dealing with end-of-class is done such that we can be flexible. +% \begin{macrocode} +\cs_new:Npn \@@_map_not_Control:Nnn #1#2#3 + { \@@_map_class:Nnnn #1 {#2} {#3} { Extend } } +\cs_new:Npn \@@_map_not_Extend:Nnn #1#2#3 + { \@@_map_class:Nnnn #1 {#2} {#3} { SpacingMark } } +\cs_new:Npn \@@_map_not_SpacingMark:Nnn #1#2#3 + { \@@_map_class:Nnnn #1 {#2} {#3} { Prepend } } +\cs_new:Npn \@@_map_not_Prepend:Nnn #1#2#3 + { + \@@_map_output:Nn #1 {#2} + \@@_map_loop:Nnw #1 {#3} + } +% \end{macrocode} +% For the end of the process. +% \begin{macrocode} +\cs_new:Npn \@@_map_output:Nn #1#2 + { \tl_if_blank:nF {#2} { #1 {#2} } } +\cs_new:Npn \text_map_break: + { \prg_map_break:Nn \text_map_break: { } } +\cs_new:Npn \text_map_break:n + { \prg_map_break:Nn \text_map_break: } +% \end{macrocode} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% +% \begin{macro}[EXP, TF]{\@@_map_codepoint_compare:nNn} +% \begin{macro}[EXP]{\@@_map_codepoint_compare:N, \@@_map_codepoint_compare_aux:N} +% \begin{macro}[EXP]{\@@_map_codepoint_compare:NN} +% \begin{macro}[EXP]{\@@_map_codepoint_compare:NNN} +% \begin{macro}[EXP]{\@@_map_codepoint_compare:NNNN} +% Allows comparison for all engines using a first \enquote{character} followed +% by a codepoint. +% \begin{macrocode} +\bool_lazy_or:nnTF + { \sys_if_engine_luatex_p: } + { \sys_if_engine_xetex_p: } + { + \prg_new_conditional:Npnn + \@@_map_codepoint_compare:nNn #1#2#3 { TF } + { + \int_compare:nNnTF { `#1 } #2 {#3} + \prg_return_true: \prg_return_false: + } + } + { + \prg_new_conditional:Npnn + \@@_map_codepoint_compare:nNn #1#2#3 { TF } + { + \int_compare:nNnTF { \@@_map_codepoint_compare:N #1 } + #2 {#3} + \prg_return_true: \prg_return_false: + } + \cs_new:Npn \@@_map_codepoint_compare:N #1 + { + \if_int_compare:w `#1 > "80 \exp_stop_f: + \if_int_compare:w `#1 < "E0 \exp_stop_f: + \exp_after:wN \exp_after:wN \exp_after:wN + \@@_map_codepoint_compare:NN + \else: + \if_int_compare:w `#1 < "F0 \exp_stop_f: + \exp_after:wN \exp_after:wN \exp_after:wN + \exp_after:wN \exp_after:wN \exp_after:wN + \exp_after:wN \@@_map_codepoint_compare:NNN + \else: + \exp_after:wN \exp_after:wN \exp_after:wN + \exp_after:wN \exp_after:wN \exp_after:wN + \exp_after:wN \@@_map_codepoint_compare:NNNN + \fi: + \fi: + \else: + \exp_after:wN \@@_map_codepoint_compare_aux:N + \fi: + #1 + } + \cs_new:Npn \@@_map_codepoint_compare_aux:N #1 { `#1 } + \cs_new:Npn \@@_map_codepoint_compare:NN #1#2 + { (`#1 - "C0) * "40 + `#2 - "80 } + \cs_new:Npn \@@_map_codepoint_compare:NNN #1#2#3 + { (`#1 - "E0) * "1000 + (`#2 - "80) * "40 + `#3 - "80 } + \cs_new:Npn \@@_map_codepoint_compare:NNNN #1#2#3#4 + { + (`#1 - "F0) * "40000 + + (`#2 - "80) * "1000 + + (`#3 - "80) * "40 + + `#4 - "80 + } + } +% \end{macrocode} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% \end{macro} +% +% \begin{macro}{\text_map_inline:nn} +% The standard non-expandable inline version. +% \begin{macrocode} +\cs_new_protected:Npn \text_map_inline:nn #1#2 + { + \int_gincr:N \g__kernel_prg_map_int + \cs_gset_protected:cpn + { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2} + \exp_args:Nnc \text_map_function:nN {#1} + { @@_map_ \int_use:N \g__kernel_prg_map_int :w } + \prg_break_point:Nn \text_map_break: + { \int_gdecr:N \g__kernel_prg_map_int } + } +% \end{macrocode} +% \end{macro} +% +% \begin{macrocode} +% +% \end{macrocode} +% +% \end{implementation} +% +% \PrintIndex diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx index a98eb3cae3..f1110bdf27 100644 --- a/l3kernel/l3text.dtx +++ b/l3kernel/l3text.dtx @@ -299,6 +299,52 @@ % \texttt{true}. % \end{variable} % +% \section{Mapping to graphemes} +% +% \begin{function}[rEXP, added = 2022-08-04] +% \begin{syntax} +% \cs{text_map_function:nN} \meta{text} \Arg{function} +% \end{syntax} +% Takes user input \meta{text} and expands as described for +% \cs{text_expand:n}, then maps over the \emph{graphemes} within the +% result, passing each grapheme to the \meta{function}. +% Broadly a grapheme is a \enquote{user perceived character}: +% the Unicode Consortium describe the decomposition of input to +% graphemes in depth, and the approach used here implements that +% algorithm. The \meta{function} should accept one argument as \meta{balanced +% text}: this may be a single codepoint, multiple codepoints (or with an +% $8$-bit engine bytes) or may be a control sequence. +% See also \cs{text_map_inline:nn}. +% \end{function} +% +% \begin{function}[added = 2022-08-04] +% \begin{syntax} +% \cs{text_map_inline:nn} \meta{text} \Arg{inline function} +% \end{syntax} +% Takes user input \meta{text} and expands as described for +% \cs{text_expand:n}, then maps over the \emph{graphemes} within the +% result, passing each grapheme to the \meta{inline function}. +% Broadly a grapheme is a \enquote{user perceived character}: +% the Unicode Consortium describe the decomposition of input to +% graphemes in depth, and the approach used here implements that +% algorithm. The \meta{inline function} should consist of code which +% receives the grapheme as \meta{balanced +% text}: this may be a single codepoint, multiple codepoints (or with an +% 8-bit engine bytes) or may be a control sequence. +% See also \cs{text_map_function:nN}. +% \end{function} +% +% \begin{function}[rEXP, added = 2022-08-04] +% {\text_map_break:, \text_map_break:n} +% \begin{syntax} +% \cs{text_map_break:} +% \cs{text_map_break:n} \Arg{code} +% \end{syntax} +% Used to terminate a \cs[no-index]{text_map_\ldots} function before all +% entries in the \meta{text} have been processed. This +% normally takes place within a conditional statement. +% \end{function} +% % \end{documentation} % % \begin{implementation} @@ -357,11 +403,14 @@ % \end{macro} % % \begin{macro}[EXP]{\@@_if_recursion_tail_stop_do:Nn} +% \begin{macro}[EXP]{\@@_if_recursion_tail_stop_do:nn} % Functions to query recursion quarks. % \begin{macrocode} \__kernel_quark_new_test:N \@@_if_recursion_tail_stop_do:Nn +\__kernel_quark_new_test:N \@@_if_recursion_tail_stop_do:nn % \end{macrocode} % \end{macro} +% \end{macro} % % \subsection{Utilities} % diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx index fbf1d0bd6c..86bd398179 100644 --- a/l3kernel/l3unicode.dtx +++ b/l3kernel/l3unicode.dtx @@ -252,6 +252,54 @@ % \end{macrocode} % % \begin{macrocode} +%<@@=text> +% \end{macrocode} +% +% Read the Unicode grapheme data. This is quite easy to handle and we only need +% codepoints, not characters, so there is no need to worry about the engine in use. +% As reading as a string is most convenient, we have to do some work to remove +% spaces: the hardest part of the entire process! +% \begin{macrocode} +\ior_new:N \g_@@_data_ior +\group_begin: + \ior_open:Nn \g_@@_data_ior { GraphemeBreakProperty.txt } + \cs_set_nopar:Npn \l_@@_tmpa_str { } + \cs_set_nopar:Npn \l_@@_tmpb_str { } + \cs_set_protected:Npn \@@_data_auxi:w #1 ;~ #2 ~ #3 \q_stop + { + \str_if_eq:VnF \l_@@_tmpb_str {#2} + { + \str_if_empty:NF \l_@@_tmpb_str + { + \clist_const:cx { c_@@_grapheme_ \l_@@_tmpb_str _clist } + { \exp_after:wN \use_none:n \l_@@_tmpa_str } + \cs_set_nopar:Npn \l_@@_tmpa_str { } + } + \cs_set_nopar:Npn \l_@@_tmpb_str {#2} + } + \@@_data_auxii:w #1 .. #1 .. #1 \q_stop + } + \cs_set_protected:Npn \@@_data_auxii:w #1 .. #2 .. #3 \q_stop + { + \cs_set_nopar:Npx \l_@@_tmpa_str + { + \l_@@_tmpa_str , + \tl_trim_spaces:n {#1} .. \tl_trim_spaces:n {#2} + } + } + \ior_str_map_inline:Nn \g_@@_data_ior + { + \str_if_eq:eeF { \tl_head:w #1 \c_hash_str \q_stop } { \c_hash_str } + { + \tl_if_blank:nF {#1} + { \@@_data_auxi:w #1 \q_stop } + } + } + \ior_close:N \g_@@_data_ior +\group_end: +% \end{macrocode} +% +% \begin{macrocode} % % \end{macrocode} % diff --git a/l3kernel/testfiles/m3text006.luatex.tlg b/l3kernel/testfiles/m3text006.luatex.tlg new file mode 100644 index 0000000000..a7c25ab1ae --- /dev/null +++ b/l3kernel/testfiles/m3text006.luatex.tlg @@ -0,0 +1,9 @@ +This is a generated file for the LaTeX (2e + expl3) validation system. +Don't change this file in any respect. +Author: Joseph Wright +============================================================ +TEST 1: Grapheme mapping +============================================================ +(H)(e)(l)(l)(o) +(S)(p)(ı)(n̈)(a)(l)( )(T)(a)(p) +============================================================ diff --git a/l3kernel/testfiles/m3text006.lvt b/l3kernel/testfiles/m3text006.lvt new file mode 100644 index 0000000000..e18e34cdb5 --- /dev/null +++ b/l3kernel/testfiles/m3text006.lvt @@ -0,0 +1,28 @@ +% +% Copyright (C) 2022 The LaTeX Project +% +\input{regression-test} + +\RequirePackage[enable-debug]{expl3} +\ExplSyntaxOn +\debug_on:n { check-declarations , deprecation , log-functions } +\ExplSyntaxOff + +\documentclass{minimal} + +\START +\AUTHOR{Joseph Wright} + +\ExplSyntaxOn + +\OMIT + \cs_set:Npn \test:n #1 { (#1) } +\TIMO + +\TESTEXP{Grapheme~mapping} + {% + \text_map_function:nN { Hello } \test:n \NEWLINE + \text_map_function:nN { Spın̈al~Tap } \test:n + } + +\END \ No newline at end of file diff --git a/l3kernel/testfiles/m3text006.tlg b/l3kernel/testfiles/m3text006.tlg new file mode 100644 index 0000000000..d2e8e1d97b --- /dev/null +++ b/l3kernel/testfiles/m3text006.tlg @@ -0,0 +1,9 @@ +This is a generated file for the LaTeX (2e + expl3) validation system. +Don't change this file in any respect. +Author: Joseph Wright +============================================================ +TEST 1: Grapheme mapping +============================================================ +(H)(e)(l)(l)(o) +(S)(p)(^^c4^^b1)(n^^cc^^88)(a)(l)( )(T)(a)(p) +============================================================ diff --git a/l3kernel/testfiles/m3text006.xetex.tlg b/l3kernel/testfiles/m3text006.xetex.tlg new file mode 100644 index 0000000000..a7c25ab1ae --- /dev/null +++ b/l3kernel/testfiles/m3text006.xetex.tlg @@ -0,0 +1,9 @@ +This is a generated file for the LaTeX (2e + expl3) validation system. +Don't change this file in any respect. +Author: Joseph Wright +============================================================ +TEST 1: Grapheme mapping +============================================================ +(H)(e)(l)(l)(o) +(S)(p)(ı)(n̈)(a)(l)( )(T)(a)(p) +============================================================