Rename \char_to_nfd:n to \codepoint_to_ndf:n

Also deprecate entirely the :N version.
latex3 · Oct 9, 2022 · 9e19f85 · 9e19f85
1 parent 38a1367
commit 9e19f85
Show file tree

Hide file tree

Showing 20 changed files with 93 additions and 197 deletions.
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
@@ -9,6 +9,7 @@ this project uses date-based 'snapshot' version identifiers.
 
 ### Added
 - `\codepoint_to_bytes:n`
+- `\codepoint_to_nfd:n`
 - `\codepoint_str_generate:n`
 
 ### Changed
@@ -21,6 +22,7 @@ this project uses date-based 'snapshot' version identifiers.
   esoteric case (issue [\#1113](https://github.com/latex3/latex3/issues/1113))
 
 ### Deprecated
+- `\char_to_nfd:N`, `\char_to_nfd:n`
 - `\char_to_utfviii_bytes:n`
 
 ## [2022-09-28]

diff --git a/l3kernel/doc/l3obsolete.txt b/l3kernel/doc/l3obsolete.txt
@@ -23,6 +23,7 @@ Function                            Date deprecated
 \char_str_mixed_case:N                   2020-01-03
 \char_str_upper_case:N                   2020-01-03
 \char_to_utfviii_bytes:n                 2022-10-09
+\char_to_nfd:N                           2022-10-09
 \cs_argument_spec:N                      2022-06-24
 \l_keys_key_tl                           2020-02-08
 \l_keys_path_tl                          2020-02-08

diff --git a/l3kernel/l3candidates.dtx b/l3kernel/l3candidates.dtx
@@ -626,28 +626,6 @@
 %   (\enquote{active}), and character code $32$ (space).
 % \end{variable}
 %
-% \begin{function}[added = 2020-01-02, rEXP]{\char_to_nfd:N}
-%   \begin{syntax}
-%     \cs{char_to_nfd:N} \meta{char}
-%   \end{syntax}
-%   Converts the \meta{char} to the Unicode Normalization Form Canonical
-%   Decomposition. The category code of the \emph{first} generated character is
-%   the same as the \meta{char}; second and subsequent chars will have the
-%   current category code, as they would if typed in directly. For $8$-bit
-%   engines, no change will take place.
-% \end{function}
-%
-% \begin{function}[added = 2022-08-29, rEXP]{\char_to_nfd:n}
-%   \begin{syntax}
-%     \cs{char_to_nfd:n} \Arg{codepoint}
-%   \end{syntax}
-%   Converts the (Unicode) \meta{codepoint} to the Unicode Normalization
-%   Form Canonical Decomposition. The generated character(s) will have
-%   the current category code as they would if typed in directly. In contrast
-%   to \cs{char_to_nfd:N}, this function \emph{does} decompose codepoints
-%   with $8$-bit engines.
-% \end{function}
-%
 % \begin{function}[added = 2018-09-23]
 %   {
 %     \peek_catcode_collect_inline:Nn,

diff --git a/l3kernel/l3deprecation.dtx b/l3kernel/l3deprecation.dtx
@@ -556,6 +556,15 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}[EXP]{\char_to_nfd:Nm, \char_to_nfd:n}
+%    \begin{macrocode}
+\__kernel_patch_deprecation:nnNNpn { 2022-10-09 } { \codepoint_to_nfd:n }
+\cs_gset:Npn \char_to_nfd:N #1 { \codepoint_to_nfd:n {`#1} }
+\__kernel_patch_deprecation:nnNNpn { 2022-10-09 } { \codepoint_to_nfd:n }
+\cs_gset:Npn \char_to_nfd:n { \codepoint_to_nfd:n }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}[EXP]
 %   {
 %     \char_lower_case:N, \char_upper_case:N,

diff --git a/l3kernel/l3kernel-functions.dtx b/l3kernel/l3kernel-functions.dtx
@@ -512,15 +512,6 @@
 %   \end{itemize}
 % \end{function}
 %
-% \begin{function}[EXP]{\__kernel_codepoint_nfd:n}
-%   \begin{syntax}
-%     \cs{__kernel_unicode_nfd:nn} \Arg{mapping}
-%   \end{syntax}
-%   Expands to a list of two balanced text, of which at least the first
-%   will contain a codepoint. This list of one or two codepoints specifies
-%   the normal form decomposition of the input \meta{codepoint}.
-% \end{function}
-%
 % \subsection{Kernel backend functions}
 %
 % These functions are required to pass information to the backend. The nature

diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
@@ -945,7 +945,7 @@
         \@@_change_case_if_greek:nTF { `#4 }
           {
             \exp_args:Ne \@@_change_case_upper_el:nnn
-              { \char_to_nfd:N #4 } {#2} {#3}
+              { \codepoint_to_nfd:N #4 } {#2} {#3}
           }
           {
             \int_compare:nNnTF { `#4 } = { "0345 }

diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
@@ -1690,56 +1690,6 @@
 % \end{macro}
 % \end{macro}
 %
-% \begin{macro}[EXP]{\char_to_nfd:N}
-% \begin{macro}[EXP]{\char_to_nfd:n}
-% \begin{macro}[EXP]{\@@_to_nfd:nn}
-% \begin{macro}[EXP]{\@@_to_nfd:nnn}
-% \begin{macro}[EXP]{\@@_to_nfd:nnnn}
-% \begin{macro}[EXP]{\@@_to_nfd_generate:nn}
-% \begin{macro}[EXP]{\@@_to_nfd_generate:n}
-% \begin{macro}[EXP]{\@@_to_nfd_generate:nnnn}
-%   Converted to NFD is a potentially-recursive process: the key is to
-%   check if we get the input codepoint back again. As far as possible,
-%   we use the same path for all engines.
-%    \begin{macrocode}
-\bool_lazy_or:nnTF
-  { \sys_if_engine_luatex_p: }
-  { \sys_if_engine_xetex_p: }
-  {
-    \cs_new:Npn \char_to_nfd:N #1
-      { \@@_to_nfd:nn {`#1} { \@@_change_case_catcode:N #1 } }
-  }
-  {
-    \cs_new:Npn \char_to_nfd:N #1 { \exp_not:n {#1} }
-  }
-\cs_new:Npn \char_to_nfd:n #1
-  { \@@_to_nfd:nn {#1} { \char_value_catcode:n {#1} } }
-\cs_new:Npn \@@_to_nfd:nn #1#2
-  {
-    \exp_args:Ne \@@_to_nfd:nnn
-      { \__kernel_codepoint_nfd:n {#1} } {#1} {#2}
-  }
-\cs_new:Npn \@@_to_nfd:nnn #1#2#3 { \@@_to_nfd:nnnn #1 {#2} {#3} }
-\cs_new:Npn \@@_to_nfd:nnnn #1#2#3#4
-  {
-    \int_compare:nNnTF {#1} = {#3}
-      { \codepoint_generate:nn {#1} {#4} }
-      {
-        \@@_to_nfd:nn {#1} {#4}
-        \tl_if_blank:nF {#2}
-          { \@@_to_nfd:nn {#2} {#4} }
-      }
-  }
-%    \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
 % \begin{macro}[EXP]
 %   {
 %     \char_lowercase:N, \char_uppercase:N,

diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
@@ -111,6 +111,16 @@
 %   and |#3| and |#4| empty.
 % \end{function}
 %
+% \begin{function}[added = 2022-10-09, EXP]{\codepoint_to_nfd:n}
+%   \begin{syntax}
+%     \cs{codepoint_to_nfd:n} \Arg{codepoint}
+%   Converts the \meta{codepoint} to the Unicode Normalization
+%   Form Canonical Decomposition. The generated character(s) will have
+%   the current category code as they would if typed in directly for Unicode
+%   engines; for $8$-bit engines, active characters are used for all codepoints
+%   outside of the ASCII range.
+% \end{function}
+%
 % \end{documentation}
 %
 % \begin{implementation}
@@ -340,6 +350,46 @@
 % \end{macro}
 % \end{macro}
 % \end{macro}
+%
+% \begin{macro}[EXP]{\codepoint_to_nfd:n}
+% \begin{macro}[EXP]{\@@_to_nfd:nn}
+% \begin{macro}[EXP]{\@@_to_nfd:nnn}
+% \begin{macro}[EXP]{\@@_to_nfd:nnnn}
+% \begin{macro}[EXP]{\@@_to_nfd_generate:nn}
+% \begin{macro}[EXP]{\@@_to_nfd_generate:n}
+% \begin{macro}[EXP]{\@@_to_nfd_generate:nnnn}
+%   Converted to NFD is a potentially-recursive process: the key is to
+%   check if we get the input codepoint back again. As far as possible,
+%   we use the same path for all engines.
+%    \begin{macrocode}
+\cs_new:Npn \codepoint_to_nfd:n #1
+  { \@@_to_nfd:nn {#1} { \char_value_catcode:n {#1} } }
+\cs_new:Npn \@@_to_nfd:nn #1#2
+  {
+    \exp_args:Ne \@@_to_nfd:nnn
+      { \@@_nfd:n {#1} } {#1} {#2}
+  }
+\cs_new:Npn \@@_to_nfd:nnn #1#2#3 { \@@_to_nfd:nnnn #1 {#2} {#3} }
+\cs_new:Npn \@@_to_nfd:nnnn #1#2#3#4
+  {
+    \int_compare:nNnTF {#1} = {#3}
+      { \codepoint_generate:nn {#1} {#4} }
+      {
+        \@@_to_nfd:nn {#1} {#4}
+        \tl_if_blank:nF {#2}
+          { \@@_to_nfd:nn {#2} {#4} }
+      }
+  }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+
 %
 % \subsection{Data loader}
 %
@@ -842,11 +892,11 @@
 % \end{macro}
 % \end{macro}
 %
-% \begin{macro}[EXP]{\__kernel_codepoint_nfd:n}
+% \begin{macro}[EXP]{\@@_nfd:n}
 % \begin{macro}[EXP]{\@@_nfd:nn}
 %   A simple interface.
 %    \begin{macrocode}
-\cs_new:Npn \__kernel_codepoint_nfd:n #1
+\cs_new:Npn \@@_nfd:n #1
   { \exp_args:Ne \@@_nfd:nn { \codepoint_str_generate:n {#1} } {#1} }
 \cs_new:Npn \@@_nfd:nn #1#2
   {

diff --git a/l3kernel/testfiles/m3token001.luatex.tlg b/l3kernel/testfiles/m3token001.luatex.tlg
diff --git a/l3kernel/testfiles/m3token001.lvt b/l3kernel/testfiles/m3token001.lvt
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3token001.tlg
diff --git a/l3kernel/testfiles/m3token001.xetex.tlg b/l3kernel/testfiles/m3token001.xetex.tlg
diff --git a/l3kernel/testfiles/m3token006.luatex.tlg b/l3kernel/testfiles/m3token006.luatex.tlg
diff --git a/l3kernel/testfiles/m3token006.lvt b/l3kernel/testfiles/m3token006.lvt
diff --git a/l3kernel/testfiles/m3token006.tlg b/l3kernel/testfiles/m3token006.tlg
diff --git a/l3kernel/testfiles/m3token006.xetex.tlg b/l3kernel/testfiles/m3token006.xetex.tlg
diff --git a/l3kernel/testfiles/m3unicode001.luatex.tlg b/l3kernel/testfiles/m3unicode001.luatex.tlg
@@ -31,3 +31,10 @@ TEST 3: Byte decomposition
 {239}{191}{189}{}
 {240}{144}{128}{128}
 ============================================================
+============================================================
+TEST 4: Character decomposition
+============================================================
+A
+Î
+Ή
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.lvt b/l3kernel/testfiles/m3unicode001.lvt
@@ -52,4 +52,11 @@
     \codepoint_to_bytes:n { "10000 }
   }
 
+\TESTEXP { Character~decomposition }
+  {
+    \codepoint_to_nfd:n { `A } \NEWLINE
+    \codepoint_to_nfd:n { "00CE } \NEWLINE
+    \codepoint_to_nfd:n { "0389 }
+  }
+
 \END
diff --git a/l3kernel/testfiles/m3unicode001.tlg b/l3kernel/testfiles/m3unicode001.tlg
@@ -31,3 +31,10 @@ TEST 3: Byte decomposition
 {239}{191}{189}{}
 {240}{144}{128}{128}
 ============================================================
+============================================================
+TEST 4: Character decomposition
+============================================================
+A
+I^^cc^^82
+^^ce^^97^^cc^^81
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.xetex.tlg b/l3kernel/testfiles/m3unicode001.xetex.tlg
@@ -31,3 +31,10 @@ TEST 3: Byte decomposition
 {239}{191}{189}{}
 {240}{144}{128}{128}
 ============================================================
+============================================================
+TEST 4: Character decomposition
+============================================================
+A
+Î
+Ή
+============================================================