Skip to content

Commit

Permalink
Rename \char_to_utfviii_bytes:n to \codepoint_to_bytes:n
Browse files Browse the repository at this point in the history
This fits a more general pattern of moving functions:
several commits will address this.

There is still the question of byte order here:
do we want to change and 'fill from the bottom'.
  • Loading branch information
josephwright committed Oct 9, 2022
1 parent d4f3d9b commit 31a1124
Show file tree
Hide file tree
Showing 25 changed files with 244 additions and 277 deletions.
4 changes: 4 additions & 0 deletions l3kernel/CHANGELOG.md
Expand Up @@ -8,6 +8,7 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]

### Added
- `\codepoint_to_bytes:n`
- `\codepoint_str_generate:n`

### Changed
Expand All @@ -19,6 +20,9 @@ this project uses date-based 'snapshot' version identifiers.
tokens (issue [\#1110](https://github.com/latex3/latex3/issues/1110)), and an
esoteric case (issue [\#1113](https://github.com/latex3/latex3/issues/1113))

### Deprecated
- `\char_to_utfviii_bytes:n`

## [2022-09-28]

### Added
Expand Down
1 change: 1 addition & 0 deletions l3kernel/doc/l3obsolete.txt
Expand Up @@ -22,6 +22,7 @@ Function Date deprecated
\char_str_lower_case:N 2020-01-03
\char_str_mixed_case:N 2020-01-03
\char_str_upper_case:N 2020-01-03
\char_to_utfviii_bytes:n 2022-10-09
\cs_argument_spec:N 2022-06-24
\l_keys_key_tl 2020-02-08
\l_keys_path_tl 2020-02-08
Expand Down
12 changes: 0 additions & 12 deletions l3kernel/l3candidates.dtx
Expand Up @@ -626,18 +626,6 @@
% (\enquote{active}), and character code $32$ (space).
% \end{variable}
%
% \begin{function}[added = 2020-01-09, EXP]{\char_to_utfviii_bytes:n}
% \begin{syntax}
% \cs{char_to_utfviii_bytes:n} \Arg{codepoint}
% \end{syntax}
% Converts the (Unicode) \meta{codepoint} to UTF-8 bytes. The expansion
% of this function comprises four brace groups, each of which will contain
% a hexadecimal value: the appropriate byte. As UTF-8 is a variable-length,
% one or more of the groups may be empty: the bytes read in the logical order,
% such that a two-byte codepoint will have groups |#1| and |#2| filled
% and |#3| and |#4| empty.
% \end{function}
%
% \begin{function}[added = 2020-01-02, rEXP]{\char_to_nfd:N}
% \begin{syntax}
% \cs{char_to_nfd:N} \meta{char}
Expand Down
7 changes: 7 additions & 0 deletions l3kernel/l3deprecation.dtx
Expand Up @@ -549,6 +549,13 @@
%
% \subsection{Deprecated \pkg{l3token} functions}
%
% \begin{macro}[EXP]{\char_to_utfviii_bytes:n}
% \begin{macrocode}
\__kernel_patch_deprecation:nnNNpn { 2022-10-09 } { \codepoint_to_bytes:n }
\cs_gset:Npn \char_to_utfviii_bytes:n { \codepoint_to_bytes:n }
% \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP]
% {
% \char_lower_case:N, \char_upper_case:N,
Expand Down
2 changes: 1 addition & 1 deletion l3kernel/l3str-convert.dtx
Expand Up @@ -2635,7 +2635,7 @@
\cs_new:Npn \@@_convert_pdfname_bytes:n #1
{
\exp_args:Ne \@@_convert_pdfname_bytes_aux:n
{ \char_to_utfviii_bytes:n {`#1} }
{ \codepoint_to_bytes:n {`#1} }
}
\cs_new:Npn \@@_convert_pdfname_bytes_aux:n #1
{ \@@_convert_pdfname_bytes_aux:nnnn #1 }
Expand Down
2 changes: 1 addition & 1 deletion l3kernel/l3str.dtx
Expand Up @@ -2056,7 +2056,7 @@
\use:e
{
\exp_not:N \@@_change_case_generate:nnnn
\char_to_utfviii_bytes:n {#1}
\codepoint_to_bytes:n {#1}
}
}
\cs_new:Npn \@@_change_case_generate:nnnn #1#2#3#4
Expand Down
16 changes: 8 additions & 8 deletions l3kernel/l3text-case.dtx
Expand Up @@ -1868,7 +1868,7 @@
}
}
\use:x
{ \@@_tmp:w \char_to_utfviii_bytes:n { "#2 } }
{ \@@_tmp:w \codepoint_to_bytes:n { "#2 } }
\group_end:
}
\@@_tmp:w \c_@@_dotless_i_tl { 0131 }
Expand Down Expand Up @@ -1902,8 +1902,8 @@
\use:x
{
\@@_tmp:w
\char_to_utfviii_bytes:n { "#1 }
\char_to_utfviii_bytes:n { "#2 }
\codepoint_to_bytes:n { "#1 }
\codepoint_to_bytes:n { "#2 }
}
\@@_loop:nn
}
Expand Down Expand Up @@ -2183,8 +2183,8 @@
\use:x
{
\@@_tmp:w
\char_to_utfviii_bytes:n { "#1 }
\char_to_utfviii_bytes:n { "#2 }
\codepoint_to_bytes:n { "#1 }
\codepoint_to_bytes:n { "#2 }
}
\group_end:
}
Expand Down Expand Up @@ -2238,7 +2238,7 @@
{#2}
}
\use:x
{ \@@_tmp:w \char_to_utfviii_bytes:n { "#1 } }
{ \@@_tmp:w \codepoint_to_bytes:n { "#1 } }
\group_end:
}
\@@_tmp:w { 00DF } { SS } { upper }
Expand Down Expand Up @@ -2463,8 +2463,8 @@
\use:x
{
\@@_tmp:w
\char_to_utfviii_bytes:n { "#1 }
\char_to_utfviii_bytes:n { "#2 }
\codepoint_to_bytes:n { "#1 }
\codepoint_to_bytes:n { "#2 }
}
\group_end:
}
Expand Down
4 changes: 2 additions & 2 deletions l3kernel/l3text-purify.dtx
Expand Up @@ -486,7 +486,7 @@
\text_declare_purify_equivalent:Nx #1
{
\exp_args:Ne \@@_tmp:n
{ \char_to_utfviii_bytes:n { "#2 } }
{ \codepoint_to_bytes:n { "#2 } }
}
\@@_loop:Nn
}
Expand Down Expand Up @@ -574,7 +574,7 @@
\cs_set:Npn \@@_tmp:n #1
{
\exp_args:Ne \@@_tmp_aux:n
{ \char_to_utfviii_bytes:n { "#1 } }
{ \codepoint_to_bytes:n { "#1 } }
}
\cs_set:Npn \@@_tmp_aux:n #1 { \@@_tmp:nnnn #1 }
\cs_set:Npn \@@_tmp:nnnn #1#2#3#4
Expand Down
105 changes: 0 additions & 105 deletions l3kernel/l3token.dtx
Expand Up @@ -1690,111 +1690,6 @@
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\char_to_utfviii_bytes:n}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_auxi:n}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_auxii:Nnn}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_auxiii:n}
% \begin{macro}[EXP]
% {
% \@@_to_utfviii_bytes_outputi:nw ,
% \@@_to_utfviii_bytes_outputii:nw ,
% \@@_to_utfviii_bytes_outputiii:nw ,
% \@@_to_utfviii_bytes_outputiv:nw
% }
% \begin{macro}[EXP]
% {\@@_to_utfviii_bytes_output:nnn, \@@_to_utfviii_bytes_output:fnn}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_end:}
% This code converts a codepoint into the correct UTF-8 representation.
% In terms of the algorithm itself, see
% \url{https://en.wikipedia.org/wiki/UTF-8} for the octet pattern.
% \begin{macrocode}
\cs_new:Npn \char_to_utfviii_bytes:n #1
{
\exp_args:Nf \@@_to_utfviii_bytes_auxi:n
{ \int_eval:n {#1} }
}
\cs_new:Npn \@@_to_utfviii_bytes_auxi:n #1
{
\if_int_compare:w #1 > "80 \exp_stop_f:
\if_int_compare:w #1 < "800 \exp_stop_f:
\@@_to_utfviii_bytes_outputi:nw
{ \@@_to_utfviii_bytes_auxii:Nnn C {#1} { 64 } }
\@@_to_utfviii_bytes_outputii:nw
{ \@@_to_utfviii_bytes_auxiii:n {#1} }
\else:
\if_int_compare:w #1 < "10000 \exp_stop_f:
\@@_to_utfviii_bytes_outputi:nw
{ \@@_to_utfviii_bytes_auxii:Nnn E {#1} { 64 * 64 } }
\@@_to_utfviii_bytes_outputii:nw
{
\@@_to_utfviii_bytes_auxiii:n
{ \int_div_truncate:nn {#1} { 64 } }
}
\@@_to_utfviii_bytes_outputiii:nw
{ \@@_to_utfviii_bytes_auxiii:n {#1} }
\else:
\@@_to_utfviii_bytes_outputi:nw
{
\@@_to_utfviii_bytes_auxii:Nnn F
{#1} { 64 * 64 * 64 }
}
\@@_to_utfviii_bytes_outputii:nw
{
\@@_to_utfviii_bytes_auxiii:n
{ \int_div_truncate:nn {#1} { 64 * 64 } }
}
\@@_to_utfviii_bytes_outputiii:nw
{
\@@_to_utfviii_bytes_auxiii:n
{ \int_div_truncate:nn {#1} { 64 } }
}
\@@_to_utfviii_bytes_outputiv:nw
{ \@@_to_utfviii_bytes_auxiii:n {#1} }
\fi:
\fi:
\else:
\@@_to_utfviii_bytes_outputi:nw {#1}
\fi:
\@@_to_utfviii_bytes_end: { } { } { } { }
}
\cs_new:Npn \@@_to_utfviii_bytes_auxii:Nnn #1#2#3
{ "#10 + \int_div_truncate:nn {#2} {#3} }
\cs_new:Npn \@@_to_utfviii_bytes_auxiii:n #1
{ \int_mod:nn {#1} { 64 } + 128 }
\cs_new:Npn \@@_to_utfviii_bytes_outputi:nw
#1 #2 \@@_to_utfviii_bytes_end: #3
{ \@@_to_utfviii_bytes_output:fnn { \int_eval:n {#1} } { } {#2} }
\cs_new:Npn \@@_to_utfviii_bytes_outputii:nw
#1 #2 \@@_to_utfviii_bytes_end: #3#4
{ \@@_to_utfviii_bytes_output:fnn { \int_eval:n {#1} } { {#3} } {#2} }
\cs_new:Npn \@@_to_utfviii_bytes_outputiii:nw
#1 #2 \@@_to_utfviii_bytes_end: #3#4#5
{
\@@_to_utfviii_bytes_output:fnn
{ \int_eval:n {#1} } { {#3} {#4} } {#2}
}
\cs_new:Npn \@@_to_utfviii_bytes_outputiv:nw
#1 #2 \@@_to_utfviii_bytes_end: #3#4#5#6
{
\@@_to_utfviii_bytes_output:fnn
{ \int_eval:n {#1} } { {#3} {#4} {#5} } {#2}
}
\cs_new:Npn \@@_to_utfviii_bytes_output:nnn #1#2#3
{
#3
\@@_to_utfviii_bytes_end: #2 {#1}
}
\cs_generate_variant:Nn \@@_to_utfviii_bytes_output:nnn { f }
\cs_new:Npn \@@_to_utfviii_bytes_end: { }
% \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\char_to_nfd:N}
% \begin{macro}[EXP]{\char_to_nfd:n}
% \begin{macro}[EXP]{\@@_to_nfd:nn}
Expand Down

0 comments on commit 31a1124

Please sign in to comment.