Skip to content

Commit

Permalink
Rename \char_codepoint_to_bytes:n to \char_to_utfviii_bytes:n
Browse files Browse the repository at this point in the history
Still experimental as the output format still needs to
be agreed, and name may still need some work.
  • Loading branch information
josephwright committed Jan 9, 2020
1 parent 97874b3 commit 12e57ee
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 58 deletions.
6 changes: 3 additions & 3 deletions l3kernel/l3candidates.dtx
Expand Up @@ -695,14 +695,14 @@
% (\enquote{active}), and character code $32$ (space).
% \end{variable}
%
% \begin{function}[added = 2018-06-01, EXP]{\char_codepoint_to_bytes:n}
% \begin{function}[added = 2020-01-09, EXP]{\char_to_utfviii_bytes:n}
% \begin{syntax}
% \cs{char_codepoint_to_bytes:n} \Arg{codepoint}
% \cs{char_to_utfviii_bytes:n} \Arg{codepoint}
% \end{syntax}
% Converts the (Unicode) \meta{codepoint} to UTF-8 bytes. The expansion
% of this function comprises four brace groups, each of which will contain
% a hexadecimal value: the appropriate byte. As UTF-8 is a variable-length,
% one or more of the grouos may be empty: the bytes read in the logical order,
% one or more of the groups may be empty: the bytes read in the logical order,
% such that a two-byte codepoint will have groups |#1| and |#2| filled
% and |#3| and |#4| empty.
% \end{function}
Expand Down
8 changes: 4 additions & 4 deletions l3kernel/l3text-case.dtx
Expand Up @@ -1185,7 +1185,7 @@
}
}
\use:x
{ \@@_tmp:w \char_codepoint_to_bytes:n { "#2 } }
{ \@@_tmp:w \char_to_utfviii_bytes:n { "#2 } }
\group_end:
}
\@@_tmp:w \c_@@_dotless_i_tl { 0131 }
Expand Down Expand Up @@ -1214,8 +1214,8 @@
\use:x
{
\@@_tmp:w
\char_codepoint_to_bytes:n { "#1 }
\char_codepoint_to_bytes:n { "#2 }
\char_to_utfviii_bytes:n { "#1 }
\char_to_utfviii_bytes:n { "#2 }
}
\@@_loop:nn
}
Expand Down Expand Up @@ -1371,7 +1371,7 @@
{#2}
}
\use:x
{ \@@_tmp:w \char_codepoint_to_bytes:n { "#1 } }
{ \@@_tmp:w \char_to_utfviii_bytes:n { "#1 } }
\group_end:
}
\@@_tmp:w { 00DF } { SS } { upper }
Expand Down
102 changes: 51 additions & 51 deletions l3kernel/l3token.dtx
Expand Up @@ -1550,102 +1550,102 @@
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\char_codepoint_to_bytes:n}
% \begin{macro}[EXP]{\@@_codepoint_to_bytes_auxi:n}
% \begin{macro}[EXP]{\@@_codepoint_to_bytes_auxii:Nnn}
% \begin{macro}[EXP]{\@@_codepoint_to_bytes_auxiii:n}
% \begin{macro}[EXP]{\char_to_utfviii_bytes:n}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_auxi:n}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_auxii:Nnn}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_auxiii:n}
% \begin{macro}[EXP]
% {
% \@@_codepoint_to_bytes_outputi:nw ,
% \@@_codepoint_to_bytes_outputii:nw ,
% \@@_codepoint_to_bytes_outputiii:nw ,
% \@@_codepoint_to_bytes_outputiv:nw
% \@@_to_utfviii_bytes_outputi:nw ,
% \@@_to_utfviii_bytes_outputii:nw ,
% \@@_to_utfviii_bytes_outputiii:nw ,
% \@@_to_utfviii_bytes_outputiv:nw
% }
% \begin{macro}[EXP]
% {\@@_codepoint_to_bytes_output:nnn, \@@_codepoint_to_bytes_output:fnn}
% \begin{macro}[EXP]{\@@_codepoint_to_bytes_end:}
% {\@@_to_utfviii_bytes_output:nnn, \@@_to_utfviii_bytes_output:fnn}
% \begin{macro}[EXP]{\@@_to_utfviii_bytes_end:}
% This code converts a codepoint into the correct UTF-8 representation.
% In terms of the algorithm itself, see
% \url{https://en.wikipedia.org/wiki/UTF-8} for the octet pattern.
% \begin{macrocode}
\cs_new:Npn \char_codepoint_to_bytes:n #1
\cs_new:Npn \char_to_utfviii_bytes:n #1
{
\exp_args:Nf \@@_codepoint_to_bytes_auxi:n
\exp_args:Nf \@@_to_utfviii_bytes_auxi:n
{ \int_eval:n {#1} }
}
\cs_new:Npn \@@_codepoint_to_bytes_auxi:n #1
\cs_new:Npn \@@_to_utfviii_bytes_auxi:n #1
{
\if_int_compare:w #1 > "80 \exp_stop_f:
\if_int_compare:w #1 < "800 \exp_stop_f:
\@@_codepoint_to_bytes_outputi:nw
{ \@@_codepoint_to_bytes_auxii:Nnn C {#1} { 64 } }
\@@_codepoint_to_bytes_outputii:nw
{ \@@_codepoint_to_bytes_auxiii:n {#1} }
\@@_to_utfviii_bytes_outputi:nw
{ \@@_to_utfviii_bytes_auxii:Nnn C {#1} { 64 } }
\@@_to_utfviii_bytes_outputii:nw
{ \@@_to_utfviii_bytes_auxiii:n {#1} }
\else:
\if_int_compare:w #1 < "10000 \exp_stop_f:
\@@_codepoint_to_bytes_outputi:nw
{ \@@_codepoint_to_bytes_auxii:Nnn E {#1} { 64 * 64 } }
\@@_codepoint_to_bytes_outputii:nw
\@@_to_utfviii_bytes_outputi:nw
{ \@@_to_utfviii_bytes_auxii:Nnn E {#1} { 64 * 64 } }
\@@_to_utfviii_bytes_outputii:nw
{
\@@_codepoint_to_bytes_auxiii:n
\@@_to_utfviii_bytes_auxiii:n
{ \int_div_truncate:nn {#1} { 64 } }
}
\@@_codepoint_to_bytes_outputiii:nw
{ \@@_codepoint_to_bytes_auxiii:n {#1} }
\@@_to_utfviii_bytes_outputiii:nw
{ \@@_to_utfviii_bytes_auxiii:n {#1} }
\else:
\@@_codepoint_to_bytes_outputi:nw
\@@_to_utfviii_bytes_outputi:nw
{
\@@_codepoint_to_bytes_auxii:Nnn F
\@@_to_utfviii_bytes_auxii:Nnn F
{#1} { 64 * 64 * 64 }
}
\@@_codepoint_to_bytes_outputii:nw
\@@_to_utfviii_bytes_outputii:nw
{
\@@_codepoint_to_bytes_auxiii:n
\@@_to_utfviii_bytes_auxiii:n
{ \int_div_truncate:nn {#1} { 64 * 64 } }
}
\@@_codepoint_to_bytes_outputiii:nw
\@@_to_utfviii_bytes_outputiii:nw
{
\@@_codepoint_to_bytes_auxiii:n
\@@_to_utfviii_bytes_auxiii:n
{ \int_div_truncate:nn {#1} { 64 } }
}
\@@_codepoint_to_bytes_outputiv:nw
{ \@@_codepoint_to_bytes_auxiii:n {#1} }
\@@_to_utfviii_bytes_outputiv:nw
{ \@@_to_utfviii_bytes_auxiii:n {#1} }
\fi:
\fi:
\else:
\@@_codepoint_to_bytes_outputi:nw {#1}
\@@_to_utfviii_bytes_outputi:nw {#1}
\fi:
\@@_codepoint_to_bytes_end: { } { } { } { }
\@@_to_utfviii_bytes_end: { } { } { } { }
}
\cs_new:Npn \@@_codepoint_to_bytes_auxii:Nnn #1#2#3
\cs_new:Npn \@@_to_utfviii_bytes_auxii:Nnn #1#2#3
{ "#10 + \int_div_truncate:nn {#2} {#3} }
\cs_new:Npn \@@_codepoint_to_bytes_auxiii:n #1
\cs_new:Npn \@@_to_utfviii_bytes_auxiii:n #1
{ \int_mod:nn {#1} { 64 } + 128 }
\cs_new:Npn \@@_codepoint_to_bytes_outputi:nw
#1 #2 \@@_codepoint_to_bytes_end: #3
{ \@@_codepoint_to_bytes_output:fnn { \int_eval:n {#1} } { } {#2} }
\cs_new:Npn \@@_codepoint_to_bytes_outputii:nw
#1 #2 \@@_codepoint_to_bytes_end: #3#4
{ \@@_codepoint_to_bytes_output:fnn { \int_eval:n {#1} } { {#3} } {#2} }
\cs_new:Npn \@@_codepoint_to_bytes_outputiii:nw
#1 #2 \@@_codepoint_to_bytes_end: #3#4#5
\cs_new:Npn \@@_to_utfviii_bytes_outputi:nw
#1 #2 \@@_to_utfviii_bytes_end: #3
{ \@@_to_utfviii_bytes_output:fnn { \int_eval:n {#1} } { } {#2} }
\cs_new:Npn \@@_to_utfviii_bytes_outputii:nw
#1 #2 \@@_to_utfviii_bytes_end: #3#4
{ \@@_to_utfviii_bytes_output:fnn { \int_eval:n {#1} } { {#3} } {#2} }
\cs_new:Npn \@@_to_utfviii_bytes_outputiii:nw
#1 #2 \@@_to_utfviii_bytes_end: #3#4#5
{
\@@_codepoint_to_bytes_output:fnn
\@@_to_utfviii_bytes_output:fnn
{ \int_eval:n {#1} } { {#3} {#4} } {#2}
}
\cs_new:Npn \@@_codepoint_to_bytes_outputiv:nw
#1 #2 \@@_codepoint_to_bytes_end: #3#4#5#6
\cs_new:Npn \@@_to_utfviii_bytes_outputiv:nw
#1 #2 \@@_to_utfviii_bytes_end: #3#4#5#6
{
\@@_codepoint_to_bytes_output:fnn
\@@_to_utfviii_bytes_output:fnn
{ \int_eval:n {#1} } { {#3} {#4} {#5} } {#2}
}
\cs_new:Npn \@@_codepoint_to_bytes_output:nnn #1#2#3
\cs_new:Npn \@@_to_utfviii_bytes_output:nnn #1#2#3
{
#3
\@@_codepoint_to_bytes_end: #2 {#1}
\@@_to_utfviii_bytes_end: #2 {#1}
}
\cs_generate_variant:Nn \@@_codepoint_to_bytes_output:nnn { f }
\cs_new:Npn \@@_codepoint_to_bytes_end: { }
\cs_generate_variant:Nn \@@_to_utfviii_bytes_output:nnn { f }
\cs_new:Npn \@@_to_utfviii_bytes_end: { }
% \end{macrocode}
% \end{macro}
% \end{macro}
Expand Down

0 comments on commit 12e57ee

Please sign in to comment.