l3kernel/l3str.dtx

% \iffalse meta-comment
%
%% File: l3str.dtx
%
% Copyright (C) 2011-2022 The LaTeX Project
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "l3kernel bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/latex3
%
% for those people who are interested.
%
%<*driver>
\documentclass[full,kernel]{l3doc}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
% \title{^^A
%   The \pkg{l3str} package: Strings^^A
% }
%
% \author{^^A
%  The \LaTeX{} Project\thanks
%    {^^A
%      E-mail:
%        \href{mailto:latex-team@latex-project.org}
%          {latex-team@latex-project.org}^^A
%    }^^A
% }
%
% \date{Released 2022-09-28}
%
% \maketitle
%
% \begin{documentation}
%
% \TeX{} associates each character with a category code: as such, there is no
% concept of a \enquote{string} as commonly understood in many other
% programming languages. However, there are places where we wish to manipulate
% token lists while in some sense \enquote{ignoring} category codes: this is
% done by treating token lists as strings in a \TeX{} sense.
%
% A \TeX{} string (and thus an \pkg{expl3} string) is a series of characters
% which have category code $12$ (\enquote{other}) with the exception of
% space characters which have category code $10$ (\enquote{space}). Thus
% at a technical level, a \TeX{} string is a token list with the appropriate
% category codes. In this documentation, these are simply referred to as
% strings.
%
% String variables are simply specialised token lists, but by convention
% should be named with the suffix \texttt{\ldots{}str}.  Such variables
% should contain characters with category code $12$ (other), except
% spaces, which have category code $10$ (blank space).  All the
% functions in this module which accept a token list argument first
% convert it to a string using \cs{tl_to_str:n} for internal processing,
% and do not treat a token list or the corresponding string
% representation differently.
%
% As a string is a subset of the more general token list, it is sometimes unclear
% when one should be used over the other.
% Use a string variable for data that isn't primarily intended for typesetting
% and for which a level of protection from unwanted expansion is suitable.
% This data type simplifies comparison of variables since there are no concerns
% about expansion of their contents.
%
% The functions \cs{cs_to_str:N}, \cs{tl_to_str:n}, \cs{tl_to_str:N} and
% \cs{token_to_str:N} (and variants) generate strings from the appropriate
% input: these are documented in \pkg{l3basics}, \pkg{l3tl} and \pkg{l3token},
% respectively.
%
% Most expandable functions in this module come in three flavours:
% \begin{itemize}
%   \item \cs[no-index]{str_\ldots{}:N}, which expect a token list or string
%     variable as their argument;
%   \item \cs[no-index]{str_\ldots{}:n}, taking any token list (or string) as an
%     argument;
%   \item \cs[no-index]{str_\ldots{}_ignore_spaces:n}, which ignores any space
%     encountered during the operation: these functions are typically
%     faster than those which take care of escaping spaces
%     appropriately.
% \end{itemize}
%
% \section{Creating and initialising string variables}
%
% \begin{function}[added = 2015-09-18]{\str_new:N, \str_new:c}
%   \begin{syntax}
%     \cs{str_new:N} \meta{str~var}
%   \end{syntax}
%   Creates a new \meta{str~var} or raises an error if the name is
%   already taken.  The declaration is global.  The \meta{str~var} is
%   initially empty.
% \end{function}
%
% \begin{function}[added = 2015-09-18, updated = 2018-07-28]
%   {
%     \str_const:Nn, \str_const:NV, \str_const:Nx,
%     \str_const:cn, \str_const:cV, \str_const:cx
%   }
%   \begin{syntax}
%     \cs{str_const:Nn} \meta{str~var} \Arg{token list}
%   \end{syntax}
%   Creates a new constant \meta{str~var} or raises an error if the name
%   is already taken.  The value of the \meta{str~var} is set
%   globally to the \meta{token list}, converted to a string.
% \end{function}
%
% \begin{function}[added = 2015-09-18]
%   {\str_clear:N, \str_clear:c, \str_gclear:N, \str_gclear:c}
%   \begin{syntax}
%     \cs{str_clear:N} \meta{str~var}
%   \end{syntax}
%   Clears the content of the \meta{str~var}.
% \end{function}
%
% \begin{function}[added = 2015-09-18]{\str_clear_new:N, \str_clear_new:c}
%   \begin{syntax}
%     \cs{str_clear_new:N} \meta{str~var}
%   \end{syntax}
%   Ensures that the \meta{str~var} exists globally by applying
%   \cs{str_new:N} if necessary, then applies
%   \cs[index=str_clear:N]{str_(g)clear:N} to leave
%   the \meta{str~var} empty.
% \end{function}
%
% \begin{function}[added = 2015-09-18]
%   {
%     \str_set_eq:NN,  \str_set_eq:cN,  \str_set_eq:Nc,  \str_set_eq:cc,
%     \str_gset_eq:NN, \str_gset_eq:cN, \str_gset_eq:Nc, \str_gset_eq:cc
%   }
%   \begin{syntax}
%     \cs{str_set_eq:NN} \meta{str~var_1} \meta{str~var_2}
%   \end{syntax}
%   Sets the content of \meta{str~var_1} equal to that of
%   \meta{str~var_2}.
% \end{function}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_concat:NNN,  \str_concat:ccc,
%     \str_gconcat:NNN, \str_gconcat:ccc
%   }
%   \begin{syntax}
%     \cs{str_concat:NNN} \meta{str~var_1} \meta{str~var_2} \meta{str~var_3}
%   \end{syntax}
%   Concatenates the content of \meta{str~var_2} and \meta{str~var_3}
%   together and saves the result in \meta{str~var_1}. The \meta{str~var_2}
%   is placed at the left side of the new string variable.
%   The \meta{str~var_2} and \meta{str~var_3} must indeed be strings, as
%   this function does not convert their contents to a string.
% \end{function}
%
% \begin{function}[EXP, pTF, added = 2015-09-18]
%   {\str_if_exist:N, \str_if_exist:c}
%   \begin{syntax}
%     \cs{str_if_exist_p:N} \meta{str~var}
%     \cs{str_if_exist:NTF} \meta{str~var} \Arg{true code} \Arg{false code}
%   \end{syntax}
%   Tests whether the \meta{str~var} is currently defined.  This does not
%   check that the \meta{str~var} really is a string.
% \end{function}
%
% \section{Adding data to string variables}
%
% \begin{function}[added = 2015-09-18, updated = 2018-07-28]
%   {
%     \str_set:Nn,  \str_set:NV, \str_set:Nx,
%     \str_set:cn,  \str_set:cV, \str_set:cx,
%     \str_gset:Nn, \str_gset:NV, \str_gset:Nx,
%     \str_gset:cn, \str_gset:cV, \str_gset:cx
%   }
%   \begin{syntax}
%     \cs{str_set:Nn} \meta{str var} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, and stores the
%   result in \meta{str var}.
% \end{function}
%
% \begin{function}[added = 2015-09-18, updated = 2018-07-28]
%   {
%     \str_put_left:Nn, \str_put_left:NV, \str_put_left:Nx,
%     \str_put_left:cn, \str_put_left:cV, \str_put_left:cx,
%     \str_gput_left:Nn, \str_gput_left:NV, \str_gput_left:Nx,
%     \str_gput_left:cn, \str_gput_left:cV, \str_gput_left:cx
%   }
%   \begin{syntax}
%     \cs{str_put_left:Nn} \meta{str var} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, and prepends the
%   result to \meta{str var}.  The current contents of the \meta{str
%     var} are not automatically converted to a string.
% \end{function}
%
% \begin{function}[added = 2015-09-18, updated = 2018-07-28]
%   {
%     \str_put_right:Nn, \str_put_right:NV, \str_put_right:Nx,
%     \str_put_right:cn, \str_put_right:cV, \str_put_right:cx,
%     \str_gput_right:Nn, \str_gput_right:NV, \str_gput_right:Nx,
%     \str_gput_right:cn, \str_gput_right:cV, \str_gput_right:cx
%   }
%   \begin{syntax}
%     \cs{str_put_right:Nn} \meta{str var} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, and appends the
%   result to \meta{str var}.  The current contents of the \meta{str
%     var} are not automatically converted to a string.
% \end{function}
%
% \section{String conditionals}
%
% \begin{function}[EXP,pTF, added = 2015-09-18, updated = 2022-03-21]
%   {\str_if_empty:N, \str_if_empty:c, \str_if_empty:n}
%   \begin{syntax}
%     \cs{str_if_empty_p:N} \meta{str~var}
%     \cs{str_if_empty:NTF} \meta{str~var} \Arg{true code} \Arg{false code}
%   \end{syntax}
%   Tests if the \meta{string variable} is entirely empty
%   (\emph{i.e.}~contains no characters at all).
% \end{function}
%
% \begin{function}[EXP,pTF, added = 2015-09-18]
%   {\str_if_eq:NN, \str_if_eq:Nc, \str_if_eq:cN, \str_if_eq:cc}
%   \begin{syntax}
%     \cs{str_if_eq_p:NN} \meta{str~var_1} \meta{str~var_2}
%     \cs{str_if_eq:NNTF} \meta{str~var_1} \meta{str~var_2} \Arg{true code} \Arg{false code}
%   \end{syntax}
%   Compares the content of two \meta{str variables} and
%   is logically \texttt{true} if the two contain the same characters
%   in the same order.  See \cs{tl_if_eq:NNTF} to compare tokens
%   (including their category codes) rather than characters.
% \end{function}
%
% \begin{function}[EXP,pTF, updated  = 2018-06-18]
%   {
%     \str_if_eq:nn, \str_if_eq:Vn, \str_if_eq:on, \str_if_eq:no,
%     \str_if_eq:nV, \str_if_eq:VV, \str_if_eq:vn, \str_if_eq:nv,
%     \str_if_eq:ee
%   }
%   \begin{syntax}
%     \cs{str_if_eq_p:nn} \Arg{tl_1} \Arg{tl_2}
%     \cs{str_if_eq:nnTF} \Arg{tl_1} \Arg{tl_2} \Arg{true code} \Arg{false code}
%   \end{syntax}
%   Compares the two \meta{token lists} on a character by character
%   basis (namely after converting them to strings),
%   and is \texttt{true} if the two \meta{strings} contain the same
%   characters in the same order. Thus for example
%   \begin{verbatim}
%     \str_if_eq_p:no { abc } { \tl_to_str:n { abc } }
%   \end{verbatim}
%   is logically \texttt{true}.  See \cs{tl_if_eq:nnTF} to compare
%   tokens (including their category codes) rather than characters.
% \end{function}
%
% \begin{function}[TF, added = 2017-10-08]{\str_if_in:Nn, \str_if_in:cn}
%   \begin{syntax}
%     \cs{str_if_in:NnTF} \meta{str~var} \Arg{token list} \Arg{true code} \Arg{false code}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} and
%   tests if that \meta{string} is found in the content of the
%   \meta{str~var}.
% \end{function}
%
% \begin{function}[TF, added = 2017-10-08]{\str_if_in:nn}
%   \begin{syntax}
%     \cs{str_if_in:nnTF} \Arg{tl_1} \Arg{tl_2} \Arg{true code} \Arg{false code}
%   \end{syntax}
%   Converts both \meta{token lists} to \meta{strings} and
%   tests whether \meta{string_2} is found inside \meta{string_1}.
% \end{function}
%
% \begin{function}[added = 2013-07-24, updated = 2022-03-21, EXP, noTF]
%   {\str_case:nn, \str_case:Vn, \str_case:Nn, \str_case:on, \str_case:nV, \str_case:nv}
%   \begin{syntax}
%     \cs{str_case:nnTF} \Arg{test string} \\
%     ~~|{| \\
%     ~~~~\Arg{string case_1} \Arg{code case_1} \\
%     ~~~~\Arg{string case_2} \Arg{code case_2} \\
%     ~~~~\ldots \\
%     ~~~~\Arg{string case_n} \Arg{code case_n} \\
%     ~~|}| \\
%     ~~\Arg{true code}
%     ~~\Arg{false code}
%   \end{syntax}
%   Compares the \meta{test string} in turn with each
%   of the \meta{string cases} (all token lists are converted to strings).
%   If the two are equal (as described for
%   \cs{str_if_eq:nnTF}) then the associated \meta{code} is left in the
%   input stream and other cases are discarded. If any of the
%   cases are matched, the \meta{true code} is also inserted into the
%   input stream (after the code for the appropriate case), while if none
%   match then the \meta{false code} is inserted. The function
%   \cs{str_case:nn}, which does nothing if there is no match, is also
%   available.
%
%   This set of functions performs no expansion on each
%   \meta{string~case} argument, so any variable in there will be
%   compared as a string.  If expansion is needed in the
%   \meta{string~cases}, then \cs[no-index]{str_case_e:nn(TF)} should
%   be used instead.
% \end{function}
%
% \begin{function}[added = 2018-06-19, EXP, noTF]{\str_case_e:nn}
%   \begin{syntax}
%     \cs{str_case_e:nnTF} \Arg{test string} \\
%     ~~|{| \\
%     ~~~~\Arg{string case_1} \Arg{code case_1} \\
%     ~~~~\Arg{string case_2} \Arg{code case_2} \\
%     ~~~~\ldots \\
%     ~~~~\Arg{string case_n} \Arg{code case_n} \\
%     ~~|}| \\
%     ~~\Arg{true code}
%     ~~\Arg{false code}
%   \end{syntax}
%   Compares the full expansion of the \meta{test string}
%   in turn with the full expansion of the \meta{string cases}
%   (all token lists are converted to strings).  If the two
%   full expansions are equal (as described for \cs{str_if_eq:nnTF}) then the
%   associated \meta{code} is left in the input stream
%   and other cases are discarded.  If any of the
%   cases are matched, the \meta{true code} is also inserted into the
%   input stream (after the code for the appropriate case), while if none
%   match then the \meta{false code} is inserted. The function
%   \cs{str_case_e:nn}, which does nothing if there is no match, is also
%   available.
%   The \meta{test string} is expanded in each comparison, and must
%   always yield the same result: for example, random numbers must
%   not be used within this string.
% \end{function}
%
% \begin{function}[EXP, pTF, added = 2021-05-17]{\str_compare:nNn, \str_compare:eNe}
%   \begin{syntax}
%     \cs{str_compare_p:nNn} \Arg{tl_1} \meta{relation} \Arg{tl_2}
%     \cs{str_compare:nNnTF} \Arg{tl_1} \meta{relation} \Arg{tl_2} \Arg{true code} \Arg{false code}
%   \end{syntax}
%   Compares the two \meta{token lists} on a character by character
%   basis (namely after converting them to strings) in a lexicographic
%   order according to the character codes of the characters.  The
%   \meta{relation} can be |<|, |=|, or~|>| and the test is
%   \texttt{true} under the following conditions:
%   \begin{itemize}
%     \item for |<|, if the first string is earlier than the second in lexicographic order;
%     \item for |=|, if the two strings have exactly the same characters;
%     \item for |>|, if the first string is later than the second in lexicographic order.
%   \end{itemize}
%   Thus for example the following is logically \texttt{true}:
%   \begin{verbatim}
%     \str_compare_p:nNn { ab } < { abc }
%   \end{verbatim}
%   \begin{texnote}
%     This is a wrapper around the \TeX{} primitive
%     \cs[index=pdfstrcmp]{(pdf)strcmp}.  It is meant for programming
%     and not for sorting textual contents, as it simply considers
%     character codes and not more elaborate considerations of grapheme
%     clusters, locale, etc.
%   \end{texnote}
% \end{function}
%
% \section{Mapping over strings}
%
% All mappings are done at the current group level, \emph{i.e.}~any
% local assignments made by the \meta{function} or \meta{code} discussed
% below remain in effect after the loop.
%
% \begin{function}[added = 2017-11-14, rEXP]
%   {\str_map_function:nN, \str_map_function:NN, \str_map_function:cN}
%   \begin{syntax}
%     \cs{str_map_function:nN} \Arg{token list} \meta{function}
%     \cs{str_map_function:NN} \meta{str~var} \meta{function}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then
%   applies \meta{function} to every \meta{character} in the
%   \meta{string} including spaces.
% \end{function}
%
% \begin{function}[added = 2017-11-14]
%   {\str_map_inline:nn, \str_map_inline:Nn, \str_map_inline:cn}
%   \begin{syntax}
%     \cs{str_map_inline:nn} \Arg{token list} \Arg{inline function}
%     \cs{str_map_inline:Nn} \meta{str~var} \Arg{inline function}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then
%   applies the \meta{inline function} to every \meta{character} in the
%   \meta{str~var} including spaces.
%   The \meta{inline function} should consist of code which
%   receives the \meta{character} as |#1|.
% \end{function}
%
% \begin{function}[rEXP, added = 2021-05-05]
%   {\str_map_tokens:nn, \str_map_tokens:Nn, \str_map_tokens:cn}
%   \begin{syntax}
%     \cs{str_map_tokens:nn} \Arg{token list} \Arg{code}
%     \cs{str_map_tokens:Nn} \meta{str~var} \Arg{code}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then applies
%   \meta{code} to every \meta{character} in the \meta{string} including
%   spaces.  The \meta{code} receives each character as a trailing brace
%   group.  This is equivalent to \cs{str_map_function:nN} if the
%   \meta{code} consists of a single function.
% \end{function}
%
% \begin{function}[added = 2017-11-14]
%   {\str_map_variable:nNn, \str_map_variable:NNn, \str_map_variable:cNn}
%   \begin{syntax}
%     \cs{str_map_variable:nNn} \Arg{token list} \meta{variable} \Arg{code}
%     \cs{str_map_variable:NNn} \meta{str~var} \meta{variable} \Arg{code}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then stores each
%   \meta{character} in the \meta{string} (including spaces) in turn in
%   the (string or token list) \meta{variable} and applies the
%   \meta{code}.  The \meta{code} will usually make use of the
%   \meta{variable}, but this is not enforced.  The assignments to the
%   \meta{variable} are local.  Its value after the loop is the last
%   \meta{character} in the \meta{string}, or its original value if the
%   \meta{string} is empty.  See also \cs{str_map_inline:Nn}.
% \end{function}
%
% \begin{function}[added = 2017-10-08, rEXP]{\str_map_break:}
%   \begin{syntax}
%     \cs{str_map_break:}
%   \end{syntax}
%   Used to terminate a \cs[no-index]{str_map_\ldots} function before all
%   characters in the \meta{string} have been processed. This
%   normally takes place within a conditional statement, for example
%   \begin{verbatim}
%     \str_map_inline:Nn \l_my_str
%       {
%         \str_if_eq:nnT { #1 } { bingo } { \str_map_break: }
%         % Do something useful
%       }
%   \end{verbatim}
%   See also \cs{str_map_break:n}.
%   Use outside of a \cs[no-index]{str_map_\ldots} scenario leads to low
%   level \TeX{} errors.
%   \begin{texnote}
%     When the mapping is broken, additional tokens may be inserted
%     before continuing with the
%     code that follows the loop.
%     This depends on the design of the mapping function.
%   \end{texnote}
% \end{function}
%
% \begin{function}[added = 2017-10-08, rEXP]{\str_map_break:n}
%   \begin{syntax}
%     \cs{str_map_break:n} \Arg{code}
%   \end{syntax}
%   Used to terminate a \cs[no-index]{str_map_\ldots} function before all
%   characters in the \meta{string} have been processed, inserting
%   the \meta{code} after the mapping has ended. This
%   normally takes place within a conditional statement, for example
%   \begin{verbatim}
%     \str_map_inline:Nn \l_my_str
%       {
%         \str_if_eq:nnT { #1 } { bingo }
%           { \str_map_break:n { <code> } }
%         % Do something useful
%       }
%   \end{verbatim}
%   Use outside of a \cs[no-index]{str_map_\ldots} scenario leads to low
%   level \TeX{} errors.
%   \begin{texnote}
%     When the mapping is broken, additional tokens may be inserted
%     before the \meta{code} is
%     inserted into the input stream.
%     This depends on the design of the mapping function.
%   \end{texnote}
% \end{function}
%
% \section{Working with the content of strings}
%
% \begin{function}[EXP, added = 2015-09-18]{\str_use:N, \str_use:c}
%   \begin{syntax}
%     \cs{str_use:N} \meta{str~var}
%   \end{syntax}
%   Recovers the content of a \meta{str~var} and places it
%   directly in the input stream. An error is raised if the variable
%   does not exist or if it is invalid. Note that it is possible to use
%   a \meta{str} directly without an accessor function.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_count:N, \str_count:c, \str_count:n, \str_count_ignore_spaces:n}
%   \begin{syntax}
%     \cs{str_count:n} \Arg{token list}
%   \end{syntax}
%   Leaves in the input stream the number of characters in the string
%   representation of \meta{token list}, as an integer denotation.  The
%   functions differ in their treatment of spaces.  In the case of
%   \cs{str_count:N} and \cs{str_count:n}, all characters including
%   spaces are counted.  The \cs{str_count_ignore_spaces:n} function
%   leaves the number of non-space characters in the input stream.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_count_spaces:N, \str_count_spaces:c, \str_count_spaces:n}
%   \begin{syntax}
%     \cs{str_count_spaces:n} \Arg{token list}
%   \end{syntax}
%   Leaves in the input stream the number of space characters in the
%   string representation of \meta{token list}, as an integer
%   denotation. Of course, this function has no \texttt{_ignore_spaces}
%   variant.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_head:N, \str_head:c, \str_head:n, \str_head_ignore_spaces:n}
%   \begin{syntax}
%     \cs{str_head:n} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} into a \meta{string}.  The first
%   character in the \meta{string} is then left in the input stream,
%   with category code \enquote{other}.  The functions differ if the
%   first character is a space: \cs{str_head:N} and \cs{str_head:n}
%   return a space token with category code~$10$ (blank space), while
%   the \cs{str_head_ignore_spaces:n} function ignores this space
%   character and leaves the first non-space character in the input
%   stream.  If the \meta{string} is empty (or only contains spaces in
%   the case of the \texttt{_ignore_spaces} function), then nothing is
%   left on the input stream.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_tail:N, \str_tail:c, \str_tail:n, \str_tail_ignore_spaces:n}
%   \begin{syntax}
%     \cs{str_tail:n} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, removes the first
%   character, and leaves the remaining characters (if any) in the input
%   stream, with category codes $12$ and $10$ (for spaces).  The
%   functions differ in the case where the first character is a space:
%   \cs{str_tail:N} and \cs{str_tail:n} only trim that space, while
%   \cs{str_tail_ignore_spaces:n} removes the first non-space character
%   and any space before it.  If the \meta{token list} is empty (or
%   blank in the case of the \texttt{_ignore_spaces} variant), then
%   nothing is left on the input stream.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_item:Nn, \str_item:nn, \str_item_ignore_spaces:nn}
%   \begin{syntax}
%     \cs{str_item:nn} \Arg{token list} \Arg{integer expression}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, and leaves in the
%   input stream the character in position \meta{integer expression} of
%   the \meta{string}, starting at $1$ for the first (left-most)
%   character.  In the case of \cs{str_item:Nn} and \cs{str_item:nn},
%   all characters including spaces are taken into account.  The
%   \cs{str_item_ignore_spaces:nn} function skips spaces when counting
%   characters.  If the \meta{integer expression} is negative,
%   characters are counted from the end of the \meta{string}. Hence,
%   $-1$ is the right-most character, \emph{etc.}
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {
%     \str_range:Nnn, \str_range:cnn, \str_range:nnn,
%     \str_range_ignore_spaces:nnn
%   }
%   \begin{syntax}
%     \cs{str_range:nnn} \Arg{token list} \Arg{start index} \Arg{end index}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, and leaves in the
%   input stream the characters from the \meta{start index} to the
%   \meta{end index} inclusive.  Spaces are preserved and counted as items
%   (contrast this with \cs{tl_range:nnn} where spaces are not counted as
%   items and are possibly discarded from the output).
%
%   Here \meta{start index} and \meta{end index} should be integer denotations.
%   For describing in detail the functions' behavior, let $m$ and $n$ be the start
%   and end index respectively. If either is $0$, the result is empty. A positive
%   index means `start counting from the left end', a negative index means
%   `start counting from the right end'. Let $l$ be the count of the token list.
%
%   The \emph{actual start point} is determined as $M=m$ if~$m>0$ and as $M=l+m+1$
%   if~$m<0$. Similarly the \emph{actual end point} is $N=n$ if~$n>0$ and $N=l+n+1$
%   if~$n<0$. If $M>N$, the result is empty. Otherwise it consists of all items from
%   position $M$ to position $N$ inclusive; for the purpose of this rule, we can
%   imagine that the token list extends at infinity on either side, with void items
%   at positions $s$ for $s\le0$ or $s>l$.
%   For instance,
%   \begin{verbatim}
%     \iow_term:x { \str_range:nnn { abcdef } { 2 } { 5 } }
%     \iow_term:x { \str_range:nnn { abcdef } { -4 } { -1 } }
%     \iow_term:x { \str_range:nnn { abcdef } { -2 } { -1 } }
%     \iow_term:x { \str_range:nnn { abcdef } { 0 } { -1 } }
%   \end{verbatim}
%   prints \texttt{bcde}, \texttt{cdef}, \texttt{ef}, and an empty
%   line to the terminal. The \meta{start index} must always be smaller than
%   or equal to the \meta{end index}: if this is not the case then no output
%   is generated. Thus
%   \begin{verbatim}
%     \iow_term:x { \str_range:nnn { abcdef } { 5 } { 2 } }
%     \iow_term:x { \str_range:nnn { abcdef } { -1 } { -4 } }
%   \end{verbatim}
%   both yield empty strings.
% \end{function}
%
% ^^A If this stays in the same {function} environment, we get a really
% ^^A awful page break. Perhaps we should add a way to allow a page break
% ^^A in a function environment...
%   The behavior of \cs{str_range_ignore_spaces:nnn} is similar, but spaces
%   are removed before starting the job. The input
%   \begin{verbatim}
%     \iow_term:x { \str_range:nnn { abcdefg } { 2 } { 5 } }
%     \iow_term:x { \str_range:nnn { abcdefg } { 2 } { -3 } }
%     \iow_term:x { \str_range:nnn { abcdefg } { -6 } { 5 } }
%     \iow_term:x { \str_range:nnn { abcdefg } { -6 } { -3 } }
%
%     \iow_term:x { \str_range:nnn { abc~efg } { 2 } { 5 } }
%     \iow_term:x { \str_range:nnn { abc~efg } { 2 } { -3 } }
%     \iow_term:x { \str_range:nnn { abc~efg } { -6 } { 5 } }
%     \iow_term:x { \str_range:nnn { abc~efg } { -6 } { -3 } }
%
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcdefg } { 2 } { 5 } }
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcdefg } { 2 } { -3 } }
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcdefg } { -6 } { 5 } }
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcdefg } { -6 } { -3 } }
%
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcd~efg } { 2 } { 5 } }
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcd~efg } { 2 } { -3 } }
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcd~efg } { -6 } { 5 } }
%     \iow_term:x { \str_range_ignore_spaces:nnn { abcd~efg } { -6 } { -3 } }
%   \end{verbatim}
%   will print four instances of |bcde|, four instances of |bc e| and eight
%   instances of |bcde|.
% ^^A\end{function}
%
% \section{Modifying string variables}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_replace_once:Nnn,  \str_replace_once:cnn,
%     \str_greplace_once:Nnn, \str_greplace_once:cnn
%   }
%   \begin{syntax}
%     \cs{str_replace_once:Nnn} \meta{str~var} \Arg{old} \Arg{new}
%   \end{syntax}
%   Converts the \meta{old} and \meta{new} token lists to strings, then
%   replaces the first (leftmost) occurrence of \meta{old string} in the
%   \meta{str~var} with \meta{new string}.
% \end{function}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_replace_all:Nnn, \str_replace_all:cnn,
%     \str_greplace_all:Nnn, \str_greplace_all:cnn
%   }
%   \begin{syntax}
%     \cs{str_replace_all:Nnn} \meta{str~var} \Arg{old} \Arg{new}
%   \end{syntax}
%   Converts the \meta{old} and \meta{new} token lists to strings, then
%   replaces all occurrences of \meta{old string} in the
%   \meta{str~var} with \meta{new string}.
%   As this function
%   operates from left to right, the pattern \meta{old string}
%   may remain after the replacement (see \cs{str_remove_all:Nn}
%   for an example).
% \end{function}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_remove_once:Nn,  \str_remove_once:cn,
%     \str_gremove_once:Nn, \str_gremove_once:cn
%   }
%   \begin{syntax}
%     \cs{str_remove_once:Nn} \meta{str~var} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then
%   removes the first (leftmost) occurrence of \meta{string} from the
%   \meta{str~var}.
% \end{function}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_remove_all:Nn,  \str_remove_all:cn,
%     \str_gremove_all:Nn, \str_gremove_all:cn
%   }
%   \begin{syntax}
%     \cs{str_remove_all:Nn} \meta{str~var} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then
%   removes all occurrences of \meta{string} from the
%   \meta{str~var}.
%   As this function
%   operates from left to right, the pattern \meta{string}
%   may remain after the removal, for instance,
%   \begin{quote}
%     \cs{str_set:Nn} \cs{l_tmpa_str} |{abbccd}|
%     \cs{str_remove_all:Nn} \cs{l_tmpa_str} |{bc}|
%   \end{quote}
%   results in \cs{l_tmpa_str} containing \texttt{abcd}.
% \end{function}
%
% \section{String manipulation}
%
% \begin{function}[EXP, added = 2019-11-26]
%    {
%      \str_lowercase:n, \str_lowercase:f,
%      \str_uppercase:n, \str_uppercase:f
%   }
%   \begin{syntax}
%     \cs{str_lowercase:n} \Arg{tokens}
%     \cs{str_uppercase:n} \Arg{tokens}
%   \end{syntax}
%   Converts the input \meta{tokens} to their string representation, as
%   described for \cs{tl_to_str:n}, and then to the lower or upper
%   case representation using a one-to-one mapping as described by the
%   Unicode Consortium file |UnicodeData.txt|.
%
%   These functions are intended for case changing programmatic data in
%   places where upper/lower case distinctions are meaningful. One example
%   would be automatically generating a function name from user input where
%   some case changing is needed. In this situation the input is programmatic,
%   not textual, case does have meaning and a language-independent one-to-one
%   mapping is appropriate. For example
%   \begin{verbatim}
%     \cs_new_protected:Npn \myfunc:nn #1#2
%       {
%         \cs_set_protected:cpn
%           {
%             user
%             \str_uppercase:f { \tl_head:n {#1} }
%             \str_lowercase:f { \tl_tail:n {#1} }
%           }
%           { #2 }
%       }
%   \end{verbatim}
%   would be used to generate a function with an auto-generated name consisting
%   of the upper case equivalent of the supplied name followed by the lower
%   case equivalent of the rest of the input.
%
%   These functions should \emph{not} be used for
%   \begin{itemize}
%     \item Caseless comparisons: use \cs{str_foldcase:n} for this
%       situation (case folding is distinct from lower casing).
%     \item Case changing text for typesetting: see the
%       \cs[index=text_lowercase:n]{text_lowercase:n(n)},
%       \cs[index=text_uppercase:n]{text_uppercase:n(n)} and
%       \cs[index=text_titlecase:n]{text_titlecase:n(n)} functions which
%       correctly deal with context-dependence and other factors appropriate
%       to text case changing.
%   \end{itemize}
% \end{function}
%
% \begin{function}[EXP, added = 2019-11-26]
%   {\str_foldcase:n, \str_foldcase:V}
%   \begin{syntax}
%     \cs{str_foldcase:n} \Arg{tokens}
%   \end{syntax}
%   Converts the input \meta{tokens} to their string representation, as
%   described for \cs{tl_to_str:n}, and then folds the case of the resulting
%   \meta{string} to remove case information. The result of this process is
%   left in the input stream.
%
%   String folding is a process used for material such as identifiers rather
%   than for \enquote{text}. The folding provided by \cs{str_foldcase:n}
%   follows the mappings provided by the \href{http://www.unicode.org}^^A
%   {Unicode Consortium}, who
%   \href{http://www.unicode.org/faq/casemap_charprop.html#2}{state}:
%   \begin{quote}
%     Case folding is primarily used for caseless comparison of text, such
%     as identifiers in a computer program, rather than actual text
%     transformation. Case folding in Unicode is based on the lowercase
%     mapping, but includes additional changes to the source text to help make
%     it language-insensitive and consistent. As a result, case-folded text
%     should be used solely for internal processing and generally should not be
%     stored or displayed to the end user.
%   \end{quote}
%   The folding approach implemented by \cs{str_foldcase:n} follows the
%   \enquote{full} scheme defined by the Unicode Consortium
%   (\emph{e.g.}~\SS folds to \texttt{SS}). As case-folding is
%   a language-insensitive process, there is no special treatment of
%   Turkic input (\emph{i.e.}~\texttt{I} always folds to \texttt{i} and
%   not to \texttt{\i}).
% \end{function}
%
% \section{Viewing strings}
%
% \begin{function}[added = 2015-09-18, updated = 2021-04-29]
%   {\str_show:N, \str_show:c, \str_show:n}
%   \begin{syntax}
%     \cs{str_show:N} \meta{str~var}
%   \end{syntax}
%   Displays the content of the \meta{str~var} on the terminal.
% \end{function}
%
% \begin{function}[added = 2019-02-15, updated = 2021-04-29]
%   {\str_log:N, \str_log:c, \str_log:n}
%   \begin{syntax}
%     \cs{str_log:N} \meta{str~var}
%   \end{syntax}
%   Writes the content of the \meta{str~var} in the log file.
% \end{function}
%
% \section{Constant strings}
%
% \begin{variable}[added = 2015-09-19, updated = 2020-12-22, module = str]
%   {
%     \c_ampersand_str,
%     \c_atsign_str,
%     \c_backslash_str,
%     \c_left_brace_str,
%     \c_right_brace_str,
%     \c_circumflex_str,
%     \c_colon_str,
%     \c_dollar_str,
%     \c_hash_str,
%     \c_percent_str,
%     \c_tilde_str,
%     \c_underscore_str,
%     \c_zero_str
%   }
%   Constant strings, containing a single character token, with category
%   code $12$.
% \end{variable}
%
% \section{Scratch strings}
%
% \begin{variable}{\l_tmpa_str, \l_tmpb_str}
%   Scratch strings for local assignment. These are never used by
%   the kernel code, and so are safe for use with any \LaTeX3-defined
%   function. However, they may be overwritten by other non-kernel
%   code and so should only be used for short-term storage.
% \end{variable}
%
% \begin{variable}{\g_tmpa_str, \g_tmpb_str}
%   Scratch strings for global assignment. These are never used by
%   the kernel code, and so are safe for use with any \LaTeX3-defined
%   function. However, they may be overwritten by other non-kernel
%   code and so should only be used for short-term storage.
% \end{variable}
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3str} implementation}
%
%    \begin{macrocode}
%<*package>
%    \end{macrocode}
%
%    \begin{macrocode}
%<@@=str>
%    \end{macrocode}
%
% \subsection{Internal auxiliaries}
%
% \begin{variable}{\s_@@_mark,\s_@@_stop}
%   Internal scan marks.
%    \begin{macrocode}
\scan_new:N \s_@@_mark
\scan_new:N \s_@@_stop
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}[EXP]{
%     \@@_use_none_delimit_by_s_stop:w,
%     \@@_use_i_delimit_by_s_stop:nw
%   }
%   Functions to gobble up to a scan mark.
%    \begin{macrocode}
\cs_new:Npn \@@_use_none_delimit_by_s_stop:w #1 \s_@@_stop { }
\cs_new:Npn \@@_use_i_delimit_by_s_stop:nw #1 #2 \s_@@_stop {#1}
%    \end{macrocode}
% \end{macro}
%
% \begin{variable}{\q_@@_recursion_tail,\q_@@_recursion_stop}
%   Internal recursion quarks.
%    \begin{macrocode}
\quark_new:N \q_@@_recursion_tail
\quark_new:N \q_@@_recursion_stop
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}[EXP]{
%     \@@_if_recursion_tail_break:NN,
%     \@@_if_recursion_tail_stop_do:Nn
%   }
%   Functions to query recursion quarks.
%    \begin{macrocode}
\__kernel_quark_new_test:N \@@_if_recursion_tail_break:NN
\__kernel_quark_new_test:N \@@_if_recursion_tail_stop_do:Nn
%    \end{macrocode}
% \end{macro}
%
% \subsection{Creating and setting string variables}
%
% \begin{macro}
%   {
%     \str_new:N, \str_new:c,
%     \str_use:N, \str_use:c,
%     \str_clear:N, \str_clear:c,
%     \str_gclear:N,\str_gclear:c,
%     \str_clear_new:N, \str_clear_new:c,
%     \str_gclear_new:N, \str_gclear_new:c
%   }
% \begin{macro}
%   {
%     \str_set_eq:NN,  \str_set_eq:cN,  \str_set_eq:Nc,  \str_set_eq:cc,
%     \str_gset_eq:NN, \str_gset_eq:cN, \str_gset_eq:Nc, \str_gset_eq:cc
%   }
% \begin{macro}
%   {\str_concat:NNN, \str_concat:ccc, \str_gconcat:NNN, \str_gconcat:ccc}
%   A string is simply a token list. The full mapping system isn't set up
%   yet so do things by hand.
%    \begin{macrocode}
\group_begin:
  \cs_set_protected:Npn \@@_tmp:n #1
    {
      \tl_if_blank:nF {#1}
        {
          \cs_new_eq:cc { str_ #1 :N } { tl_ #1 :N }
          \exp_args:Nc \cs_generate_variant:Nn { str_ #1 :N } { c }
          \@@_tmp:n
        }
    }
  \@@_tmp:n
    { new }
    { use }
    { clear }
    { gclear }
    { clear_new }
    { gclear_new }
    { }
\group_end:
\cs_new_eq:NN \str_set_eq:NN \tl_set_eq:NN
\cs_new_eq:NN \str_gset_eq:NN \tl_gset_eq:NN
\cs_generate_variant:Nn \str_set_eq:NN  { c , Nc , cc }
\cs_generate_variant:Nn \str_gset_eq:NN { c , Nc , cc }
\cs_new_eq:NN \str_concat:NNN \tl_concat:NNN
\cs_new_eq:NN \str_gconcat:NNN \tl_gconcat:NNN
\cs_generate_variant:Nn \str_concat:NNN  { ccc }
\cs_generate_variant:Nn \str_gconcat:NNN { ccc }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}
%   {
%     \str_set:Nn, \str_set:NV, \str_set:Nx,
%     \str_set:cn, \str_set:cV, \str_set:cx,
%     \str_gset:Nn, \str_gset:NV, \str_gset:Nx,
%     \str_gset:cn, \str_gset:cV, \str_gset:cx,
%     \str_const:Nn, \str_const:NV, \str_const:Nx,
%     \str_const:cn, \str_const:cV, \str_const:cx,
%     \str_put_left:Nn, \str_put_left:NV, \str_put_left:Nx,
%     \str_put_left:cn, \str_put_left:cV, \str_put_left:cx,
%     \str_gput_left:Nn, \str_gput_left:NV, \str_gput_left:Nx,
%     \str_gput_left:cn, \str_gput_left:cV, \str_gput_left:cx,
%     \str_put_right:Nn, \str_put_right:NV, \str_put_right:Nx,
%     \str_put_right:cn, \str_put_right:cV, \str_put_right:cx,
%     \str_gput_right:Nn, \str_gput_right:NV, \str_gput_right:Nx,
%     \str_gput_right:cn, \str_gput_right:cV, \str_gput_right:cx
%   }
%   Simply convert the token list inputs to \meta{strings}.
%    \begin{macrocode}
\group_begin:
  \cs_set_protected:Npn \@@_tmp:n #1
    {
      \tl_if_blank:nF {#1}
        {
          \cs_new_protected:cpx { str_ #1 :Nn } ##1##2
            {
              \exp_not:c { tl_ #1 :Nx } ##1
                { \exp_not:N \tl_to_str:n {##2} }
            }
          \cs_generate_variant:cn { str_ #1 :Nn } { NV , Nx , cn , cV , cx }
          \@@_tmp:n
        }
    }
  \@@_tmp:n
    { set }
    { gset }
    { const }
    { put_left }
    { gput_left }
    { put_right }
    { gput_right }
    { }
\group_end:
%    \end{macrocode}
% \end{macro}
%
% \subsection{Modifying string variables}
%
% \begin{macro}
%   {
%     \str_replace_all:Nnn,   \str_replace_all:cnn,
%     \str_greplace_all:Nnn,  \str_greplace_all:cnn,
%     \str_replace_once:Nnn,  \str_replace_once:cnn,
%     \str_greplace_once:Nnn, \str_greplace_once:cnn
%   }
% \begin{macro}{\@@_replace:NNNnn}
% \begin{macro}{\@@_replace_aux:NNNnnn}
% \begin{macro}{\@@_replace_next:w}
%   Start by applying \cs{tl_to_str:n} to convert the old and new token
%   lists to strings, and also apply \cs{tl_to_str:N} to avoid any
%   issues if we are fed a token list variable.  Then the code is a much
%   simplified version of the token list code because neither the
%   delimiter nor the replacement can contain macro parameters or
%   braces.  The delimiter \cs{s_@@_mark} cannot appear in the string to
%   edit so it is used in all cases.  Some |x|-expansion is unnecessary.
%   There is no need to avoid losing braces nor to protect against
%   expansion.  The ending code is much simplified and does not need to
%   hide in braces.
%    \begin{macrocode}
\cs_new_protected:Npn \str_replace_once:Nnn
  { \@@_replace:NNNnn \prg_do_nothing: \__kernel_tl_set:Nx  }
\cs_new_protected:Npn \str_greplace_once:Nnn
  { \@@_replace:NNNnn \prg_do_nothing: \__kernel_tl_gset:Nx }
\cs_new_protected:Npn \str_replace_all:Nnn
  { \@@_replace:NNNnn \@@_replace_next:w \__kernel_tl_set:Nx  }
\cs_new_protected:Npn \str_greplace_all:Nnn
  { \@@_replace:NNNnn \@@_replace_next:w \__kernel_tl_gset:Nx }
\cs_generate_variant:Nn \str_replace_once:Nnn  { c }
\cs_generate_variant:Nn \str_greplace_once:Nnn { c }
\cs_generate_variant:Nn \str_replace_all:Nnn   { c }
\cs_generate_variant:Nn \str_greplace_all:Nnn  { c }
\cs_new_protected:Npn \@@_replace:NNNnn #1#2#3#4#5
  {
    \tl_if_empty:nTF {#4}
      {
        \msg_error:nnx { kernel } { empty-search-pattern } {#5}
      }
      {
        \use:x
          {
            \exp_not:n { \@@_replace_aux:NNNnnn #1 #2 #3 }
              { \tl_to_str:N #3 }
              { \tl_to_str:n {#4} } { \tl_to_str:n {#5} }
          }
      }
  }
\cs_new_protected:Npn \@@_replace_aux:NNNnnn #1#2#3#4#5#6
  {
    \cs_set:Npn \@@_replace_next:w ##1 #5 { ##1 #6 #1 }
    #2 #3
      {
        \@@_replace_next:w
        #4
        \@@_use_none_delimit_by_s_stop:w
        #5
        \s_@@_stop
      }
  }
\cs_new_eq:NN \@@_replace_next:w ?
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\str_remove_once:Nn, \str_remove_once:cn}
% \begin{macro}{\str_gremove_once:Nn, \str_gremove_once:cn}
%   Removal is just a special case of replacement.
%    \begin{macrocode}
\cs_new_protected:Npn \str_remove_once:Nn #1#2
  { \str_replace_once:Nnn #1 {#2} { } }
\cs_new_protected:Npn \str_gremove_once:Nn #1#2
  { \str_greplace_once:Nnn #1 {#2} { } }
\cs_generate_variant:Nn \str_remove_once:Nn  { c }
\cs_generate_variant:Nn \str_gremove_once:Nn { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\str_remove_all:Nn, \str_remove_all:cn}
% \begin{macro}{\str_gremove_all:Nn, \str_gremove_all:cn}
%   Removal is just a special case of replacement.
%    \begin{macrocode}
\cs_new_protected:Npn \str_remove_all:Nn #1#2
  { \str_replace_all:Nnn #1 {#2} { } }
\cs_new_protected:Npn \str_gremove_all:Nn #1#2
  { \str_greplace_all:Nnn #1 {#2} { } }
\cs_generate_variant:Nn \str_remove_all:Nn  { c }
\cs_generate_variant:Nn \str_gremove_all:Nn { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{String comparisons}
%
% \begin{macro}[pTF, EXP]
%   {
%     \str_if_empty:N, \str_if_empty:c, \str_if_empty:n,
%     \str_if_exist:N, \str_if_exist:c
%   }
%   More copy-paste!
%    \begin{macrocode}
\prg_new_eq_conditional:NNn \str_if_exist:N \tl_if_exist:N
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_exist:c \tl_if_exist:c
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_empty:N \tl_if_empty:N
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_empty:c \tl_if_empty:c
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_empty:n \tl_if_empty:n
  { p , T , F , TF }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_if_eq:nn}
%   String comparisons rely on the primitive \cs[index=pdfstrcmp]{(pdf)strcmp},
%   so we define a new name for it.
%    \begin{macrocode}
\cs_new_eq:NN \@@_if_eq:nn \tex_strcmp:D
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[pTF, EXP]{\str_compare:nNn, \str_compare:eNe}
%   Simply rely on \cs{@@_if_eq:nn}, which expands to |-1|, |0|
%   or~|1|.  The |ee| version is created directly because it is more efficient.
%    \begin{macrocode}
\prg_new_conditional:Npnn \str_compare:nNn #1#2#3 { p , T , F , TF }
  {
    \if_int_compare:w
      \@@_if_eq:nn { \exp_not:n {#1} } { \exp_not:n {#3} }
      #2 \c_zero_int
      \prg_return_true: \else: \prg_return_false: \fi:
  }
\prg_new_conditional:Npnn \str_compare:eNe #1#2#3 { p , T , F , TF }
  {
    \if_int_compare:w \@@_if_eq:nn {#1} {#3} #2 \c_zero_int
      \prg_return_true: \else: \prg_return_false: \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[pTF, EXP]
%   {
%     \str_if_eq:nn, \str_if_eq:Vn, \str_if_eq:on, \str_if_eq:nV,
%     \str_if_eq:no, \str_if_eq:VV,
%     \str_if_eq:ee
%   }
%   Modern engines provide a direct way of comparing two token lists,
%   but returning a number. This set of conditionals therefore makes life
%   a bit clearer. The \texttt{nn} and \texttt{ee} versions are created
%   directly as this is most efficient. Since \cs{@@_if_eq:nn} will expand to
%   |0| as an explicit character with category 12 if the two lists match (and
%   either |-1| or |1| if they don't) we can use \cs{if:w} here which is faster
%   than using \cs{if_int_compare:w}.
%    \begin{macrocode}
\prg_new_conditional:Npnn \str_if_eq:nn #1#2 { p , T , F , TF }
  {
    \if:w 0 \@@_if_eq:nn { \exp_not:n {#1} } { \exp_not:n {#2} }
      \prg_return_true: \else: \prg_return_false: \fi:
  }
\prg_generate_conditional_variant:Nnn \str_if_eq:nn
  { V , v , o , nV , no , VV , nv } { p , T , F , TF }
\prg_new_conditional:Npnn \str_if_eq:ee #1#2 { p , T , F , TF }
  {
    \if:w 0 \@@_if_eq:nn {#1} {#2}
      \prg_return_true: \else: \prg_return_false: \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP, pTF]
%   {\str_if_eq:NN, \str_if_eq:Nc, \str_if_eq:cN, \str_if_eq:cc}
%   Note that \cs{str_if_eq:NNTF} is different from
%   \cs{tl_if_eq:NNTF} because it needs to ignore category codes.
%    \begin{macrocode}
\prg_new_conditional:Npnn \str_if_eq:NN #1#2 { p , TF , T , F }
  {
    \if:w 0 \@@_if_eq:nn { \tl_to_str:N #1 } { \tl_to_str:N #2 }
      \prg_return_true: \else: \prg_return_false: \fi:
  }
\prg_generate_conditional_variant:Nnn \str_if_eq:NN
  { c , Nc , cc } { T , F , TF , p }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[TF]{\str_if_in:Nn, \str_if_in:cn, \str_if_in:nn}
%   Everything here needs to be detokenized but beyond that it is a
%   simple token list test.  It would be faster to fine-tune the |T|,
%   |F|, |TF| variants by calling the appropriate variant of
%   \cs{tl_if_in:nnTF} directly but that takes more code.
%    \begin{macrocode}
\prg_new_protected_conditional:Npnn \str_if_in:Nn #1#2 { T , F , TF }
  {
    \use:x
      { \tl_if_in:nnTF { \tl_to_str:N #1 } { \tl_to_str:n {#2} } }
      { \prg_return_true: } { \prg_return_false: }
  }
\prg_generate_conditional_variant:Nnn \str_if_in:Nn
  { c } { T , F , TF }
\prg_new_protected_conditional:Npnn \str_if_in:nn #1#2 { T , F , TF }
  {
    \use:x
      { \tl_if_in:nnTF { \tl_to_str:n {#1} } { \tl_to_str:n {#2} } }
      { \prg_return_true: } { \prg_return_false: }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP, noTF]
%   {\str_case:nn, \str_case:Vn, \str_case:Nn, \str_case:on, \str_case:nV, \str_case:nv, \str_case_e:nn}
% \begin{macro}[EXP]{\@@_case:nnTF, \@@_case_e:nnTF}
% \begin{macro}[EXP]
%   {\@@_case:nw, \@@_case_e:nw, \@@_case_end:nw}
%   Much the same as \cs{tl_case:nnTF} here:
%   just a change in the internal comparison.
%    \begin{macrocode}
\cs_new:Npn \str_case:nn #1#2
  {
    \exp:w
    \@@_case:nnTF {#1} {#2} { } { }
  }
\cs_new:Npn \str_case:nnT #1#2#3
  {
    \exp:w
    \@@_case:nnTF {#1} {#2} {#3} { }
  }
\cs_new:Npn \str_case:nnF #1#2
  {
    \exp:w
    \@@_case:nnTF {#1} {#2} { }
  }
\cs_new:Npn \str_case:nnTF #1#2
  {
    \exp:w
    \@@_case:nnTF {#1} {#2}
  }
\cs_new:Npn \@@_case:nnTF #1#2#3#4
  { \@@_case:nw {#1} #2 {#1} { } \s_@@_mark {#3} \s_@@_mark {#4} \s_@@_stop }
\cs_generate_variant:Nn \str_case:nn   { V , o , nV , nv }
\prg_generate_conditional_variant:Nnn \str_case:nn
  { V , o , nV , nv } { T , F , TF }
\cs_new_eq:NN \str_case:Nn   \str_case:Vn
\cs_new_eq:NN \str_case:NnT  \str_case:VnT
\cs_new_eq:NN \str_case:NnF  \str_case:VnF
\cs_new_eq:NN \str_case:NnTF \str_case:VnTF
\cs_new:Npn \@@_case:nw #1#2#3
  {
    \str_if_eq:nnTF {#1} {#2}
      { \@@_case_end:nw {#3} }
      { \@@_case:nw {#1} }
  }
\cs_new:Npn \str_case_e:nn #1#2
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2} { } { }
  }
\cs_new:Npn \str_case_e:nnT #1#2#3
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2} {#3} { }
  }
\cs_new:Npn \str_case_e:nnF #1#2
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2} { }
  }
\cs_new:Npn \str_case_e:nnTF #1#2
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2}
  }
\cs_new:Npn \@@_case_e:nnTF #1#2#3#4
  { \@@_case_e:nw {#1} #2 {#1} { } \s_@@_mark {#3} \s_@@_mark {#4} \s_@@_stop }
\cs_new:Npn \@@_case_e:nw #1#2#3
  {
    \str_if_eq:eeTF {#1} {#2}
      { \@@_case_end:nw {#3} }
      { \@@_case_e:nw {#1} }
  }
\cs_new:Npn \@@_case_end:nw #1#2#3 \s_@@_mark #4#5 \s_@@_stop
  { \exp_end: #1 #4 }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Mapping over strings}
%
% \begin{macro}[rEXP]{\str_map_function:NN, \str_map_function:cN}
% \begin{macro}[rEXP]{\str_map_function:nN}
% \begin{macro}{\str_map_inline:Nn, \str_map_inline:cn}
% \begin{macro}{\str_map_inline:nn}
% \begin{macro}{\str_map_variable:NNn, \str_map_variable:cNn}
% \begin{macro}{\str_map_variable:nNn}
% \begin{macro}{\str_map_break:}
% \begin{macro}{\str_map_break:n}
% \begin{macro}[rEXP]{\@@_map_function:w, \@@_map_function:nn}
% \begin{macro}{\@@_map_inline:NN, \@@_map_variable:NnN}
%   The inline and variable mappings are similar to the usual token list
%   mappings but start out by turning the argument to an ``other
%   string''.  Doing the same for the expandable function mapping would
%   require \cs{__kernel_str_to_other:n}, quadratic in the string length.  To deal
%   with spaces in that case, \cs{@@_map_function:w} replaces the
%   following space by a braced space and a further call to itself.
%   These are received by \cs{@@_map_function:nn}, which passes
%   the space to |#1| and calls \cs{@@_map_function:w} to deal with the
%   next space.  The space before the braced space allows to optimize
%   the \cs{q_@@_recursion_tail} test.  Of course we need to include a
%   trailing space (the question mark is needed to avoid losing the
%   space when \TeX{} tokenizes the line).
%   At the cost of about three more auxiliaries this code could get a $9$
%   times speed up by testing only every $9$-th character for whether it
%   is \cs{q_@@_recursion_tail} (also by converting $9$ spaces at a time in
%   the \cs{str_map_function:nN} case).
%
%   For the \texttt{map_variable} functions we use a string assignment
%   to store each character because spaces are made catcode~$12$ before
%   the loop.
%    \begin{macrocode}
\cs_new:Npn \str_map_function:nN #1#2
  {
    \exp_after:wN \@@_map_function:w
    \exp_after:wN \@@_map_function:nn \exp_after:wN #2
      \__kernel_tl_to_str:w {#1}
      \q_@@_recursion_tail ? ~
    \prg_break_point:Nn \str_map_break: { }
  }
\cs_new:Npn \str_map_function:NN
  { \exp_args:No \str_map_function:nN }
\cs_new:Npn \@@_map_function:w #1 ~
  { #1 { ~ { ~ } \@@_map_function:w } }
\cs_new:Npn \@@_map_function:nn #1#2
  {
    \if_meaning:w \q_@@_recursion_tail #2
      \exp_after:wN \str_map_break:
    \fi:
    #1 #2 \@@_map_function:nn {#1}
  }
\cs_generate_variant:Nn \str_map_function:NN { c }
\cs_new_protected:Npn \str_map_inline:nn #1#2
  {
    \int_gincr:N \g__kernel_prg_map_int
    \cs_gset_protected:cpn
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2}
    \use:x
      {
        \exp_not:N \@@_map_inline:NN
        \exp_not:c { @@_map_ \int_use:N \g__kernel_prg_map_int :w }
        \__kernel_str_to_other_fast:n {#1}
      }
      \q_@@_recursion_tail
    \prg_break_point:Nn \str_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
\cs_new_protected:Npn \str_map_inline:Nn
  { \exp_args:No \str_map_inline:nn }
\cs_generate_variant:Nn \str_map_inline:Nn { c }
\cs_new:Npn \@@_map_inline:NN #1#2
  {
    \@@_if_recursion_tail_break:NN #2 \str_map_break:
    \exp_args:No #1 { \token_to_str:N #2 }
    \@@_map_inline:NN #1
  }
\cs_new_protected:Npn \str_map_variable:nNn #1#2#3
  {
    \use:x
      {
        \exp_not:n { \@@_map_variable:NnN #2 {#3} }
        \__kernel_str_to_other_fast:n {#1}
      }
      \q_@@_recursion_tail
    \prg_break_point:Nn \str_map_break: { }
  }
\cs_new_protected:Npn \str_map_variable:NNn
  { \exp_args:No \str_map_variable:nNn }
\cs_new_protected:Npn \@@_map_variable:NnN #1#2#3
  {
    \@@_if_recursion_tail_break:NN #3 \str_map_break:
    \str_set:Nn #1 {#3}
    \use:n {#2}
    \@@_map_variable:NnN #1 {#2}
  }
\cs_generate_variant:Nn \str_map_variable:NNn { c }
\cs_new:Npn \str_map_break:
  { \prg_map_break:Nn \str_map_break: { } }
\cs_new:Npn \str_map_break:n
  { \prg_map_break:Nn \str_map_break: }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}[rEXP]{\str_map_tokens:Nn, \str_map_tokens:cn}
% \begin{macro}[rEXP]{\str_map_tokens:nn}
%   Uses an auxiliary of \cs{str_map_function:NN}.
%    \begin{macrocode}
\cs_new:Npn \str_map_tokens:nn #1#2
  {
    \exp_args:Nno \use:nn
      { \@@_map_function:w \@@_map_function:nn {#2} }
      { \__kernel_tl_to_str:w {#1} }
      \q_@@_recursion_tail ? ~
    \prg_break_point:Nn \str_map_break: { }
  }
\cs_new:Npn \str_map_tokens:Nn { \exp_args:No \str_map_tokens:nn }
\cs_generate_variant:Nn \str_map_tokens:Nn { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Accessing specific characters in a string}
%
% \begin{macro}[EXP]{\__kernel_str_to_other:n}
% \begin{macro}[EXP]{\@@_to_other_loop:w, \@@_to_other_end:w}
%   First apply \cs{tl_to_str:n}, then replace all spaces by
%   \enquote{other} spaces, $8$ at a time, storing the converted part of
%   the string between the \cs{s_@@_mark} and \cs{s_@@_stop} markers.  The end
%   is detected when \cs{@@_to_other_loop:w} finds one of the trailing
%   |A|, distinguished from any contents of the initial token list by
%   their category.  Then \cs{@@_to_other_end:w} is called, and finds
%   the result between \cs{s_@@_mark} and the first |A| (well, there is
%   also the need to remove a space).
%    \begin{macrocode}
\cs_new:Npn \__kernel_str_to_other:n #1
  {
    \exp_after:wN \@@_to_other_loop:w
      \tl_to_str:n {#1} ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \s_@@_mark \s_@@_stop
  }
\group_begin:
\tex_lccode:D `\* = `\  %
\tex_lccode:D `\A = `\A %
\tex_lowercase:D
  {
    \group_end:
    \cs_new:Npn \@@_to_other_loop:w
      #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 \s_@@_stop
      {
        \if_meaning:w A #8
          \@@_to_other_end:w
        \fi:
        \@@_to_other_loop:w
        #9 #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * \s_@@_stop
      }
    \cs_new:Npn \@@_to_other_end:w \fi: #1 \s_@@_mark #2 * A #3 \s_@@_stop
      { \fi: #2 }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[rEXP]{\__kernel_str_to_other_fast:n}
% \begin{macro}[rEXP]{\__kernel_str_to_other_fast_loop:w, \@@_to_other_fast_end:w}
%   The difference with \cs{__kernel_str_to_other:n} is that the converted part is
%   left in the input stream, making these commands only
%   restricted-expandable.
%    \begin{macrocode}
\cs_new:Npn \__kernel_str_to_other_fast:n #1
  {
    \exp_after:wN \@@_to_other_fast_loop:w \tl_to_str:n {#1} ~
      A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \s_@@_stop
  }
\group_begin:
\tex_lccode:D `\* = `\  %
\tex_lccode:D `\A = `\A %
\tex_lowercase:D
  {
    \group_end:
    \cs_new:Npn \@@_to_other_fast_loop:w
      #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 ~
      {
        \if_meaning:w A #9
          \@@_to_other_fast_end:w
        \fi:
        #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * #9
        \@@_to_other_fast_loop:w *
      }
    \cs_new:Npn \@@_to_other_fast_end:w #1 * A #2 \s_@@_stop {#1}
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_item:Nn, \str_item:cn, \str_item:nn, \str_item_ignore_spaces:nn}
% \begin{macro}[EXP]{\@@_item:nn, \@@_item:w}
%   The \cs{str_item:nn} hands its argument with spaces escaped to
%   \cs{@@_item:nn}, and makes sure to turn the result back into
%   a proper string (with category code~$10$ spaces) eventually.  The
%   \cs{str_item_ignore_spaces:nn} function does not escape spaces,
%   which are thus ignored by \cs{@@_item:nn} since
%   everything else is done with undelimited arguments.
%   Evaluate the \meta{index} argument~|#2| and count characters in
%   the string, passing those two numbers to \cs{@@_item:w} for
%   further analysis.  If the \meta{index} is negative, shift it by
%   the \meta{count} to know the how many character to discard, and if
%   that is still negative give an empty result.  If the \meta{index}
%   is larger than the \meta{count}, give an empty result, and
%   otherwise discard $\meta{index}-1$ characters before returning the
%   following one.  The shift by $-1$ is obtained by inserting an empty
%   brace group before the string in that case: that brace group also
%   covers the case where the \meta{index} is zero.
%    \begin{macrocode}
\cs_new:Npn \str_item:Nn { \exp_args:No \str_item:nn }
\cs_generate_variant:Nn \str_item:Nn { c }
\cs_new:Npn \str_item:nn #1#2
  {
    \exp_args:Nf \tl_to_str:n
      {
        \exp_args:Nf \@@_item:nn
          { \__kernel_str_to_other:n {#1} } {#2}
      }
  }
\cs_new:Npn \str_item_ignore_spaces:nn #1
  { \exp_args:No \@@_item:nn { \tl_to_str:n {#1} } }
\cs_new:Npn \@@_item:nn #1#2
  {
    \exp_after:wN \@@_item:w
    \int_value:w \int_eval:n {#2} \exp_after:wN ;
    \int_value:w \@@_count:n {#1} ;
    #1 \s_@@_stop
  }
\cs_new:Npn \@@_item:w #1; #2;
  {
    \int_compare:nNnTF {#1} < 0
      {
        \int_compare:nNnTF {#1} < {-#2}
          { \@@_use_none_delimit_by_s_stop:w }
          {
            \exp_after:wN \@@_use_i_delimit_by_s_stop:nw
            \exp:w \exp_after:wN \@@_skip_exp_end:w
              \int_value:w \int_eval:n { #1 + #2 } ;
          }
      }
      {
        \int_compare:nNnTF {#1} > {#2}
          { \@@_use_none_delimit_by_s_stop:w }
          {
            \exp_after:wN \@@_use_i_delimit_by_s_stop:nw
            \exp:w \@@_skip_exp_end:w #1 ; { }
          }
      }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_skip_exp_end:w}
% \begin{macro}[EXP]
%   {\@@_skip_loop:wNNNNNNNN, \@@_skip_end:w, \@@_skip_end:NNNNNNNN}
%   Removes |max(#1,0)| characters from the input stream, and then
%   leaves \cs{exp_end:}.  This should be expanded using
%   \cs{exp:w}.  We remove characters $8$ at a time until
%   there are at most $8$ to remove.  Then we do a dirty trick: the
%   \cs{if_case:w} construction leaves between $0$ and $8$ times the
%   \cs{or:} control sequence, and those \cs{or:} become arguments of
%   \cs{@@_skip_end:NNNNNNNN}.  If the number of characters to remove
%   is $6$, say, then there are two \cs{or:} left, and the $8$ arguments
%   of \cs{@@_skip_end:NNNNNNNN} are the two \cs{or:}, and $6$
%   characters from the input stream, exactly what we wanted to
%   remove. Then close the \cs{if_case:w} conditional with \cs{fi:}, and
%   stop the initial expansion with \cs{exp_end:} (see places where
%   \cs{@@_skip_exp_end:w} is called).
%    \begin{macrocode}
\cs_new:Npn \@@_skip_exp_end:w #1;
  {
    \if_int_compare:w #1 > 8 \exp_stop_f:
      \exp_after:wN \@@_skip_loop:wNNNNNNNN
    \else:
      \exp_after:wN \@@_skip_end:w
      \int_value:w \int_eval:w
    \fi:
    #1 ;
  }
\cs_new:Npn \@@_skip_loop:wNNNNNNNN #1; #2#3#4#5#6#7#8#9
  {
    \exp_after:wN \@@_skip_exp_end:w
      \int_value:w \int_eval:n { #1 - 8 } ;
  }
\cs_new:Npn \@@_skip_end:w #1 ;
  {
    \exp_after:wN \@@_skip_end:NNNNNNNN
    \if_case:w #1 \exp_stop_f: \or: \or: \or: \or: \or: \or: \or: \or:
  }
\cs_new:Npn \@@_skip_end:NNNNNNNN #1#2#3#4#5#6#7#8 { \fi: \exp_end: }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_range:Nnn, \str_range:nnn, \str_range_ignore_spaces:nnn}
% \begin{macro}[EXP]{\@@_range:nnn}
% \begin{macro}[EXP]{\@@_range:w, \@@_range:nnw}
%   Sanitize the string.  Then evaluate the arguments.  At this stage we
%   also decrement the \meta{start index}, since our goal is to know how
%   many characters should be removed.  Then limit the range to be
%   non-negative and at most the length of the string (this avoids
%   needing to check for the end of the string when grabbing
%   characters), shifting negative numbers by the appropriate amount.
%   Afterwards, skip characters, then keep some more, and finally drop
%   the end of the string.
%    \begin{macrocode}
\cs_new:Npn \str_range:Nnn { \exp_args:No \str_range:nnn }
\cs_generate_variant:Nn \str_range:Nnn { c }
\cs_new:Npn \str_range:nnn #1#2#3
  {
    \exp_args:Nf \tl_to_str:n
      {
        \exp_args:Nf \@@_range:nnn
          { \__kernel_str_to_other:n {#1} } {#2} {#3}
      }
  }
\cs_new:Npn \str_range_ignore_spaces:nnn #1
  { \exp_args:No \@@_range:nnn { \tl_to_str:n {#1} } }
\cs_new:Npn \@@_range:nnn #1#2#3
  {
    \exp_after:wN \@@_range:w
    \int_value:w \@@_count:n {#1} \exp_after:wN ;
    \int_value:w \int_eval:n { (#2) - 1 } \exp_after:wN ;
    \int_value:w \int_eval:n {#3} ;
    #1 \s_@@_stop
  }
\cs_new:Npn \@@_range:w #1; #2; #3;
  {
    \exp_args:Nf \@@_range:nnw
      { \@@_range_normalize:nn {#2} {#1} }
      { \@@_range_normalize:nn {#3} {#1} }
  }
\cs_new:Npn \@@_range:nnw #1#2
  {
    \exp_after:wN \@@_collect_delimit_by_q_stop:w
    \int_value:w \int_eval:n { #2 - #1 } \exp_after:wN ;
    \exp:w \@@_skip_exp_end:w #1 ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \begin{macro}[EXP]{\@@_range_normalize:nn}
%   This function converts an \meta{index} argument into an explicit
%   position in the string (a result of $0$ denoting \enquote{out of
%     bounds}).  Expects two explicit integer arguments: the
%   \meta{index} |#1| and the string count~|#2|.  If |#1| is negative,
%   replace it by $|#1| + |#2| + 1$, then limit to the range $[0,
%   |#2|]$.
%    \begin{macrocode}
\cs_new:Npn \@@_range_normalize:nn #1#2
  {
    \int_eval:n
      {
        \if_int_compare:w #1 < \c_zero_int
          \if_int_compare:w #1 < -#2 \exp_stop_f:
            0
          \else:
            #1 + #2 + 1
          \fi:
        \else:
          \if_int_compare:w #1 < #2 \exp_stop_f:
            #1
          \else:
            #2
          \fi:
        \fi:
      }
  }
%    \end{macrocode}
% \end{macro}
% \begin{macro}[EXP]{\@@_collect_delimit_by_q_stop:w}
% \begin{macro}[EXP]
%   {
%     \@@_collect_loop:wn, \@@_collect_loop:wnNNNNNNN,
%     \@@_collect_end:wn, \@@_collect_end:nnnnnnnnw
%   }
%   Collects |max(#1,0)| characters, and removes everything else until
%   \cs{s_@@_stop}. This is somewhat similar to \cs{@@_skip_exp_end:w}, but
%   accepts integer expression arguments.  This time we can only grab
%   $7$ characters at a time.  At the end, we use an \cs{if_case:w}
%   trick again, so that the $8$ first arguments of
%   \cs{@@_collect_end:nnnnnnnnw} are some \cs{or:}, followed by an
%   \cs{fi:}, followed by |#1| characters from the input stream. Simply
%   leaving this in the input stream closes the conditional properly
%   and the \cs{or:} disappear.
%    \begin{macrocode}
\cs_new:Npn \@@_collect_delimit_by_q_stop:w #1;
  { \@@_collect_loop:wn #1 ; { } }
\cs_new:Npn \@@_collect_loop:wn #1 ;
  {
    \if_int_compare:w #1 > 7 \exp_stop_f:
      \exp_after:wN \@@_collect_loop:wnNNNNNNN
    \else:
      \exp_after:wN \@@_collect_end:wn
    \fi:
    #1 ;
  }
\cs_new:Npn \@@_collect_loop:wnNNNNNNN #1; #2 #3#4#5#6#7#8#9
  {
    \exp_after:wN \@@_collect_loop:wn
    \int_value:w \int_eval:n { #1 - 7 } ;
    { #2 #3#4#5#6#7#8#9 }
  }
\cs_new:Npn \@@_collect_end:wn #1 ;
  {
    \exp_after:wN \@@_collect_end:nnnnnnnnw
    \if_case:w \if_int_compare:w #1 > \c_zero_int
      #1 \else: 0 \fi: \exp_stop_f:
      \or: \or: \or: \or: \or: \or: \fi:
  }
\cs_new:Npn \@@_collect_end:nnnnnnnnw #1#2#3#4#5#6#7#8 #9 \s_@@_stop
  { #1#2#3#4#5#6#7#8 }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Counting characters}
%
% \begin{macro}[EXP]
%   {\str_count_spaces:N, \str_count_spaces:c, \str_count_spaces:n}
% \begin{macro}[EXP]{\@@_count_spaces_loop:w}
%   To speed up this function, we grab and discard $9$ space-delimited
%   arguments in each iteration of the loop.  The loop stops when the
%   last argument is one of the trailing |X|\meta{number}, and that
%   \meta{number} is added to the sum of $9$ that precedes, to adjust
%   the result.
%    \begin{macrocode}
\cs_new:Npn \str_count_spaces:N
  { \exp_args:No \str_count_spaces:n }
\cs_generate_variant:Nn \str_count_spaces:N { c }
\cs_new:Npn \str_count_spaces:n #1
  {
    \int_eval:n
      {
        \exp_after:wN \@@_count_spaces_loop:w
        \tl_to_str:n {#1} ~
        X 7 ~ X 6 ~ X 5 ~ X 4 ~ X 3 ~ X 2 ~ X 1 ~ X 0 ~ X -1 ~
        \s_@@_stop
      }
  }
\cs_new:Npn \@@_count_spaces_loop:w #1~#2~#3~#4~#5~#6~#7~#8~#9~
  {
    \if_meaning:w X #9
      \@@_use_i_delimit_by_s_stop:nw
    \fi:
    9 + \@@_count_spaces_loop:w
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_count:N, \str_count:c, \str_count:n, \str_count_ignore_spaces:n}
% \begin{macro}[EXP]{\@@_count:n}
% \begin{macro}[EXP]{\@@_count_aux:n, \@@_count_loop:NNNNNNNNN}
%   To count characters in a string we could first escape all spaces
%   using \cs{__kernel_str_to_other:n}, then pass the result to \cs{tl_count:n}.
%   However, the escaping step would be quadratic in the number of
%   characters in the string, and we can do better.  Namely, sum the
%   number of spaces (\cs{str_count_spaces:n}) and the result of
%   \cs{tl_count:n}, which ignores spaces.  Since strings tend to be
%   longer than token lists, we use specialized functions to count
%   characters ignoring spaces.  Namely, loop, grabbing $9$ non-space
%   characters at each step, and end as soon as we reach one of the $9$
%   trailing items.  The internal function \cs{@@_count:n}, used in
%   \cs{str_item:nn} and \cs{str_range:nnn}, is similar to
%   \cs{str_count_ignore_spaces:n} but expects its argument to already
%   be a string or a string with spaces escaped.
%    \begin{macrocode}
\cs_new:Npn \str_count:N { \exp_args:No \str_count:n }
\cs_generate_variant:Nn \str_count:N { c }
\cs_new:Npn \str_count:n #1
  {
    \@@_count_aux:n
      {
        \str_count_spaces:n {#1}
        + \exp_after:wN \@@_count_loop:NNNNNNNNN \tl_to_str:n {#1}
      }
  }
\cs_new:Npn \@@_count:n #1
  {
    \@@_count_aux:n
      { \@@_count_loop:NNNNNNNNN #1 }
  }
\cs_new:Npn \str_count_ignore_spaces:n #1
  {
    \@@_count_aux:n
      { \exp_after:wN \@@_count_loop:NNNNNNNNN \tl_to_str:n {#1} }
  }
\cs_new:Npn \@@_count_aux:n #1
  {
    \int_eval:n
      {
        #1
        { X 8 } { X 7 } { X 6 }
        { X 5 } { X 4 } { X 3 }
        { X 2 } { X 1 } { X 0 }
        \s_@@_stop
      }
  }
\cs_new:Npn \@@_count_loop:NNNNNNNNN #1#2#3#4#5#6#7#8#9
  {
    \if_meaning:w X #9
      \exp_after:wN \@@_use_none_delimit_by_s_stop:w
    \fi:
    9 + \@@_count_loop:NNNNNNNNN
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{The first character in a string}
%
% \begin{macro}[EXP]
%   {\str_head:N, \str_head:c, \str_head:n, \str_head_ignore_spaces:n}
% \begin{macro}[EXP]{\@@_head:w}
%   The \texttt{_ignore_spaces} variant applies \cs{tl_to_str:n} then
%   grabs the first item, thus skipping spaces.
%   As usual, \cs{str_head:N} expands its argument and
%   hands it to \cs{str_head:n}.  To circumvent the fact that \TeX{}
%   skips spaces when grabbing undelimited macro parameters,
%   \cs{@@_head:w} takes an argument delimited by a space. If |#1|
%   starts with a non-space character, \cs{@@_use_i_delimit_by_s_stop:nw}
%   leaves that in the input stream. On the other hand, if |#1| starts
%   with a space, the \cs{@@_head:w} takes an empty argument, and the
%   single (initially braced) space in the definition of \cs{@@_head:w}
%   makes its way to the output. Finally, for an empty argument, the
%   (braced) empty brace group in the definition of \cs{str_head:n}
%   gives an empty result after passing through
%   \cs{@@_use_i_delimit_by_s_stop:nw}.
%    \begin{macrocode}
\cs_new:Npn \str_head:N { \exp_args:No \str_head:n }
\cs_generate_variant:Nn \str_head:N { c }
\cs_new:Npn \str_head:n #1
  {
    \exp_after:wN \@@_head:w
    \tl_to_str:n {#1}
    { { } } ~ \s_@@_stop
  }
\cs_new:Npn \@@_head:w #1 ~ %
  { \@@_use_i_delimit_by_s_stop:nw #1 { ~ } }
\cs_new:Npn \str_head_ignore_spaces:n #1
  {
    \exp_after:wN \@@_use_i_delimit_by_s_stop:nw
    \tl_to_str:n {#1} { } \s_@@_stop
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_tail:N, \str_tail:c, \str_tail:n, \str_tail_ignore_spaces:n}
% \begin{macro}[EXP]{\@@_tail_auxi:w, \@@_tail_auxii:w}
%   Getting the tail is a little bit more convoluted than the head of a
%   string.  We hit the front of the string with \cs{reverse_if:N}
%   \cs{if_charcode:w} \cs{scan_stop:}.  This removes the first
%   character, and necessarily makes the test true, since the character
%   cannot match \cs{scan_stop:}. The auxiliary function then inserts
%   the required \cs{fi:} to close the conditional, and leaves the tail
%   of the string in the input stream.  The details are such that an
%   empty string has an empty tail (this requires in particular that the
%   end-marker |X| be unexpandable and not a control sequence).  The
%   \texttt{_ignore_spaces} is rather simpler: after converting the
%   input to a string, \cs{@@_tail_auxii:w} removes one undelimited
%   argument and leaves everything else until an end-marker \cs{s_@@_mark}.
%   One can check that an empty (or blank) string yields an empty
%   tail.
%    \begin{macrocode}
\cs_new:Npn \str_tail:N { \exp_args:No \str_tail:n }
\cs_generate_variant:Nn \str_tail:N { c }
\cs_new:Npn \str_tail:n #1
  {
    \exp_after:wN \@@_tail_auxi:w
    \reverse_if:N \if_charcode:w
        \scan_stop: \tl_to_str:n {#1} X X \s_@@_stop
  }
\cs_new:Npn \@@_tail_auxi:w #1 X #2 \s_@@_stop { \fi: #1 }
\cs_new:Npn \str_tail_ignore_spaces:n #1
  {
    \exp_after:wN \@@_tail_auxii:w
    \tl_to_str:n {#1} \s_@@_mark \s_@@_mark \s_@@_stop
  }
\cs_new:Npn \@@_tail_auxii:w #1 #2 \s_@@_mark #3 \s_@@_stop { #2 }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{String manipulation}
%
% \begin{macro}[EXP]
%   {
%     \str_foldcase:n, \str_foldcase:V,
%     \str_lowercase:n, \str_lowercase:f,
%     \str_uppercase:n, \str_uppercase:f
%   }
% \begin{macro}[EXP]{\@@_change_case:nn}
% \begin{macro}[EXP]{\@@_change_case_aux:nn}
% \begin{macro}[EXP]{\@@_change_case_result:n}
% \begin{macro}[EXP]{\@@_change_case_output:nw, \@@_change_case_output:fw}
% \begin{macro}[EXP]{\@@_change_case_end:nw}
% \begin{macro}[EXP]{\@@_change_case_loop:nw}
% \begin{macro}[EXP]{\@@_change_case_space:n}
% \begin{macro}[EXP]{\@@_change_case_char:nN, \@@_change_case_char_aux:nN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nNN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nNNN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nNNNN}
% \begin{macro}[EXP]{\@@_change_case_char:nnn, \@@_change_case_char_aux:nnn}
% \begin{macro}[EXP]{\@@_change_case_char:nnnnn}
% \begin{macro}[EXP]{\@@_change_case_generate:n}
% \begin{macro}[EXP]{\@@_change_case_generate:nnnn}
%   Case changing for programmatic reasons is done by first detokenizing
%   input then doing a simple loop that only has to worry about spaces
%   and everything else. The output is detokenized to allow data sharing
%   with text-based case changing. Similarly, for $8$-bit engines the
%   multi-byte information is shared.
%    \begin{macrocode}
\cs_new:Npn \str_foldcase:n  #1 { \@@_change_case:nn {#1} { casefold } }
\cs_new:Npn \str_lowercase:n #1 { \@@_change_case:nn {#1} { lowercase } }
\cs_new:Npn \str_uppercase:n #1 { \@@_change_case:nn {#1} { uppercase } }
\cs_generate_variant:Nn \str_foldcase:n  { V }
\cs_generate_variant:Nn \str_lowercase:n { f }
\cs_generate_variant:Nn \str_uppercase:n { f }
\cs_new:Npn \@@_change_case:nn #1
  {
    \exp_after:wN \@@_change_case_aux:nn \exp_after:wN
      { \tl_to_str:n {#1} }
  }
\cs_new:Npn \@@_change_case_aux:nn #1#2
  {
    \@@_change_case_loop:nw {#2} #1 \q_@@_recursion_tail \q_@@_recursion_stop
      \@@_change_case_result:n { }
  }
\cs_new:Npn \@@_change_case_output:nw #1#2 \@@_change_case_result:n #3
  { #2 \@@_change_case_result:n { #3 #1 } }
\cs_generate_variant:Nn  \@@_change_case_output:nw { f }
\cs_new:Npn \@@_change_case_end:wn #1 \@@_change_case_result:n #2
  { \tl_to_str:n {#2} }
\cs_new:Npn \@@_change_case_loop:nw #1#2 \q_@@_recursion_stop
  {
    \tl_if_head_is_space:nTF {#2}
      { \@@_change_case_space:n }
      { \@@_change_case_char:nN }
    {#1} #2 \q_@@_recursion_stop
  }
\exp_last_unbraced:NNNNo
  \cs_new:Npn \@@_change_case_space:n #1 \c_space_tl
  {
    \@@_change_case_output:nw { ~ }
    \@@_change_case_loop:nw {#1}
  }
\cs_new:Npn \@@_change_case_char:nN #1#2
  {
    \@@_if_recursion_tail_stop_do:Nn #2
      { \@@_change_case_end:wn }
    \@@_change_case_codepoint:nN {#1} #2
  }
\if_int_compare:w 0
  \cs_if_exist:NT \tex_XeTeXversion:D { 1 }
  \cs_if_exist:NT \tex_luatexversion:D { 1 }
  > 0 \exp_stop_f:
  \cs_new:Npn \@@_change_case_codepoint:nN #1#2
    { \@@_change_case_char:fnn { \int_eval:n {`#2} } {#1} {#2} }
\else:
    \cs_new:Npn \@@_change_case_codepoint:nN #1#2
      {
        \int_compare:nNnTF {`#2} > { "80 }
          {
            \int_compare:nNnTF {`#2} < { "E0 }
              { \@@_change_case_codepoint:nNN }
              {
                 \int_compare:nNnTF {`#2} < { "F0 }
                   { \@@_change_case_codepoint:nNNN }
                   { \@@_change_case_codepoint:nNNNNN }
              }
          }
          { \@@_change_case_char_aux:nN }
            {#1} #2
      }
    \cs_new:Npn \@@_change_case_char_aux:nN #1#2
      { \@@_change_case_char:fnn { \int_eval:n {`#2} } {#1} {#2} }
    \cs_new:Npn \@@_change_case_codepoint:nNN #1#2#3
      {
        \@@_change_case_char:fnn
          { \int_eval:n { (`#2 - "C0) * "40 + `#3 - "80 } }
          {#1} {#2#3}
      }
    \cs_new:Npn \@@_change_case_codepoint:nNNN #1#2#3#4
      {
        \@@_change_case_char:fnn
          {
            \int_eval:n
              { (`#2 - "E0) * "1000 + (`#3 - "80) * "40 + `#4 - "80 }
          }
          {#1} {#2#3#4}
      }
    \cs_new:Npn \@@_change_case_codepoint:nNNNN #1#2#3#4#5
      {
        \@@_change_case_char:fnn
          {
            \int_eval:n
              {
                  (`#2 - "F0) * "40000 
                + (`#3 - "80) * "1000
                + (`#4 - "80) * "40
                + `#5 - "80
              }
          }
          {#1} {#2#3#4#5}
      }
\fi:
\cs_new:Npn \@@_change_case_char:nnn #1#2#3
  {
    \@@_change_case_output:fw
      {
        \exp_args:Ne \@@_change_case_char_aux:nnn
          { \__kernel_codepoint_case:nn {#2} {#1} } {#1} {#3}
      }
    \@@_change_case_loop:nw {#2}
  }
\cs_generate_variant:Nn \@@_change_case_char:nnn { f }
\cs_new:Npn \@@_change_case_char_aux:nnn #1#2#3
  {
    \use:e { \@@_change_case_char:nnnnn #1 {#2} {#3} }
  }
\cs_new:Npn \@@_change_case_char:nnnnn #1#2#3#4#5
  {
    \int_compare:nNnTF {#1} = {#4}
      { \tl_to_str:n {#5} }
      {
        \@@_change_case_generate:n {#1}
        \tl_if_blank:nF {#2}
          {
            \@@_change_case_generate:n {#2}
            \tl_if_blank:nF {#3}
             { \@@_change_case_generate:n {#3} }
          }
      }
  }
\if_int_compare:w 0
  \cs_if_exist:NT \tex_XeTeXversion:D { 1 }
  \cs_if_exist:NT \tex_luatexversion:D { 1 }
  > 0 \exp_stop_f:
  \cs_new:Npn \@@_change_case_generate:n #1
    { \char_generate:nn {#1} { 12 } }
\else:
  \cs_new:Npn \@@_change_case_generate:n #1
    {
      \use:e
        {
          \exp_not:N \@@_change_case_generate:nnnn
            \codepoint_to_bytes:n {#1}
        }
    }
  \cs_new:Npn \@@_change_case_generate:nnnn #1#2#3#4
    {
      \char_generate:nn {#1} { 12 }
      \tl_if_blank:nF {#2}
        {
          \char_generate:nn {#2} { 12 }
          \tl_if_blank:nF {#3}
            {
              \char_generate:nn {#3} { 12 }
              \tl_if_blank:nF {#4}
                { \char_generate:nn {#4} { 12 } }
            }
        }
    }
\fi:
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{variable}
%   {
%     \c_ampersand_str,
%     \c_atsign_str,
%     \c_backslash_str,
%     \c_left_brace_str,
%     \c_right_brace_str,
%     \c_circumflex_str,
%     \c_colon_str,
%     \c_dollar_str,
%     \c_hash_str,
%     \c_percent_str,
%     \c_tilde_str,
%     \c_underscore_str,
%     \c_zero_str
%   }
%   For all of those strings, use \cs{cs_to_str:N} to get characters with
%   the correct category code without worries
%    \begin{macrocode}
\str_const:Nx \c_ampersand_str   { \cs_to_str:N \& }
\str_const:Nx \c_atsign_str      { \cs_to_str:N \@ }
\str_const:Nx \c_backslash_str   { \cs_to_str:N \\ }
\str_const:Nx \c_left_brace_str  { \cs_to_str:N \{ }
\str_const:Nx \c_right_brace_str { \cs_to_str:N \} }
\str_const:Nx \c_circumflex_str  { \cs_to_str:N \^ }
\str_const:Nx \c_colon_str       { \cs_to_str:N \: }
\str_const:Nx \c_dollar_str      { \cs_to_str:N \$ }
\str_const:Nx \c_hash_str        { \cs_to_str:N \# }
\str_const:Nx \c_percent_str     { \cs_to_str:N \% }
\str_const:Nx \c_tilde_str       { \cs_to_str:N \~ }
\str_const:Nx \c_underscore_str  { \cs_to_str:N \_ }
\str_const:Nx \c_zero_str        { 0 }
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_tmpa_str, \l_tmpb_str, \g_tmpa_str, \g_tmpb_str}
%   Scratch strings.
%    \begin{macrocode}
\str_new:N \l_tmpa_str
\str_new:N \l_tmpb_str
\str_new:N \g_tmpa_str
\str_new:N \g_tmpb_str
%    \end{macrocode}
% \end{variable}
%
% \subsection{Viewing strings}
%
% \begin{macro}{\str_show:n, \str_show:N, \str_show:c}
% \begin{macro}{\str_log:n, \str_log:N, \str_log:c}
%   Displays a string on the terminal.
%    \begin{macrocode}
\cs_new_eq:NN \str_show:n \tl_show:n
\cs_new_protected:Npn \str_show:N #1
  {
    \__kernel_chk_tl_type:NnnT #1 { str } { \tl_to_str:N #1 }
      { \tl_show:N #1 }
  }
\cs_generate_variant:Nn \str_show:N { c }
\cs_new_eq:NN \str_log:n \tl_log:n
\cs_new_protected:Npn \str_log:N #1
  {
    \__kernel_chk_tl_type:NnnT #1 { str } { \tl_to_str:N #1 }
      { \tl_log:N #1 }
  }
\cs_generate_variant:Nn \str_log:N { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
%    \begin{macrocode}
%</package>
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex