diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md index d291d6650a..8b4473e03b 100644 --- a/l3kernel/CHANGELOG.md +++ b/l3kernel/CHANGELOG.md @@ -20,6 +20,7 @@ this project uses date-based 'snapshot' version identifiers. - `\pdf_version_gset:n` for `dvips`. - Improve handling of `\exp_not:n` in `\text_expand:n` (issue #875) - `\file_full_name:n` now avoids calling `\pdffilesize` multiple times on the same file. +- Show printable characters explicitly in `\regex_show:n` ### Fixed - Evalutate integer constants only once (issue#861) diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx index 4f151d591a..0975173a5d 100644 --- a/l3kernel/l3regex.dtx +++ b/l3kernel/l3regex.dtx @@ -509,7 +509,7 @@ % which never change. % \end{function} % -% \begin{function}[added = 2017-05-26]{\regex_show:n, \regex_show:N} +% \begin{function}[added = 2017-05-26, updated = 2021-04-15]{\regex_show:n, \regex_show:N} % \begin{syntax} % \cs{regex_show:n} \Arg{regex} % \end{syntax} @@ -3554,18 +3554,18 @@ \cs_set:Npn \@@_A_test: { anchor~at~start~(\iow_char:N\\A) } \cs_set:Npn \@@_G_test: { anchor~at~start~of~match~(\iow_char:N\\G) } \cs_set_protected:Npn \@@_item_caseful_equal:n ##1 - { \@@_show_one:n { char~code~\int_eval:n{##1} } } + { \@@_show_one:n { char~code~\@@_show_char:n{##1} } } \cs_set_protected:Npn \@@_item_caseful_range:nn ##1##2 { \@@_show_one:n - { range~[\int_eval:n{##1}, \int_eval:n{##2}] } + { range~[\@@_show_char:n{##1}, \@@_show_char:n{##2}] } } \cs_set_protected:Npn \@@_item_caseless_equal:n ##1 - { \@@_show_one:n { char~code~\int_eval:n{##1}~(caseless) } } + { \@@_show_one:n { char~code~\@@_show_char:n{##1}~(caseless) } } \cs_set_protected:Npn \@@_item_caseless_range:nn ##1##2 { \@@_show_one:n - { Range~[\int_eval:n{##1}, \int_eval:n{##2}]~(caseless) } + { Range~[\@@_show_char:n{##1}, \@@_show_char:n{##2}]~(caseless) } } \cs_set_protected:Npn \@@_item_catcode:nT { \@@_show_item_catcode:NnT \c_true_bool } @@ -3574,7 +3574,7 @@ \cs_set_protected:Npn \@@_item_reverse:n { \@@_show_scope:nn { Reversed~match } } \cs_set_protected:Npn \@@_item_exact:nn ##1##2 - { \@@_show_one:n { char~##2,~catcode~##1 } } + { \@@_show_one:n { char~\@@_show_char:n{##2},~catcode~##1 } } \cs_set_eq:NN \@@_item_exact_cs:n \@@_show_item_exact_cs:n \cs_set_protected:Npn \@@_item_cs:n { \@@_show_scope:nn { control~sequence } } @@ -3590,6 +3590,19 @@ % \end{macrocode} % \end{macro} % +% \begin{macro}[EXP]{\@@_show_char:n} +% Show a single character, together with its ascii representation if available. +% This could be extended to beyond ascii. It is not ideal for parentheses themselves. +% \begin{macrocode} +\cs_new:Npn \@@_show_char:n #1 + { + \int_eval:n {#1} + \int_compare:nT { 32 <= #1 <= 126 } + { ~ ( \char_generate:nn {#1} {12} ) } + } +% \end{macrocode} +% \end{macro} +% % \begin{macro}{\@@_show_one:n} % Every part of the final message go through this function, which adds % one line to the output, with the appropriate prefix. diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg index 09b55e7d9e..0fd7781fe7 100644 --- a/l3kernel/testfiles/m3regex007.tlg +++ b/l3kernel/testfiles/m3regex007.tlg @@ -7,68 +7,68 @@ TEST 1: regex_show LaTeX3 Warning: Extra right parenthesis ignored in regular expression. > Compiled regex {a*|b??|(c{0}d{2,}e|[^fg\c [^BE][^\d ]\w ]){2,4}?)}: +-branch - char code 97, repeated 0 or more times, greedy + char code 97 (a), repeated 0 or more times, greedy +-branch - char code 98, repeated between 0 and 1 times, lazy + char code 98 (b), repeated between 0 and 1 times, lazy +-branch ,-group begin - | char code 99, repeated 0 times - | char code 100, repeated 2 or more times, greedy - | char code 101 + | char code 99 (c), repeated 0 times + | char code 100 (d), repeated 2 or more times, greedy + | char code 101 (e) +-branch | Don't match - | char code 102 - | char code 103 + | char code 102 (f) + | char code 103 (g) | categories CMTPUDSLOA, negative class - | range [48,57] - | range [97,122] - | range [65,90] - | range [48,57] - | char code 95 + | range [48 (0),57 (9)] + | range [97 (a),122 (z)] + | range [65 (A),90 (Z)] + | range [48 (0),57 (9)] + | char code 95 (_) `-group end, repeated between 2 and 4 times, lazy - char code 41. + char code 41 ()). } l. ... } Defining \l_foo_regex on line ... > Compiled regex variable \l_foo_regex: +-branch assertion: anchor at start (\A) - char code 97 + char code 97 (a) +-branch - char code 98 + char code 98 (b) +-branch. } l. ... } > Compiled regex {a\ur {l_foo_regex}b\c {\ur {l_foo_regex}|D}}: +-branch - char code 97 + char code 97 (a) ,-group begin (no capture) | assertion: anchor at start (\A) - | char code 97 + | char code 97 (a) +-branch - | char code 98 + | char code 98 (b) +-branch `-group end - char code 98 + char code 98 (b) Match control sequence +-branch ,-group begin (no capture) | assertion: anchor at start (\A) - | char code 97 + | char code 97 (a) +-branch - | char code 98 + | char code 98 (b) +-branch `-group end +-branch - char code 68. + char code 68 (D). } l. ... } > Compiled regex {a\c {bc}\u {c_space_tl}\c {\u {c_space_tl}|}}: +-branch - char code 97 + char code 97 (a) control sequence \bc - char 32, catcode 10 + char 32 ( ), catcode 10 control sequence \ or \. } l. ... } @@ -76,39 +76,39 @@ l. ... } {l_tmpa_int})?}{3}|y*}: +-branch control sequence \abc - char code 42 + char code 42 (*) Match, repeated 3 times control sequence +-branch - char code 92 - char code 97 - char code 98 - char code 99 - char code 32 + char code 92 (\) + char code 97 (a) + char code 98 (b) + char code 99 (c) + char code 32 ( ) +-branch ,-group begin (no capture) - | char code 55 + | char code 55 (7) `-group end, repeated between 0 and 1 times, greedy +-branch - char code 121, repeated 0 or more times, greedy. + char code 121 (y), repeated 0 or more times, greedy. } l. ... } > Compiled regex {a(?:bc(?|de|f){2}g|hi){3,4}?}: +-branch - char code 97 + char code 97 (a) ,-group begin (no capture) - | char code 98 - | char code 99 + | char code 98 (b) + | char code 99 (c) | ,-group begin (resetting) - | | char code 100 - | | char code 101 + | | char code 100 (d) + | | char code 101 (e) | +-branch - | | char code 102 + | | char code 102 (f) | `-group end, repeated 2 times - | char code 103 + | char code 103 (g) +-branch - | char code 104 - | char code 105 + | char code 104 (h) + | char code 105 (i) `-group end, repeated between 3 and 4 times, lazy. } l. ... } @@ -132,23 +132,23 @@ TEST 2: regex_show again > Compiled regex {[^\d \W \s ]*?[^a-z[:^cntrl:]A-Z]??}: +-branch Don't match, repeated 0 or more times, lazy - range [48,57] + range [48 (0),57 (9)] Reversed match - range [97,122] - range [65,90] - range [48,57] - char code 95 - char code 32 + range [97 (a),122 (z)] + range [65 (A),90 (Z)] + range [48 (0),57 (9)] + char code 95 (_) + char code 32 ( ) char code 9 char code 10 char code 12 char code 13 Don't match, repeated between 0 and 1 times, lazy - range [97,122] + range [97 (a),122 (z)] Reversed match range [0,31] char code 127 - range [65,90]. + range [65 (A),90 (Z)]. } l. ... } > Compiled regex {[^\c [^L][^[:^alpha:]]]{2,}?}: @@ -156,8 +156,8 @@ l. ... } Don't match, repeated 2 or more times, lazy categories CBEMTPUDSOA, negative class Reversed match - range [97,122] - range [65,90]. + range [97 (a),122 (z)] + range [65 (A),90 (Z)]. } l. ... } ! LaTeX3 Error: Missing right bracket inserted in regular expression. @@ -175,9 +175,9 @@ parentheses. > Compiled regex {(a(b|[^}: +-branch ,-group begin - | char code 97 + | char code 97 (a) | ,-group begin - | | char code 98 + | | char code 98 (b) | +-branch | | Pass | `-group end @@ -232,8 +232,8 @@ l. ... } control sequence +-branch Match - char code 125 - char code 123. + char code 125 (}) + char code 123 ({). } l. ... } ============================================================ @@ -245,49 +245,49 @@ TEST 3: all escaped sequences +-branch assertion: anchor at start (\A) negative assertion: word boundary - char code 67 + char code 67 (C) Match Reversed match - range [48,57] - char code 69 - char code 70 + range [48 (0),57 (9)] + char code 69 (E) + char code 70 (F) assertion: anchor at start of match (\G) Match Reversed match - char code 32 + char code 32 ( ) char code 9 - char code 73 - char code 74 + char code 73 (I) + char code 74 (J) reset match start (\K) - char code 76 - char code 77 + char code 76 (L) + char code 77 (M) Match Reversed match char code 10 - char code 79 - char code 80 - char code 81 - char code 82 + char code 79 (O) + char code 80 (P) + char code 81 (Q) + char code 82 (R) Match Reversed match - char code 32 + char code 32 ( ) char code 9 char code 10 char code 12 char code 13 - char code 84 - char code 85 + char code 84 (T) + char code 85 (U) Match Reversed match range [10,13] Match Reversed match - range [97,122] - range [65,90] - range [48,57] - char code 95 - char code 88 - char code 89 + range [97 (a),122 (z)] + range [65 (A),90 (Z)] + range [48 (0),57 (9)] + char code 95 (_) + char code 88 (X) + char code 89 (Y) assertion: anchor at end (\Z). } l. ... } @@ -309,54 +309,54 @@ the variable to use. +-branch char code 7 assertion: word boundary - range [48,57] + range [48 (0),57 (9)] char code 27 char code 12 - char code 103 + char code 103 (g) Match - char code 32 + char code 32 ( ) char code 9 - char code 105 - char code 106 - char code 107 - char code 108 - char code 109 + char code 105 (i) + char code 106 (j) + char code 107 (k) + char code 108 (l) + char code 109 (m) char code 10 - char code 111 - char code 112 - char code 113 + char code 111 (o) + char code 112 (p) + char code 113 (q) char code 13 Match - char code 32 + char code 32 ( ) char code 9 char code 10 char code 12 char code 13 char code 9 - char code 117 + char code 117 (u) range [10,13] Match - range [97,122] - range [65,90] - range [48,57] - char code 95 + range [97 (a),122 (z)] + range [65 (A),90 (Z)] + range [48 (0),57 (9)] + char code 95 (_) char code 0 - char code 121 + char code 121 (y) assertion: anchor at end (\Z). } l. ... } > Compiled regex {\0\1\2\3\4\5\6\7\8\9}: +-branch - char code 48 - char code 49 - char code 50 - char code 51 - char code 52 - char code 53 - char code 54 - char code 55 - char code 56 - char code 57. + char code 48 (0) + char code 49 (1) + char code 50 (2) + char code 51 (3) + char code 52 (4) + char code 53 (5) + char code 54 (6) + char code 55 (7) + char code 56 (8) + char code 57 (9). } l. ... } ============================================================ diff --git a/l3kernel/testfiles/m3regex010.luatex.tlg b/l3kernel/testfiles/m3regex010.luatex.tlg index c0ad4d0c9e..86fc5ca945 100644 --- a/l3kernel/testfiles/m3regex010.luatex.tlg +++ b/l3kernel/testfiles/m3regex010.luatex.tlg @@ -6,7 +6,7 @@ TEST 1: LuaTeX bug which leads to an l3regex bug ============================================================ > Compiled regex {\\^^A}: +-branch - char code 92 + char code 92 (\) char code 1. } l. ... }