Browse files

22934, modified, see 22937: add HIST_SUBST_PATTERN option

make ${.../#%...} anchor at both ends
  • Loading branch information...
1 parent e8d1ef3 commit 9471bbcfa06a87dcb6803a36d1208c214fa56003 Peter Stephenson committed Nov 1, 2006
Showing with 222 additions and 61 deletions.
  1. +8 −0 ChangeLog
  2. +9 −8 Completion/compinit
  3. +44 −7 Doc/Zsh/expn.yo
  4. +9 −0 Doc/Zsh/options.yo
  5. +1 −1 INSTALL
  6. +7 −0 README
  7. +25 −2 Src/glob.c
  8. +81 −28 Src/hist.c
  9. +1 −0 Src/options.c
  10. +19 −15 Src/subst.c
  11. +4 −0 Src/zsh.h
  12. +14 −0 Test/E01options.ztst
View
8 ChangeLog
@@ -1,3 +1,11 @@
+2006-11-01 Peter Stephenson <pws@csr.com>
+
+ * 22934, with modifications (c.f. 22937): INSTALL, README,
+ Completion/compinit, Doc/Zsh/expn.yo, Doc/Zsh/options.yo,
+ Src/glob.c, Src/hist.c, Src/options.c, Src/subst.c, Src/zsh.h,
+ Test/E01options.ztst: add HIST_SUBST_PATTERN option, make
+ ${.../#%...} anchor at both ends.
+
2006-11-01 Clint Adams <clint@zsh.org>
* 22940: R. Ramkumar: Completion/Unix/Command/_mkdir:
View
17 Completion/compinit
@@ -128,25 +128,26 @@ fi
# The standard options set in completion functions.
_comp_options=(
- extendedglob
bareglobqual
+ extendedglob
glob
multibyte
nullglob
rcexpandparam
unset
- NO_markdirs
+ NO_allexport
+ NO_aliases
+ NO_cshnullglob
+ NO_errexit
NO_globsubst
- NO_shwordsplit
- NO_shglob
+ NO_histsubstpattern
NO_kshglob
NO_ksharrays
NO_kshtypeset
- NO_cshnullglob
- NO_allexport
- NO_aliases
- NO_errexit
+ NO_markdirs
NO_octalzeroes
+ NO_shwordsplit
+ NO_shglob
NO_warncreateglobal
)
View
51 Doc/Zsh/expn.yo
@@ -258,7 +258,8 @@ item(tt(s/)var(l)tt(/)var(r)[tt(/)])(
Substitute var(r) for var(l) as described below.
The substitution is done only for the
first string that matches var(l). For arrays and for filename
-generation, this applies to each word of the expanded text.
+generation, this applies to each word of the expanded text. See
+below for further notes on substitutions.
The forms `tt(gs/)var(l)tt(/)var(r)' and `tt(s/)var(l)tt(/)var(r)tt(/:G)'
perform global substitution, i.e. substitute every occurrence of var(r)
@@ -273,8 +274,8 @@ backslash.
)
enditem()
-The tt(s/l/r/) substitution works as follows. The left-hand side of
-substitutions are not regular expressions, but character strings. Any
+The tt(s/l/r/) substitution works as follows. By default the left-hand
+side of substitutions are not patterns, but character strings. Any
character can be used as the delimiter in place of `tt(/)'. A
backslash quotes the delimiter character. The character `tt(&)', in
the right-hand-side var(r), is replaced by the text from the
@@ -286,6 +287,41 @@ the rightmost `tt(?)' in a context scan can similarly be omitted.
Note the same record of the last var(l) and var(r) is maintained
across all forms of expansion.
+If the option tt(HIST_SUBST_PATTERN) is set, var(l) is treated as
+a pattern of the usual form desribed in
+ifzman(the section FILENAME GENERATION below)\
+ifnzman(noderef(Filename Generation)). This can be used in
+all the places where modifiers are available; note, however, that
+in globbing qualifiers parameter substitution has already taken place,
+so parameters in the replacement string should be quoted to ensure
+they are replaced at the correct time.
+Note also that complicated patterns used in globbing qualifiers may
+need the extended glob qualifier notation
+tt(LPAR()#q:s/)var(...)tt(/)var(...)tt(/RPAR()) in order for the
+shell to recognize the expression as a glob qualifer. Further,
+note that bad patterns in the substitution are not subject to
+the tt(NO_BAD_PATTERN) option so will cause an error.
+
+When tt(HIST_SUBST_PATTERN) is set, var(l) may start with a tt(#)
+to indicate that the pattern must match at the start of the string
+to be substituted, and a tt(%) may appear at the start or after an tt(#)
+to indicate that the pattern must match at the end of the string
+to be substituted. The tt(%) or tt(#) may be quoted with two
+backslashes.
+
+For example, the following piece of filename generation code
+with the tt(EXTENDED_GLOB) option:
+
+example(print *.c+LPAR()#q:s/#%+LPAR()#b+RPAR()s+LPAR()*+RPAR().c/'S${match[1]}.C'/+RPAR())
+
+takes the expansion of tt(*.c) and applies the glob qualifiers in the
+tt(LPAR()#q)var(...)tt(RPAR()) expression, which consists of a substitution
+modifier anchored to the start and end of each word (tt(#%)). This
+turns on backreferences (tt(LPAR()#b+RPAR())), so that the parenthesised
+subexpression is available in the replacement string as tt(${match[1]}).
+The replacement string is quoted so that the parameter is not substituted
+before the start of filename generation.
+
The following tt(f), tt(F), tt(w) and tt(W) modifiers work only with
parameter expansion and filename generation. They are listed here to
provide a single point of reference for all modifiers.
@@ -530,13 +566,14 @@ substituted as tt(${~opat}).
The var(pattern) may begin with a `tt(#)', in which case the
var(pattern) must match at the start of the string, or `tt(%)', in
-which case it must match at the end of the string. The var(repl) may
+which case it must match at the end of the string, or `tt(#%)' in which
+case the var(pattern) must match the entire string. The var(repl) may
be an empty string, in which case the final `tt(/)' may also be omitted.
To quote the final `tt(/)' in other cases it should be preceded by a
single backslash; this is not necessary if the
-`tt(/)' occurs inside a substituted parameter. Note also that the `tt(#)'
-and `tt(%)' are not active if they occur inside a substituted parameter,
-even at the start.
+`tt(/)' occurs inside a substituted parameter. Note also that the `tt(#)',
+`tt(%)' and `tt(#%) are not active if they occur inside a substituted
+parameter, even at the start.
The first `tt(/)' may be preceded by a `tt(:)', in which case the match
will only succeed if it matches the entire word. Note also the
View
9 Doc/Zsh/options.yo
@@ -376,6 +376,15 @@ characters resulting from command substitution as being eligible for
filename generation. Braces (and commas in between) do not become eligible
for expansion.
)
+pindex(HIST_SUBST_PATTERN)
+item(tt(HIST_SUBST_PATTERN))(
+Substitutions using the tt(:s) and tt(:&) history modifiers are performed
+with pattern matching instead of string matching. This occurs wherever
+history modifiers are valid, including glob qualifiers and parameters.
+See
+ifzman(the section Modifiers in zmanref(zshexp))\
+ifnzman(noderef(Modifiers)).
+)
pindex(IGNORE_BRACES)
cindex(disabling brace expansion)
cindex(brace expansion, disabling)
View
2 INSTALL
@@ -270,7 +270,7 @@ handled properly (some assistance with this problem would be appreciated).
The configuration script should turn on multibyte support on all systems
where it can be compiled successfully.
-The support can be explicitly enabled or disable with --enable-multibyte or
+The support can be explicitly enabled or disabled with --enable-multibyte or
--disable-multibyte. The developers are not aware of any need to use
--disable-multibyte and this should be reported as a bug. Currently
multibyte mode is believed to work on at least the following:
View
7 README
@@ -49,6 +49,13 @@ The variable HOME is no longer set by the shell if zsh is emulating any
other shell at startup; it must be present in the environment or set
subsequently by the user. It is valid for the variable to be unset.
+Parameter substitutions in the form ${param//#%search/replace} match
+against "search" anchored at both ends of the parameter value. Previously
+this syntax would have matched against "%search", anchored only at the head
+of the value. The form ${param//#$search/replace} where the value
+$search starts with "%" considers the "%" to be part of the search
+string as before.
+
The MULTIBYTE option is on by default where it is available; this
causes many operations to recognise characters as in the current locale.
Older versions of the shell always assumed a character was one byte.
View
27 Src/glob.c
@@ -2294,6 +2294,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
return 1;
}
if (matched) {
+ /*
+ * The default behaviour is to match at the start; this
+ * is modified by SUB_END and SUB_SUBSTR. SUB_END matches
+ * at the end of the string instead of the start. SUB_SUBSTR
+ * without SUB_END matches substrings searching from the start;
+ * with SUB_END it matches substrings searching from the end.
+ *
+ * The possibilities are further modified by whether we want the
+ * longest (SUB_LONG) or shortest possible match.
+ *
+ * SUB_START is only used in the case where we are also
+ * forcing a match at the end (SUB_END with no SUB_SUBSTR,
+ * with or without SUB_LONG), to indicate we should match
+ * the entire string.
+ */
switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
case 0:
case SUB_LONG:
@@ -2341,13 +2356,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
set_pat_start(p, t-s);
if (pattrylen(p, t, s + l - t, umlen, ioff))
tmatch = t;
+ if (fl & SUB_START)
+ break;
umlen -= iincchar(&t);
}
if (tmatch) {
*sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, repllist);
return 1;
}
- if (pattrylen(p, s + l, 0, 0, ioff)) {
+ if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
*sp = get_match_ret(*sp, l, l, fl, replstr, repllist);
return 1;
}
@@ -2364,8 +2381,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
*sp = get_match_ret(*sp, t-s, l, fl, replstr, repllist);
return 1;
}
+ if (fl & SUB_START)
+ break;
umlen -= iincchar(&t);
}
+ if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
+ *sp = get_match_ret(*sp, l, l, fl, replstr, repllist);
+ return 1;
+ }
break;
case SUB_SUBSTR:
@@ -2566,7 +2589,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
/* munge the whole string: no match, so no replstr */
*sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
- return 1;
+ return (fl & SUB_RETFAIL) ? 0 : 1;
}
/**/
View
109 Src/hist.c
@@ -323,7 +323,8 @@ getsubsargs(char *subline, int *gbalp, int *cflagp)
if (strlen(ptr1)) {
zsfree(hsubl);
hsubl = ptr1;
- }
+ } else if (!hsubl) /* fail silently on this */
+ return 0;
zsfree(hsubr);
hsubr = ptr2;
follow = ingetc();
@@ -337,11 +338,6 @@ getsubsargs(char *subline, int *gbalp, int *cflagp)
}
} else
inungetc(follow);
- if (hsubl && !strstr(subline, hsubl)) {
- herrflush();
- zerr("substitution failed");
- return 1;
- }
return 0;
}
@@ -354,6 +350,15 @@ getargc(Histent ehist)
return ehist->nwords ? ehist->nwords-1 : 0;
}
+/**/
+static int
+substfailed(void)
+{
+ herrflush();
+ zerr("substitution failed");
+ return -1;
+}
+
/* Perform history substitution, returning the next character afterwards. */
/**/
@@ -376,10 +381,15 @@ histsubchar(int c)
isfirstch = 0;
inungetc(hatchar);
if (!(ehist = gethist(defev))
- || !(sline = getargs(ehist, 0, getargc(ehist)))
- || getsubsargs(sline, &gbal, &cflag) || !hsubl)
+ || !(sline = getargs(ehist, 0, getargc(ehist))))
return -1;
- subst(&sline, hsubl, hsubr, gbal);
+
+ if (getsubsargs(sline, &gbal, &cflag))
+ return substfailed();
+ if (!hsubl)
+ return -1;
+ if (subst(&sline, hsubl, hsubr, gbal))
+ return substfailed();
} else {
/* Line doesn't begin ^foo^bar */
if (c != ' ')
@@ -608,9 +618,10 @@ histsubchar(int c)
if (getsubsargs(sline, &gbal, &cflag))
return -1; /* fall through */
case '&':
- if (hsubl && hsubr)
- subst(&sline, hsubl, hsubr, gbal);
- else {
+ if (hsubl && hsubr) {
+ if (subst(&sline, hsubl, hsubr, gbal))
+ return substfailed();
+ } else {
herrflush();
zerr("no previous substitution");
return -1;
@@ -1629,30 +1640,72 @@ casemodify(char *str, int how)
return str2;
}
+
+/*
+ * Substitute "in" for "out" in "*strptr" and update "*strptr".
+ * If "gbal", do global substitution.
+ *
+ * This returns a result from the heap. There seems to have
+ * been some confusion on this point.
+ */
+
/**/
-void
+int
subst(char **strptr, char *in, char *out, int gbal)
{
- char *str = *strptr, *instr = *strptr, *substcut, *sptr, *oldstr;
+ char *str = *strptr, *substcut, *sptr;
int off, inlen, outlen;
if (!*in)
in = str, gbal = 0;
- if (!(substcut = (char *)strstr(str, in)))
- return;
- inlen = strlen(in);
- sptr = convamps(out, in, inlen);
- outlen = strlen(sptr);
- do {
- *substcut = '\0';
- off = substcut - *strptr + outlen;
- substcut += inlen;
- *strptr = tricat(oldstr = *strptr, sptr, substcut);
- if (oldstr != instr)
- zsfree(oldstr);
- str = (char *)*strptr + off;
- } while (gbal && (substcut = (char *)strstr(str, in)));
+ if (isset(HISTSUBSTPATTERN)) {
+ int fl = SUB_LONG|SUB_REST|SUB_RETFAIL;
+ char *oldin = in;
+ if (gbal)
+ fl |= SUB_GLOBAL;
+ if (*in == '#' || *in == Pound) {
+ /* anchor at head, flag needed if SUB_END is also set */
+ fl |= SUB_START;
+ in++;
+ }
+ if (*in == '%') {
+ /* anchor at tail */
+ in++;
+ fl |= SUB_END;
+ }
+ if (in == oldin) {
+ /* no anchor, substring match */
+ fl |= SUB_SUBSTR;
+ }
+ if (in == str)
+ in = dupstring(in);
+ if (parse_subst_string(in) || errflag)
+ return 1;
+ if (parse_subst_string(out) || errflag)
+ return 1;
+ singsub(&in);
+ if (getmatch(strptr, in, fl, 1, out))
+ return 0;
+ } else {
+ if ((substcut = (char *)strstr(str, in))) {
+ inlen = strlen(in);
+ sptr = convamps(out, in, inlen);
+ outlen = strlen(sptr);
+
+ do {
+ *substcut = '\0';
+ off = substcut - *strptr + outlen;
+ substcut += inlen;
+ *strptr = zhtricat(*strptr, sptr, substcut);
+ str = (char *)*strptr + off;
+ } while (gbal && (substcut = (char *)strstr(str, in)));
+
+ return 0;
+ }
+ }
+
+ return 1;
}
/**/
View
1 Src/options.c
@@ -137,6 +137,7 @@ static struct optname optns[] = {
{{NULL, "histignorespace", 0}, HISTIGNORESPACE},
{{NULL, "histnofunctions", 0}, HISTNOFUNCTIONS},
{{NULL, "histnostore", 0}, HISTNOSTORE},
+{{NULL, "histsubstpattern", OPT_EMULATE}, HISTSUBSTPATTERN},
{{NULL, "histreduceblanks", 0}, HISTREDUCEBLANKS},
{{NULL, "histsavebycopy", OPT_ALL}, HISTSAVEBYCOPY},
{{NULL, "histsavenodups", 0}, HISTSAVENODUPS},
View
34 Src/subst.c
@@ -2355,15 +2355,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
c = *++s;
}
/* Check for anchored substitution */
- if (c == '%') {
+ if (c == '#' || c == Pound) {
+ /*
+ * anchor at head: this is the `normal' case in
+ * getmatch and we only require the flag if SUB_END
+ * is also present.
+ */
+ flags |= SUB_START;
+ s++;
+ }
+ if (*s == '%') {
/* anchor at tail */
flags |= SUB_END;
s++;
- } else if (c == '#' || c == Pound) {
- /* anchor at head: this is the `normal' case in getmatch */
- s++;
- } else
+ }
+ if (!(flags & (SUB_START|SUB_END))) {
+ /* No anchor, so substring */
flags |= SUB_SUBSTR;
+ }
/*
* Find the / marking the end of the search pattern.
* If there isn't one, we're just going to delete that,
@@ -2526,7 +2535,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
/* This once was executed only `if (qt) ...'. But with that
* patterns in a expansion resulting from a ${(e)...} aren't
* tokenized even though this function thinks they are (it thinks
- * they are because subst_parse_str() turns Qstring tokens
+ * they are because parse_subst_str() turns Qstring tokens
* into String tokens and for unquoted parameter expansions the
* lexer normally does tokenize patterns inside parameter
* expansions). */
@@ -3273,6 +3282,7 @@ modify(char **str, char **ptr)
break;
case 's':
+ /* TODO: multibyte delimiter */
c = **ptr;
(*ptr)++;
ptr1 = *ptr;
@@ -3298,7 +3308,8 @@ modify(char **str, char **ptr)
for (tt = hsubl; *tt; tt++)
if (inull(*tt) && *tt != Bnullkeep)
chuck(tt--);
- untokenize(hsubl);
+ if (!isset(HISTSUBSTPATTERN))
+ untokenize(hsubl);
for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
if (inull(*tt) && *tt != Bnullkeep)
chuck(tt--);
@@ -3444,15 +3455,8 @@ modify(char **str, char **ptr)
*str = casemodify(*str, CASMOD_UPPER);
break;
case 's':
- if (hsubl && hsubr) {
- char *oldstr = *str;
-
+ if (hsubl && hsubr)
subst(str, hsubl, hsubr, gbal);
- if (*str != oldstr) {
- *str = dupstring(oldstr = *str);
- zsfree(oldstr);
- }
- }
break;
case 'q':
*str = quotestring(*str, NULL, QT_BACKSLASH);
View
4 Src/zsh.h
@@ -1405,6 +1405,9 @@ struct tieddata {
#define SUB_ALL 0x0100 /* match complete string */
#define SUB_GLOBAL 0x0200 /* global substitution ${..//all/these} */
#define SUB_DOSUBST 0x0400 /* replacement string needs substituting */
+#define SUB_RETFAIL 0x0800 /* return status 0 if no match */
+#define SUB_START 0x1000 /* force match at start with SUB_END
+ * and no SUB_SUBSTR */
/* Flags as the second argument to prefork */
#define PF_TYPESET 0x01 /* argument handled like typeset foo=bar */
@@ -1631,6 +1634,7 @@ enum {
HISTREDUCEBLANKS,
HISTSAVEBYCOPY,
HISTSAVENODUPS,
+ HISTSUBSTPATTERN,
HISTVERIFY,
HUP,
IGNOREBRACES,
View
14 Test/E01options.ztst
@@ -487,6 +487,20 @@
>tmpcd tmpfile1 tmpfile2
>tmp*
+ setopt histsubstpattern
+ print *(:s/t??/TING/)
+ foo=(tmp*)
+ print ${foo:s/??p/THUMP/}
+ foo=(one.c two.c three.c)
+ print ${foo:s/#%(#b)t(*).c/T${match[1]}.X/}
+ print *(#q:s/#(#b)tmp(*e)/'scrunchy${match[1]}'/)
+ unsetopt histsubstpattern
+0:HIST_SUBST_PATTERN option
+>TINGcd TINGfile1 TINGfile2
+>THUMPcd THUMPfile1 THUMPfile2
+>one.c Two.X Three.X
+>scrunchyfile1 scrunchyfile2 tmpcd
+
setopt ignorebraces
echo X{a,b}Y
unsetopt ignorebraces

0 comments on commit 9471bbc

Please sign in to comment.