Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

import today's FreeBSD sed.

  • Loading branch information...
commit 20d93dc76c242871e6e6a8bea429643aca533b0a 1 parent 780cf21
christos authored
9 usr.bin/sed/Makefile
View
@@ -1,6 +1,15 @@
# @(#)Makefile 8.1 (Berkeley) 6/6/93
+# $FreeBSD: head/usr.bin/sed/Makefile 265420 2014-05-06 04:22:01Z imp $
+
+.include <src.opts.mk>
PROG= sed
SRCS= compile.c main.c misc.c process.c
+WARNS?= 2
+
+.if ${MK_TESTS} != "no"
+SUBDIR+= tests
+.endif
+
.include <bsd.prog.mk>
14 usr.bin/sed/POSIX
View
@@ -1,4 +1,5 @@
# @(#)POSIX 8.1 (Berkeley) 6/6/93
+# $FreeBSD: head/usr.bin/sed/POSIX 168417 2007-04-06 08:43:30Z yar $
Comments on the IEEE P1003.2 Draft 12
Part 2: Shell and Utilities
@@ -117,10 +118,15 @@ All uses of "POSIX" refer to section 4.55, Draft 12 of POSIX 1003.2.
1,3c\
text
- Historic implementations, and this implementation, do not output
- the text in the above example. The general rule, therefore,
- is that a range whose second address is never matched extends to
- the end of the input.
+ Historic implementations did not output the text in the above
+ example. Therefore it was believed that a range whose second
+ address was never matched extended to the end of the input.
+ However, the current practice adopted by this implementation,
+ as well as by those from GNU and SUN, is as follows: The text
+ from the 'c' command still isn't output because the second address
+ isn't actually matched; but the range is reset after all if its
+ second address is a line number. In the above example, only the
+ first line of the input will be deleted.
13. Historical implementations allow an output suppressing #n at the
beginning of -e arguments as well as in a script file. POSIX
596 usr.bin/sed/compile.c
View
@@ -14,10 +14,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -35,14 +31,18 @@
* SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
+
#ifndef lint
-static char sccsid[] = "@(#)compile.c 8.2 (Berkeley) 4/28/95";
-#endif /* not lint */
+static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93";
+#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
+#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@@ -50,6 +50,7 @@ static char sccsid[] = "@(#)compile.c 8.2 (Berkeley) 4/28/95";
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
#include "defs.h"
#include "extern.h"
@@ -63,21 +64,22 @@ static struct labhash {
int lh_ref;
} *labels[LHSZ];
-static char *compile_addr __P((char *, struct s_addr *));
-static char *compile_delimited __P((char *, char *));
-static char *compile_flags __P((char *, struct s_subst *));
-static char *compile_re __P((char *, regex_t **));
-static char *compile_subst __P((char *, struct s_subst *));
-static char *compile_text __P((void));
-static char *compile_tr __P((char *, char **));
+static char *compile_addr(char *, struct s_addr *);
+static char *compile_ccl(char **, char *);
+static char *compile_delimited(char *, char *, int);
+static char *compile_flags(char *, struct s_subst *);
+static regex_t *compile_re(char *, int);
+static char *compile_subst(char *, struct s_subst *);
+static char *compile_text(void);
+static char *compile_tr(char *, struct s_tr **);
static struct s_command
- **compile_stream __P((char *, struct s_command **, char *));
-static char *duptoeol __P((char *, char *));
-static void enterlabel __P((struct s_command *));
+ **compile_stream(struct s_command **);
+static char *duptoeol(char *, const char *);
+static void enterlabel(struct s_command *);
static struct s_command
- *findlabel __P((char *));
-static void fixuplabel __P((struct s_command *, struct s_command *));
-static void uselabel __P((void));
+ *findlabel(char *);
+static void fixuplabel(struct s_command *, struct s_command *);
+static void uselabel(void);
/*
* Command specification. This is used to drive the command parser.
@@ -90,6 +92,7 @@ struct s_format {
static struct s_format cmd_fmts[] = {
{'{', 2, GROUP},
+ {'}', 0, ENDGROUP},
{'a', 1, TEXT},
{'b', 2, BRANCH},
{'c', 2, TEXT},
@@ -127,52 +130,58 @@ struct s_command *prog;
* Initialise appends.
*/
void
-compile()
+compile(void)
{
- *compile_stream(NULL, &prog, NULL) = NULL;
+ *compile_stream(&prog) = NULL;
fixuplabel(prog, NULL);
uselabel();
- appends = xmalloc(sizeof(struct s_appends) * appendnum);
- match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
+ if (appendnum == 0)
+ appends = NULL;
+ else if ((appends = malloc(sizeof(struct s_appends) * appendnum)) ==
+ NULL)
+ err(1, "malloc");
+ if ((match = malloc((maxnsub + 1) * sizeof(regmatch_t))) == NULL)
+ err(1, "malloc");
}
#define EATSPACE() do { \
if (p) \
- while (*p && isascii(*p) && isspace(*p)) \
+ while (*p && isspace((unsigned char)*p)) \
p++; \
} while (0)
static struct s_command **
-compile_stream(terminator, link, p)
- char *terminator;
- struct s_command **link;
- register char *p;
+compile_stream(struct s_command **link)
{
+ char *p;
static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
- struct s_command *cmd, *cmd2;
+ struct s_command *cmd, *cmd2, *stack;
struct s_format *fp;
+ char re[_POSIX2_LINE_MAX + 1];
int naddr; /* Number of addresses */
- if (p != NULL)
- goto semicolon;
+ stack = 0;
for (;;) {
- if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) {
- if (terminator != NULL)
- err(COMPILE, "unexpected EOF (pending }'s)");
+ if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
+ if (stack != 0)
+ errx(1, "%lu: %s: unexpected EOF (pending }'s)",
+ linenum, fname);
return (link);
}
semicolon: EATSPACE();
- if (p && (*p == '#' || *p == '\0'))
- continue;
- if (*p == '}') {
- if (terminator == NULL)
- err(COMPILE, "unexpected }");
- return (link);
+ if (p) {
+ if (*p == '#' || *p == '\0')
+ continue;
+ else if (*p == ';') {
+ p++;
+ goto semicolon;
+ }
}
- *link = cmd = xmalloc(sizeof(struct s_command));
+ if ((*link = cmd = malloc(sizeof(struct s_command))) == NULL)
+ err(1, "malloc");
link = &cmd->next;
- cmd->nonsel = cmd->inrange = 0;
+ cmd->startline = cmd->nonsel = 0;
/* First parse the addresses */
naddr = 0;
@@ -180,14 +189,17 @@ semicolon: EATSPACE();
#define addrchar(c) (strchr("0123456789/\\$", (c)))
if (addrchar(*p)) {
naddr++;
- cmd->a1 = xmalloc(sizeof(struct s_addr));
+ if ((cmd->a1 = malloc(sizeof(struct s_addr))) == NULL)
+ err(1, "malloc");
p = compile_addr(p, cmd->a1);
EATSPACE(); /* EXTENSION */
if (*p == ',') {
p++;
EATSPACE(); /* EXTENSION */
naddr++;
- cmd->a2 = xmalloc(sizeof(struct s_addr));
+ if ((cmd->a2 = malloc(sizeof(struct s_addr)))
+ == NULL)
+ err(1, "malloc");
p = compile_addr(p, cmd->a2);
EATSPACE();
} else
@@ -197,39 +209,44 @@ semicolon: EATSPACE();
nonsel: /* Now parse the command */
if (!*p)
- err(COMPILE, "command expected");
+ errx(1, "%lu: %s: command expected", linenum, fname);
cmd->code = *p;
for (fp = cmd_fmts; fp->code; fp++)
if (fp->code == *p)
break;
if (!fp->code)
- err(COMPILE, "invalid command code %c", *p);
+ errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
if (naddr > fp->naddr)
- err(COMPILE,
-"command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
+ errx(1,
+ "%lu: %s: command %c expects up to %d address(es), found %d",
+ linenum, fname, *p, fp->naddr, naddr);
switch (fp->args) {
case NONSEL: /* ! */
p++;
EATSPACE();
- cmd->nonsel = ! cmd->nonsel;
+ cmd->nonsel = 1;
goto nonsel;
case GROUP: /* { */
p++;
EATSPACE();
- if (!*p)
- p = NULL;
- cmd2 = xmalloc(sizeof(struct s_command));
- cmd2->code = '}';
- *compile_stream("}", &cmd->u.c, p) = cmd2;
- cmd->next = cmd2;
- link = &cmd2->next;
+ cmd->next = stack;
+ stack = cmd;
+ link = &cmd->u.c;
+ if (*p)
+ goto semicolon;
+ break;
+ case ENDGROUP:
/*
* Short-circuit command processing, since end of
* group is really just a noop.
*/
- cmd2->nonsel = 1;
- cmd2->a1 = cmd2->a2 = 0;
- break;
+ cmd->nonsel = 1;
+ if (stack == 0)
+ errx(1, "%lu: %s: unexpected }", linenum, fname);
+ cmd2 = stack;
+ stack = cmd2->next;
+ cmd2->next = cmd;
+ /*FALLTHROUGH*/
case EMPTY: /* d D g G h H l n N p P q x = \0 */
p++;
EATSPACE();
@@ -239,20 +256,21 @@ semicolon: EATSPACE();
goto semicolon;
}
if (*p)
- err(COMPILE,
-"extra characters at the end of %c command", cmd->code);
+ errx(1, "%lu: %s: extra characters at the end of %c command",
+ linenum, fname, cmd->code);
break;
case TEXT: /* a c i */
p++;
EATSPACE();
if (*p != '\\')
- err(COMPILE,
-"command %c expects \\ followed by text", cmd->code);
+ errx(1,
+"%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
p++;
EATSPACE();
if (*p)
- err(COMPILE,
-"extra characters after \\ at the end of %c command", cmd->code);
+ errx(1,
+ "%lu: %s: extra characters after \\ at the end of %c command",
+ linenum, fname, cmd->code);
cmd->t = compile_text();
break;
case COMMENT: /* \0 # */
@@ -261,20 +279,20 @@ semicolon: EATSPACE();
p++;
EATSPACE();
if (*p == '\0')
- err(COMPILE, "filename expected");
+ errx(1, "%lu: %s: filename expected", linenum, fname);
cmd->t = duptoeol(p, "w command");
if (aflag)
cmd->u.fd = -1;
- else if ((cmd->u.fd = open(p,
+ else if ((cmd->u.fd = open(p,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
DEFFILEMODE)) == -1)
- err(FATAL, "%s: %s\n", p, strerror(errno));
+ err(1, "%s", p);
break;
case RFILE: /* r */
p++;
EATSPACE();
if (*p == '\0')
- err(COMPILE, "filename expected");
+ errx(1, "%lu: %s: filename expected", linenum, fname);
else
cmd->t = duptoeol(p, "read command");
break;
@@ -291,21 +309,36 @@ semicolon: EATSPACE();
EATSPACE();
cmd->t = duptoeol(p, "label");
if (strlen(p) == 0)
- err(COMPILE, "empty label");
+ errx(1, "%lu: %s: empty label", linenum, fname);
enterlabel(cmd);
break;
case SUBST: /* s */
p++;
if (*p == '\0' || *p == '\\')
- err(COMPILE,
-"substitute pattern can not be delimited by newline or backslash");
- cmd->u.s = xmalloc(sizeof(struct s_subst));
- p = compile_re(p, &cmd->u.s->re);
+ errx(1,
+"%lu: %s: substitute pattern can not be delimited by newline or backslash",
+ linenum, fname);
+ if ((cmd->u.s = calloc(1, sizeof(struct s_subst))) == NULL)
+ err(1, "malloc");
+ p = compile_delimited(p, re, 0);
if (p == NULL)
- err(COMPILE, "unterminated substitute pattern");
+ errx(1,
+ "%lu: %s: unterminated substitute pattern", linenum, fname);
+
+ /* Compile RE with no case sensitivity temporarily */
+ if (*re == '\0')
+ cmd->u.s->re = NULL;
+ else
+ cmd->u.s->re = compile_re(re, 0);
--p;
p = compile_subst(p, cmd->u.s);
p = compile_flags(p, cmd->u.s);
+
+ /* Recompile RE with case sensitivity from "I" flag if any */
+ if (*re == '\0')
+ cmd->u.s->re = NULL;
+ else
+ cmd->u.s->re = compile_re(re, cmd->u.s->icase);
EATSPACE();
if (*p == ';') {
p++;
@@ -315,7 +348,7 @@ semicolon: EATSPACE();
break;
case TR: /* y */
p++;
- p = compile_tr(p, (char **)&cmd->u.y);
+ p = compile_tr(p, &cmd->u.y);
EATSPACE();
if (*p == ';') {
p++;
@@ -323,8 +356,8 @@ semicolon: EATSPACE();
goto semicolon;
}
if (*p)
- err(COMPILE,
-"extra text at the end of a transform command");
+ errx(1,
+"%lu: %s: extra text at the end of a transform command", linenum, fname);
break;
}
}
@@ -340,8 +373,7 @@ semicolon: EATSPACE();
* with the processed string.
*/
static char *
-compile_delimited(p, d)
- char *p, *d;
+compile_delimited(char *p, char *d, int is_tr)
{
char c;
@@ -349,19 +381,30 @@ compile_delimited(p, d)
if (c == '\0')
return (NULL);
else if (c == '\\')
- err(COMPILE, "\\ can not be used as a string delimiter");
+ errx(1, "%lu: %s: \\ can not be used as a string delimiter",
+ linenum, fname);
else if (c == '\n')
- err(COMPILE, "newline can not be used as a string delimiter");
+ errx(1, "%lu: %s: newline can not be used as a string delimiter",
+ linenum, fname);
while (*p) {
- if (*p == '\\' && p[1] == c)
+ if (*p == '[' && *p != c) {
+ if ((d = compile_ccl(&p, d)) == NULL)
+ errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
+ continue;
+ } else if (*p == '\\' && p[1] == '[') {
+ *d++ = *p++;
+ } else if (*p == '\\' && p[1] == c)
p++;
else if (*p == '\\' && p[1] == 'n') {
*d++ = '\n';
p += 2;
continue;
- } else if (*p == '\\' && p[1] == '\\')
- *d++ = *p++;
- else if (*p == c) {
+ } else if (*p == '\\' && p[1] == '\\') {
+ if (is_tr)
+ p++;
+ else
+ *d++ = *p++;
+ } else if (*p == c) {
*d = '\0';
return (p + 1);
}
@@ -370,34 +413,52 @@ compile_delimited(p, d)
return (NULL);
}
+
+/* compile_ccl: expand a POSIX character class */
+static char *
+compile_ccl(char **sp, char *t)
+{
+ int c, d;
+ char *s = *sp;
+
+ *t++ = *s++;
+ if (*s == '^')
+ *t++ = *s++;
+ if (*s == ']')
+ *t++ = *s++;
+ for (; *s && (*t = *s) != ']'; s++, t++)
+ if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
+ *++t = *++s, t++, s++;
+ for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
+ if ((c = *s) == '\0')
+ return NULL;
+ }
+ return (*s == ']') ? *sp = ++s, ++t : NULL;
+}
+
/*
- * Get a regular expression. P points to the delimiter of the regular
- * expression; repp points to the address of a regexp pointer. Newline
- * and delimiter escapes are processed; other escapes are ignored.
- * Returns a pointer to the first character after the final delimiter
- * or NULL in the case of a non terminated regular expression. The regexp
- * pointer is set to the compiled regular expression.
+ * Compiles the regular expression in RE and returns a pointer to the compiled
+ * regular expression.
* Cflags are passed to regcomp.
*/
-static char *
-compile_re(p, repp)
- char *p;
- regex_t **repp;
+static regex_t *
+compile_re(char *re, int case_insensitive)
{
- int eval;
- char re[_POSIX2_LINE_MAX + 1];
-
- p = compile_delimited(p, re);
- if (p && strlen(re) == 0) {
- *repp = NULL;
- return (p);
- }
- *repp = xmalloc(sizeof(regex_t));
- if (p && (eval = regcomp(*repp, re, 0)) != 0)
- err(COMPILE, "RE error: %s", strregerror(eval, *repp));
- if (maxnsub < (*repp)->re_nsub)
- maxnsub = (*repp)->re_nsub;
- return (p);
+ regex_t *rep;
+ int eval, flags;
+
+
+ flags = rflags;
+ if (case_insensitive)
+ flags |= REG_ICASE;
+ if ((rep = malloc(sizeof(regex_t))) == NULL)
+ err(1, "malloc");
+ if ((eval = regcomp(rep, re, flags)) != 0)
+ errx(1, "%lu: %s: RE error: %s",
+ linenum, fname, strregerror(eval, rep));
+ if (maxnsub < rep->re_nsub)
+ maxnsub = rep->re_nsub;
+ return (rep);
}
/*
@@ -406,13 +467,13 @@ compile_re(p, repp)
* expressions.
*/
static char *
-compile_subst(p, s)
- char *p;
- struct s_subst *s;
+compile_subst(char *p, struct s_subst *s)
{
static char lbuf[_POSIX2_LINE_MAX + 1];
- int asize, ref, size;
+ int asize, size;
+ u_char ref;
char c, *text, *op, *sp;
+ int more = 1, sawesc = 0;
c = *p++; /* Terminator character */
if (c == '\0')
@@ -421,33 +482,58 @@ compile_subst(p, s)
s->maxbref = 0;
s->linenum = linenum;
asize = 2 * _POSIX2_LINE_MAX + 1;
- text = xmalloc(asize);
+ if ((text = malloc(asize)) == NULL)
+ err(1, "malloc");
size = 0;
do {
op = sp = text + size;
for (; *p; p++) {
- if (*p == '\\') {
- p++;
- if (strchr("123456789", *p) != NULL) {
+ if (*p == '\\' || sawesc) {
+ /*
+ * If this is a continuation from the last
+ * buffer, we won't have a character to
+ * skip over.
+ */
+ if (sawesc)
+ sawesc = 0;
+ else
+ p++;
+
+ if (*p == '\0') {
+ /*
+ * This escaped character is continued
+ * in the next part of the line. Note
+ * this fact, then cause the loop to
+ * exit w/ normal EOL case and reenter
+ * above with the new buffer.
+ */
+ sawesc = 1;
+ p--;
+ continue;
+ } else if (strchr("123456789", *p) != NULL) {
*sp++ = '\\';
ref = *p - '0';
if (s->re != NULL &&
ref > s->re->re_nsub)
- err(COMPILE,
-"\\%c not defined in the RE", *p);
+ errx(1, "%lu: %s: \\%c not defined in the RE",
+ linenum, fname, *p);
if (s->maxbref < ref)
s->maxbref = ref;
} else if (*p == '&' || *p == '\\')
*sp++ = '\\';
} else if (*p == c) {
- p++;
+ if (*++p == '\0' && more) {
+ if (cu_fgets(lbuf, sizeof(lbuf), &more))
+ p = lbuf;
+ }
*sp++ = '\0';
size += sp - op;
- s->new = xrealloc(text, size);
+ if ((s->new = realloc(text, size)) == NULL)
+ err(1, "realloc");
return (p);
} else if (*p == '\n') {
- err(COMPILE,
-"unescaped newline inside substitute pattern");
+ errx(1,
+"%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
/* NOTREACHED */
}
*sp++ = *p;
@@ -455,10 +541,12 @@ compile_subst(p, s)
size += sp - op;
if (asize - size < _POSIX2_LINE_MAX + 1) {
asize *= 2;
- text = xmalloc(asize);
+ if ((text = realloc(text, asize)) == NULL)
+ err(1, "realloc");
}
- } while (cu_fgets(p = lbuf, sizeof(lbuf)));
- err(COMPILE, "unterminated substitute in regular expression");
+ } while (cu_fgets(p = lbuf, sizeof(lbuf), &more));
+ errx(1, "%lu: %s: unterminated substitute in regular expression",
+ linenum, fname);
/* NOTREACHED */
}
@@ -466,24 +554,24 @@ compile_subst(p, s)
* Compile the flags of the s command
*/
static char *
-compile_flags(p, s)
- char *p;
- struct s_subst *s;
+compile_flags(char *p, struct s_subst *s)
{
int gn; /* True if we have seen g or n */
+ unsigned long nval;
char wfile[_POSIX2_LINE_MAX + 1], *q;
s->n = 1; /* Default */
s->p = 0;
s->wfile = NULL;
s->wfd = -1;
+ s->icase = 0;
for (gn = 0;;) {
EATSPACE(); /* EXTENSION */
switch (*p) {
case 'g':
if (gn)
- err(COMPILE,
-"more than one number or 'g' in substitute flags");
+ errx(1,
+"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
gn = 1;
s->n = 0;
break;
@@ -494,21 +582,30 @@ compile_flags(p, s)
case 'p':
s->p = 1;
break;
+ case 'i':
+ case 'I':
+ s->icase = 1;
+ break;
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
if (gn)
- err(COMPILE,
-"more than one number or 'g' in substitute flags");
+ errx(1,
+"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
gn = 1;
- /* XXX Check for overflow */
- s->n = (int)strtol(p, &p, 10);
+ errno = 0;
+ nval = strtol(p, &p, 10);
+ if (errno == ERANGE || nval > INT_MAX)
+ errx(1,
+"%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
+ s->n = nval;
+ p--;
break;
case 'w':
p++;
#ifdef HISTORIC_PRACTICE
if (*p != ' ') {
- err(WARNING, "space missing before w wfile");
+ warnx("%lu: %s: space missing before w wfile", linenum, fname);
return (p);
}
#endif
@@ -521,16 +618,16 @@ compile_flags(p, s)
}
*q = '\0';
if (q == wfile)
- err(COMPILE, "no wfile specified");
+ errx(1, "%lu: %s: no wfile specified", linenum, fname);
s->wfile = strdup(wfile);
if (!aflag && (s->wfd = open(wfile,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
DEFFILEMODE)) == -1)
- err(FATAL, "%s: %s\n", wfile, strerror(errno));
+ err(1, "%s", wfile);
return (p);
default:
- err(COMPILE,
- "bad flag in substitute command: '%c'", *p);
+ errx(1, "%lu: %s: bad flag in substitute command: '%c'",
+ linenum, fname, *p);
break;
}
p++;
@@ -541,40 +638,91 @@ compile_flags(p, s)
* Compile a translation set of strings into a lookup table.
*/
static char *
-compile_tr(p, transtab)
- char *p;
- char **transtab;
+compile_tr(char *p, struct s_tr **py)
{
+ struct s_tr *y;
int i;
- char *lt, *op, *np;
+ const char *op, *np;
char old[_POSIX2_LINE_MAX + 1];
char new[_POSIX2_LINE_MAX + 1];
+ size_t oclen, oldlen, nclen, newlen;
+ mbstate_t mbs1, mbs2;
+
+ if ((*py = y = malloc(sizeof(*y))) == NULL)
+ err(1, NULL);
+ y->multis = NULL;
+ y->nmultis = 0;
if (*p == '\0' || *p == '\\')
- err(COMPILE,
-"transform pattern can not be delimited by newline or backslash");
- p = compile_delimited(p, old);
- if (p == NULL) {
- err(COMPILE, "unterminated transform source string");
- return (NULL);
- }
- p = compile_delimited(--p, new);
- if (p == NULL) {
- err(COMPILE, "unterminated transform target string");
- return (NULL);
- }
+ errx(1,
+ "%lu: %s: transform pattern can not be delimited by newline or backslash",
+ linenum, fname);
+ p = compile_delimited(p, old, 1);
+ if (p == NULL)
+ errx(1, "%lu: %s: unterminated transform source string",
+ linenum, fname);
+ p = compile_delimited(p - 1, new, 1);
+ if (p == NULL)
+ errx(1, "%lu: %s: unterminated transform target string",
+ linenum, fname);
EATSPACE();
- if (strlen(new) != strlen(old)) {
- err(COMPILE, "transform strings are not the same length");
- return (NULL);
+ op = old;
+ oldlen = mbsrtowcs(NULL, &op, 0, NULL);
+ if (oldlen == (size_t)-1)
+ err(1, NULL);
+ np = new;
+ newlen = mbsrtowcs(NULL, &np, 0, NULL);
+ if (newlen == (size_t)-1)
+ err(1, NULL);
+ if (newlen != oldlen)
+ errx(1, "%lu: %s: transform strings are not the same length",
+ linenum, fname);
+ if (MB_CUR_MAX == 1) {
+ /*
+ * The single-byte encoding case is easy: generate a
+ * lookup table.
+ */
+ for (i = 0; i <= UCHAR_MAX; i++)
+ y->bytetab[i] = (char)i;
+ for (; *op; op++, np++)
+ y->bytetab[(u_char)*op] = *np;
+ } else {
+ /*
+ * Multi-byte encoding case: generate a lookup table as
+ * above, but only for single-byte characters. The first
+ * bytes of multi-byte characters have their lookup table
+ * entries set to 0, which causes do_tr() to search through
+ * an auxiliary vector of multi-byte mappings.
+ */
+ memset(&mbs1, 0, sizeof(mbs1));
+ memset(&mbs2, 0, sizeof(mbs2));
+ for (i = 0; i <= UCHAR_MAX; i++)
+ y->bytetab[i] = (btowc(i) != WEOF) ? i : 0;
+ while (*op != '\0') {
+ oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
+ if (oclen == (size_t)-1 || oclen == (size_t)-2)
+ errc(1, EILSEQ, NULL);
+ nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
+ if (nclen == (size_t)-1 || nclen == (size_t)-2)
+ errc(1, EILSEQ, NULL);
+ if (oclen == 1 && nclen == 1)
+ y->bytetab[(u_char)*op] = *np;
+ else {
+ y->bytetab[(u_char)*op] = 0;
+ y->multis = realloc(y->multis,
+ (y->nmultis + 1) * sizeof(*y->multis));
+ if (y->multis == NULL)
+ err(1, NULL);
+ i = y->nmultis++;
+ y->multis[i].fromlen = oclen;
+ memcpy(y->multis[i].from, op, oclen);
+ y->multis[i].tolen = nclen;
+ memcpy(y->multis[i].to, np, nclen);
+ }
+ op += oclen;
+ np += nclen;
+ }
}
- /* We assume characters are 8 bits */
- lt = xmalloc(UCHAR_MAX);
- for (i = 0; i <= UCHAR_MAX; i++)
- lt[i] = (char)i;
- for (op = old, np = new; *op; op++, np++)
- lt[(u_char)*op] = *np;
- *transtab = lt;
return (p);
}
@@ -582,35 +730,40 @@ compile_tr(p, transtab)
* Compile the text following an a or i command.
*/
static char *
-compile_text()
+compile_text(void)
{
- int asize, size;
+ int asize, esc_nl, size;
char *text, *p, *op, *s;
char lbuf[_POSIX2_LINE_MAX + 1];
asize = 2 * _POSIX2_LINE_MAX + 1;
- text = xmalloc(asize);
+ if ((text = malloc(asize)) == NULL)
+ err(1, "malloc");
size = 0;
- while (cu_fgets(lbuf, sizeof(lbuf))) {
+ while (cu_fgets(lbuf, sizeof(lbuf), NULL)) {
op = s = text + size;
p = lbuf;
EATSPACE();
- for (; *p; p++) {
- if (*p == '\\')
- p++;
+ for (esc_nl = 0; *p != '\0'; p++) {
+ if (*p == '\\' && p[1] != '\0' && *++p == '\n')
+ esc_nl = 1;
*s++ = *p;
}
size += s - op;
- if (p[-2] != '\\') {
+ if (!esc_nl) {
*s = '\0';
break;
}
if (asize - size < _POSIX2_LINE_MAX + 1) {
asize *= 2;
- text = xmalloc(asize);
+ if ((text = realloc(text, asize)) == NULL)
+ err(1, "realloc");
}
}
- return (xrealloc(text, size + 1));
+ text[size] = '\0';
+ if ((p = realloc(text, size + 1)) == NULL)
+ err(1, "realloc");
+ return (p);
}
/*
@@ -618,34 +771,51 @@ compile_text()
* it. Fill the structure pointed to according to the address.
*/
static char *
-compile_addr(p, a)
- char *p;
- struct s_addr *a;
+compile_addr(char *p, struct s_addr *a)
{
- char *end;
+ char *end, re[_POSIX2_LINE_MAX + 1];
+ int icase;
+
+ icase = 0;
+ a->type = 0;
switch (*p) {
case '\\': /* Context address */
++p;
/* FALLTHROUGH */
case '/': /* Context address */
- p = compile_re(p, &a->u.r);
+ p = compile_delimited(p, re, 0);
if (p == NULL)
- err(COMPILE, "unterminated regular expression");
+ errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
+ /* Check for case insensitive regexp flag */
+ if (*p == 'I') {
+ icase = 1;
+ p++;
+ }
+ if (*re == '\0')
+ a->u.r = NULL;
+ else
+ a->u.r = compile_re(re, icase);
a->type = AT_RE;
return (p);
case '$': /* Last line */
a->type = AT_LAST;
return (p + 1);
+
+ case '+': /* Relative line number */
+ a->type = AT_RELLINE;
+ p++;
+ /* FALLTHROUGH */
/* Line number */
- case '0': case '1': case '2': case '3': case '4':
+ case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- a->type = AT_LINE;
+ if (a->type == 0)
+ a->type = AT_LINE;
a->u.l = strtol(p, &end, 10);
return (end);
default:
- err(COMPILE, "expected context address");
+ errx(1, "%lu: %s: expected context address", linenum, fname);
return (NULL);
}
}
@@ -655,22 +825,22 @@ compile_addr(p, a)
* Return a copy of all the characters up to \n or \0.
*/
static char *
-duptoeol(s, ctype)
- register char *s;
- char *ctype;
+duptoeol(char *s, const char *ctype)
{
size_t len;
int ws;
- char *start;
+ char *p, *start;
ws = 0;
for (start = s; *s != '\0' && *s != '\n'; ++s)
- ws = isspace(*s);
+ ws = isspace((unsigned char)*s);
*s = '\0';
if (ws)
- err(WARNING, "whitespace after %s", ctype);
+ warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
len = s - start + 1;
- return (memmove(xmalloc(len), start, len));
+ if ((p = malloc(len)) == NULL)
+ err(1, "malloc");
+ return (memmove(p, start, len));
}
/*
@@ -681,8 +851,7 @@ duptoeol(s, ctype)
* TODO: Remove } nodes
*/
static void
-fixuplabel(cp, end)
- struct s_command *cp, *end;
+fixuplabel(struct s_command *cp, struct s_command *end)
{
for (; cp != end; cp = cp->next)
@@ -699,7 +868,7 @@ fixuplabel(cp, end)
break;
}
if ((cp->u.c = findlabel(cp->t)) == NULL)
- err(COMPILE2, "undefined label '%s'", cp->t);
+ errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
free(cp->t);
break;
case '{':
@@ -713,20 +882,20 @@ fixuplabel(cp, end)
* Associate the given command label for later lookup.
*/
static void
-enterlabel(cp)
- struct s_command *cp;
+enterlabel(struct s_command *cp)
{
- register struct labhash **lhp, *lh;
- register u_char *p;
- register u_int h, c;
+ struct labhash **lhp, *lh;
+ u_char *p;
+ u_int h, c;
for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
h = (h << 5) + h + c;
lhp = &labels[h & LHMASK];
for (lh = *lhp; lh != NULL; lh = lh->lh_next)
if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
- err(COMPILE2, "duplicate label '%s'", cp->t);
- lh = xmalloc(sizeof *lh);
+ errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
+ if ((lh = malloc(sizeof *lh)) == NULL)
+ err(1, "malloc");
lh->lh_next = *lhp;
lh->lh_hash = h;
lh->lh_cmd = cp;
@@ -739,12 +908,11 @@ enterlabel(cp)
* list cp. L is excluded from the search. Return NULL if not found.
*/
static struct s_command *
-findlabel(name)
- char *name;
+findlabel(char *name)
{
- register struct labhash *lh;
- register u_char *p;
- register u_int h, c;
+ struct labhash *lh;
+ u_char *p;
+ u_int h, c;
for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
h = (h << 5) + h + c;
@@ -757,22 +925,22 @@ findlabel(name)
return (NULL);
}
-/*
+/*
* Warn about any unused labels. As a side effect, release the label hash
* table space.
*/
static void
-uselabel()
+uselabel(void)
{
- register struct labhash *lh, *next;
- register int i;
+ struct labhash *lh, *next;
+ int i;
for (i = 0; i < LHSZ; i++) {
for (lh = labels[i]; lh != NULL; lh = next) {
next = lh->lh_next;
if (!lh->lh_ref)
- err(WARNING, "unused label '%s'",
- lh->lh_cmd->t);
+ warnx("%lu: %s: unused label '%s'",
+ linenum, fname, lh->lh_cmd->t);
free(lh);
}
}
38 usr.bin/sed/defs.h
View
@@ -14,10 +14,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -35,14 +31,16 @@
* SUCH DAMAGE.
*
* @(#)defs.h 8.1 (Berkeley) 6/6/93
+ * $FreeBSD: head/usr.bin/sed/defs.h 192732 2009-05-25 06:45:33Z brian $
*/
/*
* Types of address specifications
*/
enum e_atype {
- AT_RE, /* Line that match RE */
+ AT_RE = 1, /* Line that match RE */
AT_LINE, /* Specific line */
+ AT_RELLINE, /* Relative line */
AT_LAST, /* Last line */
};
@@ -63,14 +61,28 @@ struct s_addr {
struct s_subst {
int n; /* Occurrence to subst. */
int p; /* True if p flag */
+ int icase; /* True if I flag */
char *wfile; /* NULL if no wfile */
int wfd; /* Cached file descriptor */
regex_t *re; /* Regular expression */
- int maxbref; /* Largest backreference. */
+ unsigned int maxbref; /* Largest backreference. */
u_long linenum; /* Line number. */
char *new; /* Replacement text */
};
+/*
+ * Translate command.
+ */
+struct s_tr {
+ unsigned char bytetab[256];
+ struct trmulti {
+ size_t fromlen;
+ char from[MB_LEN_MAX];
+ size_t tolen;
+ char to[MB_LEN_MAX];
+ } *multis;
+ int nmultis;
+};
/*
* An internally compiled command.
@@ -80,16 +92,16 @@ struct s_subst {
struct s_command {
struct s_command *next; /* Pointer to next command */
struct s_addr *a1, *a2; /* Start and end address */
+ u_long startline; /* Start line number or zero */
char *t; /* Text for : a c i r w */
union {
struct s_command *c; /* Command(s) for b t { */
struct s_subst *s; /* Substitute command */
- u_char *y; /* Replace command array */
+ struct s_tr *y; /* Replace command array */
int fd; /* File descriptor for w */
} u;
char code; /* Command code */
u_int nonsel:1; /* True if ! */
- u_int inrange:1; /* True if in range */
};
/*
@@ -100,6 +112,7 @@ enum e_args {
TEXT, /* a c i */
NONSEL, /* ! */
GROUP, /* { */
+ ENDGROUP, /* } */
COMMENT, /* # */
BRANCH, /* b t */
LABEL, /* : */
@@ -133,12 +146,3 @@ typedef struct {
char *back; /* Backing memory. */
size_t blen; /* Backing memory length. */
} SPACE;
-
-/*
- * Error severity codes:
- */
-#define FATAL 0 /* Exit immediately with 1 */
-#define ERROR 1 /* Continue, but change exit value */
-#define WARNING 2 /* Just print the warning */
-#define COMPILE 3 /* Print error, count and finish script */
-#define COMPILE2 3 /* Print error, count and finish script */
29 usr.bin/sed/extern.h
View
@@ -14,10 +14,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -35,6 +31,7 @@
* SUCH DAMAGE.
*
* @(#)extern.h 8.1 (Berkeley) 6/6/93
+ * $FreeBSD: head/usr.bin/sed/extern.h 170608 2007-06-12 12:05:24Z yar $
*/
extern struct s_command *prog;
@@ -43,17 +40,17 @@ extern regmatch_t *match;
extern size_t maxnsub;
extern u_long linenum;
extern int appendnum;
-extern int lastline;
extern int aflag, eflag, nflag;
-extern char *fname;
+extern const char *fname, *outfname;
+extern FILE *infile, *outfile;
+extern int rflags; /* regex flags to use */
-void cfclose __P((struct s_command *, struct s_command *));
-void compile __P((void));
-void cspace __P((SPACE *, char *, size_t, enum e_spflag));
-char *cu_fgets __P((char *, int));
-void err __P((int, const char *, ...));
-int mf_fgets __P((SPACE *, enum e_spflag));
-void process __P((void));
-char *strregerror __P((int, regex_t *));
-void *xmalloc __P((u_int));
-void *xrealloc __P((void *, u_int));
+void cfclose(struct s_command *, struct s_command *);
+void compile(void);
+void cspace(SPACE *, const char *, size_t, enum e_spflag);
+char *cu_fgets(char *, int, int *);
+int mf_fgets(SPACE *, enum e_spflag);
+int lastline(void);
+void process(void);
+void resetstate(void);
+char *strregerror(int, regex_t *);
322 usr.bin/sed/main.c
View
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson.
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
@@ -14,10 +15,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -35,23 +32,33 @@
* SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.bin/sed/main.c 252231 2013-06-26 04:14:19Z pfg $");
+
#ifndef lint
-static char copyright[] =
+static const char copyright[] =
"@(#) Copyright (c) 1992, 1993\n\
The Regents of the University of California. All rights reserved.\n";
-#endif /* not lint */
+#endif
#ifndef lint
-static char sccsid[] = "@(#)main.c 8.2 (Berkeley) 1/3/94";
-#endif /* not lint */
+static const char sccsid[] = "@(#)main.c 8.2 (Berkeley) 1/3/94";
+#endif
#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/stat.h>
-#include <ctype.h>
+#include <err.h>
#include <errno.h>
#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <locale.h>
#include <regex.h>
#include <stddef.h>
+#define _WITH_GETLINE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -89,48 +96,81 @@ struct s_flist {
*/
static struct s_flist *files, **fl_nextp = &files;
+FILE *infile; /* Current input file */
+FILE *outfile; /* Current output file */
+
int aflag, eflag, nflag;
+int rflags = 0;
+static int rval; /* Exit status */
+
+static int ispan; /* Whether inplace editing spans across files */
/*
* Current file and line number; line numbers restart across compilation
- * units, but span across input files.
+ * units, but span across input files. The latter is optional if editing
+ * in place.
*/
-char *fname; /* File name. */
+const char *fname; /* File name. */
+const char *outfname; /* Output file name */
+static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */
+static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */
+static const char *inplace; /* Inplace edit file extension. */
u_long linenum;
-int lastline; /* TRUE on the last line of the last file */
-static void add_compunit __P((enum e_cut, char *));
-static void add_file __P((char *));
+static void add_compunit(enum e_cut, char *);
+static void add_file(char *);
+static void usage(void);
int
-main(argc, argv)
- int argc;
- char *argv[];
+main(int argc, char *argv[])
{
int c, fflag;
+ char *temp_arg;
+
+ (void) setlocale(LC_ALL, "");
fflag = 0;
- while ((c = getopt(argc, argv, "ae:f:n")) != EOF)
+ inplace = NULL;
+
+ while ((c = getopt(argc, argv, "EI:ae:f:i:lnr")) != -1)
switch (c) {
+ case 'r': /* Gnu sed compat */
+ case 'E':
+ rflags = REG_EXTENDED;
+ break;
+ case 'I':
+ inplace = optarg;
+ ispan = 1; /* span across input files */
+ break;
case 'a':
aflag = 1;
break;
case 'e':
eflag = 1;
- add_compunit(CU_STRING, optarg);
+ if ((temp_arg = malloc(strlen(optarg) + 2)) == NULL)
+ err(1, "malloc");
+ strcpy(temp_arg, optarg);
+ strcat(temp_arg, "\n");
+ add_compunit(CU_STRING, temp_arg);
break;
case 'f':
fflag = 1;
add_compunit(CU_FILE, optarg);
break;
+ case 'i':
+ inplace = optarg;
+ ispan = 0; /* don't span across input files */
+ break;
+ case 'l':
+ if(setlinebuf(stdout) != 0)
+ warnx("setlinebuf() failed");
+ break;
case 'n':
nflag = 1;
break;
default:
case '?':
- (void)fprintf(stderr,
-"usage:\tsed script [-an] [file ...]\n\tsed [-an] [-e script] ... [-f scipt_file] ... [file ...]\n");
- exit(1);
+ usage();
}
argc -= optind;
argv += optind;
@@ -152,8 +192,17 @@ main(argc, argv)
process();
cfclose(prog, NULL);
if (fclose(stdout))
- err(FATAL, "stdout: %s", strerror(errno));
- exit (0);
+ err(1, "stdout");
+ exit(rval);
+}
+
+static void
+usage(void)
+{
+ (void)fprintf(stderr, "%s\n%s\n",
+ "usage: sed script [-Ealn] [-i extension] [file ...]",
+ " sed [-Ealn] [-i extension] [-e script] ... [-f script_file] ... [file ...]");
+ exit(1);
}
/*
@@ -161,9 +210,7 @@ main(argc, argv)
* together. Empty strings and files are ignored.
*/
char *
-cu_fgets(buf, n)
- char *buf;
- int n;
+cu_fgets(char *buf, int n, int *more)
{
static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
static FILE *f; /* Current open file */
@@ -174,19 +221,21 @@ cu_fgets(buf, n)
again:
switch (state) {
case ST_EOF:
- if (script == NULL)
+ if (script == NULL) {
+ if (more != NULL)
+ *more = 0;
return (NULL);
+ }
linenum = 0;
switch (script->type) {
case CU_FILE:
if ((f = fopen(script->s, "r")) == NULL)
- err(FATAL,
- "%s: %s", script->s, strerror(errno));
+ err(1, "%s", script->s);
fname = script->s;
state = ST_FILE;
goto again;
case CU_STRING:
- if ((snprintf(string_ident,
+ if (((size_t)snprintf(string_ident,
sizeof(string_ident), "\"%s\"", script->s)) >=
sizeof(string_ident) - 1)
(void)strcpy(string_ident +
@@ -201,6 +250,8 @@ cu_fgets(buf, n)
linenum++;
if (linenum == 1 && buf[0] == '#' && buf[1] == 'n')
nflag = 1;
+ if (more != NULL)
+ *more = !feof(f);
return (p);
}
script = script->next;
@@ -215,6 +266,8 @@ cu_fgets(buf, n)
if (n-- <= 1) {
*p = '\0';
linenum++;
+ if (more != NULL)
+ *more = 1;
return (buf);
}
switch (*s) {
@@ -227,6 +280,8 @@ cu_fgets(buf, n)
script = script->next;
*p = '\0';
linenum++;
+ if (more != NULL)
+ *more = 0;
return (buf);
}
case '\n':
@@ -234,6 +289,8 @@ cu_fgets(buf, n)
*p = '\0';
s++;
linenum++;
+ if (more != NULL)
+ *more = 0;
return (buf);
default:
*p++ = *s++;
@@ -241,6 +298,7 @@ cu_fgets(buf, n)
}
}
/* NOTREACHED */
+ return (NULL);
}
/*
@@ -248,72 +306,140 @@ cu_fgets(buf, n)
* Set len to the length of the line.
*/
int
-mf_fgets(sp, spflag)
- SPACE *sp;
- enum e_spflag spflag;
+mf_fgets(SPACE *sp, enum e_spflag spflag)
{
- static FILE *f; /* Current open file */
- size_t len;
- char c, *p;
+ struct stat sb;
+ ssize_t len;
+ static char *p = NULL;
+ static size_t plen = 0;
+ int c;
+ static int firstfile;
- if (f == NULL)
- /* Advance to first non-empty file */
- for (;;) {
- if (files == NULL) {
- lastline = 1;
- return (0);
- }
- if (files->fname == NULL) {
- f = stdin;
- fname = "stdin";
- } else {
- fname = files->fname;
- if ((f = fopen(fname, "r")) == NULL)
- err(FATAL, "%s: %s",
- fname, strerror(errno));
+ if (infile == NULL) {
+ /* stdin? */
+ if (files->fname == NULL) {
+ if (inplace != NULL)
+ errx(1, "-I or -i may not be used with stdin");
+ infile = stdin;
+ fname = "stdin";
+ outfile = stdout;
+ outfname = "stdout";
+ }
+ firstfile = 1;
+ }
+
+ for (;;) {
+ if (infile != NULL && (c = getc(infile)) != EOF) {
+ (void)ungetc(c, infile);
+ break;
+ }
+ /* If we are here then either eof or no files are open yet */
+ if (infile == stdin) {
+ sp->len = 0;
+ return (0);
+ }
+ if (infile != NULL) {
+ fclose(infile);
+ if (*oldfname != '\0') {
+ /* if there was a backup file, remove it */
+ unlink(oldfname);
+ /*
+ * Backup the original. Note that hard links
+ * are not supported on all filesystems.
+ */
+ if ((link(fname, oldfname) != 0) &&
+ (rename(fname, oldfname) != 0)) {
+ warn("rename()");
+ if (*tmpfname)
+ unlink(tmpfname);
+ exit(1);
+ }
+ *oldfname = '\0';
}
- if ((c = getc(f)) != EOF) {
- (void)ungetc(c, f);
- break;
+ if (*tmpfname != '\0') {
+ if (outfile != NULL && outfile != stdout)
+ if (fclose(outfile) != 0) {
+ warn("fclose()");
+ unlink(tmpfname);
+ exit(1);
+ }
+ outfile = NULL;
+ if (rename(tmpfname, fname) != 0) {
+ /* this should not happen really! */
+ warn("rename()");
+ unlink(tmpfname);
+ exit(1);
+ }
+ *tmpfname = '\0';
}
- (void)fclose(f);
+ outfname = NULL;
+ }
+ if (firstfile == 0)
files = files->next;
+ else
+ firstfile = 0;
+ if (files == NULL) {
+ sp->len = 0;
+ return (0);
+ }
+ fname = files->fname;
+ if (inplace != NULL) {
+ if (lstat(fname, &sb) != 0)
+ err(1, "%s", fname);
+ if (!(sb.st_mode & S_IFREG))
+ errx(1, "%s: %s %s", fname,
+ "in-place editing only",
+ "works for regular files");
+ if (*inplace != '\0') {
+ strlcpy(oldfname, fname,
+ sizeof(oldfname));
+ len = strlcat(oldfname, inplace,
+ sizeof(oldfname));
+ if (len > sizeof(oldfname))
+ errx(1, "%s: name too long", fname);
+ }
+ len = snprintf(tmpfname, sizeof(tmpfname),
+ "%s/.!%ld!%s", dirname(fname), (long)getpid(),
+ basename(fname));
+ if (len >= sizeof(tmpfname))
+ errx(1, "%s: name too long", fname);
+ unlink(tmpfname);
+ if ((outfile = fopen(tmpfname, "w")) == NULL)
+ err(1, "%s", fname);
+ fchown(fileno(outfile), sb.st_uid, sb.st_gid);
+ fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
+ outfname = tmpfname;
+ if (!ispan) {
+ linenum = 0;
+ resetstate();
+ }
+ } else {
+ outfile = stdout;
+ outfname = "stdout";
+ }
+ if ((infile = fopen(fname, "r")) == NULL) {
+ warn("%s", fname);
+ rval = 1;
+ continue;
}
-
- if (lastline) {
- sp->len = 0;
- return (0);
}
-
/*
- * Use fgetln so that we can handle essentially infinite input data.
- * Can't use the pointer into the stdio buffer as the process space
- * because the ungetc() can cause it to move.
+ * We are here only when infile is open and we still have something
+ * to read from it.
+ *
+ * Use getline() so that we can handle essentially infinite input
+ * data. The p and plen are static so each invocation gives
+ * getline() the same buffer which is expanded as needed.
*/
- p = fgetln(f, &len);
- if (ferror(f))
- err(FATAL, "%s: %s", fname, strerror(errno ? errno : EIO));
+ len = getline(&p, &plen, infile);
+ if (len == -1)
+ err(1, "%s", fname);
+ if (len != 0 && p[len - 1] == '\n')
+ len--;
cspace(sp, p, len, spflag);
linenum++;
- /* Advance to next non-empty file */
- while ((c = getc(f)) == EOF) {
- (void)fclose(f);
- files = files->next;
- if (files == NULL) {
- lastline = 1;
- return (1);
- }
- if (files->fname == NULL) {
- f = stdin;
- fname = "stdin";
- } else {
- fname = files->fname;
- if ((f = fopen(fname, "r")) == NULL)
- err(FATAL, "%s: %s", fname, strerror(errno));
- }
- }
- (void)ungetc(c, f);
+
return (1);
}
@@ -321,13 +447,12 @@ mf_fgets(sp, spflag)
* Add a compilation unit to the linked list
*/
static void
-add_compunit(type, s)
- enum e_cut type;
- char *s;
+add_compunit(enum e_cut type, char *s)
{
struct s_compunit *cu;
- cu = xmalloc(sizeof(struct s_compunit));
+ if ((cu = malloc(sizeof(struct s_compunit))) == NULL)
+ err(1, "malloc");
cu->type = type;
cu->s = s;
cu->next = NULL;
@@ -339,14 +464,27 @@ add_compunit(type, s)
* Add a file to the linked list
*/
static void
-add_file(s)
- char *s;
+add_file(char *s)
{
struct s_flist *fp;
- fp = xmalloc(sizeof(struct s_flist));
+ if ((fp = malloc(sizeof(struct s_flist))) == NULL)
+ err(1, "malloc");
fp->next = NULL;
*fl_nextp = fp;
fp->fname = s;
fl_nextp = &fp->next;
}
+
+int
+lastline(void)
+{
+ int ch;
+
+ if (files->next != NULL && (inplace == NULL || ispan))
+ return (0);
+ if ((ch = getc(infile)) == EOF)
+ return (1);
+ ungetc(ch, infile);
+ return (0);
+}
94 usr.bin/sed/misc.c
View
@@ -14,10 +14,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -35,13 +31,17 @@
* SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.bin/sed/misc.c 200462 2009-12-13 03:14:06Z delphij $");
+
#ifndef lint
-static char sccsid[] = "@(#)misc.c 8.1 (Berkeley) 6/6/93";
-#endif /* not lint */
+static const char sccsid[] = "@(#)misc.c 8.1 (Berkeley) 6/6/93";
+#endif
#include <sys/types.h>
-#include <errno.h>
+#include <err.h>
+#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
@@ -51,91 +51,21 @@ static char sccsid[] = "@(#)misc.c 8.1 (Berkeley) 6/6/93";
#include "extern.h"
/*
- * malloc with result test
- */
-void *
-xmalloc(size)
- u_int size;
-{
- void *p;
-
- if ((p = malloc(size)) == NULL)
- err(FATAL, "%s", strerror(errno));
- return (p);
-}
-
-/*
- * realloc with result test
- */
-void *
-xrealloc(p, size)
- void *p;
- u_int size;
-{
- if (p == NULL) /* Compatibility hack. */
- return (xmalloc(size));
-
- if ((p = realloc(p, size)) == NULL)
- err(FATAL, "%s", strerror(errno));
- return (p);
-}
-
-/*
- * Return a string for a regular expression error passed. This is a overkill,
+ * Return a string for a regular expression error passed. This is overkill,
* because of the silly semantics of regerror (we can never know the size of
* the buffer).
*/
char *
-strregerror(errcode, preg)
- int errcode;
- regex_t *preg;
+strregerror(int errcode, regex_t *preg)
{
static char *oe;
size_t s;
if (oe != NULL)
free(oe);
- s = regerror(errcode, preg, "", 0);
- oe = xmalloc(s);
+ s = regerror(errcode, preg, NULL, 0);
+ if ((oe = malloc(s)) == NULL)
+ err(1, "malloc");
(void)regerror(errcode, preg, oe, s);
return (oe);
}
-
-#if __STDC__
-#include <stdarg.h>
-#else
-#include <varargs.h>
-#endif
-/*
- * Error reporting function
- */
-void
-#if __STDC__
-err(int severity, const char *fmt, ...)
-#else
-err(severity, fmt, va_alist)
- int severity;
- char *fmt;
- va_dcl
-#endif
-{
- va_list ap;
-#if __STDC__
- va_start(ap, fmt);
-#else
- va_start(ap);
-#endif
- (void)fprintf(stderr, "sed: ");
- switch (severity) {
- case WARNING:
- case COMPILE:
- (void)fprintf(stderr, "%lu: %s: ", linenum, fname);
- }
- (void)vfprintf(stderr, fmt, ap);
- va_end(ap);
- (void)fprintf(stderr, "\n");
- if (severity == WARNING)
- return;
- exit(1);
- /* NOTREACHED */
-}
468 usr.bin/sed/process.c
View
@@ -14,10 +14,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -35,9 +31,12 @@
* SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $");
+
#ifndef lint
-static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
-#endif /* not lint */
+static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
+#endif
#include <sys/types.h>
#include <sys/stat.h>
@@ -45,6 +44,7 @@ static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
#include <sys/uio.h>
#include <ctype.h>
+#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@@ -53,23 +53,26 @@ static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
#include "defs.h"
#include "extern.h"
-static SPACE HS, PS, SS;
+static SPACE HS, PS, SS, YS;
#define pd PS.deleted
#define ps PS.space
#define psl PS.len
#define hs HS.space
#define hsl HS.len
-static inline int applies __P((struct s_command *));
-static void flush_appends __P((void));
-static void lputs __P((char *));
-static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
-static void regsub __P((SPACE *, char *, char *));
-static int substitute __P((struct s_command *));
+static __inline int applies(struct s_command *);
+static void do_tr(struct s_tr *);
+static void flush_appends(void);
+static void lputs(char *, size_t);
+static __inline int regexec_e(regex_t *, const char *, int, int, size_t);
+static void regsub(SPACE *, char *, char *);
+static int substitute(struct s_command *);
struct s_appends *appends; /* Array of pointers to strings to append. */
static int appendx; /* Index into appends array. */
@@ -82,18 +85,21 @@ static regex_t *defpreg;
size_t maxnsub;
regmatch_t *match;
-#define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
+#define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
void
-process()
+process(void)
{
struct s_command *cp;
SPACE tspace;
- size_t len;
- char oldc, *p;
+ size_t oldpsl = 0;
+ char *p;
+
+ p = NULL;
for (linenum = 0; mf_fgets(&PS, REPLACE);) {
pd = 0;
+top:
cp = prog;
redirect:
while (cp != NULL) {
@@ -107,9 +113,10 @@ process()
goto redirect;
case 'a':
if (appendx >= appendnum)
- appends = xrealloc(appends,
+ if ((appends = realloc(appends,
sizeof(struct s_appends) *
- (appendnum *= 2));
+ (appendnum *= 2))) == NULL)
+ err(1, "realloc");
appends[appendx].type = AP_STRING;
appends[appendx].s = cp->t;
appends[appendx].len = strlen(cp->t);
@@ -121,8 +128,8 @@ process()
case 'c':
pd = 1;
psl = 0;
- if (cp->a2 == NULL || lastaddr)
- (void)printf("%s", cp->t);
+ if (cp->a2 == NULL || lastaddr || lastline())
+ (void)fprintf(outfile, "%s", cp->t);
break;
case 'd':
pd = 1;
@@ -130,34 +137,38 @@ process()
case 'D':
if (pd)
goto new;
- if ((p = memchr(ps, '\n', psl)) == NULL)
+ if (psl == 0 ||
+ (p = memchr(ps, '\n', psl)) == NULL) {
pd = 1;
- else {
- psl -= (p - ps) + 1;
+ goto new;
+ } else {
+ psl -= (p + 1) - ps;
memmove(ps, p + 1, psl);
+ goto top;
}
- goto new;
case 'g':
cspace(&PS, hs, hsl, REPLACE);
break;
case 'G':
- cspace(&PS, hs, hsl, 0);
+ cspace(&PS, "\n", 1, APPEND);
+ cspace(&PS, hs, hsl, APPEND);
break;
case 'h':
cspace(&HS, ps, psl, REPLACE);
break;
case 'H':
- cspace(&HS, ps, psl, 0);
+ cspace(&HS, "\n", 1, APPEND);
+ cspace(&HS, ps, psl, APPEND);
break;
case 'i':
- (void)printf("%s", cp->t);
+ (void)fprintf(outfile, "%s", cp->t);
break;
case 'l':
- lputs(ps);
+ lputs(ps, psl);
break;
case 'n':
if (!nflag && !pd)
- OUT(ps)
+ OUT();
flush_appends();
if (!mf_fgets(&PS, REPLACE))
exit(0);
@@ -165,38 +176,37 @@ process()
break;
case 'N':
flush_appends();
- if (!mf_fgets(&PS, 0)) {
- if (!nflag && !pd)
- OUT(ps)
+ cspace(&PS, "\n", 1, APPEND);
+ if (!mf_fgets(&PS, APPEND))
exit(0);
- }
break;
case 'p':
if (pd)
break;
- OUT(ps)
+ OUT();
break;
case 'P':
if (pd)
break;
if ((p = memchr(ps, '\n', psl)) != NULL) {
- oldc = *p;
- *p = '\0';
+ oldpsl = psl;
+ psl = p - ps;
}
- OUT(ps)
+ OUT();
if (p != NULL)
- *p = oldc;
+ psl = oldpsl;
break;
case 'q':
if (!nflag && !pd)
- OUT(ps)
+ OUT();
flush_appends();
exit(0);
case 'r':
if (appendx >= appendnum)
- appends = xrealloc(appends,
+ if ((appends = realloc(appends,
sizeof(struct s_appends) *
- (appendnum *= 2));
+ (appendnum *= 2))) == NULL)
+ err(1, "realloc");
appends[appendx].type = AP_FILE;
appends[appendx].s = cp->t;
appends[appendx].len = strlen(cp->t);
@@ -218,13 +228,18 @@ process()
if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
DEFFILEMODE)) == -1)
- err(FATAL, "%s: %s\n",
- cp->t, strerror(errno));
- if (write(cp->u.fd, ps, psl) != psl)
- err(FATAL, "%s: %s\n",
- cp->t, strerror(errno));
+ err(1, "%s", cp->t);
+ if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
+ write(cp->u.fd, "\n", 1) != 1)
+ err(1, "%s", cp->t);
break;
case 'x':
+ /*
+ * If the hold space is null, make it empty
+ * but not null. Otherwise the pattern space
+ * will become null after the swap, which is
+ * an abnormal condition.
+ */
if (hs == NULL)
cspace(&HS, "", 0, REPLACE);
tspace = PS;
@@ -232,22 +247,21 @@ process()
HS = tspace;
break;
case 'y':
- if (pd)
+ if (pd || psl == 0)
break;
- for (p = ps, len = psl; --len; ++p)
- *p = cp->u.y[*p];
+ do_tr(cp->u.y);
break;
case ':':
case '}':
break;
case '=':
- (void)printf("%lu\n", linenum);
+ (void)fprintf(outfile, "%lu\n", linenum);
}
cp = cp->next;
} /* for all cp */
new: if (!nflag && !pd)
- OUT(ps)
+ OUT();
flush_appends();
} /* for all lines */
}
@@ -256,17 +270,16 @@ new: if (!nflag && !pd)
* TRUE if the address passed matches the current program state
* (lastline, linenumber, ps).
*/
-#define MATCH(a) \
- (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
- (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
+#define MATCH(a) \
+ ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
+ (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
/*
- * Return TRUE if the command applies to the current line. Sets the inrange
- * flag to process ranges. Interprets the non-select (``!'') flag.
+ * Return TRUE if the command applies to the current line. Sets the start
+ * line for process ranges. Interprets the non-select (``!'') flag.
*/
-static inline int
-applies(cp)
- struct s_command *cp;
+static __inline int
+applies(struct s_command *cp)
{
int r;
@@ -274,24 +287,40 @@ applies(cp)
if (cp->a1 == NULL && cp->a2 == NULL)
r = 1;
else if (cp->a2)
- if (cp->inrange) {
+ if (cp->startline > 0) {
if (MATCH(cp->a2)) {
- cp->inrange = 0;
+ cp->startline = 0;
lastaddr = 1;
- }
- r = 1;
+ r = 1;
+ } else if (linenum - cp->startline <= cp->a2->u.l)
+ r = 1;
+ else if ((cp->a2->type == AT_LINE &&
+ linenum > cp->a2->u.l) ||
+ (cp->a2->type == AT_RELLINE &&
+ linenum - cp->startline > cp->a2->u.l)) {
+ /*
+ * We missed the 2nd address due to a branch,
+ * so just close the range and return false.
+ */
+ cp->startline = 0;
+ r = 0;
+ } else
+ r = 1;
} else if (MATCH(cp->a1)) {
/*
* If the second address is a number less than or
* equal to the line number first selected, only
* one line shall be selected.
* -- POSIX 1003.2
+ * Likewise if the relative second line address is zero.
*/
- if (cp->a2->type == AT_LINE &&
- linenum >= cp->a2->u.l)
+ if ((cp->a2->type == AT_LINE &&
+ linenum >= cp->a2->u.l) ||
+ (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
lastaddr = 1;
- else
- cp->inrange = 1;
+ else {
+ cp->startline = linenum;
+ }
r = 1;
} else
r = 0;
@@ -301,18 +330,38 @@ applies(cp)
}
/*
+ * Reset the sed processor to its initial state.
+ */
+void
+resetstate(void)
+{
+ struct s_command *cp;
+
+ /*
+ * Reset all in-range markers.
+ */
+ for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
+ if (cp->a2)
+ cp->startline = 0;
+
+ /*
+ * Clear out the hold space.
+ */
+ cspace(&HS, "", 0, REPLACE);
+}
+
+/*
* substitute --
* Do substitutions in the pattern space. Currently, we build a
* copy of the new pattern space in the substitute space structure
* and then swap them.
*/
static int
-substitute(cp)
- struct s_command *cp;
+substitute(struct s_command *cp)
{
SPACE tspace;
regex_t *re;
- size_t re_off, slen;
+ regoff_t re_off, slen;
int lastempty, n;
char *s;
@@ -321,21 +370,21 @@ substitute(cp)
if (re == NULL) {
if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
linenum = cp->u.s->linenum;
- err(COMPILE, "\\%d not defined in the RE",
- cp->u.s->maxbref);
+ errx(1, "%lu: %s: \\%u not defined in the RE",
+ linenum, fname, cp->u.s->maxbref);
}
}
if (!regexec_e(re, s, 0, 0, psl))
return (0);
- SS.len = 0; /* Clean substitute space. */
- slen = psl;
- n = cp->u.s->n;
+ SS.len = 0; /* Clean substitute space. */
+ slen = psl;
+ n = cp->u.s->n;
lastempty = 1;
- switch (n) {
- case 0: /* Global */
- do {
+ switch (n) {
+ case 0: /* Global */
+ do {
if (lastempty || match[0].rm_so != match[0].rm_eo) {
/* Locate start of replaced string. */
re_off = match[0].rm_so;
@@ -345,31 +394,32 @@ substitute(cp)
regsub(&SS, s, cp->u.s->new);
}
- /* Move past this match. */
+ /* Move past this match. */
if (match[0].rm_so != match[0].rm_eo) {
s += match[0].rm_eo;
slen -= match[0].rm_eo;
lastempty = 0;
} else {
- if (match[0].rm_so == 0)
- cspace(&SS,
- s, match[0].rm_so + 1, APPEND);
- else
- cspace(&SS,
- s + match[0].rm_so, 1, APPEND);
+ if (match[0].rm_so < slen)
+ cspace(&SS, s + match[0].rm_so, 1,
+ APPEND);
s += match[0].rm_so + 1;
slen -= match[0].rm_so + 1;
lastempty = 1;
}
- } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
+ } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
/* Copy trailing retained string. */
if (slen > 0)
cspace(&SS, s, slen, APPEND);
- break;
+ break;
default: /* Nth occurrence */
while (--n) {
+ if (match[0].rm_eo == match[0].rm_so)
+ match[0].rm_eo = match[0].rm_so + 1;
s += match[0].rm_eo;
slen -= match[0].rm_eo;
+ if (slen < 0)
+ return (0);
if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
return (0);
}
@@ -399,122 +449,209 @@ substitute(cp)
/* Handle the 'p' flag. */
if (cp->u.s->p)
- OUT(ps)
+ OUT();
/* Handle the 'w' flag. */
if (cp->u.s->wfile && !pd) {
if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
- err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
- if (write(cp->u.s->wfd, ps, psl) != psl)
- err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
+ err(1, "%s", cp->u.s->wfile);
+ if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
+ write(cp->u.s->wfd, "\n", 1) != 1)
+ err(1, "%s", cp->u.s->wfile);
}
return (1);
}
/*
+ * do_tr --
+ * Perform translation ('y' command) in the pattern space.
+ */
+static void
+do_tr(struct s_tr *y)
+{
+ SPACE tmp;
+ char c, *p;
+ size_t clen, left;
+ int i;
+
+ if (MB_CUR_MAX == 1) {
+ /*
+ * Single-byte encoding: perform in-place translation
+ * of the pattern space.
+ */
+ for (p = ps; p < &ps[psl]; p++)
+ *p = y->bytetab[(u_char)*p];
+ } else {
+ /*
+ * Multi-byte encoding: perform translation into the
+ * translation space, then swap the translation and
+ * pattern spaces.
+ */
+ /* Clean translation space. */
+ YS.len =