Skip to content

Commit

Permalink
OS-7315 Update nawk(1) field splitting behaviour to match POSIX defin…
Browse files Browse the repository at this point in the history
…ition

Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Jason King <jbk@joyent.com>
  • Loading branch information
melloc committed Jun 13, 2019
1 parent b4acf02 commit 084c5c4
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 13 deletions.
1 change: 1 addition & 0 deletions usr/src/cmd/awk/awk.h
Expand Up @@ -338,6 +338,7 @@ extern void FATAL(const char *, ...) __attribute__((__noreturn__));
extern void WARNING(const char *, ...);
extern void error(void);
extern void nextfile(void);
extern void savefs(void);

extern int isclvar(const char *);
extern int is_number(const char *);
Expand Down
24 changes: 18 additions & 6 deletions usr/src/cmd/awk/lib.c
Expand Up @@ -144,6 +144,23 @@ initgetrec(void)
infile = stdin; /* no filenames, so use stdin */
}

/*
* POSIX specifies that fields are supposed to be evaluated as if they were
* split using the value of FS at the time that the record's value ($0) was
* read.
*
* Since field-splitting is done lazily, we save the current value of FS
* whenever a new record is read in (implicitly or via getline), or when
* a new value is assigned to $0.
*/
void
savefs(void)
{
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
(void) strcpy(inputFS, *FS);
}

static int firsttime = 1;

/*
Expand All @@ -167,6 +184,7 @@ getrec(char **pbuf, size_t *pbufsize, int isrecord)
if (isrecord) {
donefld = 0;
donerec = 1;
savefs();
}
saveb0 = buf[0];
buf[0] = '\0';
Expand Down Expand Up @@ -242,9 +260,6 @@ readrec(char **pbuf, size_t *pbufsize, FILE *inf) /* read one record into buf */
size_t bufsize = *pbufsize;
char *rs = getsval(rsloc);

if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
(void) strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = *rs) == 0) {
sep = '\n';
/* skip leading \n's */
Expand Down Expand Up @@ -342,9 +357,6 @@ fldbld(void) /* create fields from current record */
fr = fields;

i = 0; /* number of fields accumulated here */
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
(void) strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
Expand Down
2 changes: 2 additions & 0 deletions usr/src/cmd/awk/tran.c
Expand Up @@ -377,6 +377,7 @@ setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
savefs();
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
Expand Down Expand Up @@ -424,6 +425,7 @@ setsval(Cell *vp, const char *s) /* set string val of a Cell */
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
savefs();
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
Expand Down
@@ -1,3 +1,3 @@
$AWK: field separator cccccccccc... is too long
source line number 12
source line number 11
EXIT CODE: 2
75 changes: 69 additions & 6 deletions usr/src/test/util-tests/tests/awk/tests/T.split
Expand Up @@ -18,14 +18,31 @@ fail() {

echo T.split: misc tests of field splitting and split command

echo a:bc:def > $TEMP0
echo a > $TEMP1
$AWK '{ FS = ":"; print $1 }' $TEMP0 > $TEMP2
$AWK 'BEGIN {
# Assign string to $0, then change FS.
FS = ":";
$0="a:bc:def";
FS = "-";
print FS, $1, NF;
# Assign number to $0, then change FS.
FS = "2";
$0=1212121;
FS="3";
print FS, $1, NF;
}' > $TEMP1
echo '- a 3
3 1 4' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.1'

echo a:bc:def > $TEMP0
echo 3 > $TEMP1
$AWK '{ FS = ":"; print NF }' $TEMP0 > $TEMP2
$AWK 'BEGIN {
# FS changes after getline.
FS = ":";
"echo a:bc:def" | getline;
FS = "-";
print FS, $1, NF;
}' > $TEMP1
echo '- a 3' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.2'

echo '
Expand All @@ -45,6 +62,52 @@ echo '0
4' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.3'

# getline var shouldn't impact fields.

echo 'f b a' > $TEMP0
$AWK '{
FS = ":";
getline a < "/etc/passwd";
print $1;
}' $TEMP0 > $TEMP1
echo 'f' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.4'

echo 'a b c d
foo
e f g h i
bar' > $TEMP0
$AWK '{
FS=":";
getline v;
print $2, NF;
FS=" ";
}' $TEMP0 > $TEMP1
echo 'b 4
f 5' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.5'

echo 'a.b.c=d.e.f
g.h.i=j.k.l
m.n.o=p.q.r' > $TEMP0
echo 'b
h
n' > $TEMP1
$AWK 'BEGIN { FS="=" } { FS="."; $0=$1; print $2; FS="="; }' $TEMP0 > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split (record assignment 1)'

echo 'a.b.c=d.e.f
g.h.i=j.k.l
m.n.o=p.q.r' > $TEMP0
echo 'd.e.f
b
j.k.l
h
p.q.r
n' > $TEMP1
$AWK 'BEGIN { FS="=" } { print $2; FS="."; $0=$1; print $2; FS="="; }' $TEMP0 > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split (record assignment 2)'

echo 'abc
de
f
Expand Down

0 comments on commit 084c5c4

Please sign in to comment.