Skip to content

Commit

Permalink
OS-7261 Want a more modern nawk(1)
Browse files Browse the repository at this point in the history
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Jason King <jbk@joyent.com>
  • Loading branch information
melloc committed Jun 13, 2019
1 parent d0ca0fd commit dc71967
Show file tree
Hide file tree
Showing 148 changed files with 5,914 additions and 1,033 deletions.
5 changes: 5 additions & 0 deletions usr/src/cmd/awk/DIVERGENCES
@@ -0,0 +1,5 @@
The illumos nawk(1) is slightly divergent from upstream:
- We allow an unlimited number of input program files
- We allow an unlimited number of "/pat/, /pat/" expressions
- Some of the code has been altered to track the length of strings
better so that we can avoid repeatedly calling strlen(3C)
2 changes: 1 addition & 1 deletion usr/src/cmd/awk/Makefile
Expand Up @@ -55,7 +55,7 @@ XGETFLAGS += -a -x awk.xcl

CPPFLAGS += -D_FILE_OFFSET_BITS=64
YFLAGS += -d
LDLIBS += -lm
LDLIBS += -lm -lumem
LINTFLAGS += -u
CLEANFILES= maketab proctab.c awk.g.c y.tab.h

Expand Down
109 changes: 72 additions & 37 deletions usr/src/cmd/awk/awk.g.y
Expand Up @@ -55,11 +55,14 @@

%{
#include "awk.h"

void checkdup(Node *list, Cell *item);
int yywrap(void) { return(1); }

Node *beginloc = NULL;
Node *endloc = NULL;
int infunc = 0; /* = 1 if in arglist or body of func */
int infunc = 0; /* = 1 if in arglist or body of func */
int inloop = 0; /* = 1 if in while, for, do */
char *curfname = NULL; /* current function name */
Node *arglist = NULL; /* list of args for current function */
static void setfname(Cell *);
Expand All @@ -80,10 +83,10 @@ static Node *notnull(Node *);
%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
%token <i> ARRAY
%token <i> MATCH NOTMATCH MATCHOP
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT
%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF
Expand All @@ -92,13 +95,14 @@ static Node *notnull(Node *);
%token <cp> VAR IVAR VARNF CALL NUMBER STRING
%token <s> REGEXPR

%type <p> pas pattern ppattern plist pplist patlist prarg term
%type <p> pas pattern ppattern plist pplist patlist prarg term re
%type <p> pa_pat pa_stat pa_stats
%type <s> reg_expr
%type <p> simple_stmt opt_simple_stmt stmt stmtlist
%type <p> var varname funcname varlist
%type <p> for if while
%type <i> pst opt_pst lbrace rparen comma nl opt_nl and bor
%type <p> for if else while
%type <i> do st
%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
%type <i> subop print

%right ASGNOP
Expand All @@ -115,7 +119,7 @@ static Node *notnull(Node *);
%left CAT
%left '+' '-'
%left '*' '/' '%'
%left NOT UMINUS
%left NOT UMINUS UPLUS
%right POWER
%right DECR INCR
%left INDIRECT
Expand Down Expand Up @@ -150,12 +154,12 @@ else:
;

for:
FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt
{ $$ = stat4(FOR, $3, notnull($5), $7, $9); }
| FOR '(' opt_simple_stmt ';' ';' opt_simple_stmt rparen stmt
{ $$ = stat4(FOR, $3, NIL, $6, $8); }
| FOR '(' varname IN varname rparen stmt
{ $$ = stat3(IN, $3, makearr($5), $7); }
FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
| FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
| FOR '(' varname IN varname rparen {inloop++;} stmt
{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
;

funcname:
Expand Down Expand Up @@ -203,8 +207,8 @@ pa_pat:
pa_stat:
pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
| pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
| pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
| pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
| pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); }
| lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
| XBEGIN lbrace stmtlist '}'
{ beginloc = linkum(beginloc, $3); $$ = 0; }
Expand Down Expand Up @@ -232,8 +236,6 @@ ppattern:
{ $$ = op2(BOR, notnull($1), notnull($3)); }
| ppattern and ppattern %prec AND
{ $$ = op2(AND, notnull($1), notnull($3)); }
| NOT ppattern
{ $$ = op1(NOT, notnull($2)); }
| ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
| ppattern MATCHOP ppattern
{ if (constnode($3))
Expand All @@ -243,8 +245,7 @@ ppattern:
| ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
| ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
| reg_expr
{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
| re
| term
;

Expand All @@ -256,8 +257,6 @@ pattern:
{ $$ = op2(BOR, notnull($1), notnull($3)); }
| pattern and pattern %prec AND
{ $$ = op2(AND, notnull($1), notnull($3)); }
| NOT pattern
{ $$ = op1(NOT, op2(NE,$2,celltonode(lookup("$zero&null",symtab),CCON))); }
| pattern EQ pattern { $$ = op2($2, $1, $3); }
| pattern GE pattern { $$ = op2($2, $1, $3); }
| pattern GT pattern { $$ = op2($2, $1, $3); }
Expand All @@ -272,11 +271,14 @@ pattern:
$$ = op3($2, (Node *)1, $1, $3); }
| pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
| pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); }
| pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
| pattern '|' GETLINE var {
if (safe) SYNTAX("cmd | getline is unsafe");
else $$ = op3(GETLINE, $4, itonp($2), $1); }
| pattern '|' GETLINE {
if (safe) SYNTAX("cmd | getline is unsafe");
else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
| pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
| reg_expr
{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
| re
| term
;

Expand Down Expand Up @@ -308,6 +310,12 @@ rbrace:
'}' | rbrace NL
;

re:
reg_expr
{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
| NOT re { $$ = op1(NOT, notnull($2)); }
;

reg_expr:
'/' {startreg();} REGEXPR '/' { $$ = $3; }
;
Expand All @@ -317,12 +325,18 @@ rparen:
;

simple_stmt:
print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); }
| print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); }
| print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); }
print prarg '|' term {
if (safe) SYNTAX("print | is unsafe");
else $$ = stat3($1, $2, itonp($3), $4); }
| print prarg APPEND term {
if (safe) SYNTAX("print >> is unsafe");
else $$ = stat3($1, $2, itonp($3), $4); }
| print prarg GT term {
if (safe) SYNTAX("print > is unsafe");
else $$ = stat3($1, $2, itonp($3), $4); }
| print prarg { $$ = stat3($1, $2, NIL, NIL); }
| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
| DELETE varname { yyclearin; SYNTAX("you can only delete array[element]"); $$ = stat1(DELETE, $2); }
| DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
| pattern { $$ = exptostat($1); }
| error { yyclearin; SYNTAX("illegal statement"); }
;
Expand All @@ -333,11 +347,12 @@ st:
;

stmt:
BREAK st { $$ = stat1(BREAK, NIL); }
| CLOSE pattern st { $$ = stat1(CLOSE, $2); }
| CONTINUE st { $$ = stat1(CONTINUE, NIL); }
| do stmt WHILE '(' pattern ')' st
{ $$ = stat2(DO, $2, notnull($5)); }
BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
$$ = stat1(BREAK, NIL); }
| CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
$$ = stat1(CONTINUE, NIL); }
| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
{ $$ = stat2(DO, $3, notnull($7)); }
| EXIT pattern st { $$ = stat1(EXIT, $2); }
| EXIT st { $$ = stat1(EXIT, NIL); }
| for
Expand All @@ -347,10 +362,13 @@ stmt:
| NEXT st { if (infunc)
SYNTAX("next is illegal inside a function");
$$ = stat1(NEXT, NIL); }
| NEXTFILE st { if (infunc)
SYNTAX("nextfile is illegal inside a function");
$$ = stat1(NEXTFILE, NIL); }
| RETURN pattern st { $$ = stat1(RETURN, $2); }
| RETURN st { $$ = stat1(RETURN, NIL); }
| simple_stmt st
| while stmt { $$ = stat2(WHILE, $1, $2); }
| while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
| ';' opt_nl { $$ = 0; }
;

Expand All @@ -372,12 +390,14 @@ term:
| term '%' term { $$ = op2(MOD, $1, $3); }
| term POWER term { $$ = op2(POWER, $1, $3); }
| '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
| '+' term %prec UMINUS { $$ = $2; }
| '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
| NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
| BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
| BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
| BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
| CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
| CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
| CLOSE term { $$ = op1(CLOSE, $2); }
| DECR var { $$ = op1(PREDECR, $2); }
| INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); }
Expand Down Expand Up @@ -439,7 +459,9 @@ var:
varlist:
/* nothing */ { arglist = $$ = 0; }
| VAR { arglist = $$ = celltonode($1,CVAR); }
| varlist comma VAR { arglist = $$ = linkum($1,celltonode($3,CVAR)); }
| varlist comma VAR {
checkdup($1, $3);
arglist = $$ = linkum($1,celltonode($3,CVAR)); }
;

varname:
Expand All @@ -463,6 +485,7 @@ setfname(Cell *p)
else if (isfcn(p))
SYNTAX("you can't define function %s more than once", p->nval);
curfname = p->nval;
p->tval |= FCN;
}

static int
Expand All @@ -488,3 +511,15 @@ notnull(Node *n)
return op2(NE, n, nullnode);
}
}

void
checkdup(Node *vl, Cell *cp) /* check if name already in list */
{
char *s = cp->nval;
for (; vl; vl = vl->nnext) {
if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
SYNTAX("duplicate argument %s", s);
break;
}
}
}

0 comments on commit dc71967

Please sign in to comment.