Permalink
Switch branches/tags
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
468 lines (390 sloc) 11.8 KB
/* Parse ABNF to locate dangling references and undefined symbols
* does limited syntax checking
*
* This is a quick&dirty hack donated to public domain by Chris Newman
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ERROR 0
#define COMMENT 1
#define RULEDEF 2
#define AMENDRULE 3
#define HEXMODE 4
#define BINMODE 5
#define DECMODE 6
#define ALTERNATE 7
#define REPEAT 8
#define BEGINGROUP 9
#define ENDGROUP 10
#define BEGINOPTION 11
#define ENDOPTION 12
#define RANGE 13
#define LITLIST 14
#define NUMBER 15
#define HEXNUMBER 16
#define QUOTED 17
#define PROSE 18
#define RULENAME 19
#define NEWLINE 20
static const char *token_name[] = {
"error", "comment", "ruledef", "amendrule", "hexmode",
"binmode", "decmode", "alternate", "repeat", "begingroup",
"endgroup", "beginoption", "endoption", "range", "litlist",
"number", "hexnumber", "quoted", "prose", "rulename",
"newline"};
/* simple rulename datastructure
*/
struct rulename {
struct rulename *next;
int defined, referenced;
int len;
char data[1];
};
#define HTSIZE 1023
static struct rulename *htable[HTSIZE];
static unsigned char hex_table[256];
const static unsigned char hexchars[] = "0123456789ABCDEF";
static const char *baserules[] = {"ALPHA", "BIT", "CHAR", "CR", "CRLF",
"CTL", "DIGIT", "DQUOTE", "HEXDIG", "HTAB",
"LF", "LWSP", "OCTET", "SP", "VCHAR",
"WSP", NULL};
const unsigned char cvt_to_lowercase[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
0x3c, 0x3d, 0x3e, 0x3f, 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
't', 'u', 'v', 'w', 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b,
0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3,
0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff};
#define TOLOWER(c) (cvt_to_lowercase[(unsigned char)(c)])
/* returns pointer to end of token, NULL if no tokens left in line
*/
char *gettoken(char *ptr, int *tokentype, int hexmode) {
*tokentype = ERROR;
switch (*ptr++) {
case '\0':
return (NULL);
case '\r':
if (*ptr == '\n') ++ptr;
case '\n':
*tokentype = NEWLINE;
break;
case ';':
*tokentype = COMMENT;
while (*ptr != '\0' && *ptr != '\n' && *ptr != '\r') ++ptr;
break;
case '/':
*tokentype = ALTERNATE;
break;
case '*':
*tokentype = REPEAT;
break;
case '(':
*tokentype = BEGINGROUP;
break;
case ')':
*tokentype = ENDGROUP;
break;
case '[':
*tokentype = BEGINOPTION;
break;
case ']':
*tokentype = ENDOPTION;
break;
case '-':
*tokentype = RANGE;
break;
case '.':
*tokentype = LITLIST;
break;
case '=':
*tokentype = RULEDEF;
if (*ptr == '/') {
*tokentype = AMENDRULE;
++ptr;
}
break;
case '%':
switch (*ptr++) {
case 'x':
case 'X':
*tokentype = HEXMODE;
break;
case 'b':
case 'B':
*tokentype = BINMODE;
break;
case 'd':
case 'D':
*tokentype = DECMODE;
break;
default:
return ("Invalid literal");
}
break;
case '"':
while (*ptr && *ptr != '"') ++ptr;
if (*ptr != '"') {
return ("Unbalanced double quotes");
}
++ptr;
*tokentype = QUOTED;
break;
case '<':
while (*ptr && *ptr != '>') ++ptr;
if (*ptr != '>') {
return ("Unbalanced angle brakets");
}
++ptr;
*tokentype = PROSE;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (hexmode) goto GETHEX;
while (isdigit(*ptr)) ++ptr;
*tokentype = NUMBER;
break;
default:
/* deal with hex digits */
if (hexmode && isxdigit(ptr[-1])) {
GETHEX:
while (isxdigit(*ptr)) ++ptr;
*tokentype = HEXNUMBER;
break;
}
if (!isalpha(ptr[-1])) {
return ("Invalid character");
}
/* deal with rule names */
while (isalnum(*ptr) || *ptr == '-') ++ptr;
*tokentype = RULENAME;
break;
}
return (ptr);
}
/* find a rule in a hash table
*/
struct rulename *hashfind(const char *ptr, int len) {
int i, hash;
struct rulename **pptr;
/* lookup in hash table */
hash = len;
for (i = 0; i < len; ++i) {
hash = (hash * 13 + TOLOWER(ptr[i])) % HTSIZE;
}
pptr = &htable[hash];
while (*pptr && ((*pptr)->len != len || strncasecmp(ptr, (*pptr)->data, len) != 0)) {
pptr = &(*pptr)->next;
}
/* create new entry, if needed */
if (!*pptr) {
*pptr = calloc(sizeof(struct rulename) + len, 1);
if (!*pptr) {
fprintf(stderr, "Out of memory\n");
exit(1);
}
(*pptr)->len = len;
memcpy((*pptr)->data, ptr, len);
}
return (*pptr);
}
int main(int argc, char **argv) {
int verbose = 0;
const unsigned char *uscan;
char *buf, *name, *src, *dst, *linestart;
char *ptr, *nptr;
const char **ruleptr;
int hexmode, tokentype, line;
int plevel = 0, blevel = 0;
int bused, bsize, amount;
struct rulename *lastrule;
int has_err = 0;
struct rulename *first_rule = NULL;
/* initialize hex table */
memset(hex_table, 16, sizeof(hex_table));
for (uscan = hexchars; *uscan; ++uscan) {
hex_table[(unsigned)*uscan] = uscan - hexchars;
}
/* initialize default rules */
for (ruleptr = baserules; *ruleptr != NULL; ++ruleptr) {
lastrule = hashfind(*ruleptr, strlen(*ruleptr));
lastrule->defined = lastrule->referenced = 1;
}
lastrule = NULL;
/* get program name */
name = strrchr(*argv, '/');
name = name == NULL ? *argv : name + 1;
/* parse args */
while (*++argv != NULL && **argv == '-') switch ((*argv)[1]) {
case 'v':
verbose = 1;
break;
case 'h':
printf("%s [-h] [-v]\n", name);
printf(" -v enable verbose output\n");
printf(" -h display this help\n");
exit(0);
default:
fprintf(stderr, "Invalid arguments, type `%s -h' for usage\n", name);
exit(1);
}
/* read the data into memory */
bsize = 4096;
buf = malloc(bsize);
if (buf != NULL) {
for (bused = 0;
(amount = fread(buf + bused, 1, bsize - bused, stdin)) != 0;
bused += amount) {
if (bused + amount == bsize) {
if (bsize > 10 * 1024 * 1024) {
fprintf(stderr, "Size limit exceeded\n");
exit(1);
}
buf = realloc(buf, bsize *= 2);
if (buf == NULL) break;
}
}
}
if (buf == NULL) {
fprintf(stderr, "Out of memory\n");
exit(1);
}
if (!feof(stdin)) {
perror("input");
exit(1);
}
buf[bused] = '\0';
/* process lines */
ptr = linestart = buf;
hexmode = 0;
line = 1;
/* get tokens */
for (;;) {
/* skip whitespace */
while (*ptr == ' ' || *ptr == '\t') {
++ptr;
hexmode = 0;
}
/* get token, done if at end of line */
if ((nptr = gettoken(ptr, &tokentype, hexmode)) == NULL) break;
/* handle errors */
if (!tokentype) {
printf("Error (line %d, col %d): %s\n", line, (int)(ptr - linestart), nptr);
exit(1);
}
/* reset hexmode */
if (hexmode && tokentype != HEXNUMBER && tokentype != LITLIST && tokentype != RANGE) {
hexmode = 0;
}
/* handle last rulename */
if (lastrule != NULL) {
if (tokentype == RULEDEF) {
lastrule->defined = 1;
} else if (tokentype == AMENDRULE) {
lastrule->defined = lastrule->referenced = 1;
} else {
lastrule->referenced = 1;
}
lastrule = NULL;
}
if (verbose && tokentype != NEWLINE) {
printf("line %d, col %d, %s", line, (int)(ptr - linestart), token_name[tokentype]);
if (nptr != NULL) {
printf(": %.*s", (int)(nptr - ptr), ptr);
}
putchar('\n');
}
switch (tokentype) {
case NEWLINE:
hexmode = 0;
++line;
ptr = linestart = nptr;
continue;
case RULENAME:
lastrule = hashfind(ptr, nptr - ptr);
if (first_rule == NULL) first_rule = lastrule;
break;
case HEXMODE:
hexmode = 1;
break;
case RULEDEF:
case AMENDRULE:
if (plevel || blevel) {
UNBALANCED:
printf("Error (line %d): Unbalanced %s\n", line, plevel ? "parenthesis" : "square brackets");
exit(1);
}
break;
case BEGINGROUP:
++plevel;
break;
case ENDGROUP:
if (--plevel < 0) goto UNBALANCED;
break;
case BEGINOPTION:
++blevel;
break;
case ENDOPTION:
if (--blevel < 0) goto UNBALANCED;
break;
}
/* advance */
ptr = nptr;
}
/* final warnings */
if (plevel) {
printf("Error at end of text: Unbalanced parenthesis\n");
has_err = 1;
}
if (blevel) {
printf("Error at end of text: Unbalanced square brackets\n");
has_err = 1;
}
/* the first rule always counts as referenced */
if (first_rule) first_rule->referenced = 1;
/* list undefined and unreferenced rules */
for (line = 0; line < HTSIZE; ++line) {
lastrule = htable[line];
while (lastrule) {
if (!lastrule->defined) {
printf("undefined rule: %s\n", lastrule->data);
has_err = 1;
}
if (!lastrule->referenced) {
printf("unreferenced rule: %s\n", lastrule->data);
has_err = 1;
}
lastrule = lastrule->next;
}
}
if (has_err == 1) {
exit(1);
}
printf("ABNF validation (version 1.0) completed\n");
exit(0);
}