Permalink
Cannot retrieve contributors at this time
467 lines (390 sloc)
11.8 KB
| /* Parse ABNF to locate dangling references and undefined symbols | |
| * does limited syntax checking | |
| * | |
| * This is a quick&dirty hack donated to public domain by Chris Newman | |
| */ | |
| #include <ctype.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #define ERROR 0 | |
| #define COMMENT 1 | |
| #define RULEDEF 2 | |
| #define AMENDRULE 3 | |
| #define HEXMODE 4 | |
| #define BINMODE 5 | |
| #define DECMODE 6 | |
| #define ALTERNATE 7 | |
| #define REPEAT 8 | |
| #define BEGINGROUP 9 | |
| #define ENDGROUP 10 | |
| #define BEGINOPTION 11 | |
| #define ENDOPTION 12 | |
| #define RANGE 13 | |
| #define LITLIST 14 | |
| #define NUMBER 15 | |
| #define HEXNUMBER 16 | |
| #define QUOTED 17 | |
| #define PROSE 18 | |
| #define RULENAME 19 | |
| #define NEWLINE 20 | |
| static const char *token_name[] = { | |
| "error", "comment", "ruledef", "amendrule", "hexmode", | |
| "binmode", "decmode", "alternate", "repeat", "begingroup", | |
| "endgroup", "beginoption", "endoption", "range", "litlist", | |
| "number", "hexnumber", "quoted", "prose", "rulename", | |
| "newline"}; | |
| /* simple rulename datastructure | |
| */ | |
| struct rulename { | |
| struct rulename *next; | |
| int defined, referenced; | |
| int len; | |
| char data[1]; | |
| }; | |
| #define HTSIZE 1023 | |
| static struct rulename *htable[HTSIZE]; | |
| static unsigned char hex_table[256]; | |
| const static unsigned char hexchars[] = "0123456789ABCDEF"; | |
| static const char *baserules[] = {"ALPHA", "BIT", "CHAR", "CR", "CRLF", | |
| "CTL", "DIGIT", "DQUOTE", "HEXDIG", "HTAB", | |
| "LF", "LWSP", "OCTET", "SP", "VCHAR", | |
| "WSP", NULL}; | |
| const unsigned char cvt_to_lowercase[256] = { | |
| 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, | |
| 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
| 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, | |
| 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, | |
| 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, | |
| 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', | |
| 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', | |
| 't', 'u', 'v', 'w', 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, | |
| 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', | |
| 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', | |
| 'x', 'y', 'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, | |
| 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, | |
| 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, | |
| 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, | |
| 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, | |
| 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, | |
| 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, | |
| 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, | |
| 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, | |
| 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, | |
| 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, | |
| 0xfc, 0xfd, 0xfe, 0xff}; | |
| #define TOLOWER(c) (cvt_to_lowercase[(unsigned char)(c)]) | |
| /* returns pointer to end of token, NULL if no tokens left in line | |
| */ | |
| char *gettoken(char *ptr, int *tokentype, int hexmode) { | |
| *tokentype = ERROR; | |
| switch (*ptr++) { | |
| case '\0': | |
| return (NULL); | |
| case '\r': | |
| if (*ptr == '\n') ++ptr; | |
| case '\n': | |
| *tokentype = NEWLINE; | |
| break; | |
| case ';': | |
| *tokentype = COMMENT; | |
| while (*ptr != '\0' && *ptr != '\n' && *ptr != '\r') ++ptr; | |
| break; | |
| case '/': | |
| *tokentype = ALTERNATE; | |
| break; | |
| case '*': | |
| *tokentype = REPEAT; | |
| break; | |
| case '(': | |
| *tokentype = BEGINGROUP; | |
| break; | |
| case ')': | |
| *tokentype = ENDGROUP; | |
| break; | |
| case '[': | |
| *tokentype = BEGINOPTION; | |
| break; | |
| case ']': | |
| *tokentype = ENDOPTION; | |
| break; | |
| case '-': | |
| *tokentype = RANGE; | |
| break; | |
| case '.': | |
| *tokentype = LITLIST; | |
| break; | |
| case '=': | |
| *tokentype = RULEDEF; | |
| if (*ptr == '/') { | |
| *tokentype = AMENDRULE; | |
| ++ptr; | |
| } | |
| break; | |
| case '%': | |
| switch (*ptr++) { | |
| case 'x': | |
| case 'X': | |
| *tokentype = HEXMODE; | |
| break; | |
| case 'b': | |
| case 'B': | |
| *tokentype = BINMODE; | |
| break; | |
| case 'd': | |
| case 'D': | |
| *tokentype = DECMODE; | |
| break; | |
| default: | |
| return ("Invalid literal"); | |
| } | |
| break; | |
| case '"': | |
| while (*ptr && *ptr != '"') ++ptr; | |
| if (*ptr != '"') { | |
| return ("Unbalanced double quotes"); | |
| } | |
| ++ptr; | |
| *tokentype = QUOTED; | |
| break; | |
| case '<': | |
| while (*ptr && *ptr != '>') ++ptr; | |
| if (*ptr != '>') { | |
| return ("Unbalanced angle brakets"); | |
| } | |
| ++ptr; | |
| *tokentype = PROSE; | |
| break; | |
| case '0': | |
| case '1': | |
| case '2': | |
| case '3': | |
| case '4': | |
| case '5': | |
| case '6': | |
| case '7': | |
| case '8': | |
| case '9': | |
| if (hexmode) goto GETHEX; | |
| while (isdigit(*ptr)) ++ptr; | |
| *tokentype = NUMBER; | |
| break; | |
| default: | |
| /* deal with hex digits */ | |
| if (hexmode && isxdigit(ptr[-1])) { | |
| GETHEX: | |
| while (isxdigit(*ptr)) ++ptr; | |
| *tokentype = HEXNUMBER; | |
| break; | |
| } | |
| if (!isalpha(ptr[-1])) { | |
| return ("Invalid character"); | |
| } | |
| /* deal with rule names */ | |
| while (isalnum(*ptr) || *ptr == '-') ++ptr; | |
| *tokentype = RULENAME; | |
| break; | |
| } | |
| return (ptr); | |
| } | |
| /* find a rule in a hash table | |
| */ | |
| struct rulename *hashfind(const char *ptr, int len) { | |
| int i, hash; | |
| struct rulename **pptr; | |
| /* lookup in hash table */ | |
| hash = len; | |
| for (i = 0; i < len; ++i) { | |
| hash = (hash * 13 + TOLOWER(ptr[i])) % HTSIZE; | |
| } | |
| pptr = &htable[hash]; | |
| while (*pptr && ((*pptr)->len != len || strncasecmp(ptr, (*pptr)->data, len) != 0)) { | |
| pptr = &(*pptr)->next; | |
| } | |
| /* create new entry, if needed */ | |
| if (!*pptr) { | |
| *pptr = calloc(sizeof(struct rulename) + len, 1); | |
| if (!*pptr) { | |
| fprintf(stderr, "Out of memory\n"); | |
| exit(1); | |
| } | |
| (*pptr)->len = len; | |
| memcpy((*pptr)->data, ptr, len); | |
| } | |
| return (*pptr); | |
| } | |
| int main(int argc, char **argv) { | |
| int verbose = 0; | |
| const unsigned char *uscan; | |
| char *buf, *name, *src, *dst, *linestart; | |
| char *ptr, *nptr; | |
| const char **ruleptr; | |
| int hexmode, tokentype, line; | |
| int plevel = 0, blevel = 0; | |
| int bused, bsize, amount; | |
| struct rulename *lastrule; | |
| int has_err = 0; | |
| struct rulename *first_rule = NULL; | |
| /* initialize hex table */ | |
| memset(hex_table, 16, sizeof(hex_table)); | |
| for (uscan = hexchars; *uscan; ++uscan) { | |
| hex_table[(unsigned)*uscan] = uscan - hexchars; | |
| } | |
| /* initialize default rules */ | |
| for (ruleptr = baserules; *ruleptr != NULL; ++ruleptr) { | |
| lastrule = hashfind(*ruleptr, strlen(*ruleptr)); | |
| lastrule->defined = lastrule->referenced = 1; | |
| } | |
| lastrule = NULL; | |
| /* get program name */ | |
| name = strrchr(*argv, '/'); | |
| name = name == NULL ? *argv : name + 1; | |
| /* parse args */ | |
| while (*++argv != NULL && **argv == '-') switch ((*argv)[1]) { | |
| case 'v': | |
| verbose = 1; | |
| break; | |
| case 'h': | |
| printf("%s [-h] [-v]\n", name); | |
| printf(" -v enable verbose output\n"); | |
| printf(" -h display this help\n"); | |
| exit(0); | |
| default: | |
| fprintf(stderr, "Invalid arguments, type `%s -h' for usage\n", name); | |
| exit(1); | |
| } | |
| /* read the data into memory */ | |
| bsize = 4096; | |
| buf = malloc(bsize); | |
| if (buf != NULL) { | |
| for (bused = 0; | |
| (amount = fread(buf + bused, 1, bsize - bused, stdin)) != 0; | |
| bused += amount) { | |
| if (bused + amount == bsize) { | |
| if (bsize > 10 * 1024 * 1024) { | |
| fprintf(stderr, "Size limit exceeded\n"); | |
| exit(1); | |
| } | |
| buf = realloc(buf, bsize *= 2); | |
| if (buf == NULL) break; | |
| } | |
| } | |
| } | |
| if (buf == NULL) { | |
| fprintf(stderr, "Out of memory\n"); | |
| exit(1); | |
| } | |
| if (!feof(stdin)) { | |
| perror("input"); | |
| exit(1); | |
| } | |
| buf[bused] = '\0'; | |
| /* process lines */ | |
| ptr = linestart = buf; | |
| hexmode = 0; | |
| line = 1; | |
| /* get tokens */ | |
| for (;;) { | |
| /* skip whitespace */ | |
| while (*ptr == ' ' || *ptr == '\t') { | |
| ++ptr; | |
| hexmode = 0; | |
| } | |
| /* get token, done if at end of line */ | |
| if ((nptr = gettoken(ptr, &tokentype, hexmode)) == NULL) break; | |
| /* handle errors */ | |
| if (!tokentype) { | |
| printf("Error (line %d, col %d): %s\n", line, (int)(ptr - linestart), nptr); | |
| exit(1); | |
| } | |
| /* reset hexmode */ | |
| if (hexmode && tokentype != HEXNUMBER && tokentype != LITLIST && tokentype != RANGE) { | |
| hexmode = 0; | |
| } | |
| /* handle last rulename */ | |
| if (lastrule != NULL) { | |
| if (tokentype == RULEDEF) { | |
| lastrule->defined = 1; | |
| } else if (tokentype == AMENDRULE) { | |
| lastrule->defined = lastrule->referenced = 1; | |
| } else { | |
| lastrule->referenced = 1; | |
| } | |
| lastrule = NULL; | |
| } | |
| if (verbose && tokentype != NEWLINE) { | |
| printf("line %d, col %d, %s", line, (int)(ptr - linestart), token_name[tokentype]); | |
| if (nptr != NULL) { | |
| printf(": %.*s", (int)(nptr - ptr), ptr); | |
| } | |
| putchar('\n'); | |
| } | |
| switch (tokentype) { | |
| case NEWLINE: | |
| hexmode = 0; | |
| ++line; | |
| ptr = linestart = nptr; | |
| continue; | |
| case RULENAME: | |
| lastrule = hashfind(ptr, nptr - ptr); | |
| if (first_rule == NULL) first_rule = lastrule; | |
| break; | |
| case HEXMODE: | |
| hexmode = 1; | |
| break; | |
| case RULEDEF: | |
| case AMENDRULE: | |
| if (plevel || blevel) { | |
| UNBALANCED: | |
| printf("Error (line %d): Unbalanced %s\n", line, plevel ? "parenthesis" : "square brackets"); | |
| exit(1); | |
| } | |
| break; | |
| case BEGINGROUP: | |
| ++plevel; | |
| break; | |
| case ENDGROUP: | |
| if (--plevel < 0) goto UNBALANCED; | |
| break; | |
| case BEGINOPTION: | |
| ++blevel; | |
| break; | |
| case ENDOPTION: | |
| if (--blevel < 0) goto UNBALANCED; | |
| break; | |
| } | |
| /* advance */ | |
| ptr = nptr; | |
| } | |
| /* final warnings */ | |
| if (plevel) { | |
| printf("Error at end of text: Unbalanced parenthesis\n"); | |
| has_err = 1; | |
| } | |
| if (blevel) { | |
| printf("Error at end of text: Unbalanced square brackets\n"); | |
| has_err = 1; | |
| } | |
| /* the first rule always counts as referenced */ | |
| if (first_rule) first_rule->referenced = 1; | |
| /* list undefined and unreferenced rules */ | |
| for (line = 0; line < HTSIZE; ++line) { | |
| lastrule = htable[line]; | |
| while (lastrule) { | |
| if (!lastrule->defined) { | |
| printf("undefined rule: %s\n", lastrule->data); | |
| has_err = 1; | |
| } | |
| if (!lastrule->referenced) { | |
| printf("unreferenced rule: %s\n", lastrule->data); | |
| has_err = 1; | |
| } | |
| lastrule = lastrule->next; | |
| } | |
| } | |
| if (has_err == 1) { | |
| exit(1); | |
| } | |
| printf("ABNF validation (version 1.0) completed\n"); | |
| exit(0); | |
| } |