Skip to content

Commit

Permalink
Implement "#include".
Browse files Browse the repository at this point in the history
  • Loading branch information
rui314 committed Aug 25, 2018
1 parent e188ffd commit a382606
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 62 deletions.
12 changes: 7 additions & 5 deletions 9cc.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,18 @@ typedef struct {
char len;

// For error reporting
char *buf;
char *filename;
char *start;
} Token;

Vector *tokenize(char *p);
Vector *tokenize(char *path, bool add_eof);
noreturn void bad_token(Token *t, char *msg);

/// preprocess.c

Vector *preprocess(Vector *tokens);

/// parse.c

enum {
Expand Down Expand Up @@ -353,7 +359,3 @@ extern char *regs32[];
extern int num_regs;

void gen_x86(Vector *globals, Vector *fns);

/// main.c

char *filename;
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ test: 9cc test/test.c
./9cc -test

@gcc -E -P test/test.c | ./9cc - > tmp-test1.s
@./9cc test/token.c > tmp-test2.s
@gcc -c -o tmp-test2.o test/gcc.c
@gcc -static -o tmp-test tmp-test1.s tmp-test2.s tmp-test2.o
@./tmp-test
@gcc -static -o tmp-test1 tmp-test1.s tmp-test2.o
@./tmp-test1

@./9cc test/token.c > tmp-test2.s
@gcc -static -o tmp-test2 tmp-test2.s
@./tmp-test2

clean:
rm -f 9cc *.o *~ tmp* a.out test/*~
Expand Down
36 changes: 5 additions & 31 deletions main.c
Original file line number Diff line number Diff line change
@@ -1,31 +1,5 @@
#include "9cc.h"

char *filename;

static char *read_file(char *filename) {
FILE *fp = stdin;
if (strcmp(filename, "-")) {
fp = fopen(filename, "r");
if (!fp) {
perror(filename);
exit(1);
}
}

StringBuilder *sb = new_sb();
char buf[4096];
for (;;) {
int nread = fread(buf, 1, sizeof(buf), fp);
if (nread == 0)
break;
sb_append_n(sb, buf, nread);
}

if (sb->data[sb->len] != '\n')
sb_add(sb, '\n');
return sb_get(sb);
}

void usage() { error("Usage: 9cc [-test] [-dump-ir1] [-dump-ir2] <file>"); }

int main(int argc, char **argv) {
Expand All @@ -37,24 +11,24 @@ int main(int argc, char **argv) {
return 0;
}

char *path;
bool dump_ir1 = false;
bool dump_ir2 = false;

if (argc == 3 && !strcmp(argv[1], "-dump-ir1")) {
dump_ir1 = true;
filename = argv[2];
path = argv[2];
} else if (argc == 3 && !strcmp(argv[1], "-dump-ir2")) {
dump_ir2 = true;
filename = argv[2];
path = argv[2];
} else {
if (argc != 2)
usage();
filename = argv[1];
path = argv[1];
}

// Tokenize and parse.
char *input = read_file(filename);
Vector *tokens = tokenize(input);
Vector *tokens = tokenize(path, true);
Vector *nodes = parse(tokens);
Vector *globals = sema(nodes);
Vector *fns = gen_ir(nodes);
Expand Down
35 changes: 35 additions & 0 deletions preprocess.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// C preprocessor

#include "9cc.h"

Vector *preprocess(Vector *tokens) {
Vector *v = new_vec();

for (int i = 0; i < tokens->len;) {
Token *t = tokens->data[i];
if (t->ty != '#') {
i++;
vec_push(v, t);
continue;
}

t = tokens->data[++i];
if (t->ty != TK_IDENT || strcmp(t->name, "include"))
bad_token(t, "'include' expected");

t = tokens->data[++i];
if (t->ty != TK_STR)
bad_token(t, "string expected");

char *path = t->str;

t = tokens->data[++i];
if (t->ty != '\n')
bad_token(t, "newline expected");

Vector *nv = tokenize(path, false);
for (int i = 0; i < nv->len; i++)
vec_push(v, nv->data[i]);
}
return v;
}
7 changes: 7 additions & 0 deletions test/test1.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
int printf();

int main() {
#include "test/test2.inc"
1; 2;
return 0;
}
1 change: 1 addition & 0 deletions test/test2.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
printf("OK\n");
8 changes: 3 additions & 5 deletions test/token.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
// This file contains tests for the tokenizer.
//
// Note that we don't actually use the function defined by this file
// because we are interested only in knowing whether the tokenizer can
// tokenize this file or not.
// This file contains tests for the tokenizer and the preprocessor.

// a line comment\
continues\
Expand All @@ -12,3 +8,5 @@ to this line
/* block comment
**
*/

#include "test/test1.inc"
Expand Down
96 changes: 78 additions & 18 deletions token.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

// Error reporting

static char *input_file;
static char *buf;
static char *filename;

// Finds a line pointed by a given pointer from the input file
// to print it out.
static void print_line(char *pos) {
char *start = input_file;
static void print_line(char *start, char *path, char *pos) {
int line = 0;
int col = 0;

for (char *p = input_file; p; p++) {
for (char *p = start; p; p++) {
if (*p == '\n') {
start = p + 1;
line++;
Expand All @@ -24,7 +24,7 @@ static void print_line(char *pos) {
continue;
}

fprintf(stderr, "error at %s:%d:%d\n\n", filename, line + 1, col + 1);
fprintf(stderr, "error at %s:%d:%d\n\n", path, line + 1, col + 1);

int linelen = strchr(p, '\n') - start;
fprintf(stderr, "%.*s\n", linelen, start);
Expand All @@ -37,7 +37,7 @@ static void print_line(char *pos) {
}

noreturn void bad_token(Token *t, char *msg) {
print_line(t->start);
print_line(t->buf, t->filename, t->start);
error(msg);
}

Expand All @@ -53,6 +53,8 @@ static Token *add(int ty, char *start) {
Token *t = calloc(1, sizeof(Token));
t->ty = ty;
t->start = start;
t->filename = filename;
t->buf = buf;
vec_push(tokens, t);
return t;
}
Expand Down Expand Up @@ -80,6 +82,30 @@ static char escaped[256] = {
['v'] = '\v', ['e'] = '\033', ['E'] = '\033',
};

static char *read_file(char *path) {
FILE *fp = stdin;
if (strcmp(path, "-")) {
fp = fopen(path, "r");
if (!fp) {
perror(path);
exit(1);
}
}

StringBuilder *sb = new_sb();
char buf[4096];
for (;;) {
int nread = fread(buf, 1, sizeof(buf), fp);
if (nread == 0)
break;
sb_append_n(sb, buf, nread);
}

if (sb->data[sb->len] != '\n')
sb_add(sb, '\n');
return sb_get(sb);
}

static Map *keyword_map() {
Map *map = new_map();
map_puti(map, "_Alignof", TK_ALIGNOF);
Expand All @@ -104,7 +130,7 @@ static char *block_comment(char *pos) {
for (char *p = pos + 2; *p; p++)
if (!strncmp(p, "*/", 2))
return p + 2;
print_line(pos);
print_line(buf, filename, pos);
error("unclosed comment");
}

Expand Down Expand Up @@ -215,10 +241,18 @@ static char *number(char *p) {

// Tokenized input is stored to this array.
static void scan() {
char *p = input_file;
char *p = buf;

loop:
while (*p) {
// New line (preprocessor-only token)
if (*p == '\n') {
add(*p, p);
p++;
continue;
}

// Whitespace
if (isspace(*p)) {
p++;
continue;
Expand Down Expand Up @@ -262,7 +296,7 @@ static void scan() {
}

// Single-letter symbol
if (strchr("+-*/;=(),{}<>[]&.!?:|^%~", *p)) {
if (strchr("+-*/;=(),{}<>[]&.!?:|^%~#", *p)) {
add(*p, p);
p++;
continue;
Expand All @@ -280,15 +314,13 @@ static void scan() {
continue;
}

print_line(p);
print_line(buf, filename, p);
error("cannot tokenize");
}

add(TK_EOF, p);
}

static void canonicalize_newline() {
char *p = input_file;
char *p = buf;
for (char *q = p; *q;) {
if (q[0] == '\r' && q[1] == '\n')
q++;
Expand All @@ -298,7 +330,7 @@ static void canonicalize_newline() {
}

static void remove_backslash_newline() {
char *p = input_file;
char *p = buf;
for (char *q = p; *q;) {
if (q[0] == '\\' && q[1] == '\n')
q += 2;
Expand All @@ -308,6 +340,16 @@ static void remove_backslash_newline() {
*p = '\0';
}

static void strip_newlines() {
Vector *v = new_vec();
for (int i = 0; i < tokens->len; i++) {
Token *t = tokens->data[i];
if (t->ty != '\n')
vec_push(v, t);
}
tokens = v;
}

static void append(Token *x, Token *y) {
StringBuilder *sb = new_sb();
sb_append_n(sb, x->str, x->len - 1);
Expand All @@ -333,14 +375,32 @@ static void join_string_literals() {
tokens = v;
}

Vector *tokenize(char *p) {
Vector *tokenize(char *path, bool add_eof) {
if (!keywords)
keywords = keyword_map();

Vector *tokens_ = tokens;
char *filename_ = filename;
char *buf_ = buf;

tokens = new_vec();
keywords = keyword_map();
input_file = p;
filename = path;
buf = read_file(path);

canonicalize_newline();
remove_backslash_newline();

scan();
if (add_eof)
add(TK_EOF, buf);

tokens = preprocess(tokens);
strip_newlines();
join_string_literals();
return tokens;

Vector *ret = tokens;
buf = buf_;
tokens = tokens_;
filename = filename_;
return ret;
}

0 comments on commit a382606

Please sign in to comment.