diff --git a/COMPLIANCE.md b/COMPLIANCE.md index 91ad7b0b..8b1c4ca7 100644 --- a/COMPLIANCE.md +++ b/COMPLIANCE.md @@ -17,6 +17,7 @@ This document tracks compliance gaps and non-standard behaviors. ### Control Flow - `if`/`else` statements +- `goto` and label statements - `while`, `do-while`, `for` loops - `switch`/`case`/`default` statements - `break`, `continue`, `return` statements @@ -96,7 +97,6 @@ This document tracks compliance gaps and non-standard behaviors. | Feature | Status | Description | |---------|--------|-------------| -| `goto` and labels | Missing | No arbitrary jumps | | Designated initializers | Missing | No `.field = value` syntax | | Compound literals | Partial | Limited support | | Flexible array members | Missing | No `[]` at struct end | @@ -115,6 +115,7 @@ This document tracks compliance gaps and non-standard behaviors. - Escape sequence: `\e` for ESC character - `void*` arithmetic (treated as `char*`) - `sizeof(void)` returns 0 (should be error) +- Computed goto ### Implementation-Specific - Array compound literals in scalar context use first element diff --git a/src/defs.h b/src/defs.h index 49361b7b..3000e6b1 100644 --- a/src/defs.h +++ b/src/defs.h @@ -20,6 +20,7 @@ #define MAX_LOCALS 1600 #define MAX_FIELDS 64 #define MAX_TYPES 256 +#define MAX_LABELS 256 #define MAX_IR_INSTR 80000 #define MAX_BB_PRED 128 #define MAX_BB_DOM_SUCC 64 @@ -179,6 +180,7 @@ typedef enum { T_break, T_default, T_continue, + T_goto, T_const, /* const qualifier */ /* C pre-processor directives */ T_cppd_include, @@ -270,6 +272,7 @@ typedef enum { OP_branch, /* conditional jump */ OP_jump, /* unconditional jump */ OP_func_ret, /* returned value */ + OP_label, /* for goto label */ /* function pointer */ OP_address_of_func, /* resolve function entry */ @@ -567,6 +570,13 @@ struct ref_block { * type, parameters) with SSA-related information (e.g., basic blocks, control * flow) to support parsing, analysis, optimization, and code generation. */ + +typedef struct { + char label_name[MAX_ID_LEN]; + basic_block_t *bb; + bool used; +} label_t; + struct func { /* Syntatic info */ var_t return_def; diff --git a/src/globals.c b/src/globals.c index efb4609c..998ae862 100644 --- a/src/globals.c +++ b/src/globals.c @@ -1481,6 +1481,14 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start) printf("br %%%s, %s, %s", rs1->var_name, bb->then_->bb_label_name, bb->else_->bb_label_name); break; + case OP_jump: + print_indent(1); + printf("jmp %s", bb->next->bb_label_name); + break; + case OP_label: + print_indent(0); + printf("%s:", insn->str); + break; case OP_push: print_indent(1); printf("push %%%s", rs1->var_name); diff --git a/src/lexer.c b/src/lexer.c index 5861ac43..111b668b 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -12,7 +12,7 @@ /* Hash table constants */ #define NUM_DIRECTIVES 11 -#define NUM_KEYWORDS 17 +#define NUM_KEYWORDS 18 /* Token mapping structure for elegant initialization */ typedef struct { @@ -85,6 +85,7 @@ void lex_init_keywords() {"break", T_break}, {"default", T_default}, {"continue", T_continue}, + {"goto", T_goto}, {"union", T_union}, {"const", T_const}, }; @@ -786,6 +787,8 @@ token_t lex_token_impl(bool aliasing) keyword = T_enum; } else if (!memcmp(token_str, "case", 4)) keyword = T_case; + else if (!memcmp(token_str, "goto", 4)) + keyword = T_goto; break; case 5: /* 5-letter keywords: while, break, union, const */ diff --git a/src/parser.c b/src/parser.c index 40f788b9..b6d7ed21 100644 --- a/src/parser.c +++ b/src/parser.c @@ -26,6 +26,12 @@ int break_exit_idx = 0; basic_block_t *continue_bb[MAX_NESTING]; int continue_pos_idx = 0; +/* Label utilities */ +label_t labels[MAX_LABELS]; +int label_idx = 0; +basic_block_t *backpatch_bb[MAX_LABELS]; +int backpatch_bb_idx = 0; + /* stack of the operands of 3AC */ var_t *operand_stack[MAX_OPERAND_STACK_SIZE]; int operand_stack_idx = 0; @@ -40,6 +46,26 @@ void parse_array_init(var_t *var, basic_block_t **bb, bool emit_code); + +label_t *find_label(char *name) +{ + for (int i = 0; i < label_idx; i++) { + if (!strcmp(name, labels[i].label_name)) + return &labels[i]; + } + return NULL; +} + +void add_label(char *name, basic_block_t *bb) +{ + if (label_idx > MAX_LABELS - 1) + error("Too many labels in function"); + + label_t *l = &labels[label_idx++]; + strncpy(l->label_name, name, MAX_ID_LEN); + l->bb = bb; +} + char *gen_name_to(char *buf) { sprintf(buf, ".t%d", global_var_idx++); @@ -997,6 +1023,61 @@ basic_block_t *handle_while_statement(block_t *parent, basic_block_t *bb) return else_; } +basic_block_t *handle_goto_statement(block_t *parent, basic_block_t *bb) +{ + /* Since a goto splits the current program into two basic blocks and makes + * the subsequent basic block unreachable, this causes problems for later + * CFG operations. Therefore, we create a fake if that always executes to + * wrap the goto, and connect the unreachable basic block to the else + * branch. Finally, return this else block. + * + * after: + * a = b + c; + * goto label; + * c *= d; + * + * before: + * a = b + c; + * if (1) + * goto label; + * c *= d; + */ + + char token[MAX_ID_LEN]; + if (!lex_peek(T_identifier, token)) + error("Expected identifier after 'goto'"); + + lex_expect(T_identifier); + lex_expect(T_semicolon); + + basic_block_t *fake_if = bb_create(parent); + bb_connect(bb, fake_if, NEXT); + var_t *val = require_var(parent); + gen_name_to(val->var_name); + val->init_val = 1; + add_insn(parent, fake_if, OP_load_constant, val, NULL, NULL, 0, NULL); + add_insn(parent, fake_if, OP_branch, NULL, val, NULL, 0, NULL); + + basic_block_t *then_ = bb_create(parent); + basic_block_t *else_ = bb_create(parent); + bb_connect(fake_if, then_, THEN); + bb_connect(fake_if, else_, ELSE); + + add_insn(parent, then_, OP_jump, NULL, NULL, NULL, 0, token); + label_t *label = find_label(token); + if (label) { + label->used = true; + bb_connect(then_, label->bb, NEXT); + return else_; + } + + if (backpatch_bb_idx > MAX_LABELS - 1) + error("Too many forward-referenced labels"); + + backpatch_bb[backpatch_bb_idx++] = then_; + return else_; +} + basic_block_t *handle_struct_variable_decl(block_t *parent, basic_block_t *bb, char *token) @@ -4169,6 +4250,9 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) return do_while_end; } + if (lex_accept(T_goto)) + return handle_goto_statement(parent, bb); + /* empty statement */ if (lex_accept(T_semicolon)) return bb; @@ -4753,6 +4837,21 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) return bb; } + if (lex_peek(T_identifier, token)) { + lex_accept(T_identifier); + if (lex_accept(T_colon)) { + label_t *l = find_label(token); + if (l) + error("label redefinition"); + + basic_block_t *n = bb_create(parent); + bb_connect(bb, n, NEXT); + add_label(token, n); + add_insn(parent, n, OP_label, NULL, NULL, NULL, 0, token); + return n; + } + } + error("Unrecognized statement token"); return NULL; } @@ -4794,6 +4893,28 @@ void read_func_body(func_t *func) basic_block_t *body = read_code_block(func, NULL, NULL, func->bbs); if (body) bb_connect(body, func->exit, NEXT); + + for (int i = 0; i < backpatch_bb_idx; i++) { + basic_block_t *bb = backpatch_bb[i]; + insn_t *g = bb->insn_list.tail; + label_t *label = find_label(g->str); + if (!label) + error("goto label undefined"); + + label->used = true; + bb_connect(bb, label->bb, NEXT); + } + + for (int i = 0; i < label_idx; i++) { + label_t *label = &labels[i]; + if (label->used) + continue; + + printf("Warning: unused label %s\n", label->label_name); + } + + backpatch_bb_idx = 0; + label_idx = 0; } /* if first token is type */ diff --git a/src/ssa.c b/src/ssa.c index 59cf647d..68351935 100644 --- a/src/ssa.c +++ b/src/ssa.c @@ -929,6 +929,82 @@ void unwind_phi(void) } } +bool is_dominate(basic_block_t *pred, basic_block_t *succ) +{ + int i; + bool found = false; + for (i = 0; i < MAX_BB_DOM_SUCC; i++) { + if (!pred->dom_next[i]) + break; + if (pred->dom_next[i] == succ) { + found = true; + break; + } + found |= is_dominate(pred->dom_next[i], succ); + } + + return found; +} + +/* + * For any variable, the basic block that defines it must dominate all the + * basic blocks where it is used; otherwise, it is an invalid cross-block + * initialization. + */ +void bb_check_var_cross_init(func_t *func, basic_block_t *bb) +{ + UNUSED(func); + + for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { + if (insn->opcode != OP_allocat) + continue; + + var_t *var = insn->rd; + ref_block_t *ref; + for (ref = var->ref_block_list.head; ref; ref = ref->next) { + if (ref->bb == bb) + continue; + + if (!is_dominate(bb, ref->bb)) + printf("Warning: Variable '%s' cross-initialized\n", + var->var_name); + } + } +} + +/** + * A variable's initialization lives in a basic block that does not dominate + * all of its uses, so control flow can reach a use without first passing + * through its initialization (i.e., a possibly-uninitialized use). + * + * For Example: + * // Jumps directly to 'label', skipping the declaration below + * goto label; + * if (1) { + * // This line is never executed when 'goto' is taken + * int x; + * label: + * // Uses 'x' after its declaration was bypassed + * x = 5; + * } + */ +void check_var_cross_init() +{ + bb_traversal_args_t *args = arena_alloc_traversal_args(); + for (func_t *func = FUNC_LIST.head; func; func = func->next) { + /* Skip function declarations without bodies */ + if (!func->bbs) + continue; + + args->func = func; + args->bb = func->bbs; + + func->visited++; + args->postorder_cb = bb_check_var_cross_init; + bb_forward_traversal(args); + } +} + #ifdef __SHECC__ #else void bb_dump_connection(FILE *fd, @@ -1112,6 +1188,12 @@ void bb_dump(FILE *fd, func_t *func, basic_block_t *bb) sprintf(str, "%d>", insn->rs1->var_name, insn->rs1->subscript); break; + case OP_jump: + sprintf(str, ""); + break; + case OP_label: + sprintf(str, "