Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion COMPLIANCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This document tracks compliance gaps and non-standard behaviors.

### Control Flow
- `if`/`else` statements
- `goto` and label statements
- `while`, `do-while`, `for` loops
- `switch`/`case`/`default` statements
- `break`, `continue`, `return` statements
Expand Down Expand Up @@ -96,7 +97,6 @@ This document tracks compliance gaps and non-standard behaviors.

| Feature | Status | Description |
|---------|--------|-------------|
| `goto` and labels | Missing | No arbitrary jumps |
| Designated initializers | Missing | No `.field = value` syntax |
| Compound literals | Partial | Limited support |
| Flexible array members | Missing | No `[]` at struct end |
Expand All @@ -115,6 +115,7 @@ This document tracks compliance gaps and non-standard behaviors.
- Escape sequence: `\e` for ESC character
- `void*` arithmetic (treated as `char*`)
- `sizeof(void)` returns 0 (should be error)
- Computed goto

### Implementation-Specific
- Array compound literals in scalar context use first element
Expand Down
10 changes: 10 additions & 0 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#define MAX_LOCALS 1600
#define MAX_FIELDS 64
#define MAX_TYPES 256
#define MAX_LABELS 256
#define MAX_IR_INSTR 80000
#define MAX_BB_PRED 128
#define MAX_BB_DOM_SUCC 64
Expand Down Expand Up @@ -179,6 +180,7 @@ typedef enum {
T_break,
T_default,
T_continue,
T_goto,
T_const, /* const qualifier */
/* C pre-processor directives */
T_cppd_include,
Expand Down Expand Up @@ -270,6 +272,7 @@ typedef enum {
OP_branch, /* conditional jump */
OP_jump, /* unconditional jump */
OP_func_ret, /* returned value */
OP_label, /* for goto label */

/* function pointer */
OP_address_of_func, /* resolve function entry */
Expand Down Expand Up @@ -567,6 +570,13 @@ struct ref_block {
* type, parameters) with SSA-related information (e.g., basic blocks, control
* flow) to support parsing, analysis, optimization, and code generation.
*/

typedef struct {
char label_name[MAX_ID_LEN];
basic_block_t *bb;
bool used;
} label_t;

struct func {
/* Syntatic info */
var_t return_def;
Expand Down
8 changes: 8 additions & 0 deletions src/globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,14 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
printf("br %%%s, %s, %s", rs1->var_name, bb->then_->bb_label_name,
bb->else_->bb_label_name);
break;
case OP_jump:
print_indent(1);
printf("jmp %s", bb->next->bb_label_name);
break;
case OP_label:
print_indent(0);
printf("%s:", insn->str);
break;
case OP_push:
print_indent(1);
printf("push %%%s", rs1->var_name);
Expand Down
5 changes: 4 additions & 1 deletion src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

/* Hash table constants */
#define NUM_DIRECTIVES 11
#define NUM_KEYWORDS 17
#define NUM_KEYWORDS 18

/* Token mapping structure for elegant initialization */
typedef struct {
Expand Down Expand Up @@ -85,6 +85,7 @@ void lex_init_keywords()
{"break", T_break},
{"default", T_default},
{"continue", T_continue},
{"goto", T_goto},
{"union", T_union},
{"const", T_const},
};
Expand Down Expand Up @@ -786,6 +787,8 @@ token_t lex_token_impl(bool aliasing)
keyword = T_enum;
} else if (!memcmp(token_str, "case", 4))
keyword = T_case;
else if (!memcmp(token_str, "goto", 4))
keyword = T_goto;
break;

case 5: /* 5-letter keywords: while, break, union, const */
Expand Down
121 changes: 121 additions & 0 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ int break_exit_idx = 0;
basic_block_t *continue_bb[MAX_NESTING];
int continue_pos_idx = 0;

/* Label utilities */
label_t labels[MAX_LABELS];
int label_idx = 0;
basic_block_t *backpatch_bb[MAX_LABELS];
int backpatch_bb_idx = 0;

/* stack of the operands of 3AC */
var_t *operand_stack[MAX_OPERAND_STACK_SIZE];
int operand_stack_idx = 0;
Expand All @@ -40,6 +46,26 @@ void parse_array_init(var_t *var,
basic_block_t **bb,
bool emit_code);


label_t *find_label(char *name)
{
for (int i = 0; i < label_idx; i++) {
if (!strcmp(name, labels[i].label_name))
return &labels[i];
}
return NULL;
}

void add_label(char *name, basic_block_t *bb)
{
if (label_idx > MAX_LABELS - 1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally, I would prefer:

if (label_idx >= MAX_LABELS)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I referred to this.

error("Too many labels in function");

label_t *l = &labels[label_idx++];
strncpy(l->label_name, name, MAX_ID_LEN);
l->bb = bb;
}

char *gen_name_to(char *buf)
{
sprintf(buf, ".t%d", global_var_idx++);
Expand Down Expand Up @@ -997,6 +1023,61 @@ basic_block_t *handle_while_statement(block_t *parent, basic_block_t *bb)
return else_;
}

basic_block_t *handle_goto_statement(block_t *parent, basic_block_t *bb)
{
/* Since a goto splits the current program into two basic blocks and makes
* the subsequent basic block unreachable, this causes problems for later
* CFG operations. Therefore, we create a fake if that always executes to
* wrap the goto, and connect the unreachable basic block to the else
* branch. Finally, return this else block.
*
* after:
* a = b + c;
* goto label;
* c *= d;
*
* before:
* a = b + c;
* if (1)
* goto label;
* c *= d;
*/

char token[MAX_ID_LEN];
if (!lex_peek(T_identifier, token))
error("Expected identifier after 'goto'");

lex_expect(T_identifier);
lex_expect(T_semicolon);

basic_block_t *fake_if = bb_create(parent);
bb_connect(bb, fake_if, NEXT);
var_t *val = require_var(parent);
gen_name_to(val->var_name);
val->init_val = 1;
add_insn(parent, fake_if, OP_load_constant, val, NULL, NULL, 0, NULL);
add_insn(parent, fake_if, OP_branch, NULL, val, NULL, 0, NULL);

basic_block_t *then_ = bb_create(parent);
basic_block_t *else_ = bb_create(parent);
bb_connect(fake_if, then_, THEN);
bb_connect(fake_if, else_, ELSE);

add_insn(parent, then_, OP_jump, NULL, NULL, NULL, 0, token);
label_t *label = find_label(token);
if (label) {
label->used = true;
bb_connect(then_, label->bb, NEXT);
return else_;
}

if (backpatch_bb_idx > MAX_LABELS - 1)
error("Too many forward-referenced labels");

backpatch_bb[backpatch_bb_idx++] = then_;
return else_;
}

basic_block_t *handle_struct_variable_decl(block_t *parent,
basic_block_t *bb,
char *token)
Expand Down Expand Up @@ -4169,6 +4250,9 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
return do_while_end;
}

if (lex_accept(T_goto))
return handle_goto_statement(parent, bb);

/* empty statement */
if (lex_accept(T_semicolon))
return bb;
Expand Down Expand Up @@ -4753,6 +4837,21 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
return bb;
}

if (lex_peek(T_identifier, token)) {
lex_accept(T_identifier);
if (lex_accept(T_colon)) {
label_t *l = find_label(token);
if (l)
error("label redefinition");

basic_block_t *n = bb_create(parent);
bb_connect(bb, n, NEXT);
add_label(token, n);
add_insn(parent, n, OP_label, NULL, NULL, NULL, 0, token);
return n;
}
}

error("Unrecognized statement token");
return NULL;
}
Expand Down Expand Up @@ -4794,6 +4893,28 @@ void read_func_body(func_t *func)
basic_block_t *body = read_code_block(func, NULL, NULL, func->bbs);
if (body)
bb_connect(body, func->exit, NEXT);

for (int i = 0; i < backpatch_bb_idx; i++) {
basic_block_t *bb = backpatch_bb[i];
insn_t *g = bb->insn_list.tail;
label_t *label = find_label(g->str);
if (!label)
error("goto label undefined");

label->used = true;
bb_connect(bb, label->bb, NEXT);
}

for (int i = 0; i < label_idx; i++) {
label_t *label = &labels[i];
if (label->used)
continue;

printf("Warning: unused label %s\n", label->label_name);
}

backpatch_bb_idx = 0;
label_idx = 0;
}

/* if first token is type */
Expand Down
85 changes: 85 additions & 0 deletions src/ssa.c
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,82 @@ void unwind_phi(void)
}
}

bool is_dominate(basic_block_t *pred, basic_block_t *succ)
{
int i;
bool found = false;
for (i = 0; i < MAX_BB_DOM_SUCC; i++) {
if (!pred->dom_next[i])
break;
if (pred->dom_next[i] == succ) {
found = true;
break;
}
found |= is_dominate(pred->dom_next[i], succ);
}

return found;
}

/*
* For any variable, the basic block that defines it must dominate all the
* basic blocks where it is used; otherwise, it is an invalid cross-block
* initialization.
*/
void bb_check_var_cross_init(func_t *func, basic_block_t *bb)
{
UNUSED(func);

for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) {
if (insn->opcode != OP_allocat)
continue;

var_t *var = insn->rd;
ref_block_t *ref;
for (ref = var->ref_block_list.head; ref; ref = ref->next) {
if (ref->bb == bb)
continue;

if (!is_dominate(bb, ref->bb))
printf("Warning: Variable '%s' cross-initialized\n",
var->var_name);
}
}
}

/**
* A variable's initialization lives in a basic block that does not dominate
* all of its uses, so control flow can reach a use without first passing
* through its initialization (i.e., a possibly-uninitialized use).
*
* For Example:
* // Jumps directly to 'label', skipping the declaration below
* goto label;
* if (1) {
* // This line is never executed when 'goto' is taken
* int x;
* label:
* // Uses 'x' after its declaration was bypassed
* x = 5;
* }
*/
void check_var_cross_init()
{
bb_traversal_args_t *args = arena_alloc_traversal_args();
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
/* Skip function declarations without bodies */
if (!func->bbs)
continue;

args->func = func;
args->bb = func->bbs;

func->visited++;
args->postorder_cb = bb_check_var_cross_init;
bb_forward_traversal(args);
}
}

#ifdef __SHECC__
#else
void bb_dump_connection(FILE *fd,
Expand Down Expand Up @@ -1112,6 +1188,12 @@ void bb_dump(FILE *fd, func_t *func, basic_block_t *bb)
sprintf(str, "<BRANCH %s<SUB>%d</SUB>>", insn->rs1->var_name,
insn->rs1->subscript);
break;
case OP_jump:
sprintf(str, "<JUMP>");
break;
case OP_label:
sprintf(str, "<LABEL>");
break;
case OP_push:
sprintf(str, "<PUSH %s<SUB>%d</SUB>>", insn->rs1->var_name,
insn->rs1->subscript);
Expand Down Expand Up @@ -1281,6 +1363,9 @@ void ssa_build(void)
build_df();

solve_globals();

check_var_cross_init();

solve_phi_insertion();
solve_phi_params();

Expand Down
Loading