From 208ad57bce55a31bc6834609fdff1b62abc454a9 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 17 Aug 2025 13:40:31 +0800 Subject: [PATCH 1/5] Add sanitizer support to Makefile Add infrastructure for building and testing shecc with AddressSanitizer and UndefinedBehaviorSanitizer to detect memory safety issues. - Add 'sanitizer' target that builds stage 0 with sanitizers enabled - Add 'check-sanitizer' target for running tests with sanitizer build - Use -fsanitize=address -fsanitize=undefined flags - Add -fno-omit-frame-pointer for better stack traces --- Makefile | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 73f2bc0d..98f28818 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,11 @@ SNAPSHOTS := $(foreach SNAPSHOT_ARCH,$(ARCHS), $(patsubst tests/%.c, tests/snaps all: config bootstrap +sanitizer: CFLAGS += -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer -O0 +sanitizer: LDFLAGS += -fsanitize=address -fsanitize=undefined +sanitizer: config $(OUT)/$(STAGE0)-sanitizer + $(VECHO) " Built stage 0 compiler with sanitizers\n" + ifeq (,$(filter $(ARCH),$(ARCHS))) $(error Support ARM and RISC-V only. Select the target with "ARCH=arm" or "ARCH=riscv") endif @@ -81,6 +86,12 @@ check-stage2: $(OUT)/$(STAGE2) $(TESTBINS) tests/driver.sh $(VECHO) " TEST STAGE 2\n" tests/driver.sh 2 +check-sanitizer: $(OUT)/$(STAGE0)-sanitizer tests/driver.sh + $(VECHO) " TEST STAGE 0 (with sanitizers)\n" + $(Q)cp $(OUT)/$(STAGE0)-sanitizer $(OUT)/shecc + tests/driver.sh 0 + $(Q)rm $(OUT)/shecc + check-snapshots: $(OUT)/$(STAGE0) $(SNAPSHOTS) tests/check-snapshots.sh $(Q)$(foreach SNAPSHOT_ARCH, $(ARCHS), $(MAKE) distclean config check-snapshot ARCH=$(SNAPSHOT_ARCH) --silent;) $(VECHO) "Switching backend back to %s\n" $(ARCH) @@ -123,7 +134,11 @@ $(OUT)/inliner: tools/inliner.c $(OUT)/$(STAGE0): $(OUT)/libc.inc $(OBJS) $(VECHO) " LD\t$@\n" - $(Q)$(CC) $(OBJS) -o $@ + $(Q)$(CC) $(OBJS) $(LDFLAGS) -o $@ + +$(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS) + $(VECHO) " LD\t$@ (with sanitizers)\n" + $(Q)$(CC) $(OBJS) $(LDFLAGS) -o $@ $(OUT)/$(STAGE1): $(OUT)/$(STAGE0) $(Q)$(STAGE1_CHECK_CMD) From 61bc355c0e89e85a93da6ea82c2dd84f476f64fb Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 17 Aug 2025 13:43:11 +0800 Subject: [PATCH 2/5] Fix arena allocator alignment for 64-bit hosts The arena allocator was using a hardcoded PTR_SIZE (4 bytes) for memory alignment, which caused misalignment issues on 64-bit host systems where pointers are 8 bytes. This led to UndefinedBehaviorSanitizer errors. --- src/globals.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/globals.c b/src/globals.c index df8abefe..5ce85063 100644 --- a/src/globals.c +++ b/src/globals.c @@ -164,8 +164,9 @@ void *arena_alloc(arena_t *arena, int size) abort(); } - /* Align to PTR_SIZE bytes */ - size = (size + PTR_SIZE - 1) & ~(PTR_SIZE - 1); + /* Align to sizeof(void*) bytes for host compatibility */ + int alignment = sizeof(void *); + size = (size + alignment - 1) & ~(alignment - 1); if (!arena->head || arena->head->offset + size > arena->head->capacity) { /* Need a new block: choose capacity = max(DEFAULT_ARENA_SIZE, From ad7db31ae07df65487c76edf7a264c799eb39647 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 17 Aug 2025 13:45:01 +0800 Subject: [PATCH 3/5] Replace arena_alloc with arena_calloc Changed all structure allocations to use arena_calloc instead of arena_alloc to ensure zero-initialization and eliminate undefined behavior from accessing uninitialized memory. This fixes sanitizer errors related to: - block_t, var_t, basic_block_t structures in parsing - insn_t structures in SSA passes - ph2_ir_t structures in register allocation - constant_t, alias_t, macro_t structures in symbol management --- src/globals.c | 14 +++++++------- src/parser.c | 6 +++--- src/reg-alloc.c | 2 +- src/ssa.c | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/globals.c b/src/globals.c index 5ce85063..ecc2f406 100644 --- a/src/globals.c +++ b/src/globals.c @@ -308,17 +308,17 @@ symbol_t *arena_alloc_symbol(void) constant_t *arena_alloc_constant(void) { - return arena_alloc(GENERAL_ARENA, sizeof(constant_t)); + return arena_calloc(GENERAL_ARENA, 1, sizeof(constant_t)); } alias_t *arena_alloc_alias(void) { - return arena_alloc(GENERAL_ARENA, sizeof(alias_t)); + return arena_calloc(GENERAL_ARENA, 1, sizeof(alias_t)); } macro_t *arena_alloc_macro(void) { - return arena_alloc(GENERAL_ARENA, sizeof(macro_t)); + return arena_calloc(GENERAL_ARENA, 1, sizeof(macro_t)); } bb_traversal_args_t *arena_alloc_traversal_args(void) @@ -614,7 +614,7 @@ ph2_ir_t *add_existed_ph2_ir(ph2_ir_t *ph2_ir) ph2_ir_t *add_ph2_ir(opcode_t op) { - ph2_ir_t *ph2_ir = arena_alloc(BB_ARENA, sizeof(ph2_ir_t)); + ph2_ir_t *ph2_ir = arena_calloc(BB_ARENA, 1, sizeof(ph2_ir_t)); ph2_ir->op = op; /* Set safe defaults; arch-lowering may annotate later */ ph2_ir->next = NULL; @@ -631,7 +631,7 @@ void set_var_liveout(var_t *var, int end) block_t *add_block(block_t *parent, func_t *func, macro_t *macro) { - block_t *blk = arena_alloc(BLOCK_ARENA, sizeof(block_t)); + block_t *blk = arena_calloc(BLOCK_ARENA, 1, sizeof(block_t)); blk->parent = parent; blk->func = func; @@ -885,7 +885,7 @@ func_t *find_func(char *func_name) /* Create a basic block and set the scope of variables to 'parent' block */ basic_block_t *bb_create(block_t *parent) { - basic_block_t *bb = arena_alloc(BB_ARENA, sizeof(basic_block_t)); + basic_block_t *bb = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t)); for (int i = 0; i < MAX_BB_PRED; i++) { bb->prev[i].bb = NULL; @@ -1001,7 +1001,7 @@ void add_insn(block_t *block, bb->scope = block; - insn_t *n = arena_alloc(INSN_ARENA, sizeof(insn_t)); + insn_t *n = arena_calloc(INSN_ARENA, 1, sizeof(insn_t)); n->opcode = op; n->rd = rd; n->rs1 = rs1; diff --git a/src/parser.c b/src/parser.c index 0471ab3a..4dab3dce 100644 --- a/src/parser.c +++ b/src/parser.c @@ -50,7 +50,7 @@ var_t *require_var(block_t *blk) var_list->elements = new_locals; } - var_t *var = arena_alloc(BLOCK_ARENA, sizeof(var_t)); + var_t *var = arena_calloc(BLOCK_ARENA, 1, sizeof(var_t)); var_list->elements[var_list->size++] = var; var->consumed = -1; var->base = var; @@ -3792,7 +3792,7 @@ void parse_internal(void) /* set starting point of global stack manually */ GLOBAL_FUNC = add_func("", true); GLOBAL_FUNC->stack_size = 4; - GLOBAL_FUNC->bbs = arena_alloc(BB_ARENA, sizeof(basic_block_t)); + GLOBAL_FUNC->bbs = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t)); /* built-in types */ TY_void = add_named_type("void"); @@ -3829,7 +3829,7 @@ void parse_internal(void) func->return_def.type = TY_int; func->num_params = 0; func->va_args = 1; - func->bbs = arena_alloc(BB_ARENA, sizeof(basic_block_t)); + func->bbs = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t)); /* lexer initialization */ SOURCE->size = 0; diff --git a/src/reg-alloc.c b/src/reg-alloc.c index fd025b48..200548f9 100644 --- a/src/reg-alloc.c +++ b/src/reg-alloc.c @@ -53,7 +53,7 @@ void refresh(basic_block_t *bb, insn_t *insn) ph2_ir_t *bb_add_ph2_ir(basic_block_t *bb, opcode_t op) { - ph2_ir_t *n = arena_alloc(BB_ARENA, sizeof(ph2_ir_t)); + ph2_ir_t *n = arena_calloc(BB_ARENA, 1, sizeof(ph2_ir_t)); n->op = op; /* Ensure deterministic defaults for newly created IR nodes */ n->next = NULL; /* well-formed singly linked list */ diff --git a/src/ssa.c b/src/ssa.c index b2262afe..391fa5eb 100644 --- a/src/ssa.c +++ b/src/ssa.c @@ -583,7 +583,7 @@ bool insert_phi_insn(basic_block_t *bb, var_t *var) return false; insn_t *head = bb->insn_list.head; - insn_t *n = arena_alloc(INSN_ARENA, sizeof(insn_t)); + insn_t *n = arena_calloc(INSN_ARENA, 1, sizeof(insn_t)); n->opcode = OP_phi; n->rd = var; n->rs1 = var; @@ -805,7 +805,7 @@ void solve_phi_params(void) void append_unwound_phi_insn(basic_block_t *bb, var_t *dest, var_t *rs) { - insn_t *n = arena_alloc(INSN_ARENA, sizeof(insn_t)); + insn_t *n = arena_calloc(INSN_ARENA, 1, sizeof(insn_t)); n->opcode = OP_unwound_phi; n->rd = dest; n->rs1 = rs; From c113d5eb897d32682fc87b89253be161991d4aab Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 17 Aug 2025 13:46:20 +0800 Subject: [PATCH 4/5] Fix parser crash on pointer dereference assignment Fixed a parser crash that occurred when encountering pointer dereference assignment statements like '*ap = 0;'. The issue was that the parser was trying to find a function named '*ap' before properly handling the pointer dereference syntax. The fix adds a has_asterisk flag to detect when the statement begins with an asterisk operator and skips the function call check in this case, allowing the existing pointer dereference handling code to process the statement correctly. --- src/parser.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/parser.c b/src/parser.c index 4dab3dce..a789b469 100644 --- a/src/parser.c +++ b/src/parser.c @@ -3232,7 +3232,8 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) else if (lex_accept(T_decrement)) prefix_op = OP_sub; /* must be an identifier or asterisk (for pointer dereference) */ - if (!lex_peek(T_identifier, token) && !lex_peek(T_asterisk, NULL)) + bool has_asterisk = lex_peek(T_asterisk, NULL); + if (!lex_peek(T_identifier, token) && !has_asterisk) error("Unexpected token"); /* handle macro parameter substitution for statements */ @@ -3350,14 +3351,16 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) return bb; } - /* is a function call? */ - func = find_func(token); - if (func) { - lex_expect(T_identifier); - read_func_call(func, parent, &bb); - perform_side_effect(parent, bb); - lex_expect(T_semicolon); - return bb; + /* is a function call? Skip function call check when has_asterisk is true */ + if (!has_asterisk) { + func = find_func(token); + if (func) { + lex_expect(T_identifier); + read_func_call(func, parent, &bb); + perform_side_effect(parent, bb); + lex_expect(T_semicolon); + return bb; + } } /* handle pointer dereference expressions like *ptr = value */ From ada731b2f75128e7a7d3f1555c7ddb9a187cbd23 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 17 Aug 2025 13:56:47 +0800 Subject: [PATCH 5/5] Add sanitizer validation to CI pipeline Integrate AddressSanitizer and UndefinedBehaviorSanitizer testing into the GitHub Actions workflow to catch memory safety and undefined behavior issues early in the development cycle. --- .github/workflows/main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9c4e25ff..4b30be7e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -26,6 +26,11 @@ jobs: - name: IR regression tests run: | make check-snapshot || exit 1 + - name: Sanitizer-enabled stage 0 tests + env: + CC: ${{ matrix.compiler }} + run: | + make check-sanitizer || exit 1 - name: Unit tests run: | make check || exit 1 @@ -48,6 +53,7 @@ jobs: apt-get install -yqq build-essential run: | make config ARCH=arm + make check-sanitizer || exit 1 make check || exit 1 coding-style: