From 13f90a9ab516e4369a35004f296a973d22d3389f Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 5 Sep 2025 15:39:48 +0800 Subject: [PATCH 1/7] Add ARM BX instruction for function returns The BX (Branch and Exchange) instruction is needed for proper function returns in ARM code generation. Unlike BLX which saves the return address, BX simply branches to the address in the register. This fixes undefined reference errors during compilation. --- src/arm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/arm.c b/src/arm.c index 28422b03..124e2e8d 100644 --- a/src/arm.c +++ b/src/arm.c @@ -329,6 +329,12 @@ int __blx(arm_cond_t cond, arm_reg rd) return arm_encode(cond, 18, 15, 15, rd + 3888); } +int __bx(arm_cond_t cond, arm_reg rm) +{ + /* BX: Branch and Exchange */ + return (cond << 28) | 0x012FFF10 | rm; +} + int __mul(arm_cond_t cond, arm_reg rd, arm_reg r1, arm_reg r2) { return arm_encode(cond, 0, rd, 0, (r1 << 8) + 144 + r2); From 81fa621644b755cbaa9562192322b0870ad4e06d Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 5 Sep 2025 15:41:40 +0800 Subject: [PATCH 2/7] Add constant folding for unary operators Optimize unary operations on constants at parse time: - Logical NOT (!x) when x is constant - Bitwise NOT (~x) when x is constant - Negation (-x) when x is constant This reduces the number of instructions generated and enables further optimizations in later passes. --- src/parser.c | 58 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/src/parser.c b/src/parser.c index dc81db92..4d1cd5da 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1750,18 +1750,40 @@ void read_expr_operand(block_t *parent, basic_block_t **bb) read_expr_operand(parent, bb); rs1 = opstack_pop(); - vd = require_var(parent); - gen_name_to(vd->var_name); - opstack_push(vd); - add_insn(parent, *bb, OP_log_not, vd, rs1, NULL, 0, NULL); + + /* Constant folding for logical NOT */ + if (rs1 && rs1->is_const && !rs1->ptr_level && !rs1->is_global) { + vd = require_var(parent); + gen_name_to(vd->var_name); + vd->is_const = true; + vd->init_val = !rs1->init_val; + opstack_push(vd); + add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); + } else { + vd = require_var(parent); + gen_name_to(vd->var_name); + opstack_push(vd); + add_insn(parent, *bb, OP_log_not, vd, rs1, NULL, 0, NULL); + } } else if (lex_accept(T_bit_not)) { read_expr_operand(parent, bb); rs1 = opstack_pop(); - vd = require_var(parent); - gen_name_to(vd->var_name); - opstack_push(vd); - add_insn(parent, *bb, OP_bit_not, vd, rs1, NULL, 0, NULL); + + /* Constant folding for bitwise NOT */ + if (rs1 && rs1->is_const && !rs1->ptr_level && !rs1->is_global) { + vd = require_var(parent); + gen_name_to(vd->var_name); + vd->is_const = true; + vd->init_val = ~rs1->init_val; + opstack_push(vd); + add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); + } else { + vd = require_var(parent); + gen_name_to(vd->var_name); + opstack_push(vd); + add_insn(parent, *bb, OP_bit_not, vd, rs1, NULL, 0, NULL); + } } else if (lex_accept(T_ampersand)) { handle_address_of_operator(parent, bb); } else if (lex_accept(T_asterisk)) { @@ -2179,10 +2201,22 @@ void read_expr_operand(block_t *parent, basic_block_t **bb) if (is_neg) { rs1 = opstack_pop(); - vd = require_var(parent); - gen_name_to(vd->var_name); - opstack_push(vd); - add_insn(parent, *bb, OP_negate, vd, rs1, NULL, 0, NULL); + + /* Constant folding for negation */ + if (rs1 && rs1->is_const && !rs1->ptr_level && !rs1->is_global) { + vd = require_var(parent); + gen_name_to(vd->var_name); + vd->is_const = true; + vd->init_val = -rs1->init_val; + opstack_push(vd); + add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, + NULL); + } else { + vd = require_var(parent); + gen_name_to(vd->var_name); + opstack_push(vd); + add_insn(parent, *bb, OP_negate, vd, rs1, NULL, 0, NULL); + } } } } From a06e3c6d282711c73a43bebf0ff709f470631e72 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 5 Sep 2025 15:49:30 +0800 Subject: [PATCH 3/7] Improve dead code elimination Enhanced dead store elimination to detect stores overwritten within a small window (3 instructions). The optimization: - Checks for intervening uses before marking as dead - Stops at control flow boundaries for safety - Marks dead stores for removal by DCE sweep This catches common patterns like consecutive assignments while remaining conservative to avoid incorrect elimination. --- src/ssa.c | 60 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/src/ssa.c b/src/ssa.c index c6269cce..5407b8f6 100644 --- a/src/ssa.c +++ b/src/ssa.c @@ -18,6 +18,9 @@ #define PHI_WORKLIST_SIZE 64 #define DCE_WORKLIST_SIZE 2048 +/* Dead store elimination window size */ +#define OVERWRITE_WINDOW 3 + /* cfront does not accept structure as an argument, pass pointer */ void bb_forward_traversal(bb_traversal_args_t *args) { @@ -1836,24 +1839,47 @@ void optimize(void) continue; } - /* Dead store elimination - conservative */ - if (insn->opcode == OP_store || insn->opcode == OP_write) { - /* Only eliminate if target is local and immediately - * overwritten - */ - if (insn->rd && !insn->rd->is_global && insn->next) { - insn_t *next_insn = insn->next; + /* Improved dead store elimination */ + if (insn->opcode == OP_store || insn->opcode == OP_write || + insn->opcode == OP_global_store) { + if (insn->rd && !insn->rd->is_global) { + /* Look for overwrites within a small window */ + insn_t *check = insn->next; + int distance = 0; + bool found_overwrite = false; + + while (check && distance < OVERWRITE_WINDOW) { + /* Stop at control flow changes */ + if (check->opcode == OP_branch || + check->opcode == OP_jump || + check->opcode == OP_call || + check->opcode == OP_return) { + break; + } - /* Check for immediate overwrite with no intervening - * instructions - */ - if ((next_insn->opcode == OP_store || - next_insn->opcode == OP_write) && - next_insn->rd == insn->rd) { - /* Eliminate only immediate overwrites */ - insn->rd = NULL; - insn->rs1 = NULL; - insn->rs2 = NULL; + /* Check if there's a use of the stored location */ + if ((check->opcode == OP_load || + check->opcode == OP_read) && + check->rs1 == insn->rd) { + break; /* Store is needed */ + } + + /* Found overwrite */ + if ((check->opcode == OP_store || + check->opcode == OP_write || + check->opcode == OP_global_store) && + check->rd == insn->rd) { + found_overwrite = true; + break; + } + + check = check->next; + distance++; + } + + if (found_overwrite) { + /* Mark for removal by DCE */ + insn->useful = false; } } } From 400ec54509ebf5f3fe5150fb03d487910f30b637 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 5 Sep 2025 15:57:28 +0800 Subject: [PATCH 4/7] Add safety guards for division and modulo optimizations Implement zero-check guards to prevent undefined behavior in: - x / x = 1 optimization (only when x is provably non-zero) - x % x = 0 optimization (only when x is provably non-zero) - x / 1 = x and x % 1 = 0 (always safe) These guards ensure correctness by only applying optimizations when operands are compile-time constants with non-zero values, addressing reviewer concerns about potential division by zero. --- src/ssa.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/ssa.c b/src/ssa.c index 5407b8f6..891783d9 100644 --- a/src/ssa.c +++ b/src/ssa.c @@ -1884,7 +1884,41 @@ void optimize(void) } } - /* TODO: Dead load elimination */ + /* Safety guards for division and modulo optimizations */ + if (insn->rs1 && insn->rs2 && insn->rs1 == insn->rs2) { + /* x / x = 1 (with zero-check guard) */ + if (insn->opcode == OP_div && insn->rd) { + /* Only optimize if we can prove x is non-zero */ + bool is_safe = false; + if (insn->rs1->is_const && insn->rs1->init_val != 0) { + is_safe = true; + } + + if (is_safe) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 1; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + } + /* x % x = 0 (with zero-check guard) */ + else if (insn->opcode == OP_mod && insn->rd) { + /* Only optimize if we can prove x is non-zero */ + bool is_safe = false; + if (insn->rs1->is_const && insn->rs1->init_val != 0) { + is_safe = true; + } + + if (is_safe) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 0; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + } + } /* more optimizations */ } From 9b42b2840081e2838204cd180e2376c7644370fb Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 5 Sep 2025 16:22:26 +0800 Subject: [PATCH 5/7] Enhance algebraic simplifications Add comprehensive algebraic optimization patterns: - Self-operations: x-x=0, x^x=0, x&x=x, x|x=x - Comparisons: x==x=1, x!=x=0, xrs1 && insn->rs2 && insn->rs1 == insn->rs2) { + /* x - x = 0 */ + if (insn->opcode == OP_sub && insn->rd) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 0; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + /* x ^ x = 0 */ + else if (insn->opcode == OP_bit_xor && insn->rd) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 0; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + /* x & x = x */ + else if (insn->opcode == OP_bit_and && insn->rd) { + insn->opcode = OP_assign; + insn->rs2 = NULL; + } + /* x | x = x */ + else if (insn->opcode == OP_bit_or && insn->rd) { + insn->opcode = OP_assign; + insn->rs2 = NULL; + } + /* x == x = 1 */ + else if (insn->opcode == OP_eq && insn->rd) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 1; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + /* x != x = 0 */ + else if (insn->opcode == OP_neq && insn->rd) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 0; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + /* x < x = 0, x > x = 0 */ + else if ((insn->opcode == OP_lt || insn->opcode == OP_gt) && + insn->rd) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 0; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + /* x <= x = 1, x >= x = 1 */ + else if ((insn->opcode == OP_leq || + insn->opcode == OP_geq) && + insn->rd) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 1; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + } + + /* Identity and constant optimizations */ + if (insn->rs2 && insn->rs2->is_const && insn->rd) { + int val = insn->rs2->init_val; + + /* x + 0 = x, x - 0 = x, x | 0 = x, x ^ 0 = x */ + if (val == 0) { + if (insn->opcode == OP_add || insn->opcode == OP_sub || + insn->opcode == OP_bit_or || + insn->opcode == OP_bit_xor) { + insn->opcode = OP_assign; + insn->rs2 = NULL; + } + /* x * 0 = 0, x & 0 = 0 */ + else if (insn->opcode == OP_mul || + insn->opcode == OP_bit_and) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 0; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + /* x << 0 = x, x >> 0 = x */ + else if (insn->opcode == OP_lshift || + insn->opcode == OP_rshift) { + insn->opcode = OP_assign; + insn->rs2 = NULL; + } + } + /* x * 1 = x, x / 1 = x */ + else if (val == 1) { + if (insn->opcode == OP_mul || insn->opcode == OP_div) { + insn->opcode = OP_assign; + insn->rs2 = NULL; + } + /* x % 1 = 0 */ + else if (insn->opcode == OP_mod) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = 0; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + } + /* x & -1 = x (all bits set) */ + else if (val == -1) { + if (insn->opcode == OP_bit_and) { + insn->opcode = OP_assign; + insn->rs2 = NULL; + } + /* x | -1 = -1 */ + else if (insn->opcode == OP_bit_or) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = -1; + insn->rs1 = NULL; + insn->rs2 = NULL; + } + /* x * -1 = -x */ + else if (insn->opcode == OP_mul) { + insn->opcode = OP_negate; + insn->rs2 = NULL; + } + } + } + /* more optimizations */ } } From 474be1a94ac611dcc82cd5fb444efe2f21614b42 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 5 Sep 2025 16:28:17 +0800 Subject: [PATCH 6/7] Add phi node optimization Implement trivial phi node elimination: - Remove phi nodes where all operands are the same variable - Replace with simple assignment (phi(x,x,x) = x) - Fold phi nodes with all same constant values - Convert to load_constant for compile-time evaluation This optimization reduces unnecessary phi operations in SSA form, improving both compile time and generated code quality by eliminating redundant merge points. --- src/ssa.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/src/ssa.c b/src/ssa.c index 7fd86cf1..69386985 100644 --- a/src/ssa.c +++ b/src/ssa.c @@ -2056,7 +2056,57 @@ void optimize(void) } } - /* TODO: Phi node optimization */ + /* Phi node optimization - eliminate trivial phi nodes */ + for (func_t *func = FUNC_LIST.head; func; func = func->next) { + for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { + for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { + if (insn->opcode == OP_phi && insn->phi_ops) { + /* Count unique operands and check if all are the same */ + var_t *first_var = insn->phi_ops->var; + bool all_same = true; + bool all_const = true; + int const_val = 0; + int num_ops = 0; + + for (phi_operand_t *op = insn->phi_ops; op; op = op->next) { + num_ops++; + /* Check if all same variable */ + if (op->var != first_var) { + all_same = false; + } + /* Check if all same constant */ + if (op->var && op->var->is_const) { + if (op == insn->phi_ops) { + const_val = op->var->init_val; + } else if (op->var->init_val != const_val) { + all_const = false; + } + } else { + all_const = false; + } + } + + /* Trivial phi: all operands are the same variable */ + if (all_same && first_var && insn->rd) { + insn->opcode = OP_assign; + insn->rs1 = first_var; + insn->rs2 = NULL; + insn->phi_ops = NULL; + } + /* Constant phi: all operands have the same constant value + */ + else if (all_const && num_ops > 0 && insn->rd) { + insn->opcode = OP_load_constant; + insn->rd->is_const = true; + insn->rd->init_val = const_val; + insn->rs1 = NULL; + insn->rs2 = NULL; + insn->phi_ops = NULL; + } + } + } + } + } /* Mark useful instructions */ for (func_t *func = FUNC_LIST.head; func; func = func->next) { From 807f02f0cd8895d711240a90fe22361948f634f3 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 5 Sep 2025 16:37:12 +0800 Subject: [PATCH 7/7] Implement multi-instruction analysis and optimization Add sophisticated cross-instruction optimizations: Store-to-Load Forwarding: - Forward stored values directly to subsequent loads - Eliminate unnecessary memory round-trips - Validate no intervening calls or branches Redundant Load Elimination: - Reuse values from previous loads to same location - Check for no intervening stores or calls - Convert redundant loads to simple assignments Strength Reduction: - Convert multiply by power-of-2 to left shift - Convert divide by power-of-2 to right shift - Convert modulo by power-of-2 to bitwise AND These patterns analyze instruction sequences to find optimization opportunities that single-instruction analysis would miss. --- src/ssa.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/src/ssa.c b/src/ssa.c index 69386985..e6fc1bda 100644 --- a/src/ssa.c +++ b/src/ssa.c @@ -2051,6 +2051,130 @@ void optimize(void) } } + /* Multi-instruction analysis and optimization */ + /* Store-to-load forwarding */ + if (insn->opcode == OP_load && insn->rs1 && insn->rd) { + insn_t *search = insn->prev; + int search_limit = 10; /* Look back up to 10 instructions */ + + while (search && search_limit > 0) { + /* Found a recent store to the same location */ + if ((search->opcode == OP_store || + search->opcode == OP_write || + search->opcode == OP_global_store) && + search->rd == insn->rs1 && search->rs1) { + /* Check for intervening calls or branches */ + bool safe_to_forward = true; + insn_t *check = search->next; + + while (check && check != insn) { + if (check->opcode == OP_call || + check->opcode == OP_indirect || + check->opcode == OP_branch || + check->opcode == OP_jump) { + safe_to_forward = false; + break; + } + check = check->next; + } + + if (safe_to_forward) { + /* Forward the stored value */ + insn->opcode = OP_assign; + insn->rs1 = search->rs1; + insn->rs2 = NULL; + break; + } + } + + /* Stop at control flow changes */ + if (search->opcode == OP_call || + search->opcode == OP_branch || + search->opcode == OP_jump || + search->opcode == OP_indirect) { + break; + } + + search = search->prev; + search_limit--; + } + } + + /* Redundant load elimination */ + if (insn->opcode == OP_load && insn->rs1 && insn->rd) { + insn_t *search = bb->insn_list.head; + + while (search && search != insn) { + /* Found an earlier load from the same location */ + if (search->opcode == OP_load && + search->rs1 == insn->rs1 && search->rd) { + /* Check if location wasn't modified between loads + */ + bool safe_to_reuse = true; + insn_t *check = search->next; + + while (check && check != insn) { + /* Check for stores to the same location */ + if ((check->opcode == OP_store || + check->opcode == OP_global_store || + check->opcode == OP_write) && + check->rd == insn->rs1) { + safe_to_reuse = false; + break; + } + /* Function calls might modify memory */ + if (check->opcode == OP_call || + check->opcode == OP_indirect) { + safe_to_reuse = false; + break; + } + check = check->next; + } + + if (safe_to_reuse) { + /* Replace with assignment from previous load */ + insn->opcode = OP_assign; + insn->rs1 = search->rd; + insn->rs2 = NULL; + break; + } + } + search = search->next; + } + } + + /* Strength reduction for power-of-2 operations */ + if (insn->rs2 && insn->rs2->is_const && insn->rd) { + int val = insn->rs2->init_val; + + /* Check if value is power of 2 */ + if (val > 0 && (val & (val - 1)) == 0) { + /* Count trailing zeros to get shift amount */ + int shift = 0; + int temp = val; + while ((temp & 1) == 0) { + temp >>= 1; + shift++; + } + + /* x * power_of_2 = x << shift */ + if (insn->opcode == OP_mul) { + insn->opcode = OP_lshift; + insn->rs2->init_val = shift; + } + /* x / power_of_2 = x >> shift (unsigned) */ + else if (insn->opcode == OP_div) { + insn->opcode = OP_rshift; + insn->rs2->init_val = shift; + } + /* x % power_of_2 = x & (power_of_2 - 1) */ + else if (insn->opcode == OP_mod) { + insn->opcode = OP_bit_and; + insn->rs2->init_val = val - 1; + } + } + } + /* more optimizations */ } }