Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ int fib(int n) def int @fib(int %n)

## Known Issues

1. The generated ELF lacks .bss and .rodata sections
2. Full `<stdarg.h>` support is not available. Variadic functions work via direct pointer arithmetic.
See the `printf` implementation in `lib/c.c` for the supported approach.
3. The C front-end operates directly on token streams without building a full AST.
Expand Down
17 changes: 12 additions & 5 deletions src/arm-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
elf_offset += 116;
return;
case OP_load_data_address:
case OP_load_rodata_address:
elf_offset += 8;
return;
case OP_address_of_func:
Expand Down Expand Up @@ -188,9 +189,9 @@ void emit(int code)
void emit_ph2_ir(ph2_ir_t *ph2_ir)
{
func_t *func;
int rd = ph2_ir->dest;
int rn = ph2_ir->src0;
int rm = ph2_ir->src1;
const int rd = ph2_ir->dest;
const int rn = ph2_ir->src0;
int rm = ph2_ir->src1; /* Not const because OP_trunc modifies it */
int ofs;

/* Prepare this variable to reuse code for:
Expand Down Expand Up @@ -288,6 +289,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
return;
case OP_load_rodata_address:
emit(__movw(__AL, rd, ph2_ir->src0 + elf_rodata_start));
emit(__movt(__AL, rd, ph2_ir->src0 + elf_rodata_start));
return;
case OP_address_of_func:
func = find_func(ph2_ir->func_name);
ofs = elf_code_start + func->bbs->elf_offset;
Expand All @@ -310,7 +315,7 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
emit(__movt(__AL, __r8, ph2_ir->src1 + 4));
emit(__add_r(__AL, __sp, __sp, __r8));
emit(__lw(__AL, __lr, __sp, -4));
emit(__blx(__AL, __lr));
emit(__bx(__AL, __lr));
return;
case OP_add:
emit(__add_r(__AL, rd, rn, rm));
Expand Down Expand Up @@ -450,6 +455,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
void code_generate(void)
{
elf_data_start = elf_code_start + elf_offset;
elf_rodata_start = elf_data_start + elf_data->size;
elf_bss_start = elf_rodata_start + elf_rodata->size;

/* start */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
Expand Down Expand Up @@ -477,7 +484,7 @@ void code_generate(void)
emit(__mov_r(__AL, __r4, __r5));
emit(__mov_r(__AL, __r5, __r6));
emit(__svc());
emit(__mov_r(__AL, __pc, __lr));
emit(__bx(__AL, __lr));

ph2_ir_t *ph2_ir;
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
Expand Down
10 changes: 5 additions & 5 deletions src/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,17 +324,17 @@ int __bl(arm_cond_t cond, int ofs)
return arm_encode(cond, 176, 0, 0, 0) + (o & 16777215);
}

int __blx(arm_cond_t cond, arm_reg rd)
{
return arm_encode(cond, 18, 15, 15, rd + 3888);
}

int __bx(arm_cond_t cond, arm_reg rm)
{
/* BX: Branch and Exchange */
return (cond << 28) | 0x012FFF10 | rm;
}

int __blx(arm_cond_t cond, arm_reg rd)
{
return arm_encode(cond, 18, 15, 15, rd + 3888);
}

int __mul(arm_cond_t cond, arm_reg rd, arm_reg r1, arm_reg r2)
{
return arm_encode(cond, 0, rd, 0, (r1 << 8) + 144 + r2);
Expand Down
9 changes: 6 additions & 3 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ typedef enum {
T_break,
T_default,
T_continue,
T_const, /* const qualifier */
/* C pre-processor directives */
T_cppd_include,
T_cppd_define,
Expand Down Expand Up @@ -260,8 +261,9 @@ typedef enum {

OP_allocat, /* allocate space on stack */
OP_assign,
OP_load_constant, /* load constant */
OP_load_data_address, /* lookup address of a constant in data section */
OP_load_constant, /* load constant */
OP_load_data_address, /* lookup address of a constant in data section */
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment for OP_load_data_address is misleading now that constants reside in .rodata; clarify it refers to a writable data symbol, not a constant.

Prompt for AI agents
Address the following comment on src/defs.h at line 265:

<comment>Comment for OP_load_data_address is misleading now that constants reside in .rodata; clarify it refers to a writable data symbol, not a constant.</comment>

<file context>
@@ -260,8 +261,9 @@ typedef enum {
-    OP_load_constant,     /* load constant */
-    OP_load_data_address, /* lookup address of a constant in data section */
+    OP_load_constant,       /* load constant */
+    OP_load_data_address,   /* lookup address of a constant in data section */
+    OP_load_rodata_address, /* lookup address of a constant in rodata section */
 
</file context>
Suggested change
OP_load_data_address, /* lookup address of a constant in data section */
OP_load_data_address, /* lookup address of a symbol in data section */
Fix with Cubic

OP_load_rodata_address, /* lookup address of a constant in rodata section */

/* control flow */
OP_branch, /* conditional jump */
Expand Down Expand Up @@ -353,7 +355,8 @@ struct var {
int ptr_level;
bool is_func;
bool is_global;
bool address_taken; /* true if variable address was taken (&var) */
bool is_const_qualified; /* true if variable has const qualifier */
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar boolean names for different concepts can be confusing; distinguish qualifier-const from constant-value more clearly to avoid misuse.

Prompt for AI agents
Address the following comment on src/defs.h at line 358:

<comment>Similar boolean names for different concepts can be confusing; distinguish qualifier-const from constant-value more clearly to avoid misuse.</comment>

<file context>
@@ -353,11 +355,14 @@ struct var {
     bool is_func;
     bool is_global;
-    bool address_taken; /* true if variable address was taken (&amp;var) */
+    bool is_const_qualified; /* true if variable has const qualifier */
+    bool address_taken;      /* true if variable address was taken (&amp;var) */
     int array_size;
</file context>
Fix with Cubic

bool address_taken; /* true if variable address was taken (&var) */
int array_size;
int array_dim1, array_dim2; /* first/second dimension size for 2D arrays */
int offset; /* offset from stack or frame, index 0 is reserved */
Expand Down
113 changes: 85 additions & 28 deletions src/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

int elf_symbol_index;

void elf_write_str(strbuf_t *elf_array, char *vals)
void elf_write_str(strbuf_t *elf_array, const char *vals)
{
/*
* Note that strbuf_puts() does not push the null character.
Expand Down Expand Up @@ -64,8 +64,7 @@ void elf_generate_header(void)
}

elf32_hdr_t hdr;
/*
* The following table explains the meaning of each field in the
/* The following table explains the meaning of each field in the
* ELF32 file header.
*
* Notice that the following values are hexadecimal.
Expand Down Expand Up @@ -134,26 +133,34 @@ void elf_generate_header(void)
hdr.e_version = 1; /* ELF version */
hdr.e_entry = ELF_START + elf_header_len; /* entry point */
hdr.e_phoff = 0x34; /* program header offset */
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size + 39 +
elf_symtab->size +
elf_strtab->size; /* section header offset */
hdr.e_flags = ELF_FLAGS; /* flags */
hdr.e_ehsize[0] = (char) 0x34; /* header size */
/* Section header offset: The section headers come after symtab, strtab, and
* shstrtab which are all written as part of elf_section buffer.
* shstrtab size = 1 (null) + 10 (.shstrtab\0) + 6 (.text\0) + 6 (.data\0) +
* 8 (.rodata\0) + 5 (.bss\0) + 8 (.symtab\0) + 8
* (.strtab\0) + 1 (padding) = 53
*/
const int shstrtab_size = 53; /* section header string table with padding */
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size +
elf_rodata->size + elf_symtab->size + elf_strtab->size +
shstrtab_size;
hdr.e_flags = ELF_FLAGS; /* flags */
hdr.e_ehsize[0] = (char) 0x34; /* header size */
hdr.e_ehsize[1] = 0;
hdr.e_phentsize[0] = (char) 0x20; /* program header size */
hdr.e_phentsize[1] = 0;
hdr.e_phnum[0] = 1; /* number of program headers */
hdr.e_phnum[1] = 0;
hdr.e_shentsize[0] = (char) 0x28; /* section header size */
hdr.e_shentsize[1] = 0;
hdr.e_shnum[0] = 6; /* number of section headers */
/* number of section headers: .rodata and .bss included */
hdr.e_shnum[0] = 8;
hdr.e_shnum[1] = 0;
hdr.e_shstrndx[0] = 5; /* section index with names */
/* section index with names: updated for new sections */
hdr.e_shstrndx[0] = 7;
hdr.e_shstrndx[1] = 0;
elf_write_blk(elf_header, &hdr, sizeof(elf32_hdr_t));

/*
* Explain the meaning of each field in the ELF32 program header.
/* Explain the meaning of each field in the ELF32 program header.
*
* | Program | |
* & | Header bytes | Explanation |
Expand All @@ -176,14 +183,16 @@ void elf_generate_header(void)
*/
/* program header - code and data combined */
elf32_phdr_t phdr;
phdr.p_type = 1; /* PT_LOAD */
phdr.p_offset = elf_header_len; /* offset of segment */
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
phdr.p_filesz = elf_code->size + elf_data->size; /* size in file */
phdr.p_memsz = elf_code->size + elf_data->size; /* size in memory */
phdr.p_flags = 7; /* flags */
phdr.p_align = 4; /* alignment */
phdr.p_type = 1; /* PT_LOAD */
phdr.p_offset = elf_header_len; /* offset of segment */
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
phdr.p_filesz = elf_code->size + elf_data->size +
elf_rodata->size; /* size in file - includes .rodata */
phdr.p_memsz = elf_code->size + elf_data->size + elf_rodata->size +
elf_bss_size; /* size in memory - includes .bss */
phdr.p_flags = 7; /* flags */
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single RWX PT_LOAD segment means .rodata isn’t actually read-only; split into RX and RW PT_LOAD segments to enforce protection.

Prompt for AI agents
Address the following comment on src/elf.c at line 190:

<comment>Single RWX PT_LOAD segment means .rodata isn’t actually read-only; split into RX and RW PT_LOAD segments to enforce protection.</comment>

<file context>
@@ -176,14 +179,16 @@ void elf_generate_header(void)
+                    elf_rodata-&gt;size; /* size in file - includes .rodata */
+    phdr.p_memsz = elf_code-&gt;size + elf_data-&gt;size + elf_rodata-&gt;size +
+                   elf_bss_size; /* size in memory - includes .bss */
+    phdr.p_flags = 7;            /* flags */
+    phdr.p_align = 4;            /* alignment */
     elf_write_blk(elf_header, &amp;phdr, sizeof(elf32_phdr_t));
</file context>
Fix with Cubic

phdr.p_align = 4; /* alignment */
elf_write_blk(elf_header, &phdr, sizeof(elf32_phdr_t));
}

Expand All @@ -195,26 +204,39 @@ void elf_generate_sections(void)
return;
}

int section_data_size = 0;
int shstrtab_start = 0; /* Track start of shstrtab */

/* symtab section */
for (int b = 0; b < elf_symtab->size; b++)
elf_write_byte(elf_section, elf_symtab->elements[b]);
section_data_size += elf_symtab->size;

/* strtab section */
for (int b = 0; b < elf_strtab->size; b++)
elf_write_byte(elf_section, elf_strtab->elements[b]);
section_data_size += elf_strtab->size;

/* shstr section; len = 39 */
/* shstr section - compute size dynamically */
shstrtab_start = elf_section->size;
elf_write_byte(elf_section, 0);
elf_write_str(elf_section, ".shstrtab");
elf_write_byte(elf_section, 0);
elf_write_str(elf_section, ".text");
elf_write_byte(elf_section, 0);
elf_write_str(elf_section, ".data");
elf_write_byte(elf_section, 0);
elf_write_str(elf_section, ".rodata");
elf_write_byte(elf_section, 0);
elf_write_str(elf_section, ".bss");
elf_write_byte(elf_section, 0);
elf_write_str(elf_section, ".symtab");
elf_write_byte(elf_section, 0);
elf_write_str(elf_section, ".strtab");
elf_write_byte(elf_section, 0);
/* Add padding byte for alignment - some tools expect this */
elf_write_byte(elf_section, 0);
int shstrtab_size = elf_section->size - shstrtab_start;

/* section header table */
elf32_shdr_t shdr;
Expand Down Expand Up @@ -288,22 +310,51 @@ void elf_generate_sections(void)
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
ofs += elf_data->size;

/* .rodata */
shdr.sh_name = 0x17; /* Offset in shstrtab for ".rodata" */
shdr.sh_type = 1; /* SHT_PROGBITS */
shdr.sh_flags = 2; /* SHF_ALLOC only (read-only) */
shdr.sh_addr = elf_code_start + elf_code->size + elf_data->size;
shdr.sh_offset = ofs;
shdr.sh_size = elf_rodata->size;
shdr.sh_link = 0;
shdr.sh_info = 0;
shdr.sh_addralign = 4;
shdr.sh_entsize = 0;
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
ofs += elf_rodata->size;

/* .bss */
shdr.sh_name = 0x1f; /* Offset in shstrtab for ".bss" */
shdr.sh_type = 8; /* SHT_NOBITS */
shdr.sh_flags = 3; /* SHF_ALLOC | SHF_WRITE */
shdr.sh_addr =
elf_code_start + elf_code->size + elf_data->size + elf_rodata->size;
shdr.sh_offset = ofs; /* File offset (not actually used for NOBITS) */
shdr.sh_size = elf_bss_size;
shdr.sh_link = 0;
shdr.sh_info = 0;
shdr.sh_addralign = 4;
shdr.sh_entsize = 0;
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
/* Note: .bss is not written to file (SHT_NOBITS) */

/* .symtab */
shdr.sh_name = 0x17;
shdr.sh_name = 0x24; /* Updated offset for ".symtab" */
shdr.sh_type = 2;
shdr.sh_flags = 0;
shdr.sh_addr = 0;
shdr.sh_offset = ofs;
shdr.sh_size = elf_symtab->size;
shdr.sh_link = 4;
shdr.sh_link = 6; /* Link to .strtab (section 6) */
shdr.sh_info = elf_symbol_index;
shdr.sh_addralign = 4;
shdr.sh_entsize = 16;
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
ofs += elf_symtab->size;

/* .strtab */
shdr.sh_name = 0x1f;
shdr.sh_name = 0x2c; /* Updated offset for ".strtab" */
shdr.sh_type = 3;
shdr.sh_flags = 0;
shdr.sh_addr = 0;
Expand All @@ -322,7 +373,7 @@ void elf_generate_sections(void)
shdr.sh_flags = 0;
shdr.sh_addr = 0;
shdr.sh_offset = ofs;
shdr.sh_size = 39;
shdr.sh_size = shstrtab_size; /* Computed dynamically */
shdr.sh_link = 0;
shdr.sh_info = 0;
shdr.sh_addralign = 1;
Expand All @@ -333,22 +384,25 @@ void elf_generate_sections(void)
void elf_align(void)
{
/* Check for null pointers to prevent crashes */
if (!elf_data || !elf_symtab || !elf_strtab) {
if (!elf_data || !elf_rodata || !elf_symtab || !elf_strtab) {
error("ELF buffers not initialized for alignment");
return;
}

while (elf_data->size & 3)
elf_write_byte(elf_data, 0);

while (elf_rodata->size & 3)
elf_write_byte(elf_rodata, 0);

while (elf_symtab->size & 3)
elf_write_byte(elf_symtab, 0);

while (elf_strtab->size & 3)
elf_write_byte(elf_strtab, 0);
}

void elf_add_symbol(char *symbol, int pc)
void elf_add_symbol(const char *symbol, int pc)
{
/* Check for null pointers to prevent crashes */
if (!symbol || !elf_symtab || !elf_strtab) {
Expand All @@ -366,7 +420,7 @@ void elf_add_symbol(char *symbol, int pc)
elf_symbol_index++;
}

void elf_generate(char *outfile)
void elf_generate(const char *outfile)
{
elf_align();
elf_generate_header();
Expand All @@ -387,6 +441,9 @@ void elf_generate(char *outfile)
fputc(elf_code->elements[i], fp);
for (int i = 0; i < elf_data->size; i++)
fputc(elf_data->elements[i], fp);
for (int i = 0; i < elf_rodata->size; i++)
fputc(elf_rodata->elements[i], fp);
/* Note: .bss is not written to file (SHT_NOBITS) */
for (int i = 0; i < elf_section->size; i++)
fputc(elf_section->elements[i], fp);
fclose(fp);
Expand Down
Loading