From bc8e5c15d58a6df0a69045f0f75f9edc1b969cdd Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 22 May 2024 13:14:51 +0100 Subject: [PATCH 01/88] Attempt to make vec_median a binary. --- cli-rs/src/main.rs | 67 ++++++++++++++++++++ riscv/Cargo.toml | 2 + riscv/src/compiler.rs | 4 +- riscv/src/lib.rs | 15 +++++ riscv/tests/riscv_data/vec_median/Cargo.toml | 3 + riscv/tests/riscv_data/vec_median/src/lib.rs | 39 ------------ 6 files changed, 89 insertions(+), 41 deletions(-) delete mode 100644 riscv/tests/riscv_data/vec_median/src/lib.rs diff --git a/cli-rs/src/main.rs b/cli-rs/src/main.rs index ec2897b23..13b0e0734 100644 --- a/cli-rs/src/main.rs +++ b/cli-rs/src/main.rs @@ -92,6 +92,33 @@ enum Commands { #[arg(default_value_t = false)] continuations: bool, }, + /// Translates a RISC-V statically liked executable to powdr assembly and + /// then to PIL and generates fixed and witness columns. + RiscvElf { + /// Input file + #[arg(required = true)] + file: String, + + /// The field to use + #[arg(long)] + #[arg(default_value_t = FieldArgument::Gl)] + #[arg(value_parser = clap_enum_variants!(FieldArgument))] + field: FieldArgument, + + /// Directory for output files. + #[arg(short, long)] + #[arg(default_value_t = String::from("."))] + output_directory: String, + + /// Comma-separated list of coprocessors. + #[arg(long)] + coprocessors: Option, + + /// Run a long execution in chunks (Experimental and not sound!) + #[arg(short, long)] + #[arg(default_value_t = false)] + continuations: bool, + }, /// Executes a powdr-asm file with given inputs. Execute { /// input powdr-asm code compiled from Rust/RISCV @@ -213,6 +240,20 @@ fn run_command(command: Commands) { continuations )) } + Commands::RiscvElf { + file, + field, + output_directory, + coprocessors, + continuations, + } => { + call_with_field!(compile_riscv_elf::( + &file, + Path::new(&output_directory), + coprocessors, + continuations + )) + } Commands::Execute { file, field, @@ -290,6 +331,32 @@ fn compile_riscv_asm( Ok(()) } +fn compile_riscv_elf( + input_file: &str, + output_dir: &Path, + coprocessors: Option, + continuations: bool, +) -> Result<(), Vec> { + let runtime = match coprocessors { + Some(list) => { + powdr_riscv::Runtime::try_from(list.split(',').collect::>().as_ref()).unwrap() + } + None => powdr_riscv::Runtime::base(), + }; + + powdr_riscv::compile_riscv_elf::( + input_file, + input_file, + output_dir, + true, + &runtime, + continuations, + ) + .ok_or_else(|| vec!["could not translate RISC-V executable".to_string()])?; + + Ok(()) +} + #[allow(clippy::too_many_arguments)] fn execute( file_name: &Path, diff --git a/riscv/Cargo.toml b/riscv/Cargo.toml index d75b3b706..d4d4b498d 100644 --- a/riscv/Cargo.toml +++ b/riscv/Cargo.toml @@ -24,12 +24,14 @@ powdr-pipeline.workspace = true powdr-riscv-executor.workspace = true powdr-riscv-syscalls.workspace = true +goblin = { version = "0.8" } lazy_static = "1.4.0" itertools = "^0.10" lalrpop-util = { version = "^0.19", features = ["lexer"] } log = "0.4.17" mktemp = "0.5.0" num-traits = "0.2.15" +raki = "0.1.3" serde_json = "1.0" # This is only here to work around https://github.com/lalrpop/lalrpop/issues/750 # It should be removed once that workaround is no longer needed. diff --git a/riscv/src/compiler.rs b/riscv/src/compiler.rs index 2e25b6a3a..5f96a2b0e 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/compiler.rs @@ -109,9 +109,9 @@ pub fn compile( with_bootloader: bool, ) -> String { // stack grows towards zero - let stack_start = 0x10000; + let stack_start = 0x10000000; // data grows away from zero - let data_start = 0x10100; + let data_start = 0x10000100; assert!(assemblies .insert("__runtime".to_string(), runtime.global_declarations()) diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index 5bc61fb5e..dd119685b 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -19,6 +19,7 @@ pub use crate::runtime::Runtime; pub mod compiler; pub mod continuations; mod disambiguator; +mod elf_translate; pub mod parser; pub mod runtime; @@ -138,6 +139,20 @@ pub fn compile_riscv_asm( ) } +/// Translates a RISC-V ELF file all the way down to PIL and generates fixed and +/// witness columns. +pub fn compile_riscv_elf( + original_file_name: &str, + input_file: &str, + output_dir: &Path, + force_overwrite: bool, + runtime: &Runtime, + with_bootloader: bool, +) -> Option<(PathBuf, String)> { + elf_translate::elf_translate(input_file); + todo!() +} + macro_rules! as_ref [ ($t:ty; $($x:expr),* $(,)?) => { [$(AsRef::<$t>::as_ref(&$x)),+] diff --git a/riscv/tests/riscv_data/vec_median/Cargo.toml b/riscv/tests/riscv_data/vec_median/Cargo.toml index b4b17227e..a074d42b8 100644 --- a/riscv/tests/riscv_data/vec_median/Cargo.toml +++ b/riscv/tests/riscv_data/vec_median/Cargo.toml @@ -6,4 +6,7 @@ edition = "2021" [dependencies] powdr-riscv-runtime = { path = "../../../../riscv-runtime" } +[profile.release] +panic = "abort" + [workspace] diff --git a/riscv/tests/riscv_data/vec_median/src/lib.rs b/riscv/tests/riscv_data/vec_median/src/lib.rs deleted file mode 100644 index 88ae2a369..000000000 --- a/riscv/tests/riscv_data/vec_median/src/lib.rs +++ /dev/null @@ -1,39 +0,0 @@ -//! Calculates the round-down median of a given input vector, and compares with -//! the expected value. -//! -//! First argument is the expected value, second is the number of elements, the -//! other are the elements. -//! -//! For example, the following will calculate the median of the 11 elements -//! vector [15,75,6,5,1,4,7,3,2,9,2] and compare the result with the expected -//! value of 5: -//! ``` -//! cargo run --release rust riscv/tests/riscv_data/vec_median -o tmp -f -i 5,11,15,75,6,5,1,4,7,3,2,9,2 -//! ``` - -#![no_std] - -extern crate alloc; - -use alloc::vec::Vec; -use powdr_riscv_runtime::input::get_prover_input; -use powdr_riscv_runtime::print; - -#[no_mangle] -fn main() { - let expected = get_prover_input(0); - let len = get_prover_input(1); - - let mut vec: Vec<_> = (2..(len + 2)).map(|idx| get_prover_input(idx)).collect(); - vec.sort(); - - let half = (len / 2) as usize; - let median = if len & 1 == 1 { - vec[half] - } else { - (vec[half - 1] + vec[half]) / 2 - }; - - print!("Found median of {median}\n"); - assert_eq!(median, expected); -} From 2544ec30cd61a9dcc4fd059a6987ba3629e69a33 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 22 May 2024 13:17:23 +0100 Subject: [PATCH 02/88] Missing moved file. --- riscv/tests/riscv_data/vec_median/src/main.rs | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 riscv/tests/riscv_data/vec_median/src/main.rs diff --git a/riscv/tests/riscv_data/vec_median/src/main.rs b/riscv/tests/riscv_data/vec_median/src/main.rs new file mode 100644 index 000000000..334b436b6 --- /dev/null +++ b/riscv/tests/riscv_data/vec_median/src/main.rs @@ -0,0 +1,39 @@ +//! Calculates the round-down median of a given input vector, and compares with +//! the expected value. +//! +//! First argument is the expected value, second is the number of elements, the +//! other are the elements. +//! +//! For example, the following will calculate the median of the 11 elements +//! vector [15,75,6,5,1,4,7,3,2,9,2] and compare the result with the expected +//! value of 5: +//! ``` +//! cargo run --release rust riscv/tests/riscv_data/vec_median -o tmp -f -i 5,11,15,75,6,5,1,4,7,3,2,9,2 +//! ``` +#![no_main] +#![no_std] + +extern crate alloc; + +use alloc::vec::Vec; +use powdr_riscv_runtime::input::get_prover_input; +use powdr_riscv_runtime::print; + +/// entry point called by the runtime +pub fn main() { + let expected = get_prover_input(0); + let len = get_prover_input(1); + + let mut vec: Vec<_> = (2..(len + 2)).map(|idx| get_prover_input(idx)).collect(); + vec.sort(); + + let half = (len / 2) as usize; + let median = if len & 1 == 1 { + vec[half] + } else { + (vec[half - 1] + vec[half]) / 2 + }; + + print!("Found median of {median}\n"); + assert_eq!(median, expected); +} From e750285d5f2191d341dbf8fe2a163da14fe27561 Mon Sep 17 00:00:00 2001 From: Leo Alt Date: Wed, 22 May 2024 14:24:08 +0200 Subject: [PATCH 03/88] works --- riscv-runtime/src/lib.rs | 2 +- riscv/tests/riscv_data/vec_median/src/main.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/riscv-runtime/src/lib.rs b/riscv-runtime/src/lib.rs index 8da9758d1..c59f24a96 100644 --- a/riscv-runtime/src/lib.rs +++ b/riscv-runtime/src/lib.rs @@ -39,7 +39,7 @@ extern "Rust" { } #[no_mangle] #[start] -pub unsafe extern "C" fn __runtime_start() { +pub unsafe extern "C" fn _start() { unsafe { main(); } diff --git a/riscv/tests/riscv_data/vec_median/src/main.rs b/riscv/tests/riscv_data/vec_median/src/main.rs index 334b436b6..c55a5ed13 100644 --- a/riscv/tests/riscv_data/vec_median/src/main.rs +++ b/riscv/tests/riscv_data/vec_median/src/main.rs @@ -20,6 +20,7 @@ use powdr_riscv_runtime::input::get_prover_input; use powdr_riscv_runtime::print; /// entry point called by the runtime +#[no_mangle] pub fn main() { let expected = get_prover_input(0); let len = get_prover_input(1); From 60505929c23cace4c46aa83ac6b4357d63667437 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 22 May 2024 21:26:22 +0100 Subject: [PATCH 04/88] Starting implementation of instructions lifting. --- riscv/src/compiler.rs | 8 +- riscv/src/elf_translate.rs | 339 +++++++++++++++++++++++++++++++++++++ 2 files changed, 343 insertions(+), 4 deletions(-) create mode 100644 riscv/src/elf_translate.rs diff --git a/riscv/src/compiler.rs b/riscv/src/compiler.rs index 5f96a2b0e..d6a2c1bbf 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/compiler.rs @@ -74,9 +74,9 @@ impl Architecture for RiscvArchitecture { | "slt" | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" | "blt" | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" | "jalr" | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" - | "sh" | "sb" | "nop" | "fence" | "fence.i" | "amoadd.w" | "amoadd.w.aq" - | "amoadd.w.rl" | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" - | "sc.w" | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, + | "sh" | "sb" | "nop" | "fence" | "amoadd.w" | "amoadd.w.aq" | "amoadd.w.rl" + | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" | "sc.w" + | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, "j" | "jr" | "tail" | "ret" | "unimp" => true, _ => { panic!("Unknown instruction: {instr}"); @@ -1444,7 +1444,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { format!("mstore {rd} + {off} - tmp2, tmp1;"), ] } - "fence" | "fence.i" | "nop" => vec![], + "fence" | "nop" => vec![], "unimp" => vec!["fail;".to_string()], // atomic instructions diff --git a/riscv/src/elf_translate.rs b/riscv/src/elf_translate.rs new file mode 100644 index 000000000..97d12820d --- /dev/null +++ b/riscv/src/elf_translate.rs @@ -0,0 +1,339 @@ +use std::{ + collections::{BTreeMap, HashSet}, + fs, +}; + +use goblin::elf::{program_header, Elf}; +use raki::{ + decode::Decode, + instruction::{Extensions, Instruction as Ins, OpcodeKind as Op}, + Isa, +}; + +pub fn elf_translate(file_name: &str) { + let file_buffer = fs::read(file_name).unwrap(); + + let elf = Elf::parse(&file_buffer).unwrap(); + println!("{:#?}", elf); + /*for p in elf.program_headers { + println!("{:?}", p); + }*/ + + // Index the sections by their virtual address + let mut text_sections = BTreeMap::new(); + let mut data_sections = BTreeMap::new(); + + for p in elf.program_headers.iter() { + if p.p_type == program_header::PT_LOAD { + // Test if executable + if p.p_flags & 1 == 1 { + text_sections.insert( + p.p_vaddr as u32, + // Slice containing the section data. Since this is a + // text section, we assume any zeroed part beyond + // p_filesz (if any) is not relevant. + &file_buffer[p.p_offset as usize..(p.p_offset + p.p_filesz) as usize], + ); + } else { + data_sections.insert(p.p_vaddr, p); + } + } + } + + extract_reachable_code(elf.entry.try_into().unwrap(), &text_sections); +} + +fn extract_reachable_code(entry_point: u32, text_sections: &BTreeMap) { + // Helper function to find the section containing the address + let find_section_of_address = |addr| { + let (§ion_addr, &data) = text_sections + .range(..=addr) + .next_back() + .expect("Jump address not found in any .text section"); + (section_addr, data) + }; + + let mut visited = HashSet::new(); + let mut to_visit = vec![find_section_of_address(entry_point)]; + + while let Some((addr, section_data)) = to_visit.pop() { + // Sanity check of the alignment + assert_eq!(addr % 2, 0); + + visited.insert(addr); + + // We assume the entire section is code, so decode and translate it from + // start. + let code = convert_to_pseudoinstructions(addr, section_data); + } +} + +struct PseudoInstruction<'a> { + op: &'a str, + rd: Option, + rs1: Option, + rs2: Option, + imm: Option, +} + +struct PseudoInstructionConverter { + base_addr: u32, +} + +impl TwoOrOneMapper> for PseudoInstructionConverter { + fn try_map_two(&mut self, insn1: &Ins, insn2: &Ins) -> Option> { + let result = match (insn1, insn2) { + ( + Ins { + opc: Op::AUIPC, + rd: Some(rd_auipc), + imm: Some(hi), + .. + }, + Ins { + opc: Op::ADDI, + rd: Some(rd_addi), + rs1: Some(rs1_addi), + imm: Some(lo), + .. + }, + ) if rd_auipc == rd_addi && rd_auipc == rs1_addi => PseudoInstruction { + op: "la", + rd: Some(*rd_auipc as u32), + rs1: None, + rs2: None, + imm: Some((((*hi as i32) << 12) | (*lo as i32)) + self.base_addr as i32), + }, + // TODO: add more pseudoinstructions + // TODO: undo linker relaxation relative to "gp" register + // TODO: transform relative addresses to absolute addresses + _ => return None, + }; + + self.base_addr += [insn1, insn2].map(ins_size).into_iter().sum::(); + + Some(result) + } + + fn map_one(&mut self, insn: Ins) -> PseudoInstruction<'static> { + self.base_addr += ins_size(&insn); + + PseudoInstruction { + op: insn.opc.to_string(), + rd: insn.rd.map(|x| x as u32), + rs1: insn.rs1.map(|x| x as u32), + rs2: insn.rs2.map(|x| x as u32), + imm: insn.imm, + } + } +} + +/// Lift the instructions back to higher-level pseudoinstructions. Just pass +/// throught instruction sets that don't have a pseudoinstruction equivalent. +fn convert_to_pseudoinstructions(base_addr: u32, data: &[u8]) -> Vec { + let instructions = RiscVInstructionIterator::new(data); + + let pseudo_converter = PseudoInstructionConverter { base_addr }; + try_map_two_by_two(instructions, PseudoInstructionConverter { base_addr }) +} + +struct RiscVInstructionIterator<'a> { + remaining_data: &'a [u8], +} + +impl RiscVInstructionIterator<'_> { + fn new(data: &[u8]) -> RiscVInstructionIterator { + RiscVInstructionIterator { + remaining_data: data, + } + } +} + +impl Iterator for RiscVInstructionIterator<'_> { + type Item = Ins; + + fn next(&mut self) -> Option { + if self.remaining_data.is_empty() { + return None; + } + + // Decide if the next instruction is 32 bits or 16 bits ("C" extension): + let advance; + let insn; + if self.remaining_data[0] & 0b11 == 0b11 { + // 32 bits + advance = 4; + insn = u32::from_le_bytes( + self.remaining_data[0..4] + .try_into() + .expect("Not enough bytes to complete a 32-bit instruction!"), + ) + .decode(Isa::Rv32) + .expect("Failed to decode instruction.") + } else { + // 16 bits + advance = 2; + let c_insn = u16::from_le_bytes( + self.remaining_data[0..2] + .try_into() + .expect("Not enough bytes to complete a 16-bit instruction!"), + ) + .decode(Isa::Rv32) + .expect("Failed to decode instruction."); + + insn = to_32bit_equivalent(c_insn); + } + + // Advance the iterator + self.remaining_data = &self.remaining_data[advance..]; + + Some(insn) + } +} + +/// Get the size, in bytes, of an instruction. +fn ins_size(ins: &Ins) -> u32 { + match ins.extension { + Extensions::C => 2, + _ => 4, + } +} + +/// Translates an extension "C" instruction to the equivalent 32-bit instruction. +fn to_32bit_equivalent(mut insn: Ins) -> Ins { + let new_opc = match insn.opc { + Op::C_LW => Op::LW, + Op::C_SW => Op::SW, + Op::C_NOP => { + return Ins { + opc: Op::C_ADDI, + rd: Some(0), + rs1: Some(0), + ..insn + } + } + Op::C_ADDI | Op::C_ADDI16SP => Op::ADDI, + Op::C_LI => { + return Ins { + opc: Op::ADDI, + rs1: Some(0), + ..insn + } + } + Op::C_JAL => { + return Ins { + opc: Op::JAL, + rd: Some(1), // output to x1 (return address) + ..insn + }; + } + Op::C_LUI => Op::LUI, + Op::C_SRLI => Op::SRLI, + Op::C_SRAI => Op::SRAI, + Op::C_ANDI => Op::ANDI, + Op::C_SUB => Op::SUB, + Op::C_XOR => Op::XOR, + Op::C_OR => Op::OR, + Op::C_AND => Op::AND, + Op::C_J => { + return Ins { + opc: Op::JAL, + rd: Some(0), // discard output + ..insn + }; + } + Op::C_BEQZ => { + return Ins { + opc: Op::BEQ, + rs2: Some(0), // compare with zero + ..insn + }; + } + Op::C_BNEZ => { + return Ins { + opc: Op::BNE, + rs2: Some(0), // compare with zero + ..insn + }; + } + Op::C_SLLI => Op::C_SLLI, + Op::C_LWSP => { + return Ins { + opc: Op::LW, + rs1: Some(2), // load relative to x2 (stack pointer) + ..insn + }; + } + Op::C_JR => { + return Ins { + opc: Op::JALR, + rd: Some(0), // discard the return address + imm: Some(0), // jump to the exact address + ..insn + }; + } + Op::C_MV => { + return Ins { + opc: Op::ADD, + rs1: Some(0), // add to zero + ..insn + }; + } + Op::C_EBREAK => Op::EBREAK, + Op::C_JALR => { + return Ins { + opc: Op::JALR, + rd: Some(1), // output to x1 (return address) + imm: Some(0), // jump to the exact address + ..insn + }; + } + Op::C_ADD => Op::ADD, + Op::C_SWSP => { + return Ins { + opc: Op::SW, + rs1: Some(2), // store relative to x2 (stack pointer) + ..insn + }; + } + Op::C_LD | Op::C_SD | Op::C_ADDIW | Op::C_SUBW | Op::C_ADDW | Op::C_LDSP | Op::C_SDSP => { + unreachable!("not a riscv32 instruction") + } + _ => unreachable!("not a RISC-V \"C\" extension instruction"), + }; + + insn.opc = new_opc; + insn +} + +trait TwoOrOneMapper { + fn try_map_two(&mut self, first: &E, second: &E) -> Option; + fn map_one(&mut self, element: E) -> R; +} + +/// Takes an iterator, and maps the elements two by two. If fails, maps +/// individually. +/// +/// TODO: this would be more elegant as a generator, but they are unstable. +fn try_map_two_by_two( + input: impl Iterator, + mut mapper: impl TwoOrOneMapper, +) -> Vec { + let mut result = Vec::new(); + let mut iter = input.peekable(); + + while let Some(first) = iter.next() { + if let Some(second) = iter.peek() { + if let Some(mapped) = mapper.try_map_two(&first, second) { + result.push(mapped); + iter.next(); + } else { + result.push(mapper.map_one(first)); + } + } else { + result.push(mapper.map_one(first)); + } + } + + result +} From 424115f2e41d0eb8d634147afea440341d799c08 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 29 May 2024 17:06:46 +0200 Subject: [PATCH 05/88] Refactoring instruction processor to be more generic. --- asm-utils/src/utils.rs | 20 +-- riscv/src/compiler.rs | 335 ++++++++++++++++++++++------------------- 2 files changed, 195 insertions(+), 160 deletions(-) diff --git a/asm-utils/src/utils.rs b/asm-utils/src/utils.rs index 771a684e1..e1ea98585 100644 --- a/asm-utils/src/utils.rs +++ b/asm-utils/src/utils.rs @@ -41,26 +41,28 @@ pub fn escape_label(l: &str) -> String { l.replace('.', "_dot_").replace('/', "_slash_") } -pub fn argument_to_escaped_symbol(x: &Argument) -> String { +pub fn argument_to_escaped_symbol( + x: &Argument, +) -> Option { if let Argument::Expression(Expression::Symbol(symbol)) = x { - escape_label(symbol) + Some(escape_label(symbol)) } else { - panic!("Expected a symbol, got {x}"); + None } } -pub fn argument_to_number(x: &Argument) -> u32 { +pub fn argument_to_number(x: &Argument) -> Option { if let Argument::Expression(expr) = x { - expression_to_number(expr) + Some(expression_to_number(expr)?) } else { - panic!("Expected numeric expression, got {x}") + None } } -pub fn expression_to_number(expr: &Expression) -> u32 { +pub fn expression_to_number(expr: &Expression) -> Option { if let Expression::Number(n) = expr { - *n as u32 + Some(*n as u32) } else { - panic!("Constant expression could not be fully resolved to a number during preprocessing: {expr}"); + None } } diff --git a/riscv/src/compiler.rs b/riscv/src/compiler.rs index 2e25b6a3a..59a396b1d 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/compiler.rs @@ -841,95 +841,141 @@ fn process_statement(s: Statement) -> Vec { // remove indentation and trailing newline let stmt_str = &stmt_str[2..(stmt_str.len() - 1)]; let mut ret = vec![format!(" .debug insn \"{stmt_str}\";")]; - ret.extend( - process_instruction(instr, args) - .into_iter() - .map(|s| " ".to_string() + &s), - ); + let processed_instr = match process_instruction(instr, &args[..]) { + Ok(s) => s, + Err(e) => panic!("Failed to process instruction '{instr}'. {e}"), + }; + ret.extend(processed_instr.into_iter().map(|s| " ".to_string() + &s)); ret } } } -fn r(args: &[Argument]) -> Register { - match args { - [Argument::Register(r1)] => *r1, - _ => panic!(), - } +trait Args { + type Error; + + fn l(&self) -> Result; + fn r(&self) -> Result; + fn rri(&self) -> Result<(Register, Register, u32), Self::Error>; + fn rrr(&self) -> Result<(Register, Register, Register), Self::Error>; + fn ri(&self) -> Result<(Register, u32), Self::Error>; + fn rr(&self) -> Result<(Register, Register), Self::Error>; + fn rrl(&self) -> Result<(Register, Register, String), Self::Error>; + fn rl(&self) -> Result<(Register, String), Self::Error>; + fn rro(&self) -> Result<(Register, Register, u32), Self::Error>; + fn rrro(&self) -> Result<(Register, Register, Register, u32), Self::Error>; + fn empty(&self) -> Result<(), Self::Error>; } -fn rri(args: &[Argument]) -> (Register, Register, u32) { - match args { - [Argument::Register(r1), Argument::Register(r2), n] => (*r1, *r2, argument_to_number(n)), - _ => panic!(), +impl Args for [Argument] { + type Error = &'static str; + + fn l(&self) -> Result { + const ERR: &str = "Expected: label"; + match self { + [l] => Ok(argument_to_escaped_symbol(l).ok_or(ERR)?), + _ => Err(ERR), + } } -} -fn rrr(args: &[Argument]) -> (Register, Register, Register) { - match args { - [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3)] => (*r1, *r2, *r3), - _ => panic!(), + fn r(&self) -> Result { + match self { + [Argument::Register(r1)] => Ok(*r1), + _ => Err("Expected: register"), + } } -} -fn ri(args: &[Argument]) -> (Register, u32) { - match args { - [Argument::Register(r1), n] => (*r1, argument_to_number(n)), - _ => panic!(), + fn rri(&self) -> Result<(Register, Register, u32), &'static str> { + const ERR: &str = "Expected: register, register, immediate"; + match self { + [Argument::Register(r1), Argument::Register(r2), n] => { + Ok((*r1, *r2, argument_to_number(n).ok_or(ERR)?)) + } + _ => Err(ERR), + } } -} -fn rr(args: &[Argument]) -> (Register, Register) { - match args { - [Argument::Register(r1), Argument::Register(r2)] => (*r1, *r2), - _ => panic!(), + fn rrr(&self) -> Result<(Register, Register, Register), &'static str> { + match self { + [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3)] => { + Ok((*r1, *r2, *r3)) + } + _ => Err("Expected: register, register, register"), + } } -} -fn rrl(args: &[Argument]) -> (Register, Register, String) { - match args { - [Argument::Register(r1), Argument::Register(r2), l] => { - (*r1, *r2, argument_to_escaped_symbol(l)) + fn ri(&self) -> Result<(Register, u32), &'static str> { + const ERR: &str = "Expected: register, immediate"; + match self { + [Argument::Register(r1), n] => Ok((*r1, argument_to_number(n).ok_or(ERR)?)), + _ => Err(ERR), } - _ => panic!(), } -} -fn rl(args: &[Argument]) -> (Register, String) { - match args { - [Argument::Register(r1), l] => (*r1, argument_to_escaped_symbol(l)), - _ => panic!(), + fn rr(&self) -> Result<(Register, Register), &'static str> { + match self { + [Argument::Register(r1), Argument::Register(r2)] => Ok((*r1, *r2)), + _ => Err("Expected: register, register"), + } + } + + fn rrl(&self) -> Result<(Register, Register, String), &'static str> { + const ERR: &str = "Expected: register, register, label"; + match self { + [Argument::Register(r1), Argument::Register(r2), l] => { + Ok((*r1, *r2, argument_to_escaped_symbol(l).ok_or(ERR)?)) + } + _ => Err(ERR), + } } -} -fn rro(args: &[Argument]) -> (Register, Register, u32) { - match args { - [Argument::Register(r1), Argument::RegOffset(off, r2)] => ( - *r1, - *r2, - expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))), - ), - [Argument::Register(r1), Argument::Expression(off)] => { - // If the register is not specified, it defaults to x0 - (*r1, Register::new(0), expression_to_number(off)) - } - _ => panic!(), + fn rl(&self) -> Result<(Register, String), &'static str> { + const ERR: &str = "Expected: register, label"; + match self { + [Argument::Register(r1), l] => Ok((*r1, argument_to_escaped_symbol(l).ok_or(ERR)?)), + _ => Err(ERR), + } } -} -fn rrro(args: &[Argument]) -> (Register, Register, Register, u32) { - match args { - [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(off, r3)] => ( - *r1, - *r2, - *r3, - expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))), - ), - [Argument::Register(r1), Argument::Register(r2), Argument::Expression(off)] => { - // If the register is not specified, it defaults to x0 - (*r1, *r2, Register::new(0), expression_to_number(off)) - } - _ => panic!(), + fn rro(&self) -> Result<(Register, Register, u32), &'static str> { + if let [Argument::Register(r1), Argument::RegOffset(off, r2)] = self { + if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) + { + return Ok((*r1, *r2, off)); + } + } + if let [Argument::Register(r1), Argument::Expression(off)] = self { + if let Some(off) = expression_to_number(off) { + // If the register is not specified, it defaults to x0 + return Ok((*r1, Register::new(0), off)); + } + } + + Err("Expected: register, offset(register)") + } + + fn rrro(&self) -> Result<(Register, Register, Register, u32), &'static str> { + if let [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(off, r3)] = self + { + if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) + { + return Ok((*r1, *r2, *r3, off)); + } + } + if let [Argument::Register(r1), Argument::Register(r2), Argument::Expression(off)] = self { + if let Some(off) = expression_to_number(off) { + // If the register is not specified, it defaults to x0 + return Ok((*r1, *r2, Register::new(0), off)); + } + } + Err("Expected: register, register, offset(register)") + } + + fn empty(&self) -> Result<(), &'static str> { + match self { + [] => Ok(()), + _ => Err("Expected: no arguments"), + } } } @@ -961,59 +1007,58 @@ pub fn pop_register(name: &str) -> [String; 2] { ] } -fn process_instruction(instr: &str, args: &[Argument]) -> Vec { - match instr { +fn process_instruction(instr: &str, args: &A) -> Result, A::Error> { + Ok(match instr { // load/store registers "li" | "la" => { // The difference between "li" and "la" in RISC-V is that the former // is for loading values as is, and the later is for loading PC // relative values. But since we work on a higher abstraction level, // for us they are the same thing. - if let [_, Argument::Expression(Expression::Symbol(_))] = args { - let (rd, label) = rl(args); + if let Ok((rd, label)) = args.rl() { only_if_no_write_to_zero(format!("{rd} <== load_label({label});"), rd) } else { - let (rd, imm) = ri(args); + let (rd, imm) = args.ri()?; only_if_no_write_to_zero(format!("{rd} <=X= {imm};"), rd) } } // TODO check if it is OK to clear the lower order bits "lui" => { - let (rd, imm) = ri(args); + let (rd, imm) = args.ri()?; only_if_no_write_to_zero(format!("{rd} <=X= {};", imm << 12), rd) } "mv" => { - let (rd, rs) = rr(args); + let (rd, rs) = args.rr()?; only_if_no_write_to_zero(format!("{rd} <=X= {rs};"), rd) } // Arithmetic "add" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd} <== wrap({r1} + {r2});"), rd) } "addi" => { - let (rd, rs, imm) = rri(args); + let (rd, rs, imm) = args.rri()?; only_if_no_write_to_zero(format!("{rd} <== wrap({rs} + {imm});"), rd) } "sub" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd} <== wrap_signed({r1} - {r2});"), rd) } "neg" => { - let (rd, r1) = rr(args); + let (rd, r1) = args.rr()?; only_if_no_write_to_zero(format!("{rd} <== wrap_signed(0 - {r1});"), rd) } "mul" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd}, tmp1 <== mul({r1}, {r2});"), rd) } "mulhu" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("tmp1, {rd} <== mul({r1}, {r2});"), rd) } "mulh" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero_vec( vec![ format!("tmp1 <== to_signed({r1});"), @@ -1040,7 +1085,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { ) } "mulhsu" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero_vec( vec![ format!("tmp1 <== to_signed({r1});"), @@ -1061,47 +1106,47 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { ) } "divu" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd}, tmp1 <== divremu({r1}, {r2});"), rd) } "remu" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("tmp1, {rd} <== divremu({r1}, {r2});"), rd) } // bitwise "xor" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd} <== xor({r1}, {r2});"), rd) } "xori" => { - let (rd, r1, imm) = rri(args); + let (rd, r1, imm) = args.rri()?; only_if_no_write_to_zero(format!("{rd} <== xor({r1}, {imm});"), rd) } "and" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd} <== and({r1}, {r2});"), rd) } "andi" => { - let (rd, r1, imm) = rri(args); + let (rd, r1, imm) = args.rri()?; only_if_no_write_to_zero(format!("{rd} <== and({r1}, {imm});"), rd) } "or" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd} <== or({r1}, {r2});"), rd) } "ori" => { - let (rd, r1, imm) = rri(args); + let (rd, r1, imm) = args.rri()?; only_if_no_write_to_zero(format!("{rd} <== or({r1}, {imm});"), rd) } "not" => { - let (rd, rs) = rr(args); + let (rd, rs) = args.rr()?; only_if_no_write_to_zero(format!("{rd} <== wrap_signed(-{rs} - 1);"), rd) } // shift "slli" => { - let (rd, rs, amount) = rri(args); + let (rd, rs, amount) = args.rri()?; assert!(amount <= 31); only_if_no_write_to_zero_vec( if amount <= 16 { @@ -1116,7 +1161,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { ) } "sll" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero_vec( vec![ format!("tmp1 <== and({r2}, 0x1f);"), @@ -1127,13 +1172,13 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { } "srli" => { // logical shift right - let (rd, rs, amount) = rri(args); + let (rd, rs, amount) = args.rri()?; assert!(amount <= 31); only_if_no_write_to_zero(format!("{rd} <== shr({rs}, {amount});"), rd) } "srl" => { // logical shift right - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero_vec( vec![ format!("tmp1 <== and({r2}, 0x1f);"), @@ -1147,7 +1192,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { // TODO see if we can implement this directly with a machine. // Now we are using the equivalence // a >>> b = (a >= 0 ? a >> b : ~(~a >> b)) - let (rd, rs, amount) = rri(args); + let (rd, rs, amount) = args.rri()?; assert!(amount <= 31); only_if_no_write_to_zero_vec( vec![ @@ -1166,15 +1211,15 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { // comparison "seqz" => { - let (rd, rs) = rr(args); + let (rd, rs) = args.rr()?; only_if_no_write_to_zero(format!("{rd} <=Y= is_equal_zero({rs});"), rd) } "snez" => { - let (rd, rs) = rr(args); + let (rd, rs) = args.rr()?; only_if_no_write_to_zero(format!("{rd} <=Y= is_not_equal_zero({rs});"), rd) } "slti" => { - let (rd, rs, imm) = rri(args); + let (rd, rs, imm) = args.rri()?; only_if_no_write_to_zero_vec( vec![ format!("tmp1 <== to_signed({rs});"), @@ -1184,7 +1229,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { ) } "slt" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero_vec( vec![ format!("tmp1 <== to_signed({r1});"), @@ -1195,15 +1240,15 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { ) } "sltiu" => { - let (rd, rs, imm) = rri(args); + let (rd, rs, imm) = args.rri()?; only_if_no_write_to_zero(format!("{rd} <=Y= is_positive({imm} - {rs});"), rd) } "sltu" => { - let (rd, r1, r2) = rrr(args); + let (rd, r1, r2) = args.rrr()?; only_if_no_write_to_zero(format!("{rd} <=Y= is_positive({r2} - {r1});"), rd) } "sgtz" => { - let (rd, rs) = rr(args); + let (rd, rs) = args.rr()?; only_if_no_write_to_zero_vec( vec![ format!("tmp1 <== to_signed({rs});"), @@ -1215,31 +1260,31 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { // branching "beq" => { - let (r1, r2, label) = rrl(args); + let (r1, r2, label) = args.rrl()?; vec![format!("branch_if_zero {r1} - {r2}, {label};")] } "beqz" => { - let (r1, label) = rl(args); + let (r1, label) = args.rl()?; vec![format!("branch_if_zero {r1}, {label};")] } "bgeu" => { - let (r1, r2, label) = rrl(args); + let (r1, r2, label) = args.rrl()?; // TODO does this fulfill the input requirements for branch_if_positive? vec![format!("branch_if_positive {r1} - {r2} + 1, {label};")] } "bgez" => { - let (r1, label) = rl(args); + let (r1, label) = args.rl()?; vec![ format!("tmp1 <== to_signed({r1});"), format!("branch_if_positive tmp1 + 1, {label};"), ] } "bltu" => { - let (r1, r2, label) = rrl(args); + let (r1, r2, label) = args.rrl()?; vec![format!("branch_if_positive {r2} - {r1}, {label};")] } "blt" => { - let (r1, r2, label) = rrl(args); + let (r1, r2, label) = args.rrl()?; // Branch if r1 < r2 (signed). // TODO does this fulfill the input requirements for branch_if_positive? vec![ @@ -1249,7 +1294,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { ] } "bge" => { - let (r1, r2, label) = rrl(args); + let (r1, r2, label) = args.rrl()?; // Branch if r1 >= r2 (signed). // TODO does this fulfill the input requirements for branch_if_positive? vec![ @@ -1260,13 +1305,12 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { } "bltz" => { // branch if 2**31 <= r1 < 2**32 - let (r1, label) = rl(args); + let (r1, label) = args.rl()?; vec![format!("branch_if_positive {r1} - 2**31 + 1, {label};")] } - "blez" => { // branch less or equal zero - let (r1, label) = rl(args); + let (r1, label) = args.rl()?; vec![ format!("tmp1 <== to_signed({r1});"), format!("branch_if_positive -tmp1 + 1, {label};"), @@ -1274,44 +1318,35 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { } "bgtz" => { // branch if 0 < r1 < 2**31 - let (r1, label) = rl(args); + let (r1, label) = args.rl()?; vec![ format!("tmp1 <== to_signed({r1});"), format!("branch_if_positive tmp1, {label};"), ] } "bne" => { - let (r1, r2, label) = rrl(args); + let (r1, r2, label) = args.rrl()?; vec![format!("branch_if_nonzero {r1} - {r2}, {label};")] } "bnez" => { - let (r1, label) = rl(args); + let (r1, label) = args.rl()?; vec![format!("branch_if_nonzero {r1}, {label};")] } // jump and call "j" => { - if let [label] = args { - vec![format!( - "tmp1 <== jump({});", - argument_to_escaped_symbol(label) - )] - } else { - panic!() - } + let label = args.l()?; + vec![format!("tmp1 <== jump({label});",)] } "jr" => { - let rs = r(args); + let rs = args.r()?; vec![format!("tmp1 <== jump_dyn({rs});")] } "jal" => { - if let [label] = args { - vec![format!( - "x1 <== jump({});", - argument_to_escaped_symbol(label) - )] + if let Ok(label) = args.l() { + vec![format!("x1 <== jump({label});")] } else { - let (rd, label) = rl(args); + let (rd, label) = args.rl()?; let statement = if rd.is_zero() { format!("tmp1 <== jump({label});") } else { @@ -1322,18 +1357,16 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { } "jalr" => { // TODO there is also a form that takes more arguments - let rs = r(args); + let rs = args.r()?; vec![format!("x1 <== jump_dyn({rs});")] } "call" | "tail" => { - assert_eq!(args.len(), 1); - let label = &args[0]; - let arg = argument_to_escaped_symbol(label); + let label = args.l()?; let dest = if instr == "tail" { "tmp1" } else { "x1" }; - vec![format!("{dest} <== jump({arg});")] + vec![format!("{dest} <== jump({label});")] } "ecall" => { - assert!(args.is_empty()); + args.empty()?; // save ra/x1 push_register("x1") .into_iter() @@ -1344,24 +1377,24 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { .collect() } "ebreak" => { - assert!(args.is_empty()); + args.empty()?; // we don't use ebreak for anything, ignore vec![] } "ret" => { - assert!(args.is_empty()); + args.empty()?; vec!["tmp1 <== jump_dyn(x1);".to_string()] } // memory access "lw" => { - let (rd, rs, off) = rro(args); + let (rd, rs, off) = args.rro()?; // TODO we need to consider misaligned loads / stores only_if_no_write_to_zero_vec(vec![format!("{rd}, tmp1 <== mload({rs} + {off});")], rd) } "lb" => { // load byte and sign-extend. the memory is little-endian. - let (rd, rs, off) = rro(args); + let (rd, rs, off) = args.rro()?; only_if_no_write_to_zero_vec( vec![ format!("{rd}, tmp2 <== mload({rs} + {off});"), @@ -1373,7 +1406,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { } "lbu" => { // load byte and zero-extend. the memory is little-endian. - let (rd, rs, off) = rro(args); + let (rd, rs, off) = args.rro()?; only_if_no_write_to_zero_vec( vec![ format!("{rd}, tmp2 <== mload({rs} + {off});"), @@ -1386,7 +1419,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { "lh" => { // Load two bytes and sign-extend. // Assumes the address is a multiple of two. - let (rd, rs, off) = rro(args); + let (rd, rs, off) = args.rro()?; only_if_no_write_to_zero_vec( vec![ format!("{rd}, tmp2 <== mload({rs} + {off});"), @@ -1399,7 +1432,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { "lhu" => { // Load two bytes and zero-extend. // Assumes the address is a multiple of two. - let (rd, rs, off) = rro(args); + let (rd, rs, off) = args.rro()?; only_if_no_write_to_zero_vec( vec![ format!("{rd}, tmp2 <== mload({rs} + {off});"), @@ -1410,7 +1443,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { ) } "sw" => { - let (r1, r2, off) = rro(args); + let (r1, r2, off) = args.rro()?; vec![format!("mstore {r2} + {off}, {r1};")] } "sh" => { @@ -1418,7 +1451,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { // TODO this code assumes it is at least aligned on // a two-byte boundary - let (rs, rd, off) = rro(args); + let (rs, rd, off) = args.rro()?; vec![ format!("tmp1, tmp2 <== mload({rd} + {off});"), "tmp3 <== shl(0xffff, 8 * tmp2);".to_string(), @@ -1432,7 +1465,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { } "sb" => { // store byte - let (rs, rd, off) = rro(args); + let (rs, rd, off) = args.rro()?; vec![ format!("tmp1, tmp2 <== mload({rd} + {off});"), "tmp3 <== shl(0xff, 8 * tmp2);".to_string(), @@ -1449,7 +1482,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { // atomic instructions insn if insn.starts_with("amoadd.w") => { - let (rd, rs2, rs1, off) = rrro(args); + let (rd, rs2, rs1, off) = args.rrro()?; assert_eq!(off, 0); [ @@ -1465,7 +1498,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { insn if insn.starts_with("lr.w") => { // Very similar to "lw": - let (rd, rs, off) = rro(args); + let (rd, rs, off) = args.rro()?; assert_eq!(off, 0); // TODO misaligned access should raise misaligned address exceptions let mut statements = @@ -1476,7 +1509,7 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { insn if insn.starts_with("sc.w") => { // Some overlap with "sw", but also writes 0 to rd on success - let (rd, rs2, rs1, off) = rrro(args); + let (rd, rs2, rs1, off) = args.rrro()?; assert_eq!(off, 0); // TODO: misaligned access should raise misaligned address exceptions let mut statements = vec![ @@ -1493,5 +1526,5 @@ fn process_instruction(instr: &str, args: &[Argument]) -> Vec { _ => { panic!("Unknown instruction: {instr}"); } - } + }) } From d362de79a7d0b6d199df456011bdc97972c77b36 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 29 May 2024 17:17:14 +0200 Subject: [PATCH 06/88] Implementing more general jalr. --- riscv/src/compiler.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/riscv/src/compiler.rs b/riscv/src/compiler.rs index 59a396b1d..ba53530be 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/compiler.rs @@ -1355,11 +1355,17 @@ fn process_instruction(instr: &str, args: &A) -> Result { - // TODO there is also a form that takes more arguments - let rs = args.r()?; - vec![format!("x1 <== jump_dyn({rs});")] - } + "jalr" => vec![if let Ok(rs) = args.r() { + format!("x1 <== jump_dyn({rs});") + } else { + let (rd, rs, off) = args.rro()?; + assert_eq!(off, 0, "jalr with non-zero offset is not supported"); + if rd.is_zero() { + format!("tmp1 <== jump_dyn({rs});") + } else { + format!("{rd} <== jump_dyn({rs});") + } + }], "call" | "tail" => { let label = args.l()?; let dest = if instr == "tail" { "tmp1" } else { "x1" }; From 2a0920ace0c660a236026e723709b9f60355ccd0 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 29 May 2024 19:23:18 +0200 Subject: [PATCH 07/88] Lifting sets of instructions into high level (pseudo)instructions. --- riscv/src/elf_translate.rs | 203 ++++++++++++++++++++++++++++++++----- 1 file changed, 175 insertions(+), 28 deletions(-) diff --git a/riscv/src/elf_translate.rs b/riscv/src/elf_translate.rs index 97d12820d..5a7f123c3 100644 --- a/riscv/src/elf_translate.rs +++ b/riscv/src/elf_translate.rs @@ -15,9 +15,6 @@ pub fn elf_translate(file_name: &str) { let elf = Elf::parse(&file_buffer).unwrap(); println!("{:#?}", elf); - /*for p in elf.program_headers { - println!("{:?}", p); - }*/ // Index the sections by their virtual address let mut text_sections = BTreeMap::new(); @@ -41,6 +38,7 @@ pub fn elf_translate(file_name: &str) { } extract_reachable_code(elf.entry.try_into().unwrap(), &text_sections); + todo!(); } fn extract_reachable_code(entry_point: u32, text_sections: &BTreeMap) { @@ -62,31 +60,38 @@ fn extract_reachable_code(entry_point: u32, text_sections: &BTreeMap visited.insert(addr); - // We assume the entire section is code, so decode and translate it from - // start. - let code = convert_to_pseudoinstructions(addr, section_data); + // She entire section should be code, so decode and translate it from start. + let code = lift_instructions(addr, section_data); } } -struct PseudoInstruction<'a> { +enum HighLevelImmediate { + None, + CodeLabel(i32), // The value is the original address + Value(i32), +} + +struct HighLevelInsn<'a> { + original_address: u32, op: &'a str, rd: Option, rs1: Option, rs2: Option, - imm: Option, + imm: HighLevelImmediate, } struct PseudoInstructionConverter { base_addr: u32, } -impl TwoOrOneMapper> for PseudoInstructionConverter { - fn try_map_two(&mut self, insn1: &Ins, insn2: &Ins) -> Option> { +impl TwoOrOneMapper> for PseudoInstructionConverter { + fn try_map_two(&mut self, insn1: &Ins, insn2: &Ins) -> Option> { let result = match (insn1, insn2) { ( + // li rd, immediate Ins { - opc: Op::AUIPC, - rd: Some(rd_auipc), + opc: Op::LUI, + rd: Some(rd_lui), imm: Some(hi), .. }, @@ -97,16 +102,126 @@ impl TwoOrOneMapper> for PseudoInstructionConver imm: Some(lo), .. }, - ) if rd_auipc == rd_addi && rd_auipc == rs1_addi => PseudoInstruction { - op: "la", - rd: Some(*rd_auipc as u32), + ) if rd_lui == rd_addi && rd_lui == rs1_addi => HighLevelInsn { + op: "li", + rd: Some(*rd_lui as u32), rs1: None, rs2: None, - imm: Some((((*hi as i32) << 12) | (*lo as i32)) + self.base_addr as i32), + imm: HighLevelImmediate::Value((*hi << 12) | *lo), + original_address: self.base_addr, }, - // TODO: add more pseudoinstructions - // TODO: undo linker relaxation relative to "gp" register - // TODO: transform relative addresses to absolute addresses + ( + // All other double instructions we can lift starts with auipc. + // Furthermore, we have to join every auipc, as we don't support + // it independently. + Ins { + opc: Op::AUIPC, + rd: Some(rd_auipc), + imm: Some(hi), + .. + }, + insn2, + ) => { + let hi = self.base_addr as i32 + (*hi << 12); + match insn2 { + // la rd, symbol + Ins { + opc: Op::ADDI, + rd: Some(rd_addi), + rs1: Some(rs1_addi), + imm: Some(lo), + .. + } if rd_auipc == rd_addi && rd_auipc == rs1_addi => HighLevelInsn { + op: "la", + rd: Some(*rd_auipc as u32), + rs1: None, + rs2: None, + imm: HighLevelImmediate::CodeLabel(hi + lo), + original_address: self.base_addr, + }, + // TODO: uncomment when powdr supports the pseudoinstruction + // version of l{b|h|w} and s{b|h|w}. For now, it is better + // to just fail here if we encounter this usage of auipc. + /* + // l{b|h|w} rd, symbol + Ins { + opc: l_op, + rd: Some(rd_l), + rs1: Some(rs1_l), + rs2: None, + imm: Some(lo), + .. + } if matches!(l_op, Op::LB | Op::LH | Op::LW) + && rd_auipc == rd_l + && rd_l == rs1_l => + { + HighLevelInsn { + op: l_op.to_string(), + rd: Some(*rd_l as u32), + rs1: None, + rs2: None, + imm: HighLevelImmediate::Value(hi + lo), + original_address: self.base_addr, + } + } + // s{b|h|w} rd, symbol, rt + Ins { + opc: l_op, + rd: None, + rs1: Some(rt_l), + rs2: Some(rd), + imm: Some(lo), + .. + } if matches!(l_op, Op::LB | Op::LH | Op::LW) && rd_auipc == rt_l => { + HighLevelInsn { + op: l_op.to_string(), + rd: None, + // TODO: If this pseudoinstruction is ever + // implemented in powdr, rs1 should end up + // containing the output of auipc, a value which + // doen't make sense in powdr. + rs1: Some(*rd_auipc as u32), + rs2: Some(*rd as u32), + imm: HighLevelImmediate::Value(hi + lo), + original_address: self.base_addr, + } + } + */ + // call offset + Ins { + opc: Op::JALR, + rd: Some(1), + rs1: Some(1), + rs2: None, + imm: Some(lo), + .. + } if *rd_auipc == 1 => HighLevelInsn { + op: "call", + rd: None, + rs1: None, + rs2: None, + imm: HighLevelImmediate::CodeLabel(hi + lo), + original_address: self.base_addr, + }, + // tail offset + Ins { + opc: Op::JALR, + rd: Some(0), + rs1: Some(6), + rs2: None, + imm: Some(lo), + .. + } if *rd_auipc == 6 => HighLevelInsn { + op: "tail", + rd: None, + rs1: None, + rs2: None, + imm: HighLevelImmediate::CodeLabel(hi + lo), + original_address: self.base_addr, + }, + _ => panic!("auipc could not be joined!"), + } + } _ => return None, }; @@ -115,26 +230,58 @@ impl TwoOrOneMapper> for PseudoInstructionConver Some(result) } - fn map_one(&mut self, insn: Ins) -> PseudoInstruction<'static> { - self.base_addr += ins_size(&insn); + fn map_one(&mut self, insn: Ins) -> HighLevelInsn<'static> { + let imm = match insn.opc { + // All jump instructions that have the immediate as an address + Op::JAL | Op::BEQ | Op::BNE | Op::BLT | Op::BGE | Op::BLTU | Op::BGEU => { + HighLevelImmediate::CodeLabel(insn.imm.unwrap() + self.base_addr as i32) + } + // We currently only support standalone jalr if offset is zero + Op::JALR => { + assert!( + insn.imm.unwrap() == 0, + "jalr with non-zero offset is not supported" + ); + + HighLevelImmediate::Value(0) + } + // We currently don't support auipc by itself + Op::AUIPC => panic!("auipc could not be joined!"), + // All other instructions, which have the immediate as a value + _ => match insn.imm { + Some(imm) => HighLevelImmediate::Value(imm as i32), + None => HighLevelImmediate::None, + }, + }; + + // We don't need to lift the branch instructions to their Z versions, + // because powdr's optimizer should be able to figure out the comparison is + // against a constant. But if needed, we could do it here... - PseudoInstruction { + let result = HighLevelInsn { op: insn.opc.to_string(), rd: insn.rd.map(|x| x as u32), rs1: insn.rs1.map(|x| x as u32), rs2: insn.rs2.map(|x| x as u32), - imm: insn.imm, - } + imm, + original_address: self.base_addr, + }; + + self.base_addr += ins_size(&insn); + + result } } -/// Lift the instructions back to higher-level pseudoinstructions. Just pass -/// throught instruction sets that don't have a pseudoinstruction equivalent. -fn convert_to_pseudoinstructions(base_addr: u32, data: &[u8]) -> Vec { +/// Lift the instructions back to higher-level instructions. +/// +/// Turn addresses into labels and and merge instructions into +/// pseudoinstructions. +fn lift_instructions(base_addr: u32, data: &[u8]) -> Vec { let instructions = RiscVInstructionIterator::new(data); let pseudo_converter = PseudoInstructionConverter { base_addr }; - try_map_two_by_two(instructions, PseudoInstructionConverter { base_addr }) + try_map_two_by_two(instructions, pseudo_converter) } struct RiscVInstructionIterator<'a> { From 6879d7427dc119b5f77765b9b32457f7aa77c1c0 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Thu, 30 May 2024 17:32:53 +0200 Subject: [PATCH 08/88] Instruction lifting possibly done. --- riscv/src/elf_translate.rs | 140 ++++++++++++------ riscv/src/lib.rs | 3 +- .../riscv_data/function_pointer/src/lib.rs | 30 ---- riscv/tests/riscv_data/keccak/src/lib.rs | 19 --- 4 files changed, 92 insertions(+), 100 deletions(-) delete mode 100644 riscv/tests/riscv_data/function_pointer/src/lib.rs delete mode 100644 riscv/tests/riscv_data/keccak/src/lib.rs diff --git a/riscv/src/elf_translate.rs b/riscv/src/elf_translate.rs index 5a7f123c3..288d3d0c9 100644 --- a/riscv/src/elf_translate.rs +++ b/riscv/src/elf_translate.rs @@ -16,52 +16,42 @@ pub fn elf_translate(file_name: &str) { let elf = Elf::parse(&file_buffer).unwrap(); println!("{:#?}", elf); - // Index the sections by their virtual address - let mut text_sections = BTreeMap::new(); - let mut data_sections = BTreeMap::new(); + // We simply extract all the text and data sections. There is no need to + // perform reachability search, because we trust the linker to have done + // that already. + let mut text_sections = Vec::new(); + //let mut data_map = BTreeMap::new(); + + // Keep a list of referenced text addresses, so we can generate the labels. + let mut referenced_text_addrs = HashSet::from([elf.entry.try_into().unwrap()]); for p in elf.program_headers.iter() { if p.p_type == program_header::PT_LOAD { + let addr = p.p_vaddr as u32; + let section_data = + &file_buffer[p.p_offset as usize..(p.p_offset + p.p_filesz) as usize]; + // Test if executable if p.p_flags & 1 == 1 { - text_sections.insert( - p.p_vaddr as u32, - // Slice containing the section data. Since this is a - // text section, we assume any zeroed part beyond - // p_filesz (if any) is not relevant. - &file_buffer[p.p_offset as usize..(p.p_offset + p.p_filesz) as usize], - ); + let insns = lift_instructions(addr, section_data, &mut referenced_text_addrs); + text_sections.push(insns); } else { - data_sections.insert(p.p_vaddr, p); + //load_data_section(addr, section_data, &mut data_map, elf); } } } - extract_reachable_code(elf.entry.try_into().unwrap(), &text_sections); todo!(); } -fn extract_reachable_code(entry_point: u32, text_sections: &BTreeMap) { - // Helper function to find the section containing the address - let find_section_of_address = |addr| { - let (§ion_addr, &data) = text_sections - .range(..=addr) - .next_back() - .expect("Jump address not found in any .text section"); - (section_addr, data) - }; - - let mut visited = HashSet::new(); - let mut to_visit = vec![find_section_of_address(entry_point)]; - - while let Some((addr, section_data)) = to_visit.pop() { - // Sanity check of the alignment - assert_eq!(addr % 2, 0); - - visited.insert(addr); +enum MaybeInstruction { + Unimplemented, + Valid(Ins), +} - // She entire section should be code, so decode and translate it from start. - let code = lift_instructions(addr, section_data); +impl From for MaybeInstruction { + fn from(insn: Ins) -> Self { + MaybeInstruction::Valid(insn) } } @@ -71,21 +61,31 @@ enum HighLevelImmediate { Value(i32), } -struct HighLevelInsn<'a> { +struct HighLevelInsn { original_address: u32, - op: &'a str, + op: &'static str, rd: Option, rs1: Option, rs2: Option, imm: HighLevelImmediate, } -struct PseudoInstructionConverter { +struct InstructionLifter<'a> { base_addr: u32, + referenced_text_addrs: &'a mut HashSet, } -impl TwoOrOneMapper> for PseudoInstructionConverter { - fn try_map_two(&mut self, insn1: &Ins, insn2: &Ins) -> Option> { +impl TwoOrOneMapper for InstructionLifter<'_> { + fn try_map_two( + &mut self, + insn1: &MaybeInstruction, + insn2: &MaybeInstruction, + ) -> Option { + let (insn1, insn2) = match (insn1, insn2) { + (MaybeInstruction::Valid(insn1), MaybeInstruction::Valid(insn2)) => (insn1, insn2), + _ => return None, + }; + let result = match (insn1, insn2) { ( // li rd, immediate @@ -227,14 +227,32 @@ impl TwoOrOneMapper> for PseudoInstructionConverter self.base_addr += [insn1, insn2].map(ins_size).into_iter().sum::(); + if let HighLevelImmediate::CodeLabel(addr) = &result.imm { + self.referenced_text_addrs.insert(*addr as u32); + } + Some(result) } - fn map_one(&mut self, insn: Ins) -> HighLevelInsn<'static> { + fn map_one(&mut self, insn: MaybeInstruction) -> HighLevelInsn { + let MaybeInstruction::Valid(insn) = insn else { + return HighLevelInsn { + op: "unimp", + rd: None, + rs1: None, + rs2: None, + imm: HighLevelImmediate::None, + original_address: self.base_addr, + }; + }; + let imm = match insn.opc { // All jump instructions that have the immediate as an address Op::JAL | Op::BEQ | Op::BNE | Op::BLT | Op::BGE | Op::BLTU | Op::BGEU => { - HighLevelImmediate::CodeLabel(insn.imm.unwrap() + self.base_addr as i32) + let addr = insn.imm.unwrap() + self.base_addr as i32; + self.referenced_text_addrs.insert(addr as u32); + + HighLevelImmediate::CodeLabel(addr) } // We currently only support standalone jalr if offset is zero Op::JALR => { @@ -277,10 +295,17 @@ impl TwoOrOneMapper> for PseudoInstructionConverter /// /// Turn addresses into labels and and merge instructions into /// pseudoinstructions. -fn lift_instructions(base_addr: u32, data: &[u8]) -> Vec { +fn lift_instructions( + base_addr: u32, + data: &[u8], + referenced_text_addrs: &mut HashSet, +) -> Vec { let instructions = RiscVInstructionIterator::new(data); - let pseudo_converter = PseudoInstructionConverter { base_addr }; + let pseudo_converter = InstructionLifter { + base_addr, + referenced_text_addrs, + }; try_map_two_by_two(instructions, pseudo_converter) } @@ -297,7 +322,7 @@ impl RiscVInstructionIterator<'_> { } impl Iterator for RiscVInstructionIterator<'_> { - type Item = Ins; + type Item = MaybeInstruction; fn next(&mut self) -> Option { if self.remaining_data.is_empty() { @@ -317,18 +342,28 @@ impl Iterator for RiscVInstructionIterator<'_> { ) .decode(Isa::Rv32) .expect("Failed to decode instruction.") + .into() } else { // 16 bits advance = 2; - let c_insn = u16::from_le_bytes( + let bin_instruction = u16::from_le_bytes( self.remaining_data[0..2] .try_into() .expect("Not enough bytes to complete a 16-bit instruction!"), - ) - .decode(Isa::Rv32) - .expect("Failed to decode instruction."); - - insn = to_32bit_equivalent(c_insn); + ); + insn = match bin_instruction.decode(Isa::Rv32) { + Ok(c_insn) => to_32bit_equivalent(c_insn).into(), + Err(raki::decode::DecodingError::IllegalInstruction) => { + // Although not a real RISC-V instruction, sometimes 0x0000 + // is used on purpose as an illegal instruction (it even has + // its own mnemonic "unimp"), so we support it here. + // Otherwise, there is something more fishy going on, and we + // panic. + assert_eq!(bin_instruction, 0, "Illegal instruction found!"); + MaybeInstruction::Unimplemented + } + Err(err) => panic!("Unexpected decoding error: {err:?}"), + }; } // Advance the iterator @@ -353,13 +388,20 @@ fn to_32bit_equivalent(mut insn: Ins) -> Ins { Op::C_SW => Op::SW, Op::C_NOP => { return Ins { - opc: Op::C_ADDI, + opc: Op::ADDI, rd: Some(0), rs1: Some(0), ..insn } } Op::C_ADDI | Op::C_ADDI16SP => Op::ADDI, + Op::C_ADDI4SPN => { + return Ins { + opc: Op::ADDI, + rs1: Some(2), // add to x2 (stack pointer) + ..insn + }; + } Op::C_LI => { return Ins { opc: Op::ADDI, diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index dd119685b..1445e7207 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -201,7 +201,7 @@ pub fn compile_rust_crate_to_riscv_asm( fn build_cargo_command(input_dir: &str, target_dir: &Path, produce_build_plan: bool) -> Command { let mut cmd = Command::new("cargo"); - cmd.env("RUSTFLAGS", "--emit=asm -g"); + cmd.env("RUSTFLAGS", "--emit=asm -g -C link-args=-pie"); let args = as_ref![ OsStr; @@ -212,7 +212,6 @@ fn build_cargo_command(input_dir: &str, target_dir: &Path, produce_build_plan: b "build-std=core,alloc", "--target", "riscv32imac-unknown-none-elf", - "--lib", "--target-dir", target_dir, "--manifest-path", diff --git a/riscv/tests/riscv_data/function_pointer/src/lib.rs b/riscv/tests/riscv_data/function_pointer/src/lib.rs deleted file mode 100644 index deabb2ef3..000000000 --- a/riscv/tests/riscv_data/function_pointer/src/lib.rs +++ /dev/null @@ -1,30 +0,0 @@ -#![no_std] - -use powdr_riscv_runtime::io::read_u32; - -// Never inline to make sure the function is not optimized away, and public to -// make sure op argument is not removed, forcing a function pointer to be loaded -// and passed. -#[inline(never)] -pub fn apply_op(op: fn(u32, u32) -> u32, a: u32, b: u32) -> u32 { - op(a, b) -} - -fn sub(a: u32, b: u32) -> u32 { - a - b -} - -#[no_mangle] -fn main() { - let a = read_u32(0); - let b = read_u32(1); - let expected = read_u32(2); - - // As of this writing, this will generate a dynamic load of sub's label into - // a0 register. If we had other functions to choose dynamically depending on - // input, their labels would be placed on a jump table in a data section, - // which is a more common case that we want to avoid in this test. - let result = apply_op(sub, a, b); - - assert_eq!(result, expected); -} diff --git a/riscv/tests/riscv_data/keccak/src/lib.rs b/riscv/tests/riscv_data/keccak/src/lib.rs deleted file mode 100644 index 45afe0520..000000000 --- a/riscv/tests/riscv_data/keccak/src/lib.rs +++ /dev/null @@ -1,19 +0,0 @@ -#![no_std] - -use tiny_keccak::{Hasher, Keccak}; - -#[no_mangle] -pub fn main() { - let input = b"Solidity"; - let mut output = [0u8; 32]; - let mut hasher = Keccak::v256(); - hasher.update(input); - hasher.finalize(&mut output); - assert_eq!( - output, - [ - 96, 41, 143, 120, 204, 11, 71, 23, 11, 167, 156, 16, 170, 56, 81, 215, 100, 139, 217, - 111, 47, 142, 70, 161, 157, 188, 119, 124, 54, 251, 12, 0, - ], - ); -} From 5866a4b639899ef5ba3c169d7c47a351ac14808b Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Thu, 30 May 2024 18:32:50 +0200 Subject: [PATCH 09/88] Loading data and lifting references to text labels. --- riscv/src/elf_translate.rs | 48 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/riscv/src/elf_translate.rs b/riscv/src/elf_translate.rs index 288d3d0c9..27117072a 100644 --- a/riscv/src/elf_translate.rs +++ b/riscv/src/elf_translate.rs @@ -19,8 +19,12 @@ pub fn elf_translate(file_name: &str) { // We simply extract all the text and data sections. There is no need to // perform reachability search, because we trust the linker to have done // that already. + // + // TODO: maybe exclude the section mapping the ELF header itself? There is + // also this dynamic section that I think we can ignore. But with + // continuations on, unused memory is practically free. let mut text_sections = Vec::new(); - //let mut data_map = BTreeMap::new(); + let mut data_map = BTreeMap::new(); // Keep a list of referenced text addresses, so we can generate the labels. let mut referenced_text_addrs = HashSet::from([elf.entry.try_into().unwrap()]); @@ -36,14 +40,54 @@ pub fn elf_translate(file_name: &str) { let insns = lift_instructions(addr, section_data, &mut referenced_text_addrs); text_sections.push(insns); } else { - //load_data_section(addr, section_data, &mut data_map, elf); + load_data_section(addr, section_data, &mut data_map); } } } + // All the references to code address have been lifted into labels in the + // instructions, but not the data sections. Luckily, that is just a matter of + // reading the dynamic relocation table. + for r in elf.dynrelas.iter() { + // We only support the R_RISCV_RELATIVE relocation type: + assert_eq!(r.r_type, 3, "Unsupported relocation type!"); + + let addr = r.r_offset as u32; + let original_addr = r.r_addend.unwrap() as u32; + + data_map.insert(addr, Data::TextLabel(original_addr)); + + // We also need to add the referenced address to the list of text + // addresses, so we can generate the label. + referenced_text_addrs.insert(original_addr); + } + + assert_eq!(elf.dynrels.len(), 0, "Unsupported relocation type!"); + todo!(); } +enum Data { + TextLabel(u32), + Value(u32), +} + +fn load_data_section(mut addr: u32, data: &[u8], data_map: &mut BTreeMap) { + for chunk in data.chunks(4) { + let mut padded = [0; 4]; + padded[..chunk.len()].copy_from_slice(chunk); + + let value = u32::from_le_bytes(padded); + if value != 0 { + data_map.insert(addr, Data::Value(value)); + } else { + // We don't need to store zero values, as they are implicit. + } + + addr += 4; + } +} + enum MaybeInstruction { Unimplemented, Valid(Ins), From 69130e1216f3749edb400971ab2d4f7a7b04d78a Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 31 May 2024 14:05:33 +0200 Subject: [PATCH 10/88] Relocation apparently working. --- riscv/src/elf_translate.rs | 119 ++++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 22 deletions(-) diff --git a/riscv/src/elf_translate.rs b/riscv/src/elf_translate.rs index 27117072a..d64efdda0 100644 --- a/riscv/src/elf_translate.rs +++ b/riscv/src/elf_translate.rs @@ -14,7 +14,10 @@ pub fn elf_translate(file_name: &str) { let file_buffer = fs::read(file_name).unwrap(); let elf = Elf::parse(&file_buffer).unwrap(); - println!("{:#?}", elf); + + // Map of addresses into memory sections, so we can know what address belong + // in what section. + let mut address_map = AddressMap(BTreeMap::new()); // We simply extract all the text and data sections. There is no need to // perform reachability search, because we trust the linker to have done @@ -23,11 +26,12 @@ pub fn elf_translate(file_name: &str) { // TODO: maybe exclude the section mapping the ELF header itself? There is // also this dynamic section that I think we can ignore. But with // continuations on, unused memory is practically free. - let mut text_sections = Vec::new(); + let mut lifted_text_sections = Vec::new(); let mut data_map = BTreeMap::new(); // Keep a list of referenced text addresses, so we can generate the labels. let mut referenced_text_addrs = HashSet::from([elf.entry.try_into().unwrap()]); + println!("entry: {:08x}:", elf.entry); for p in elf.program_headers.iter() { if p.p_type == program_header::PT_LOAD { @@ -35,10 +39,13 @@ pub fn elf_translate(file_name: &str) { let section_data = &file_buffer[p.p_offset as usize..(p.p_offset + p.p_filesz) as usize]; + address_map.0.insert(addr, p); + // Test if executable if p.p_flags & 1 == 1 { - let insns = lift_instructions(addr, section_data, &mut referenced_text_addrs); - text_sections.push(insns); + let insns = + lift_instructions(addr, section_data, &address_map, &mut referenced_text_addrs); + lifted_text_sections.push(insns); } else { load_data_section(addr, section_data, &mut data_map); } @@ -49,24 +56,80 @@ pub fn elf_translate(file_name: &str) { // instructions, but not the data sections. Luckily, that is just a matter of // reading the dynamic relocation table. for r in elf.dynrelas.iter() { - // We only support the R_RISCV_RELATIVE relocation type: - assert_eq!(r.r_type, 3, "Unsupported relocation type!"); - let addr = r.r_offset as u32; - let original_addr = r.r_addend.unwrap() as u32; + if address_map.is_in_data_section(addr) { + // We only support the R_RISCV_RELATIVE relocation type: + assert_eq!(r.r_type, 3, "Unsupported relocation type!"); - data_map.insert(addr, Data::TextLabel(original_addr)); + let original_addr = r.r_addend.unwrap() as u32; - // We also need to add the referenced address to the list of text - // addresses, so we can generate the label. - referenced_text_addrs.insert(original_addr); + if address_map.is_in_text_section(original_addr) { + data_map.insert(addr, Data::TextLabel(original_addr)); + + // We also need to add the referenced address to the list of text + // addresses, so we can generate the label. + referenced_text_addrs.insert(original_addr); + println!("reloc: {:08x}:", original_addr); + } else { + data_map.insert(addr, Data::Value(original_addr)); + } + } else { + panic!("Unsupported relocation in non-data section!\nTODO: maybe this is fine. Maybe the lifting of instructions have already taken care of this."); + } } assert_eq!(elf.dynrels.len(), 0, "Unsupported relocation type!"); + println!("Text labels:"); + for label in referenced_text_addrs { + println!(" label_{:08x}:", label); + } + + println!("Non-zero data:"); + for (addr, data) in data_map { + println!(" {addr:08x}: {data:?}:"); + } + todo!(); } +struct AddressMap<'a>(BTreeMap); + +impl AddressMap<'_> { + fn is_in_data_section(&self, addr: u32) -> bool { + if let Some(section) = self.get_section_of_addr(addr) { + section.p_flags & 1 != 1 + } else { + false + } + } + + fn is_in_text_section(&self, addr: u32) -> bool { + if let Some(section) = self.get_section_of_addr(addr) { + section.p_flags & 1 == 1 + } else { + false + } + } + + fn get_section_of_addr(&self, addr: u32) -> Option<&program_header::ProgramHeader> { + // Get the latest section that starts before the address. + let section = self + .0 + .range(..=addr) + .next_back() + .map(|(_, §ion)| section)?; + + if addr > section.p_vaddr as u32 + section.p_memsz as u32 { + // The address is after the end of the section. + None + } else { + Some(section) + } + } +} + +#[derive(Debug)] enum Data { TextLabel(u32), Value(u32), @@ -116,6 +179,7 @@ struct HighLevelInsn { struct InstructionLifter<'a> { base_addr: u32, + address_map: &'a AddressMap<'a>, referenced_text_addrs: &'a mut HashSet, } @@ -151,7 +215,7 @@ impl TwoOrOneMapper for InstructionLifter<'_> { rd: Some(*rd_lui as u32), rs1: None, rs2: None, - imm: HighLevelImmediate::Value((*hi << 12) | *lo), + imm: HighLevelImmediate::Value(*hi | *lo), original_address: self.base_addr, }, ( @@ -166,7 +230,7 @@ impl TwoOrOneMapper for InstructionLifter<'_> { }, insn2, ) => { - let hi = self.base_addr as i32 + (*hi << 12); + let hi = self.base_addr as i32 + *hi; match insn2 { // la rd, symbol Ins { @@ -175,14 +239,22 @@ impl TwoOrOneMapper for InstructionLifter<'_> { rs1: Some(rs1_addi), imm: Some(lo), .. - } if rd_auipc == rd_addi && rd_auipc == rs1_addi => HighLevelInsn { - op: "la", - rd: Some(*rd_auipc as u32), - rs1: None, - rs2: None, - imm: HighLevelImmediate::CodeLabel(hi + lo), - original_address: self.base_addr, - }, + } if rd_auipc == rd_addi && rd_auipc == rs1_addi => { + let imm_addr = hi + lo; + let imm = if self.address_map.is_in_text_section(imm_addr as u32) { + HighLevelImmediate::CodeLabel(imm_addr) + } else { + HighLevelImmediate::Value(imm_addr) + }; + HighLevelInsn { + op: "la", + rd: Some(*rd_auipc as u32), + rs1: None, + rs2: None, + imm, + original_address: self.base_addr, + } + } // TODO: uncomment when powdr supports the pseudoinstruction // version of l{b|h|w} and s{b|h|w}. For now, it is better // to just fail here if we encounter this usage of auipc. @@ -273,6 +345,7 @@ impl TwoOrOneMapper for InstructionLifter<'_> { if let HighLevelImmediate::CodeLabel(addr) = &result.imm { self.referenced_text_addrs.insert(*addr as u32); + println!("insn {}: {:08x}", result.op, addr); } Some(result) @@ -342,12 +415,14 @@ impl TwoOrOneMapper for InstructionLifter<'_> { fn lift_instructions( base_addr: u32, data: &[u8], + address_map: &AddressMap, referenced_text_addrs: &mut HashSet, ) -> Vec { let instructions = RiscVInstructionIterator::new(data); let pseudo_converter = InstructionLifter { base_addr, + address_map, referenced_text_addrs, }; try_map_two_by_two(instructions, pseudo_converter) From d92987e5f3b3123e45c4324024b5d3629f8171a8 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Tue, 4 Jun 2024 20:04:12 +0200 Subject: [PATCH 11/88] Refactoring powdr asm generator to make it source agnostic. --- asm-utils/src/data_storage.rs | 32 +- riscv/benches/executor_benchmark.rs | 8 +- .../src/{ => asm_translate}/disambiguator.rs | 2 +- riscv/src/asm_translate/mod.rs | 529 ++++++++++++++++ riscv/src/{ => asm_translate}/parser.rs | 9 +- .../src/{ => asm_translate}/riscv_asm.lalrpop | 2 +- riscv/src/{compiler.rs => code_gen.rs} | 586 ++++-------------- riscv/src/lib.rs | 83 +-- riscv/src/runtime.rs | 10 +- riscv/tests/instructions.rs | 2 +- riscv/tests/riscv.rs | 11 +- 11 files changed, 708 insertions(+), 566 deletions(-) rename riscv/src/{ => asm_translate}/disambiguator.rs (99%) create mode 100644 riscv/src/asm_translate/mod.rs rename riscv/src/{ => asm_translate}/parser.rs (87%) rename riscv/src/{ => asm_translate}/riscv_asm.lalrpop (98%) rename riscv/src/{compiler.rs => code_gen.rs} (69%) diff --git a/asm-utils/src/data_storage.rs b/asm-utils/src/data_storage.rs index f588d1924..93188ddc1 100644 --- a/asm-utils/src/data_storage.rs +++ b/asm-utils/src/data_storage.rs @@ -7,23 +7,23 @@ use crate::{ utils::{alignment_size, next_aligned}, }; -pub enum SingleDataValue<'a> { +pub enum SingleDataValue { Value(u32), - LabelReference(&'a str), - Offset(&'a str, &'a str), + LabelReference(String), + Offset(String, String), } -struct WordWriter<'a, 'b> { - data_writer: &'a mut dyn FnMut(Option<&str>, u32, SingleDataValue), +struct WordWriter<'a> { + data_writer: &'a mut dyn FnMut(Option, u32, SingleDataValue), partial: u32, current_pos: u32, - latest_label: Option<&'b str>, + latest_label: Option, } -impl<'a, 'b> WordWriter<'a, 'b> { +impl<'a> WordWriter<'a> { fn new( starting_pos: u32, - data_writer: &'a mut dyn FnMut(Option<&str>, u32, SingleDataValue), + data_writer: &'a mut dyn FnMut(Option, u32, SingleDataValue), ) -> Self { // sanitary alignment to 8 bytes let current_pos = next_aligned(starting_pos as usize, 8) as u32; @@ -39,7 +39,7 @@ impl<'a, 'b> WordWriter<'a, 'b> { self.current_pos } - fn set_label(&mut self, label: &'b str) { + fn set_label(&mut self, label: String) { self.latest_label = Some(label) } @@ -77,7 +77,7 @@ impl<'a, 'b> WordWriter<'a, 'b> { } } - fn write_label_reference(&mut self, label: &str) { + fn write_label_reference(&mut self, label: String) { assert_eq!( self.current_pos % 4, 0, @@ -103,7 +103,7 @@ impl<'a, 'b> WordWriter<'a, 'b> { pub fn store_data_objects( sections: Vec, Vec)>>, memory_start: u32, - code_gen: &mut dyn FnMut(Option<&str>, u32, SingleDataValue), + code_gen: &mut dyn FnMut(Option, u32, SingleDataValue), ) -> BTreeMap { let mut writer = WordWriter::new(memory_start, code_gen); @@ -121,22 +121,22 @@ pub fn store_data_objects( positions }; - for (name, data) in sections.iter().flatten() { + for (name, data) in sections.into_iter().flatten() { if let Some(name) = name { writer.set_label(name); } for item in data { - match &item { + match item { DataValue::Zero(length) => { // We can assume memory to be zero-initialized, so we // just have to advance. - writer.advance(*length as u32); + writer.advance(length as u32); } DataValue::Direct(bytes) => { writer.write_bytes(bytes.iter().copied()); } DataValue::Reference(sym) => { - if let Some(p) = positions.get(sym) { + if let Some(p) = positions.get(&sym) { writer.write_bytes(p.to_le_bytes().iter().copied()); } else { // code reference @@ -144,7 +144,7 @@ pub fn store_data_objects( } } DataValue::Alignment(bytes, pad_value) => { - writer.align(*bytes as u32, *pad_value); + writer.align(bytes as u32, pad_value); } DataValue::Offset(_l, _r) => unimplemented!(), } diff --git a/riscv/benches/executor_benchmark.rs b/riscv/benches/executor_benchmark.rs index 0d79343c9..6c43daf0f 100644 --- a/riscv/benches/executor_benchmark.rs +++ b/riscv/benches/executor_benchmark.rs @@ -2,7 +2,8 @@ use ::powdr_pipeline::Pipeline; use powdr_number::GoldilocksField; use powdr_riscv::{ - compile_rust_crate_to_riscv_asm, compiler, continuations::bootloader::default_input, Runtime, + asm_translate, compile_rust_crate_to_riscv_asm, continuations::bootloader::default_input, + Runtime, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -18,7 +19,7 @@ fn executor_benchmark(c: &mut Criterion) { let tmp_dir = Temp::new_dir().unwrap(); let riscv_asm_files = compile_rust_crate_to_riscv_asm("./tests/riscv_data/keccak/Cargo.toml", &tmp_dir); - let contents = compiler::compile::(riscv_asm_files, &Runtime::base(), false); + let contents = asm_translate::compile::(riscv_asm_files, &Runtime::base(), false); let mut pipeline = Pipeline::::default().from_asm_string(contents, None); pipeline.compute_optimized_pil().unwrap(); pipeline.compute_fixed_cols().unwrap(); @@ -30,7 +31,8 @@ fn executor_benchmark(c: &mut Criterion) { // The first chunk of `many_chunks`, with Poseidon co-processor & bootloader let riscv_asm_files = compile_rust_crate_to_riscv_asm("./tests/riscv_data/many_chunks/Cargo.toml", &tmp_dir); - let contents = compiler::compile::(riscv_asm_files, &Runtime::base().with_poseidon(), true); + let contents = + asm_translate::compile::(riscv_asm_files, &Runtime::base().with_poseidon(), true); let mut pipeline = Pipeline::::default().from_asm_string(contents, None); pipeline.compute_optimized_pil().unwrap(); pipeline.compute_fixed_cols().unwrap(); diff --git a/riscv/src/disambiguator.rs b/riscv/src/asm_translate/disambiguator.rs similarity index 99% rename from riscv/src/disambiguator.rs rename to riscv/src/asm_translate/disambiguator.rs index b2899be5c..db33bb162 100644 --- a/riscv/src/disambiguator.rs +++ b/riscv/src/asm_translate/disambiguator.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use itertools::Itertools; -use crate::{Argument, Expression, Statement}; +use super::{Argument, Expression, Statement}; /// Disambiguates the collection of assembly files and concatenates it to a single list of statements. /// Also disambiguates file ids (debugging information) and returns a list of all files with new IDs. diff --git a/riscv/src/asm_translate/mod.rs b/riscv/src/asm_translate/mod.rs new file mode 100644 index 000000000..48a102aff --- /dev/null +++ b/riscv/src/asm_translate/mod.rs @@ -0,0 +1,529 @@ +use std::{ + cell::Cell, + collections::{BTreeMap, BTreeSet, HashSet}, +}; + +use itertools::Itertools; +use parser::RiscParser; +use powdr_asm_utils::{ + ast::{BinaryOpKind, UnaryOpKind}, + data_parser, + data_storage::store_data_objects, + parser::parse_asm, + reachability::{self, symbols_in_args}, + utils::{argument_to_escaped_symbol, argument_to_number, expression_to_number}, + Architecture, +}; +use powdr_number::FieldElement; + +use crate::{ + code_gen::{self, Args, FunctionKind, MemEntry, Register, RiscVProgram, SourceFileInfo}, + Runtime, +}; + +mod disambiguator; +mod parser; + +type Statement = powdr_asm_utils::ast::Statement; +type Argument = powdr_asm_utils::ast::Argument; +type Expression = powdr_asm_utils::ast::Expression; + +struct AsmProgram { + file_ids: Vec<(i64, String, String)>, + mem_entries: Cell>>, + statements: Vec, +} + +impl RiscVProgram for AsmProgram { + type InstructionArgs = [Argument]; + type Label = String; + + fn source_files_info(&self) -> impl Iterator { + self.file_ids.iter().map(|(id, dir, file)| SourceFileInfo { + id: *id as u32, + dir, + file, + }) + } + + fn initial_mem(&self) -> impl Iterator { + self.mem_entries.take().into_iter().flatten() + } + + fn executable_statements( + &self, + ) -> impl Iterator> { + self.statements.iter().filter_map(process_statement) + } + + fn start_function(&self) -> &str { + "_start" + } +} + +impl Args for [Argument] { + type Error = &'static str; + + fn l(&self) -> Result { + const ERR: &str = "Expected: label"; + match self { + [l] => Ok(argument_to_escaped_symbol(l).ok_or(ERR)?), + _ => Err(ERR), + } + } + + fn r(&self) -> Result { + match self { + [Argument::Register(r1)] => Ok(*r1), + _ => Err("Expected: register"), + } + } + + fn rri(&self) -> Result<(Register, Register, u32), &'static str> { + const ERR: &str = "Expected: register, register, immediate"; + match self { + [Argument::Register(r1), Argument::Register(r2), n] => { + Ok((*r1, *r2, argument_to_number(n).ok_or(ERR)?)) + } + _ => Err(ERR), + } + } + + fn rrr(&self) -> Result<(Register, Register, Register), &'static str> { + match self { + [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3)] => { + Ok((*r1, *r2, *r3)) + } + _ => Err("Expected: register, register, register"), + } + } + + fn ri(&self) -> Result<(Register, u32), &'static str> { + const ERR: &str = "Expected: register, immediate"; + match self { + [Argument::Register(r1), n] => Ok((*r1, argument_to_number(n).ok_or(ERR)?)), + _ => Err(ERR), + } + } + + fn rr(&self) -> Result<(Register, Register), &'static str> { + match self { + [Argument::Register(r1), Argument::Register(r2)] => Ok((*r1, *r2)), + _ => Err("Expected: register, register"), + } + } + + fn rrl(&self) -> Result<(Register, Register, String), &'static str> { + const ERR: &str = "Expected: register, register, label"; + match self { + [Argument::Register(r1), Argument::Register(r2), l] => { + Ok((*r1, *r2, argument_to_escaped_symbol(l).ok_or(ERR)?)) + } + _ => Err(ERR), + } + } + + fn rl(&self) -> Result<(Register, String), &'static str> { + const ERR: &str = "Expected: register, label"; + match self { + [Argument::Register(r1), l] => Ok((*r1, argument_to_escaped_symbol(l).ok_or(ERR)?)), + _ => Err(ERR), + } + } + + fn rro(&self) -> Result<(Register, Register, u32), &'static str> { + if let [Argument::Register(r1), Argument::RegOffset(off, r2)] = self { + if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) + { + return Ok((*r1, *r2, off)); + } + } + if let [Argument::Register(r1), Argument::Expression(off)] = self { + if let Some(off) = expression_to_number(off) { + // If the register is not specified, it defaults to x0 + return Ok((*r1, Register::new(0), off)); + } + } + + Err("Expected: register, offset(register)") + } + + fn rrro(&self) -> Result<(Register, Register, Register, u32), &'static str> { + if let [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(off, r3)] = self + { + if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) + { + return Ok((*r1, *r2, *r3, off)); + } + } + if let [Argument::Register(r1), Argument::Register(r2), Argument::Expression(off)] = self { + if let Some(off) = expression_to_number(off) { + // If the register is not specified, it defaults to x0 + return Ok((*r1, *r2, Register::new(0), off)); + } + } + Err("Expected: register, register, offset(register)") + } + + fn empty(&self) -> Result<(), &'static str> { + match self { + [] => Ok(()), + _ => Err("Expected: no arguments"), + } + } +} + +/// Compiles riscv assembly to a powdr assembly file. Adds required library routines. +pub fn compile( + assemblies: BTreeMap, + runtime: &Runtime, + with_bootloader: bool, +) -> String { + let asm_program = compile_internal(assemblies, runtime); + + code_gen::translate_program::(&asm_program, runtime, with_bootloader) +} + +fn compile_internal(mut assemblies: BTreeMap, runtime: &Runtime) -> AsmProgram { + // stack grows towards zero + let stack_start = 0x10000000; + // data grows away from zero + let data_start = 0x10000100; + + assert!(assemblies + .insert( + "__runtime".to_string(), + runtime.global_declarations(stack_start) + ) + .is_none()); + + // TODO remove unreferenced files. + let (mut statements, file_ids) = disambiguator::disambiguate( + assemblies + .into_iter() + .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) + .collect(), + ); + let mut data_sections = data_parser::extract_data_objects(&statements); + + // Reduce to the code that is actually reachable from main + // (and the objects that are referred from there) + let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>( + "_start", + &mut statements, + &mut data_sections, + ); + + // Replace dynamic references to code labels + replace_dynamic_label_references(&mut statements, &data_labels); + + let mut mem_entries = Vec::new(); + let data_positions = + store_data_objects(data_sections, data_start, &mut |label, addr, value| { + mem_entries.push(MemEntry { label, addr, value }); + }); + + let statements = substitute_symbols_with_values(statements, &data_positions); + + AsmProgram { + file_ids, + mem_entries: Cell::new(Some(mem_entries)), + statements, + } +} + +/// Replace certain patterns of references to code labels by +/// special instructions. We ignore any references to data objects +/// because they will be handled differently. +fn replace_dynamic_label_references(statements: &mut Vec, data_labels: &HashSet<&str>) { + /* + Find patterns of the form + lui a0, %hi(LABEL) + addi s10, a0, %lo(LABEL) + - + turn this into the pseudoinstruction + li s10, LABEL + which is then turned into + + s10 <== load_label(LABEL) + + It gets complicated by the fact that sometimes, labels + and debugging directives occur between the two statements + matching that pattern... + */ + let instruction_indices = statements + .iter() + .enumerate() + .filter_map(|(i, s)| match s { + Statement::Instruction(_, _) => Some(i), + _ => None, + }) + .collect::>(); + + let mut to_delete = BTreeSet::default(); + for (i1, i2) in instruction_indices.into_iter().tuple_windows() { + if let Some(r) = + replace_dynamic_label_reference(&statements[i1], &statements[i2], data_labels) + { + to_delete.insert(i1); + statements[i2] = r; + } + } + + let mut i = 0; + statements.retain(|_| (!to_delete.contains(&i), i += 1).0); +} + +fn replace_dynamic_label_reference( + s1: &Statement, + s2: &Statement, + data_labels: &HashSet<&str>, +) -> Option { + let Statement::Instruction(instr1, args1) = s1 else { + return None; + }; + let Statement::Instruction(instr2, args2) = s2 else { + return None; + }; + if instr1.as_str() != "lui" || instr2.as_str() != "addi" { + return None; + }; + let [Argument::Register(r1), Argument::Expression(Expression::FunctionOp(FunctionKind::HiDataRef, expr1))] = + &args1[..] + else { + return None; + }; + // Maybe should try to reduce expr1 and expr2 before comparing deciding it is a pure symbol? + let Expression::Symbol(label1) = expr1.as_ref() else { + return None; + }; + let [Argument::Register(r2), Argument::Register(r3), Argument::Expression(Expression::FunctionOp(FunctionKind::LoDataRef, expr2))] = + &args2[..] + else { + return None; + }; + let Expression::Symbol(label2) = expr2.as_ref() else { + return None; + }; + if r1 != r3 || label1 != label2 || data_labels.contains(label1.as_str()) { + return None; + } + Some(Statement::Instruction( + "li".to_string(), + vec![ + Argument::Register(*r2), + Argument::Expression(Expression::Symbol(label1.clone())), + ], + )) +} + +fn substitute_symbols_with_values( + mut statements: Vec, + data_positions: &BTreeMap, +) -> Vec { + for s in &mut statements { + let Statement::Instruction(_name, args) = s else { + continue; + }; + for arg in args { + arg.post_visit_expressions_mut(&mut |expression| match expression { + Expression::Number(_) => {} + Expression::Symbol(symb) => { + if let Some(pos) = data_positions.get(symb) { + *expression = Expression::Number(*pos as i64) + } + } + Expression::UnaryOp(op, subexpr) => { + if let Expression::Number(num) = subexpr.as_ref() { + let result = match op { + UnaryOpKind::BitwiseNot => !num, + UnaryOpKind::Negation => -num, + }; + *expression = Expression::Number(result); + }; + } + Expression::BinaryOp(op, subexprs) => { + if let (Expression::Number(a), Expression::Number(b)) = + (&subexprs[0], &subexprs[1]) + { + let result = match op { + BinaryOpKind::Or => a | b, + BinaryOpKind::Xor => a ^ b, + BinaryOpKind::And => a & b, + BinaryOpKind::LeftShift => a << b, + BinaryOpKind::RightShift => a >> b, + BinaryOpKind::Add => a + b, + BinaryOpKind::Sub => a - b, + BinaryOpKind::Mul => a * b, + BinaryOpKind::Div => a / b, + BinaryOpKind::Mod => a % b, + }; + *expression = Expression::Number(result); + } + } + Expression::FunctionOp(op, subexpr) => { + if let Expression::Number(num) = subexpr.as_ref() { + let result = match op { + FunctionKind::HiDataRef => num >> 12, + FunctionKind::LoDataRef => num & 0xfff, + }; + *expression = Expression::Number(result); + }; + } + }); + } + } + statements +} + +fn process_statement(s: &Statement) -> Option> { + match s { + Statement::Label(l) => Some(code_gen::Statement::Label(l)), + Statement::Directive(directive, args) => match (directive.as_str(), &args[..]) { + ( + ".loc", + [Argument::Expression(Expression::Number(file)), Argument::Expression(Expression::Number(line)), Argument::Expression(Expression::Number(column)), ..], + ) => Some(code_gen::Statement::DebugLoc { + file: *file as u64, + line: *line as u64, + col: *column as u64, + }), + (".file", _) => { + // We ignore ".file" directives because they have been extracted to the top. + None + } + (".size", _) => { + // We ignore ".size" directives + None + } + _ if directive.starts_with(".cfi_") => None, + _ => panic!( + "Leftover directive in code: {directive} {}", + args.iter().format(", ") + ), + }, + Statement::Instruction(instr, args) => { + // TODO: maybe restore this debug info + /* + let stmt_str = format!("{s}"); + // remove indentation and trailing newline + let stmt_str = &stmt_str[2..(stmt_str.len() - 1)]; + let mut ret = vec![format!(" .debug insn \"{stmt_str}\";")]; + */ + Some(code_gen::Statement::Instruction { + op: instr, + args: args.as_slice(), + }) + } + } +} + +struct RiscvArchitecture {} + +impl Architecture for RiscvArchitecture { + fn instruction_ends_control_flow(instr: &str) -> bool { + match instr { + "li" | "lui" | "la" | "mv" | "add" | "addi" | "sub" | "neg" | "mul" | "mulh" + | "mulhu" | "mulhsu" | "divu" | "remu" | "xor" | "xori" | "and" | "andi" | "or" + | "ori" | "not" | "slli" | "sll" | "srli" | "srl" | "srai" | "seqz" | "snez" + | "slt" | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" + | "blt" | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" + | "jalr" | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" + | "sh" | "sb" | "nop" | "fence" | "amoadd.w" | "amoadd.w.aq" | "amoadd.w.rl" + | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" | "sc.w" + | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, + "j" | "jr" | "tail" | "ret" | "unimp" => true, + _ => { + panic!("Unknown instruction: {instr}"); + } + } + } + + fn get_references< + 'a, + R: powdr_asm_utils::ast::Register, + F: powdr_asm_utils::ast::FunctionOpKind, + >( + instr: &str, + args: &'a [powdr_asm_utils::ast::Argument], + ) -> Vec<&'a str> { + // fence arguments are not symbols, they are like reserved + // keywords affecting the instruction behavior + if instr.starts_with("fence") { + Vec::new() + } else { + symbols_in_args(args) + } + } +} + +/// Maps an instruction in .insn syntax to Statement::Instruction() in the expected format. +/// +/// See https://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dFormats.html +pub fn map_insn_i( + opcode6: Expression, + func3: Expression, + rd: Register, + rs1: Register, + simm12: Expression, +) -> Statement { + let (Expression::Number(opcode6), Expression::Number(func3)) = (opcode6, func3) else { + panic!("Only literal opcode and function are supported in .insn syntax"); + }; + + // These are almost all instructions in RISC-V Instruction Set Manual that + // we are supposed to implement and roughly fits the pattern of the I-type + // instruction. Only "csr*i" instructions are missing. + + // First we try to match the instructions that uses the I-type encoding + // ordinarily, i.e. where all fields are what they are supposed to be: + let name = match (opcode6, func3) { + (0b1100111, 0b000) => "jalr", + (0b0000011, 0b000) => "lb", + (0b0000011, 0b001) => "lh", + (0b0000011, 0b010) => "lw", + (0b0000011, 0b100) => "lbu", + (0b0000011, 0b101) => "lhu", + (0b0010011, 0b000) => "addi", + (0b0010011, 0b010) => "slti", + (0b0010011, 0b011) => "sltiu", + (0b0010011, 0b100) => "xori", + (0b0010011, 0b110) => "ori", + (0b0010011, 0b111) => "andi", + (0b1110011, 0b001) => "csrrw", + (0b1110011, 0b010) => "csrrs", + (0b1110011, 0b011) => "csrrc", + // won't interpret "csr*i" instructions because it is too weird to + // encode an immediate as a register + opfunc => { + // We now try the instructions that take certain liberties with the + // I-type encoding, and don't use the standard arguments for it. + let name = match opfunc { + (0b0001111, 0b000) => "fence", + (0b0001111, 0b001) => "fence.i", + (0b1110011, 0b000) => { + let Expression::Number(simm12) = simm12 else { + panic!( + "Only literal simm12 is supported for ecall and ebreak instructions" + ); + }; + match simm12 { + 0 => "ecall", + 1 => "ebreak", + _ => panic!("unknown instruction"), + } + } + _ => panic!("unsupported .insn instruction"), + }; + return Statement::Instruction(name.to_string(), Vec::new()); + } + }; + + let args = vec![ + Argument::Register(rd), + Argument::Register(rs1), + Argument::Expression(simm12), + ]; + + Statement::Instruction(name.to_string(), args) +} diff --git a/riscv/src/parser.rs b/riscv/src/asm_translate/parser.rs similarity index 87% rename from riscv/src/parser.rs rename to riscv/src/asm_translate/parser.rs index 88dd5f162..c5d1345ce 100644 --- a/riscv/src/parser.rs +++ b/riscv/src/asm_translate/parser.rs @@ -1,16 +1,15 @@ use lalrpop_util::*; -use crate::{ - compiler::{FunctionKind, Register}, - Statement, -}; +use crate::code_gen::{FunctionKind, Register}; use powdr_parser_util::handle_parse_error; +use super::Statement; + lalrpop_mod!( #[allow(clippy::all)] #[allow(clippy::uninlined_format_args)] riscv_asm, - "/riscv_asm.rs" + "/asm_translate/riscv_asm.rs" ); pub struct RiscParser { diff --git a/riscv/src/riscv_asm.lalrpop b/riscv/src/asm_translate/riscv_asm.lalrpop similarity index 98% rename from riscv/src/riscv_asm.lalrpop rename to riscv/src/asm_translate/riscv_asm.lalrpop index f876f30bb..50a8f9896 100644 --- a/riscv/src/riscv_asm.lalrpop +++ b/riscv/src/asm_translate/riscv_asm.lalrpop @@ -13,7 +13,7 @@ use std::str::FromStr; use powdr_asm_utils::ast::{unescape_string, BinaryOpKind as BOp, UnaryOpKind as UOp, new_binary_op as bin_op, new_unary_op as un_op, new_function_op as fn_op}; -use crate::{Argument, Register, Statement, FunctionKind as FOp, Expression, map_insn_i}; +use super::super::{Argument, Register, Statement, FunctionKind as FOp, Expression, map_insn_i}; grammar; diff --git a/riscv/src/compiler.rs b/riscv/src/code_gen.rs similarity index 69% rename from riscv/src/compiler.rs rename to riscv/src/code_gen.rs index ba53530be..aad2a7277 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/code_gen.rs @@ -1,27 +1,12 @@ -use std::{ - collections::{BTreeMap, BTreeSet, HashSet}, - fmt, -}; +use std::fmt; use itertools::Itertools; -use powdr_asm_utils::{ - ast::{BinaryOpKind, UnaryOpKind}, - data_parser, - data_storage::{store_data_objects, SingleDataValue}, - parser::parse_asm, - reachability::{self, symbols_in_args}, - utils::{ - argument_to_escaped_symbol, argument_to_number, escape_label, expression_to_number, quote, - }, - Architecture, -}; +use powdr_asm_utils::data_storage::SingleDataValue; +use powdr_asm_utils::utils::{escape_label, quote}; use powdr_number::{FieldElement, KnownField}; use crate::continuations::bootloader::{bootloader_and_shutdown_routine, bootloader_preamble}; -use crate::disambiguator; -use crate::parser::RiscParser; use crate::runtime::Runtime; -use crate::{Argument, Expression, Statement}; #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub struct Register { @@ -63,135 +48,115 @@ impl fmt::Display for FunctionKind { } } -struct RiscvArchitecture {} - -impl Architecture for RiscvArchitecture { - fn instruction_ends_control_flow(instr: &str) -> bool { - match instr { - "li" | "lui" | "la" | "mv" | "add" | "addi" | "sub" | "neg" | "mul" | "mulh" - | "mulhu" | "mulhsu" | "divu" | "remu" | "xor" | "xori" | "and" | "andi" | "or" - | "ori" | "not" | "slli" | "sll" | "srli" | "srl" | "srai" | "seqz" | "snez" - | "slt" | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" - | "blt" | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" - | "jalr" | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" - | "sh" | "sb" | "nop" | "fence" | "fence.i" | "amoadd.w" | "amoadd.w.aq" - | "amoadd.w.rl" | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" - | "sc.w" | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, - "j" | "jr" | "tail" | "ret" | "unimp" => true, - _ => { - panic!("Unknown instruction: {instr}"); - } - } - } +pub enum Statement<'a, A: Args + ?Sized> { + DebugLoc { file: u64, line: u64, col: u64 }, + Label(&'a str), + Instruction { op: &'a str, args: &'a A }, +} - fn get_references< - 'a, - R: powdr_asm_utils::ast::Register, - F: powdr_asm_utils::ast::FunctionOpKind, - >( - instr: &str, - args: &'a [powdr_asm_utils::ast::Argument], - ) -> Vec<&'a str> { - // fence arguments are not symbols, they are like reserved - // keywords affecting the instruction behavior - if instr.starts_with("fence") { - Vec::new() - } else { - symbols_in_args(args) - } - } +pub struct MemEntry { + pub label: Option, + pub addr: u32, + pub value: SingleDataValue, +} + +pub struct SourceFileInfo<'a> { + pub id: u32, + pub dir: &'a str, + pub file: &'a str, } -/// Compiles riscv assembly to a powdr assembly file. Adds required library routines. -pub fn compile( - mut assemblies: BTreeMap, +/// A RISC-V program that can be translated to POWDR ASM. +pub trait RiscVProgram { + type InstructionArgs: Args + ?Sized; + type Label; + + // Source files to be used by the debug statements. + fn source_files_info(&self) -> impl Iterator; + + fn initial_mem(&self) -> impl Iterator; + fn executable_statements(&self) -> impl Iterator>; + fn start_function(&self) -> &str; +} + +pub fn translate_program( + program: &impl RiscVProgram, runtime: &Runtime, with_bootloader: bool, ) -> String { - // stack grows towards zero - let stack_start = 0x10000; - // data grows away from zero - let data_start = 0x10100; - - assert!(assemblies - .insert("__runtime".to_string(), runtime.global_declarations()) - .is_none()); - - // TODO remove unreferenced files. - let (mut statements, file_ids) = disambiguator::disambiguate( - assemblies - .into_iter() - .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) - .collect(), - ); - let mut data_sections = data_parser::extract_data_objects(&statements); - - // Reduce to the code that is actually reachable from main - // (and the objects that are referred from there) - let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>( - "__runtime_start", - &mut statements, - &mut data_sections, - ); - - // Replace dynamic references to code labels - replace_dynamic_label_references(&mut statements, &data_labels); + // Do this in a separate function to avoid most of the code being generic on F. + let (initial_mem, instructions, degree) = + translate_program_impl(program, runtime, with_bootloader); + riscv_machine( + runtime, + degree, + &preamble::(runtime, with_bootloader), + initial_mem, + instructions, + ) +} + +fn translate_program_impl( + program: &impl RiscVProgram, + runtime: &Runtime, + with_bootloader: bool, +) -> (Vec, Vec, u64) { let mut initial_mem = Vec::new(); let mut data_code = Vec::new(); - let data_positions = - store_data_objects(data_sections, data_start, &mut |label, addr, value| { - if let Some(label) = label { - let comment = format!(" // data {label}"); - if with_bootloader && !matches!(value, SingleDataValue::LabelReference(_)) { - &mut initial_mem + for MemEntry { label, addr, value } in program.initial_mem() { + if let Some(label) = label { + // This is a comment, so we don't need to escape the label. + let comment = format!(" // data {label}"); + if with_bootloader && !matches!(value, SingleDataValue::LabelReference(_)) { + &mut initial_mem + } else { + &mut data_code + } + .push(comment); + } + match value { + SingleDataValue::Value(v) => { + if with_bootloader { + // Instead of generating the data loading code, we store it + // in the variable that will be used as the initial memory + // snapshot, committed by the bootloader. + initial_mem.push(format!("(0x{addr:x}, 0x{v:x})")); } else { - &mut data_code + // There is no bootloader to commit to memory, so we have to + // load it explicitly. + data_code.push(format!("mstore 0x{addr:x}, 0x{v:x};")); } - .push(comment); } - match value { - SingleDataValue::Value(v) => { - if with_bootloader { - // Instead of generating the data loading code, we store it - // in the variable that will be used as the initial memory - // snapshot, committed by the bootloader. - initial_mem.push(format!("(0x{addr:x}, 0x{v:x})")); - } else { - // There is no bootloader to commit to memory, so we have to - // load it explicitly. - data_code.push(format!("mstore 0x{addr:x}, 0x{v:x};")); - } - } - SingleDataValue::LabelReference(sym) => { - // The label value is not known at this point, so we have to - // load it via code, irrespectively of bootloader availability. - // - // TODO should be possible without temporary - data_code.extend([ - format!("tmp1 <== load_label({});", escape_label(sym)), - format!("mstore 0x{addr:x}, tmp1;"), - ]); - } - SingleDataValue::Offset(_, _) => { - unimplemented!(); - /* - object_code.push(format!("addr <=X= 0x{pos:x};")); - - I think this solution should be fine but hard to say without - an actual code snippet that uses it. - - // TODO should be possible without temporary - object_code.extend([ - format!("tmp1 <== load_label({});", escape_label(a)), - format!("tmp2 <== load_label({});", escape_label(b)), - // TODO check if registers match - "mstore wrap(tmp1 - tmp2);".to_string(), - ]); - */ - } + SingleDataValue::LabelReference(sym) => { + // The label value is not known at this point, so we have to + // load it via code, irrespectively of bootloader availability. + // + // TODO should be possible without temporary + data_code.extend([ + format!("tmp1 <== load_label({});", escape_label(&sym)), + format!("mstore 0x{addr:x}, tmp1;"), + ]); + } + SingleDataValue::Offset(_, _) => { + unimplemented!(); + /* + object_code.push(format!("addr <=X= 0x{pos:x};")); + + I think this solution should be fine but hard to say without + an actual code snippet that uses it. + + // TODO should be possible without temporary + object_code.extend([ + format!("tmp1 <== load_label({});", escape_label(a)), + format!("tmp2 <== load_label({});", escape_label(b)), + // TODO check if registers match + "mstore wrap(tmp1 - tmp2);".to_string(), + ]); + */ } - }); + } + } let submachines_init = runtime.submachines_init(); let bootloader_and_shutdown_routine_lines = if with_bootloader { @@ -205,26 +170,42 @@ pub fn compile( submachines_init }; - let mut program: Vec = file_ids - .into_iter() - .map(|(id, dir, file)| format!(".debug file {id} {} {};", quote(&dir), quote(&file))) + let mut statements: Vec = program + .source_files_info() + .map( + |SourceFileInfo { + id, + dir, + file: name, + }| { format!(".debug file {id} {} {};", quote(dir), quote(name)) }, + ) .chain(bootloader_and_shutdown_routine_lines) .collect(); if !data_code.is_empty() { - program.push("x1 <== jump(__data_init);".to_string()); + statements.push("x1 <== jump(__data_init);".to_string()); } - program.extend([ - format!("// Set stack pointer\nx2 <=X= {stack_start};"), - "x1 <== jump(__runtime_start);".to_string(), + statements.extend([ + format!("x1 <== jump({});", program.start_function()), "return;".to_string(), // This is not "riscv ret", but "return from powdr asm function". ]); - program.extend( - substitute_symbols_with_values(statements, &data_positions) - .into_iter() - .flat_map(process_statement), - ); + for s in program.executable_statements() { + match s { + Statement::DebugLoc { file, line, col } => { + statements.push(format!(".debug loc {file} {line} {col};")) + } + Statement::Label(l) => statements.push(format!("{}:", escape_label(l))), + Statement::Instruction { op, args } => { + let processed_instr = match process_instruction(op, args) { + Ok(s) => s, + Err(e) => panic!("Failed to process instruction '{op}'. {e}"), + }; + statements.extend(processed_instr.into_iter().map(|s| " ".to_string() + &s)) + } + } + } + if !data_code.is_empty() { - program.extend( + statements.extend( ["// This is the data initialization routine.\n__data_init:".to_string()].into_iter() .chain(data_code) .chain([ @@ -232,10 +213,10 @@ pub fn compile( .to_string(), ])); } - program.extend(runtime.ecall_handler()); + statements.extend(runtime.ecall_handler()); // The program ROM needs to fit the degree, so we use the next power of 2. - let degree = program.len().ilog2() + 1; + let degree = statements.len().ilog2() + 1; let degree = std::cmp::max(degree, 18); log::info!("Inferred degree 2^{degree}"); @@ -250,157 +231,7 @@ pub fn compile( assert!((18..=20).contains(°ree)); let degree = 1 << degree; - riscv_machine( - runtime, - degree, - &preamble::(runtime, with_bootloader), - initial_mem, - program, - ) -} - -/// Replace certain patterns of references to code labels by -/// special instructions. We ignore any references to data objects -/// because they will be handled differently. -fn replace_dynamic_label_references(statements: &mut Vec, data_labels: &HashSet<&str>) { - /* - Find patterns of the form - lui a0, %hi(LABEL) - addi s10, a0, %lo(LABEL) - - - turn this into the pseudoinstruction - li s10, LABEL - which is then turned into - - s10 <== load_label(LABEL) - - It gets complicated by the fact that sometimes, labels - and debugging directives occur between the two statements - matching that pattern... - */ - let instruction_indices = statements - .iter() - .enumerate() - .filter_map(|(i, s)| match s { - Statement::Instruction(_, _) => Some(i), - _ => None, - }) - .collect::>(); - - let mut to_delete = BTreeSet::default(); - for (i1, i2) in instruction_indices.into_iter().tuple_windows() { - if let Some(r) = - replace_dynamic_label_reference(&statements[i1], &statements[i2], data_labels) - { - to_delete.insert(i1); - statements[i2] = r; - } - } - - let mut i = 0; - statements.retain(|_| (!to_delete.contains(&i), i += 1).0); -} - -fn replace_dynamic_label_reference( - s1: &Statement, - s2: &Statement, - data_labels: &HashSet<&str>, -) -> Option { - let Statement::Instruction(instr1, args1) = s1 else { - return None; - }; - let Statement::Instruction(instr2, args2) = s2 else { - return None; - }; - if instr1.as_str() != "lui" || instr2.as_str() != "addi" { - return None; - }; - let [Argument::Register(r1), Argument::Expression(Expression::FunctionOp(FunctionKind::HiDataRef, expr1))] = - &args1[..] - else { - return None; - }; - // Maybe should try to reduce expr1 and expr2 before comparing deciding it is a pure symbol? - let Expression::Symbol(label1) = expr1.as_ref() else { - return None; - }; - let [Argument::Register(r2), Argument::Register(r3), Argument::Expression(Expression::FunctionOp(FunctionKind::LoDataRef, expr2))] = - &args2[..] - else { - return None; - }; - let Expression::Symbol(label2) = expr2.as_ref() else { - return None; - }; - if r1 != r3 || label1 != label2 || data_labels.contains(label1.as_str()) { - return None; - } - Some(Statement::Instruction( - "li".to_string(), - vec![ - Argument::Register(*r2), - Argument::Expression(Expression::Symbol(label1.clone())), - ], - )) -} - -fn substitute_symbols_with_values( - mut statements: Vec, - data_positions: &BTreeMap, -) -> Vec { - for s in &mut statements { - let Statement::Instruction(_name, args) = s else { - continue; - }; - for arg in args { - arg.post_visit_expressions_mut(&mut |expression| match expression { - Expression::Number(_) => {} - Expression::Symbol(symb) => { - if let Some(pos) = data_positions.get(symb) { - *expression = Expression::Number(*pos as i64) - } - } - Expression::UnaryOp(op, subexpr) => { - if let Expression::Number(num) = subexpr.as_ref() { - let result = match op { - UnaryOpKind::BitwiseNot => !num, - UnaryOpKind::Negation => -num, - }; - *expression = Expression::Number(result); - }; - } - Expression::BinaryOp(op, subexprs) => { - if let (Expression::Number(a), Expression::Number(b)) = - (&subexprs[0], &subexprs[1]) - { - let result = match op { - BinaryOpKind::Or => a | b, - BinaryOpKind::Xor => a ^ b, - BinaryOpKind::And => a & b, - BinaryOpKind::LeftShift => a << b, - BinaryOpKind::RightShift => a >> b, - BinaryOpKind::Add => a + b, - BinaryOpKind::Sub => a - b, - BinaryOpKind::Mul => a * b, - BinaryOpKind::Div => a / b, - BinaryOpKind::Mod => a % b, - }; - *expression = Expression::Number(result); - } - } - Expression::FunctionOp(op, subexpr) => { - if let Expression::Number(num) = subexpr.as_ref() { - let result = match op { - FunctionKind::HiDataRef => num >> 12, - FunctionKind::LoDataRef => num & 0xfff, - }; - *expression = Expression::Number(result); - }; - } - }); - } - } - statements + (initial_mem, statements, degree) } fn riscv_machine( @@ -812,47 +643,8 @@ fn memory(with_bootloader: bool) -> String { "# } -fn process_statement(s: Statement) -> Vec { - match &s { - Statement::Label(l) => vec![format!("{}:", escape_label(l))], - Statement::Directive(directive, args) => match (directive.as_str(), &args[..]) { - ( - ".loc", - [Argument::Expression(Expression::Number(file)), Argument::Expression(Expression::Number(line)), Argument::Expression(Expression::Number(column)), ..], - ) => { - vec![format!(" .debug loc {file} {line} {column};")] - } - (".file", _) => { - // We ignore ".file" directives because they have been extracted to the top. - vec![] - } - (".size", _) => { - // We ignore ".size" directives - vec![] - } - _ if directive.starts_with(".cfi_") => vec![], - _ => panic!( - "Leftover directive in code: {directive} {}", - args.iter().format(", ") - ), - }, - Statement::Instruction(instr, args) => { - let stmt_str = format!("{s}"); - // remove indentation and trailing newline - let stmt_str = &stmt_str[2..(stmt_str.len() - 1)]; - let mut ret = vec![format!(" .debug insn \"{stmt_str}\";")]; - let processed_instr = match process_instruction(instr, &args[..]) { - Ok(s) => s, - Err(e) => panic!("Failed to process instruction '{instr}'. {e}"), - }; - ret.extend(processed_instr.into_iter().map(|s| " ".to_string() + &s)); - ret - } - } -} - -trait Args { - type Error; +pub trait Args { + type Error: fmt::Display; fn l(&self) -> Result; fn r(&self) -> Result; @@ -867,118 +659,6 @@ trait Args { fn empty(&self) -> Result<(), Self::Error>; } -impl Args for [Argument] { - type Error = &'static str; - - fn l(&self) -> Result { - const ERR: &str = "Expected: label"; - match self { - [l] => Ok(argument_to_escaped_symbol(l).ok_or(ERR)?), - _ => Err(ERR), - } - } - - fn r(&self) -> Result { - match self { - [Argument::Register(r1)] => Ok(*r1), - _ => Err("Expected: register"), - } - } - - fn rri(&self) -> Result<(Register, Register, u32), &'static str> { - const ERR: &str = "Expected: register, register, immediate"; - match self { - [Argument::Register(r1), Argument::Register(r2), n] => { - Ok((*r1, *r2, argument_to_number(n).ok_or(ERR)?)) - } - _ => Err(ERR), - } - } - - fn rrr(&self) -> Result<(Register, Register, Register), &'static str> { - match self { - [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3)] => { - Ok((*r1, *r2, *r3)) - } - _ => Err("Expected: register, register, register"), - } - } - - fn ri(&self) -> Result<(Register, u32), &'static str> { - const ERR: &str = "Expected: register, immediate"; - match self { - [Argument::Register(r1), n] => Ok((*r1, argument_to_number(n).ok_or(ERR)?)), - _ => Err(ERR), - } - } - - fn rr(&self) -> Result<(Register, Register), &'static str> { - match self { - [Argument::Register(r1), Argument::Register(r2)] => Ok((*r1, *r2)), - _ => Err("Expected: register, register"), - } - } - - fn rrl(&self) -> Result<(Register, Register, String), &'static str> { - const ERR: &str = "Expected: register, register, label"; - match self { - [Argument::Register(r1), Argument::Register(r2), l] => { - Ok((*r1, *r2, argument_to_escaped_symbol(l).ok_or(ERR)?)) - } - _ => Err(ERR), - } - } - - fn rl(&self) -> Result<(Register, String), &'static str> { - const ERR: &str = "Expected: register, label"; - match self { - [Argument::Register(r1), l] => Ok((*r1, argument_to_escaped_symbol(l).ok_or(ERR)?)), - _ => Err(ERR), - } - } - - fn rro(&self) -> Result<(Register, Register, u32), &'static str> { - if let [Argument::Register(r1), Argument::RegOffset(off, r2)] = self { - if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) - { - return Ok((*r1, *r2, off)); - } - } - if let [Argument::Register(r1), Argument::Expression(off)] = self { - if let Some(off) = expression_to_number(off) { - // If the register is not specified, it defaults to x0 - return Ok((*r1, Register::new(0), off)); - } - } - - Err("Expected: register, offset(register)") - } - - fn rrro(&self) -> Result<(Register, Register, Register, u32), &'static str> { - if let [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(off, r3)] = self - { - if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) - { - return Ok((*r1, *r2, *r3, off)); - } - } - if let [Argument::Register(r1), Argument::Register(r2), Argument::Expression(off)] = self { - if let Some(off) = expression_to_number(off) { - // If the register is not specified, it defaults to x0 - return Ok((*r1, *r2, Register::new(0), off)); - } - } - Err("Expected: register, register, offset(register)") - } - - fn empty(&self) -> Result<(), &'static str> { - match self { - [] => Ok(()), - _ => Err("Expected: no arguments"), - } - } -} - fn only_if_no_write_to_zero(statement: String, reg: Register) -> Vec { only_if_no_write_to_zero_vec(vec![statement], reg) } diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index 5bc61fb5e..e97ce513c 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -13,19 +13,13 @@ use powdr_number::FieldElement; use serde_json::Value as JsonValue; use std::fs; -use crate::compiler::{FunctionKind, Register}; pub use crate::runtime::Runtime; -pub mod compiler; +pub mod asm_translate; +mod code_gen; pub mod continuations; -mod disambiguator; -pub mod parser; pub mod runtime; -type Statement = powdr_asm_utils::ast::Statement; -type Argument = powdr_asm_utils::ast::Argument; -type Expression = powdr_asm_utils::ast::Expression; - /// Compiles a rust file all the way down to PIL and generates /// fixed and witness columns. #[allow(clippy::print_stderr)] @@ -105,7 +99,7 @@ pub fn compile_riscv_asm_bundle( return None; } - let powdr_asm = compiler::compile::(riscv_asm_files, runtime, with_bootloader); + let powdr_asm = asm_translate::compile::(riscv_asm_files, runtime, with_bootloader); fs::write(powdr_asm_file_name.clone(), &powdr_asm).unwrap(); log::info!("Wrote {}", powdr_asm_file_name.to_str().unwrap()); @@ -263,74 +257,3 @@ fn output_files_from_cargo_build_plan( assemblies } - -/// Maps an instruction in .insn syntax to Statement::Instruction() in the expected format. -/// -/// See https://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dFormats.html -pub fn map_insn_i( - opcode6: Expression, - func3: Expression, - rd: Register, - rs1: Register, - simm12: Expression, -) -> Statement { - let (Expression::Number(opcode6), Expression::Number(func3)) = (opcode6, func3) else { - panic!("Only literal opcode and function are supported in .insn syntax"); - }; - - // These are almost all instructions in RISC-V Instruction Set Manual that - // we are supposed to implement and roughly fits the pattern of the I-type - // instruction. Only "csr*i" instructions are missing. - - // First we try to match the instructions that uses the I-type encoding - // ordinarily, i.e. where all fields are what they are supposed to be: - let name = match (opcode6, func3) { - (0b1100111, 0b000) => "jalr", - (0b0000011, 0b000) => "lb", - (0b0000011, 0b001) => "lh", - (0b0000011, 0b010) => "lw", - (0b0000011, 0b100) => "lbu", - (0b0000011, 0b101) => "lhu", - (0b0010011, 0b000) => "addi", - (0b0010011, 0b010) => "slti", - (0b0010011, 0b011) => "sltiu", - (0b0010011, 0b100) => "xori", - (0b0010011, 0b110) => "ori", - (0b0010011, 0b111) => "andi", - (0b1110011, 0b001) => "csrrw", - (0b1110011, 0b010) => "csrrs", - (0b1110011, 0b011) => "csrrc", - // won't interpret "csr*i" instructions because it is too weird to - // encode an immediate as a register - opfunc => { - // We now try the instructions that take certain liberties with the - // I-type encoding, and don't use the standard arguments for it. - let name = match opfunc { - (0b0001111, 0b000) => "fence", - (0b0001111, 0b001) => "fence.i", - (0b1110011, 0b000) => { - let Expression::Number(simm12) = simm12 else { - panic!( - "Only literal simm12 is supported for ecall and ebreak instructions" - ); - }; - match simm12 { - 0 => "ecall", - 1 => "ebreak", - _ => panic!("unknown instruction"), - } - } - _ => panic!("unsupported .insn instruction"), - }; - return Statement::Instruction(name.to_string(), Vec::new()); - } - }; - - let args = vec![ - Argument::Register(rd), - Argument::Register(rs1), - Argument::Expression(simm12), - ]; - - Statement::Instruction(name.to_string(), args) -} diff --git a/riscv/src/runtime.rs b/riscv/src/runtime.rs index 3b08f7264..60ac65f6d 100644 --- a/riscv/src/runtime.rs +++ b/riscv/src/runtime.rs @@ -7,7 +7,7 @@ use powdr_ast::parsed::asm::{FunctionStatement, MachineStatement, SymbolPath}; use itertools::Itertools; use powdr_parser::ParserContext; -use crate::compiler::{pop_register, push_register}; +use crate::code_gen::{pop_register, push_register}; static EXTRA_REG_PREFIX: &str = "xtra"; @@ -436,7 +436,7 @@ impl Runtime { .collect() } - pub fn global_declarations(&self) -> String { + pub fn global_declarations(&self, stack_start: u32) -> String { [ "__divdi3", "__udivdi3", @@ -475,6 +475,12 @@ impl Runtime { __rust_alloc_error_handler_should_panic: .byte 0 .globl __rust_no_alloc_shim_is_unstable __rust_no_alloc_shim_is_unstable: .byte 0 +.globl __powdr_stack_start +" + + &format!( + ".set __powdr_stack_start, {stack_start}", + ) + + r" .text " } diff --git a/riscv/tests/instructions.rs b/riscv/tests/instructions.rs index 1476631a7..692162ca8 100644 --- a/riscv/tests/instructions.rs +++ b/riscv/tests/instructions.rs @@ -4,7 +4,7 @@ mod instruction_tests { use crate::common::verify_riscv_asm_string; use powdr_backend::BackendType; use powdr_number::GoldilocksField; - use powdr_riscv::compiler::compile; + use powdr_riscv::asm_translate::compile; use powdr_riscv::Runtime; use test_log::test; diff --git a/riscv/tests/riscv.rs b/riscv/tests/riscv.rs index e6689cdd8..a8fdc4eab 100644 --- a/riscv/tests/riscv.rs +++ b/riscv/tests/riscv.rs @@ -22,7 +22,8 @@ pub fn test_continuations(case: &str) { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = powdr_riscv::compiler::compile::(riscv_asm, &runtime, true); + let powdr_asm = + powdr_riscv::asm_translate::compile::(riscv_asm, &runtime, true); // Manually create tmp dir, so that it is the same in all chunks. let tmp_dir = mktemp::Temp::new_dir().unwrap(); @@ -274,7 +275,8 @@ fn many_chunks_dry() { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = powdr_riscv::compiler::compile::(riscv_asm, &runtime, true); + let powdr_asm = + powdr_riscv::asm_translate::compile::(riscv_asm, &runtime, true); let mut pipeline = Pipeline::default() .from_asm_string(powdr_asm, Some(PathBuf::from(case))) @@ -299,7 +301,8 @@ fn output_syscall() { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = powdr_riscv::compiler::compile::(riscv_asm, &runtime, false); + let powdr_asm = + powdr_riscv::asm_translate::compile::(riscv_asm, &runtime, false); let inputs = vec![1u32, 2, 3] .into_iter() @@ -373,5 +376,5 @@ fn compile_riscv_crate(case: &str, runtime: &Runtime) -> String &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - powdr_riscv::compiler::compile::(riscv_asm, runtime, false) + powdr_riscv::asm_translate::compile::(riscv_asm, runtime, false) } From 3c71255a96bc2ec755446f678a1db4e821a68e8f Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 5 Jun 2024 16:43:12 +0200 Subject: [PATCH 12/88] Make Label generic. --- riscv/src/asm_translate/mod.rs | 10 +++++----- riscv/src/code_gen.rs | 22 ++++++++++++---------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/riscv/src/asm_translate/mod.rs b/riscv/src/asm_translate/mod.rs index 48a102aff..60eab96f5 100644 --- a/riscv/src/asm_translate/mod.rs +++ b/riscv/src/asm_translate/mod.rs @@ -34,9 +34,9 @@ struct AsmProgram { statements: Vec, } -impl RiscVProgram for AsmProgram { +impl<'a> RiscVProgram<'a> for AsmProgram { type InstructionArgs = [Argument]; - type Label = String; + type Label = &'a str; fn source_files_info(&self) -> impl Iterator { self.file_ids.iter().map(|(id, dir, file)| SourceFileInfo { @@ -51,8 +51,8 @@ impl RiscVProgram for AsmProgram { } fn executable_statements( - &self, - ) -> impl Iterator> { + &'a self, + ) -> impl Iterator> { self.statements.iter().filter_map(process_statement) } @@ -376,7 +376,7 @@ fn substitute_symbols_with_values( statements } -fn process_statement(s: &Statement) -> Option> { +fn process_statement(s: &Statement) -> Option> { match s { Statement::Label(l) => Some(code_gen::Statement::Label(l)), Statement::Directive(directive, args) => match (directive.as_str(), &args[..]) { diff --git a/riscv/src/code_gen.rs b/riscv/src/code_gen.rs index aad2a7277..c54304c5a 100644 --- a/riscv/src/code_gen.rs +++ b/riscv/src/code_gen.rs @@ -48,9 +48,9 @@ impl fmt::Display for FunctionKind { } } -pub enum Statement<'a, A: Args + ?Sized> { +pub enum Statement<'a, L: AsRef, A: Args + ?Sized> { DebugLoc { file: u64, line: u64, col: u64 }, - Label(&'a str), + Label(L), Instruction { op: &'a str, args: &'a A }, } @@ -67,20 +67,22 @@ pub struct SourceFileInfo<'a> { } /// A RISC-V program that can be translated to POWDR ASM. -pub trait RiscVProgram { +pub trait RiscVProgram<'a> { type InstructionArgs: Args + ?Sized; - type Label; + type Label: AsRef + 'a; // Source files to be used by the debug statements. fn source_files_info(&self) -> impl Iterator; fn initial_mem(&self) -> impl Iterator; - fn executable_statements(&self) -> impl Iterator>; + fn executable_statements( + &'a self, + ) -> impl Iterator>; fn start_function(&self) -> &str; } -pub fn translate_program( - program: &impl RiscVProgram, +pub fn translate_program<'a, F: FieldElement>( + program: &'a impl RiscVProgram<'a>, runtime: &Runtime, with_bootloader: bool, ) -> String { @@ -97,8 +99,8 @@ pub fn translate_program( ) } -fn translate_program_impl( - program: &impl RiscVProgram, +fn translate_program_impl<'a>( + program: &'a impl RiscVProgram<'a>, runtime: &Runtime, with_bootloader: bool, ) -> (Vec, Vec, u64) { @@ -193,7 +195,7 @@ fn translate_program_impl( Statement::DebugLoc { file, line, col } => { statements.push(format!(".debug loc {file} {line} {col};")) } - Statement::Label(l) => statements.push(format!("{}:", escape_label(l))), + Statement::Label(l) => statements.push(format!("{}:", escape_label(l.as_ref()))), Statement::Instruction { op, args } => { let processed_instr = match process_instruction(op, args) { Ok(s) => s, From bc5365738ca80e67d8eaf541aac124de40f8051d Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 5 Jun 2024 17:39:10 +0200 Subject: [PATCH 13/88] Setting the stack pointer. --- riscv/src/runtime.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/riscv/src/runtime.rs b/riscv/src/runtime.rs index 60ac65f6d..d88f8edfa 100644 --- a/riscv/src/runtime.rs +++ b/riscv/src/runtime.rs @@ -470,19 +470,17 @@ impl Runtime { // - __rust_alloc_error_handler_should_panic: needed by the default alloc error handler, // not sure why it's not present in the asm. // https://github.com/rust-lang/rust/blob/ae9d7b0c6434b27e4e2effe8f05b16d37e7ef33f/library/alloc/src/alloc.rs#L415 - r".data + &format!(r".data .globl __rust_alloc_error_handler_should_panic __rust_alloc_error_handler_should_panic: .byte 0 .globl __rust_no_alloc_shim_is_unstable __rust_no_alloc_shim_is_unstable: .byte 0 -.globl __powdr_stack_start -" - + &format!( - ".set __powdr_stack_start, {stack_start}", - ) - + r" .text -" +.globl _start +_start: + li sp, {stack_start} + tail __runtime_start +") } pub fn ecall_handler(&self) -> Vec { From 708dfee4bf73b60c6b321c482e37e915f28a9e74 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Wed, 5 Jun 2024 18:33:33 +0200 Subject: [PATCH 14/88] Using a non-conflicting start function name. --- riscv/src/asm_translate/mod.rs | 6 ++++-- riscv/src/runtime.rs | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/riscv/src/asm_translate/mod.rs b/riscv/src/asm_translate/mod.rs index 60eab96f5..fdab1ce7c 100644 --- a/riscv/src/asm_translate/mod.rs +++ b/riscv/src/asm_translate/mod.rs @@ -34,6 +34,8 @@ struct AsmProgram { statements: Vec, } +const START_FUNCTION: &str = "__stack_setup"; + impl<'a> RiscVProgram<'a> for AsmProgram { type InstructionArgs = [Argument]; type Label = &'a str; @@ -57,7 +59,7 @@ impl<'a> RiscVProgram<'a> for AsmProgram { } fn start_function(&self) -> &str { - "_start" + START_FUNCTION } } @@ -209,7 +211,7 @@ fn compile_internal(mut assemblies: BTreeMap, runtime: &Runtime) // Reduce to the code that is actually reachable from main // (and the objects that are referred from there) let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>( - "_start", + START_FUNCTION, &mut statements, &mut data_sections, ); diff --git a/riscv/src/runtime.rs b/riscv/src/runtime.rs index d88f8edfa..6f9875af7 100644 --- a/riscv/src/runtime.rs +++ b/riscv/src/runtime.rs @@ -476,8 +476,8 @@ __rust_alloc_error_handler_should_panic: .byte 0 .globl __rust_no_alloc_shim_is_unstable __rust_no_alloc_shim_is_unstable: .byte 0 .text -.globl _start -_start: +.globl __stack_setup +__stack_setup: li sp, {stack_start} tail __runtime_start ") From 856a128ee836d1861898120b572b78b0fa546011 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Thu, 6 Jun 2024 19:04:38 +0200 Subject: [PATCH 15/88] Moving global declarations to asm_translate. --- riscv/src/asm_translate/mod.rs | 56 ++++++++++++++++++++++++++++++---- riscv/src/runtime.rs | 47 ---------------------------- 2 files changed, 50 insertions(+), 53 deletions(-) diff --git a/riscv/src/asm_translate/mod.rs b/riscv/src/asm_translate/mod.rs index fdab1ce7c..e77940a5f 100644 --- a/riscv/src/asm_translate/mod.rs +++ b/riscv/src/asm_translate/mod.rs @@ -181,22 +181,19 @@ pub fn compile( runtime: &Runtime, with_bootloader: bool, ) -> String { - let asm_program = compile_internal(assemblies, runtime); + let asm_program = compile_internal(assemblies); code_gen::translate_program::(&asm_program, runtime, with_bootloader) } -fn compile_internal(mut assemblies: BTreeMap, runtime: &Runtime) -> AsmProgram { +fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { // stack grows towards zero let stack_start = 0x10000000; // data grows away from zero let data_start = 0x10000100; assert!(assemblies - .insert( - "__runtime".to_string(), - runtime.global_declarations(stack_start) - ) + .insert("__runtime".to_string(), global_declarations(stack_start)) .is_none()); // TODO remove unreferenced files. @@ -529,3 +526,50 @@ pub fn map_insn_i( Statement::Instruction(name.to_string(), args) } + +fn global_declarations(stack_start: u32) -> String { + [ + "__divdi3", + "__udivdi3", + "__udivti3", + "__divdf3", + "__muldf3", + "__moddi3", + "__umoddi3", + "__umodti3", + "__eqdf2", + "__ltdf2", + "__nedf2", + "__unorddf2", + "__floatundidf", + "__extendsfdf2", + "memcpy", + "memmove", + "memset", + "memcmp", + "bcmp", + "strlen", + ] + .map(|n| format!(".globl {n}@plt\n.globl {n}\n.set {n}@plt, {n}\n")) + .join("\n\n") + + &[("__rust_alloc_error_handler", "__rg_oom")] + .map(|(n, m)| format!(".globl {n}\n.set {n}, {m}\n")) + .join("\n\n") + + + // some extra symbols expected by rust code: + // - __rust_no_alloc_shim_is_unstable: compilation time acknowledgment that this feature is unstable. + // - __rust_alloc_error_handler_should_panic: needed by the default alloc error handler, + // not sure why it's not present in the asm. + // https://github.com/rust-lang/rust/blob/ae9d7b0c6434b27e4e2effe8f05b16d37e7ef33f/library/alloc/src/alloc.rs#L415 + &format!(r".data +.globl __rust_alloc_error_handler_should_panic +__rust_alloc_error_handler_should_panic: .byte 0 +.globl __rust_no_alloc_shim_is_unstable +__rust_no_alloc_shim_is_unstable: .byte 0 +.text +.globl __stack_setup +__stack_setup: +li sp, {stack_start} +tail __runtime_start +") +} diff --git a/riscv/src/runtime.rs b/riscv/src/runtime.rs index 6f9875af7..564ef3813 100644 --- a/riscv/src/runtime.rs +++ b/riscv/src/runtime.rs @@ -436,53 +436,6 @@ impl Runtime { .collect() } - pub fn global_declarations(&self, stack_start: u32) -> String { - [ - "__divdi3", - "__udivdi3", - "__udivti3", - "__divdf3", - "__muldf3", - "__moddi3", - "__umoddi3", - "__umodti3", - "__eqdf2", - "__ltdf2", - "__nedf2", - "__unorddf2", - "__floatundidf", - "__extendsfdf2", - "memcpy", - "memmove", - "memset", - "memcmp", - "bcmp", - "strlen", - ] - .map(|n| format!(".globl {n}@plt\n.globl {n}\n.set {n}@plt, {n}\n")) - .join("\n\n") - + &[("__rust_alloc_error_handler", "__rg_oom")] - .map(|(n, m)| format!(".globl {n}\n.set {n}, {m}\n")) - .join("\n\n") - + - // some extra symbols expected by rust code: - // - __rust_no_alloc_shim_is_unstable: compilation time acknowledgment that this feature is unstable. - // - __rust_alloc_error_handler_should_panic: needed by the default alloc error handler, - // not sure why it's not present in the asm. - // https://github.com/rust-lang/rust/blob/ae9d7b0c6434b27e4e2effe8f05b16d37e7ef33f/library/alloc/src/alloc.rs#L415 - &format!(r".data -.globl __rust_alloc_error_handler_should_panic -__rust_alloc_error_handler_should_panic: .byte 0 -.globl __rust_no_alloc_shim_is_unstable -__rust_no_alloc_shim_is_unstable: .byte 0 -.text -.globl __stack_setup -__stack_setup: - li sp, {stack_start} - tail __runtime_start -") - } - pub fn ecall_handler(&self) -> Vec { let ecall = [ "// ecall handler".to_string(), From adfc41d82b80dd0e8f13433ef4a71a9aec6982dc Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Thu, 6 Jun 2024 19:03:22 +0200 Subject: [PATCH 16/88] Doing RISC-V initializations on runtime lib, with linker support. --- asm-utils/src/data_parser.rs | 32 +++++++++++++++++++++++++-- asm-utils/src/data_storage.rs | 25 +++++++++------------ asm-utils/src/reachability.rs | 19 +++++++++++----- riscv-runtime/src/lib.rs | 40 ++++++++++++++++++++++++---------- riscv/src/asm_translate/mod.rs | 37 +++++++++++++++++++++---------- 5 files changed, 107 insertions(+), 46 deletions(-) diff --git a/asm-utils/src/data_parser.rs b/asm-utils/src/data_parser.rs index 60162b3af..d4bc4d51c 100644 --- a/asm-utils/src/data_parser.rs +++ b/asm-utils/src/data_parser.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use crate::{ ast::{Argument, BinaryOpKind, Expression, FunctionOpKind, Register, Statement}, utils::{alignment_size, split_at_first}, @@ -63,6 +65,16 @@ impl DataSections { fn append_section(&mut self) { self.sections.push(Vec::new()) } + + fn add_empty_section(&mut self, label: String) { + self.sections.push(vec![(Some(label), Vec::new())]); + + // If there are other sections, the previous one is the active one, so we swap. + let len = self.sections.len(); + if len > 1 { + self.sections.swap(len - 1, len - 2); + } + } } /// Extract all data objects from the list of statements. @@ -70,7 +82,11 @@ impl DataSections { /// in the order in which they occur in the statements. pub fn extract_data_objects( statements: &[Statement], -) -> Vec, Vec)>> { +) -> ( + Vec, Vec)>>, + BTreeMap, +) { + let mut adhoc_symbols = BTreeMap::new(); let mut data = DataSections::new(); let mut is_in_data_section = false; @@ -142,6 +158,18 @@ pub fn extract_data_objects( )); } } + ( + ".set", + [Argument::Expression(Expression::Symbol(label)), Argument::Expression(Expression::Number(value))], + ) => { + // This is a directive that sets a symbol to a value. We + // create a phantom empty data section so reachability is + // happy, but we also save it so we can replace the symbol + // with the value when needed. + data.add_empty_section(label.clone()); + adhoc_symbols.insert(label.clone(), *value as u32); + } + (n @ ".balign" | n @ ".p2align", arg) => { // TODO: implement last optional argument of .balign and .p2align unimplemented!("{n} {arg:?}"); @@ -151,7 +179,7 @@ pub fn extract_data_objects( _ => {} } } - data.sections + (data.sections, adhoc_symbols) } fn is_data_section(arg: &Argument) -> bool { diff --git a/asm-utils/src/data_storage.rs b/asm-utils/src/data_storage.rs index 93188ddc1..f8fb8183e 100644 --- a/asm-utils/src/data_storage.rs +++ b/asm-utils/src/data_storage.rs @@ -104,22 +104,19 @@ pub fn store_data_objects( sections: Vec, Vec)>>, memory_start: u32, code_gen: &mut dyn FnMut(Option, u32, SingleDataValue), -) -> BTreeMap { + positions: &mut BTreeMap, +) { let mut writer = WordWriter::new(memory_start, code_gen); - let positions = { - let mut positions = BTreeMap::new(); - let mut current_pos = writer.current_position(); - for (name, data) in sections.iter().flatten() { - if let Some(name) = name { - positions.insert(name.clone(), current_pos); - } - for d in data.iter() { - current_pos += d.size(current_pos as usize) as u32; - } + let mut current_pos = writer.current_position(); + for (name, data) in sections.iter().flatten() { + if let Some(name) = name { + positions.insert(name.clone(), current_pos); } - positions - }; + for d in data.iter() { + current_pos += d.size(current_pos as usize) as u32; + } + } for (name, data) in sections.into_iter().flatten() { if let Some(name) = name { @@ -151,6 +148,4 @@ pub fn store_data_objects( } } writer.finish(); - - positions } diff --git a/asm-utils/src/reachability.rs b/asm-utils/src/reachability.rs index ebe8de193..023c037d9 100644 --- a/asm-utils/src/reachability.rs +++ b/asm-utils/src/reachability.rs @@ -141,13 +141,20 @@ fn extract_replacements( .iter() .filter_map(|s| match s { Statement::Directive(dir, args) if dir.as_str() == ".set" => { - if let [Argument::Expression(Expression::Symbol(from)), Argument::Expression(Expression::Symbol(to))] = &args[..] - { - Some((from.to_string(), to.to_string())) - } else { - panic!(); + match &args[..] { + [Argument::Expression(Expression::Symbol(from)), Argument::Expression(Expression::Symbol(to))] => + { + Some((from.to_string(), to.to_string())) + }, + [Argument::Expression(Expression::Symbol(_)), Argument::Expression(Expression::Number(_))] => { + // Handled elsewhere... + None + } + _ =>{ + panic!(); + } } - } + }, _ => None, }) .fold(BTreeMap::new(), |mut acc, (from, to)| { diff --git a/riscv-runtime/src/lib.rs b/riscv-runtime/src/lib.rs index 799c6bb74..eeea1faef 100644 --- a/riscv-runtime/src/lib.rs +++ b/riscv-runtime/src/lib.rs @@ -6,7 +6,7 @@ round_char_boundary )] -use core::arch::asm; +use core::arch::{asm, global_asm}; use core::panic::PanicInfo; use crate::fmt::print_str; @@ -34,13 +34,31 @@ unsafe fn panic(panic: &PanicInfo<'_>) -> ! { loop {} } -extern "Rust" { - fn main(); -} -#[no_mangle] -#[start] -pub unsafe extern "C" fn __runtime_start() { - unsafe { - main(); - } -} +// Entry point function __runtime_start: +// 1. Sets the global pointer register (the symbol __global_pointer$ is standard +// in RISC-V, and it is set by the linker). +// 2. Sets the stack pointer to the extern symbol __powdr_stack_start (this must +// also be set by the linker, but the name is powdr specific). +// 3. Tail call the main function (in powdr, the return address register is already +// set, so that returning from the entry point function will cause the execution +// to succeed). +global_asm!( + r" +.global __runtime_start +__runtime_start: + .option push + .option norelax + lui gp, %hi(__global_pointer$) + addi gp, gp, %lo(__global_pointer$) + .option pop + lui sp, %hi(__powdr_stack_start) + addi sp, sp, %lo(__powdr_stack_start) + tail main +" +); + +// TODO: ideally, the above code would use `la` instead of `lui` + `addi`, but +// for some reason rustc automatically expands it to `auipc %pcrel_hi(...)` +// + `addi %pcrel_lo(...)`, which our asm converter doesn't support on multiple +// levels. We can't use `li` either, because rustc doesn't like `li` with +// symbols. diff --git a/riscv/src/asm_translate/mod.rs b/riscv/src/asm_translate/mod.rs index e77940a5f..38967ee68 100644 --- a/riscv/src/asm_translate/mod.rs +++ b/riscv/src/asm_translate/mod.rs @@ -34,7 +34,7 @@ struct AsmProgram { statements: Vec, } -const START_FUNCTION: &str = "__stack_setup"; +const START_FUNCTION: &str = "__runtime_start"; impl<'a> RiscVProgram<'a> for AsmProgram { type InstructionArgs = [Argument]; @@ -203,7 +203,7 @@ fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) .collect(), ); - let mut data_sections = data_parser::extract_data_objects(&statements); + let (mut data_sections, mut data_positions) = data_parser::extract_data_objects(&statements); // Reduce to the code that is actually reachable from main // (and the objects that are referred from there) @@ -217,10 +217,14 @@ fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { replace_dynamic_label_references(&mut statements, &data_labels); let mut mem_entries = Vec::new(); - let data_positions = - store_data_objects(data_sections, data_start, &mut |label, addr, value| { + store_data_objects( + data_sections, + data_start, + &mut |label, addr, value| { mem_entries.push(MemEntry { label, addr, value }); - }); + }, + &mut data_positions, + ); let statements = substitute_symbols_with_values(statements, &data_positions); @@ -395,6 +399,10 @@ fn process_statement(s: &Statement) -> Option { + // We ignore ".option" directives + None + } _ if directive.starts_with(".cfi_") => None, _ => panic!( "Leftover directive in code: {directive} {}", @@ -557,19 +565,24 @@ fn global_declarations(stack_start: u32) -> String { .join("\n\n") + // some extra symbols expected by rust code: - // - __rust_no_alloc_shim_is_unstable: compilation time acknowledgment that this feature is unstable. - // - __rust_alloc_error_handler_should_panic: needed by the default alloc error handler, - // not sure why it's not present in the asm. + // - __rust_no_alloc_shim_is_unstable: compilation time acknowledgment + // that this feature is unstable. + // - __rust_alloc_error_handler_should_panic: needed by the default + // alloc error handler, not sure why it's not present in the asm. // https://github.com/rust-lang/rust/blob/ae9d7b0c6434b27e4e2effe8f05b16d37e7ef33f/library/alloc/src/alloc.rs#L415 + // - __stack_start: the start of the stack + // - __global_pointer$: a RISC-V special symbol that we actually don't + // use, but we define for compatibility with programs that expect it. &format!(r".data .globl __rust_alloc_error_handler_should_panic __rust_alloc_error_handler_should_panic: .byte 0 .globl __rust_no_alloc_shim_is_unstable __rust_no_alloc_shim_is_unstable: .byte 0 +.globl __powdr_stack_start +.set __powdr_stack_start, {stack_start} +.globl __global_pointer$ +.set __global_pointer$, 0 + .text -.globl __stack_setup -__stack_setup: -li sp, {stack_start} -tail __runtime_start ") } From d04c23fee0b2981eecefc7a6d34432bc7baefd5a Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Tue, 4 Jun 2024 20:04:12 +0200 Subject: [PATCH 17/88] Modify RISC-V compilation to build binaries instead of libraries. --- riscv-runtime/build.rs | 7 ++++ riscv-runtime/powdr.x | 26 ++++++++++++ .../dynamic_relocation/dynamic_relocation.s | 41 +++++++++++++++++++ .../function_pointer/src/{lib.rs => main.rs} | 3 +- .../riscv_data/keccak/src/{lib.rs => main.rs} | 3 ++ 5 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 riscv-runtime/build.rs create mode 100644 riscv-runtime/powdr.x create mode 100644 riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s rename riscv/tests/riscv_data/function_pointer/src/{lib.rs => main.rs} (96%) rename riscv/tests/riscv_data/keccak/src/{lib.rs => main.rs} (90%) diff --git a/riscv-runtime/build.rs b/riscv-runtime/build.rs new file mode 100644 index 000000000..fe48d3107 --- /dev/null +++ b/riscv-runtime/build.rs @@ -0,0 +1,7 @@ +use std::{env, path::PathBuf}; + +fn main() { + // Output the linker script to somewhere the linker can find. + let out_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + println!("cargo:rustc-link-search={}", out_dir.to_str().unwrap()); +} diff --git a/riscv-runtime/powdr.x b/riscv-runtime/powdr.x new file mode 100644 index 000000000..671c0e9b4 --- /dev/null +++ b/riscv-runtime/powdr.x @@ -0,0 +1,26 @@ +# Powdr linker script. +# +# If you are using powdr-riscv-runtime, it expects the symbols +# "__global_pointer$" and "__powdr_stack_start" to be defined. +# +# This is an usable version with a 256 MB stack. If you are not building via powdr-rs, +# you must manually specify "-C link-arg=-Tpowdr.x -C link-arg=pie" in rustc +# (e.g. via RUSTFLAGS). + +SECTIONS +{ + # Data starts here, before is the stack. + . = 0x10000100; + .data : { + *(.data) + PROVIDE( __global_pointer$ = . + 0x800 ); + } + .bss : { *(.bss) } + + # Text addresses are fake in powdr, we use a different address space. + .text : { *(.text) } + + __powdr_stack_start = 0x10000000; +} + +ENTRY(__runtime_start) diff --git a/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s b/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s new file mode 100644 index 000000000..403d05ea6 --- /dev/null +++ b/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s @@ -0,0 +1,41 @@ +# TODO: turn this into an actual test, with fail conditions, and put on the testsuite + + .section .data + .align 4 + .global data_section + +data_section: + .word text_label1 # Pointer to text_label1 + .word text_label2 # Pointer to text_label2 + + .section .text + .align 4 + .global _start + +_start: + # Load the address of data_section + la a0, data_section + + # Load the first pointer from data_section + lw a1, 0(a0) + # Call the function pointer + jalr ra, a1 + + # Load the second pointer from data_section + lw a1, 4(a0) + # Call the function pointer + jalr ra, a1 + + # Finish in an infinite loop +final_loop: + j final_loop + +text_label1: + # Function at text_label1 + li a0, 1 # For example, setting a0 to 1 + ret + +text_label2: + # Function at text_label2 + li a0, 2 # For example, setting a0 to 2 + ret diff --git a/riscv/tests/riscv_data/function_pointer/src/lib.rs b/riscv/tests/riscv_data/function_pointer/src/main.rs similarity index 96% rename from riscv/tests/riscv_data/function_pointer/src/lib.rs rename to riscv/tests/riscv_data/function_pointer/src/main.rs index deabb2ef3..1d579a14e 100644 --- a/riscv/tests/riscv_data/function_pointer/src/lib.rs +++ b/riscv/tests/riscv_data/function_pointer/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use powdr_riscv_runtime::io::read_u32; @@ -15,7 +16,7 @@ fn sub(a: u32, b: u32) -> u32 { } #[no_mangle] -fn main() { +pub fn main() { let a = read_u32(0); let b = read_u32(1); let expected = read_u32(2); diff --git a/riscv/tests/riscv_data/keccak/src/lib.rs b/riscv/tests/riscv_data/keccak/src/main.rs similarity index 90% rename from riscv/tests/riscv_data/keccak/src/lib.rs rename to riscv/tests/riscv_data/keccak/src/main.rs index 45afe0520..8a785d821 100644 --- a/riscv/tests/riscv_data/keccak/src/lib.rs +++ b/riscv/tests/riscv_data/keccak/src/main.rs @@ -1,7 +1,10 @@ +#![no_main] #![no_std] use tiny_keccak::{Hasher, Keccak}; +extern crate powdr_riscv_runtime; + #[no_mangle] pub fn main() { let input = b"Solidity"; From 7ce34462f70654e9760087f93d0a585196e07b86 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 15:46:23 +0200 Subject: [PATCH 18/88] Correctly finding all the assembly files. --- riscv/src/lib.rs | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index e97ce513c..c78866d8b 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -191,7 +191,6 @@ fn build_cargo_command(input_dir: &str, target_dir: &Path, produce_build_plan: b "build-std=core,alloc", "--target", "riscv32imac-unknown-none-elf", - "--lib", "--target-dir", target_dir, "--manifest-path", @@ -225,6 +224,8 @@ fn output_files_from_cargo_build_plan( panic!("no invocations in cargo build plan"); }; + let mut executable_found = false; + log::debug!("RISC-V assembly files of this build:"); for i in invocations { let JsonValue::Array(outputs) = &i["outputs"] else { @@ -234,17 +235,24 @@ fn output_files_from_cargo_build_plan( let output = Path::new(output.as_str().unwrap()); // Strip the target_dir, so that the path becomes relative. let parent = output.parent().unwrap().strip_prefix(target_dir).unwrap(); - if Some(OsStr::new("rmeta")) == output.extension() - && parent.ends_with("riscv32imac-unknown-none-elf/release/deps") - { - // Have to convert to string to remove the "lib" prefix: - let name_stem = output - .file_stem() - .unwrap() - .to_str() - .unwrap() - .strip_prefix("lib") - .unwrap(); + if parent.ends_with("riscv32imac-unknown-none-elf/release/deps") { + let extension = output.extension(); + let name_stem = if Some(OsStr::new("rmeta")) == extension { + // Have to convert to string to remove the "lib" prefix: + output + .file_stem() + .unwrap() + .to_str() + .unwrap() + .strip_prefix("lib") + .unwrap() + } else if None == extension { + assert!(!executable_found, "Multiple executables found"); + executable_found = true; + output.file_stem().unwrap().to_str().unwrap() + } else { + continue; + }; let mut asm_name = parent.join(name_stem); asm_name.set_extension("s"); From 890f529b2452934b0faa6f4e68686ae88b4bfdad Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 15:53:22 +0200 Subject: [PATCH 19/88] Linker settings for the ELF file to be usable. --- riscv/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index c78866d8b..dc31c4b21 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -180,7 +180,10 @@ pub fn compile_rust_crate_to_riscv_asm( fn build_cargo_command(input_dir: &str, target_dir: &Path, produce_build_plan: bool) -> Command { let mut cmd = Command::new("cargo"); - cmd.env("RUSTFLAGS", "--emit=asm -g"); + cmd.env( + "RUSTFLAGS", + "--emit=asm -g -C link-args=-Tpowdr.x -C link-args=--emit-relocs", + ); let args = as_ref![ OsStr; From ce334a1a16063c7327c89ba856e10464e7718490 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 17:07:04 +0200 Subject: [PATCH 20/88] Relocation table test. --- .../dynamic_relocation/dynamic_relocation.s | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s diff --git a/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s b/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s new file mode 100644 index 000000000..403d05ea6 --- /dev/null +++ b/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s @@ -0,0 +1,41 @@ +# TODO: turn this into an actual test, with fail conditions, and put on the testsuite + + .section .data + .align 4 + .global data_section + +data_section: + .word text_label1 # Pointer to text_label1 + .word text_label2 # Pointer to text_label2 + + .section .text + .align 4 + .global _start + +_start: + # Load the address of data_section + la a0, data_section + + # Load the first pointer from data_section + lw a1, 0(a0) + # Call the function pointer + jalr ra, a1 + + # Load the second pointer from data_section + lw a1, 4(a0) + # Call the function pointer + jalr ra, a1 + + # Finish in an infinite loop +final_loop: + j final_loop + +text_label1: + # Function at text_label1 + li a0, 1 # For example, setting a0 to 1 + ret + +text_label2: + # Function at text_label2 + li a0, 2 # For example, setting a0 to 2 + ret From 42af5f7e2cad7099201efb61479f63168da5fda1 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 17:08:02 +0200 Subject: [PATCH 21/88] Removing unrelated test. --- .../dynamic_relocation/dynamic_relocation.s | 41 ------------------- 1 file changed, 41 deletions(-) delete mode 100644 riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s diff --git a/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s b/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s deleted file mode 100644 index 403d05ea6..000000000 --- a/riscv/tests/riscv_data/dynamic_relocation/dynamic_relocation.s +++ /dev/null @@ -1,41 +0,0 @@ -# TODO: turn this into an actual test, with fail conditions, and put on the testsuite - - .section .data - .align 4 - .global data_section - -data_section: - .word text_label1 # Pointer to text_label1 - .word text_label2 # Pointer to text_label2 - - .section .text - .align 4 - .global _start - -_start: - # Load the address of data_section - la a0, data_section - - # Load the first pointer from data_section - lw a1, 0(a0) - # Call the function pointer - jalr ra, a1 - - # Load the second pointer from data_section - lw a1, 4(a0) - # Call the function pointer - jalr ra, a1 - - # Finish in an infinite loop -final_loop: - j final_loop - -text_label1: - # Function at text_label1 - li a0, 1 # For example, setting a0 to 1 - ret - -text_label2: - # Function at text_label2 - li a0, 2 # For example, setting a0 to 2 - ret From 9f509a8362a5b472b50dbf9f98b65918221c2046 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 17:18:43 +0200 Subject: [PATCH 22/88] Fixing lint. --- asm-utils/src/data_parser.rs | 15 ++++++++++----- riscv/src/asm_translate/mod.rs | 7 +++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/asm-utils/src/data_parser.rs b/asm-utils/src/data_parser.rs index d4bc4d51c..03bd6ebf0 100644 --- a/asm-utils/src/data_parser.rs +++ b/asm-utils/src/data_parser.rs @@ -77,15 +77,17 @@ impl DataSections { } } +pub struct DataObjects { + pub sections: Vec, Vec)>>, + pub adhoc_symbols: BTreeMap, +} + /// Extract all data objects from the list of statements. /// Returns the named data objects themselves and a vector of the names /// in the order in which they occur in the statements. pub fn extract_data_objects( statements: &[Statement], -) -> ( - Vec, Vec)>>, - BTreeMap, -) { +) -> DataObjects { let mut adhoc_symbols = BTreeMap::new(); let mut data = DataSections::new(); @@ -179,7 +181,10 @@ pub fn extract_data_objects( _ => {} } } - (data.sections, adhoc_symbols) + DataObjects { + sections: data.sections, + adhoc_symbols, + } } fn is_data_section(arg: &Argument) -> bool { diff --git a/riscv/src/asm_translate/mod.rs b/riscv/src/asm_translate/mod.rs index 38967ee68..75bb3b376 100644 --- a/riscv/src/asm_translate/mod.rs +++ b/riscv/src/asm_translate/mod.rs @@ -7,7 +7,7 @@ use itertools::Itertools; use parser::RiscParser; use powdr_asm_utils::{ ast::{BinaryOpKind, UnaryOpKind}, - data_parser, + data_parser::{self, DataObjects}, data_storage::store_data_objects, parser::parse_asm, reachability::{self, symbols_in_args}, @@ -203,7 +203,10 @@ fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) .collect(), ); - let (mut data_sections, mut data_positions) = data_parser::extract_data_objects(&statements); + let DataObjects { + sections: mut data_sections, + adhoc_symbols: mut data_positions, + } = data_parser::extract_data_objects(&statements); // Reduce to the code that is actually reachable from main // (and the objects that are referred from there) From bad34de623f3d241dbe724754fa478eea68237b3 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 17:27:54 +0200 Subject: [PATCH 23/88] Updating comments. --- riscv-runtime/powdr.x | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv-runtime/powdr.x b/riscv-runtime/powdr.x index 671c0e9b4..0704e2bfe 100644 --- a/riscv-runtime/powdr.x +++ b/riscv-runtime/powdr.x @@ -4,7 +4,7 @@ # "__global_pointer$" and "__powdr_stack_start" to be defined. # # This is an usable version with a 256 MB stack. If you are not building via powdr-rs, -# you must manually specify "-C link-arg=-Tpowdr.x -C link-arg=pie" in rustc +# to use this linker script you must manually specify "-C link-arg=-Tpowdr.x" in rustc # (e.g. via RUSTFLAGS). SECTIONS From 5c703daffc993973b6bcc55e5ccc6182e274fa09 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 17:29:59 +0200 Subject: [PATCH 24/88] Bumping version because the runtime changes are incompatible. (I think.) --- riscv-runtime/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv-runtime/Cargo.toml b/riscv-runtime/Cargo.toml index 56d37aa18..1b749c75c 100644 --- a/riscv-runtime/Cargo.toml +++ b/riscv-runtime/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "powdr-riscv-runtime" description = "powdr runtime provider for RISCV programs" -version = "0.1.0-alpha.2" +version = "0.2.0-alpha.0" edition = "2021" license = "MIT" homepage = "https://powdr.org" From 531e8ee4ff9b74bc70fb69eb329c4a9034433e8e Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Fri, 7 Jun 2024 18:35:51 +0200 Subject: [PATCH 25/88] Fixing lint. --- riscv/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index dc31c4b21..f46d3161d 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -249,7 +249,7 @@ fn output_files_from_cargo_build_plan( .unwrap() .strip_prefix("lib") .unwrap() - } else if None == extension { + } else if extension.is_none() { assert!(!executable_found, "Multiple executables found"); executable_found = true; output.file_stem().unwrap().to_str().unwrap() From d138007600bbe54f342d06996ea30f1b99bf7687 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 10 Jun 2024 13:46:56 +0200 Subject: [PATCH 26/88] Addressing reviews. --- asm-utils/src/data_storage.rs | 6 +++ riscv/benches/executor_benchmark.rs | 8 ++-- .../{asm_translate => asm}/disambiguator.rs | 0 riscv/src/{asm_translate => asm}/mod.rs | 34 +++++++------ riscv/src/{asm_translate => asm}/parser.rs | 2 +- .../{asm_translate => asm}/riscv_asm.lalrpop | 0 riscv/src/code_gen.rs | 48 +++++++++++-------- riscv/src/lib.rs | 4 +- riscv/tests/instructions.rs | 2 +- riscv/tests/riscv.rs | 11 ++--- 10 files changed, 62 insertions(+), 53 deletions(-) rename riscv/src/{asm_translate => asm}/disambiguator.rs (100%) rename riscv/src/{asm_translate => asm}/mod.rs (96%) rename riscv/src/{asm_translate => asm}/parser.rs (95%) rename riscv/src/{asm_translate => asm}/riscv_asm.lalrpop (100%) diff --git a/asm-utils/src/data_storage.rs b/asm-utils/src/data_storage.rs index 93188ddc1..fbe43603d 100644 --- a/asm-utils/src/data_storage.rs +++ b/asm-utils/src/data_storage.rs @@ -7,9 +7,15 @@ use crate::{ utils::{alignment_size, next_aligned}, }; +/// A single 32-bit data value. pub enum SingleDataValue { + /// A literal value. Value(u32), + /// The value of a pointer to a text label. Since there is no 1-to-1 + /// correspondence between RISC-V and Powdr ASM instructions, this is + /// passed unresolved to the code generator. LabelReference(String), + /// Currently not supported. Offset(String, String), } diff --git a/riscv/benches/executor_benchmark.rs b/riscv/benches/executor_benchmark.rs index 6c43daf0f..777865748 100644 --- a/riscv/benches/executor_benchmark.rs +++ b/riscv/benches/executor_benchmark.rs @@ -2,8 +2,7 @@ use ::powdr_pipeline::Pipeline; use powdr_number::GoldilocksField; use powdr_riscv::{ - asm_translate, compile_rust_crate_to_riscv_asm, continuations::bootloader::default_input, - Runtime, + asm, compile_rust_crate_to_riscv_asm, continuations::bootloader::default_input, Runtime, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -19,7 +18,7 @@ fn executor_benchmark(c: &mut Criterion) { let tmp_dir = Temp::new_dir().unwrap(); let riscv_asm_files = compile_rust_crate_to_riscv_asm("./tests/riscv_data/keccak/Cargo.toml", &tmp_dir); - let contents = asm_translate::compile::(riscv_asm_files, &Runtime::base(), false); + let contents = asm::compile::(riscv_asm_files, &Runtime::base(), false); let mut pipeline = Pipeline::::default().from_asm_string(contents, None); pipeline.compute_optimized_pil().unwrap(); pipeline.compute_fixed_cols().unwrap(); @@ -31,8 +30,7 @@ fn executor_benchmark(c: &mut Criterion) { // The first chunk of `many_chunks`, with Poseidon co-processor & bootloader let riscv_asm_files = compile_rust_crate_to_riscv_asm("./tests/riscv_data/many_chunks/Cargo.toml", &tmp_dir); - let contents = - asm_translate::compile::(riscv_asm_files, &Runtime::base().with_poseidon(), true); + let contents = asm::compile::(riscv_asm_files, &Runtime::base().with_poseidon(), true); let mut pipeline = Pipeline::::default().from_asm_string(contents, None); pipeline.compute_optimized_pil().unwrap(); pipeline.compute_fixed_cols().unwrap(); diff --git a/riscv/src/asm_translate/disambiguator.rs b/riscv/src/asm/disambiguator.rs similarity index 100% rename from riscv/src/asm_translate/disambiguator.rs rename to riscv/src/asm/disambiguator.rs diff --git a/riscv/src/asm_translate/mod.rs b/riscv/src/asm/mod.rs similarity index 96% rename from riscv/src/asm_translate/mod.rs rename to riscv/src/asm/mod.rs index e77940a5f..6d165c839 100644 --- a/riscv/src/asm_translate/mod.rs +++ b/riscv/src/asm/mod.rs @@ -1,7 +1,4 @@ -use std::{ - cell::Cell, - collections::{BTreeMap, BTreeSet, HashSet}, -}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use itertools::Itertools; use parser::RiscParser; @@ -17,7 +14,9 @@ use powdr_asm_utils::{ use powdr_number::FieldElement; use crate::{ - code_gen::{self, Args, FunctionKind, MemEntry, Register, RiscVProgram, SourceFileInfo}, + code_gen::{ + self, FunctionKind, InstructionArgs, MemEntry, Register, RiscVProgram, SourceFileInfo, + }, Runtime, }; @@ -30,17 +29,16 @@ type Expression = powdr_asm_utils::ast::Expression; struct AsmProgram { file_ids: Vec<(i64, String, String)>, - mem_entries: Cell>>, + mem_entries: Option>, statements: Vec, } const START_FUNCTION: &str = "__stack_setup"; -impl<'a> RiscVProgram<'a> for AsmProgram { - type InstructionArgs = [Argument]; - type Label = &'a str; +impl RiscVProgram for AsmProgram { + type Args = [Argument]; - fn source_files_info(&self) -> impl Iterator { + fn take_source_files_info(&mut self) -> impl Iterator { self.file_ids.iter().map(|(id, dir, file)| SourceFileInfo { id: *id as u32, dir, @@ -48,13 +46,13 @@ impl<'a> RiscVProgram<'a> for AsmProgram { }) } - fn initial_mem(&self) -> impl Iterator { - self.mem_entries.take().into_iter().flatten() + fn take_initial_mem(&mut self) -> impl Iterator { + std::mem::take(&mut self.mem_entries).unwrap().into_iter() } - fn executable_statements( - &'a self, - ) -> impl Iterator> { + fn take_executable_statements( + &mut self, + ) -> impl Iterator> { self.statements.iter().filter_map(process_statement) } @@ -63,7 +61,7 @@ impl<'a> RiscVProgram<'a> for AsmProgram { } } -impl Args for [Argument] { +impl InstructionArgs for [Argument] { type Error = &'static str; fn l(&self) -> Result { @@ -183,7 +181,7 @@ pub fn compile( ) -> String { let asm_program = compile_internal(assemblies); - code_gen::translate_program::(&asm_program, runtime, with_bootloader) + code_gen::translate_program::(asm_program, runtime, with_bootloader) } fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { @@ -226,7 +224,7 @@ fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { AsmProgram { file_ids, - mem_entries: Cell::new(Some(mem_entries)), + mem_entries: Some(mem_entries), statements, } } diff --git a/riscv/src/asm_translate/parser.rs b/riscv/src/asm/parser.rs similarity index 95% rename from riscv/src/asm_translate/parser.rs rename to riscv/src/asm/parser.rs index c5d1345ce..0ca41b6f4 100644 --- a/riscv/src/asm_translate/parser.rs +++ b/riscv/src/asm/parser.rs @@ -9,7 +9,7 @@ lalrpop_mod!( #[allow(clippy::all)] #[allow(clippy::uninlined_format_args)] riscv_asm, - "/asm_translate/riscv_asm.rs" + "/asm/riscv_asm.rs" ); pub struct RiscParser { diff --git a/riscv/src/asm_translate/riscv_asm.lalrpop b/riscv/src/asm/riscv_asm.lalrpop similarity index 100% rename from riscv/src/asm_translate/riscv_asm.lalrpop rename to riscv/src/asm/riscv_asm.lalrpop diff --git a/riscv/src/code_gen.rs b/riscv/src/code_gen.rs index 1c0aff700..b77d2d9bb 100644 --- a/riscv/src/code_gen.rs +++ b/riscv/src/code_gen.rs @@ -48,7 +48,7 @@ impl fmt::Display for FunctionKind { } } -pub enum Statement<'a, L: AsRef, A: Args + ?Sized> { +pub enum Statement<'a, L: AsRef + 'a, A: InstructionArgs + ?Sized + 'a> { DebugLoc { file: u64, line: u64, col: u64 }, Label(L), Instruction { op: &'a str, args: &'a A }, @@ -67,22 +67,29 @@ pub struct SourceFileInfo<'a> { } /// A RISC-V program that can be translated to POWDR ASM. -pub trait RiscVProgram<'a> { - type InstructionArgs: Args + ?Sized; - type Label: AsRef + 'a; +pub trait RiscVProgram { + type Args: InstructionArgs + ?Sized; - // Source files to be used by the debug statements. - fn source_files_info(&self) -> impl Iterator; + /// Takes the listing of source files, to be used in the debug statements. + fn take_source_files_info(&mut self) -> impl Iterator; - fn initial_mem(&self) -> impl Iterator; - fn executable_statements( - &'a self, - ) -> impl Iterator>; + /// Takes the initial memory snapshot. + fn take_initial_mem(&mut self) -> impl Iterator; + + /// Takes the executable statements and labels. + fn take_executable_statements( + &mut self, + ) -> impl Iterator, Self::Args>>; + + /// The name of the function that should be called to start the program. fn start_function(&self) -> &str; } -pub fn translate_program<'a, F: FieldElement>( - program: &'a impl RiscVProgram<'a>, +/// Translates a RISC-V program to POWDR ASM. +/// +/// Will call each of the methods in the `RiscVProgram` just once. +pub fn translate_program( + program: impl RiscVProgram, runtime: &Runtime, with_bootloader: bool, ) -> String { @@ -99,14 +106,14 @@ pub fn translate_program<'a, F: FieldElement>( ) } -fn translate_program_impl<'a>( - program: &'a impl RiscVProgram<'a>, +fn translate_program_impl( + mut program: impl RiscVProgram, runtime: &Runtime, with_bootloader: bool, ) -> (Vec, Vec, u64) { let mut initial_mem = Vec::new(); let mut data_code = Vec::new(); - for MemEntry { label, addr, value } in program.initial_mem() { + for MemEntry { label, addr, value } in program.take_initial_mem() { if let Some(label) = label { // This is a comment, so we don't need to escape the label. let comment = format!(" // data {label}"); @@ -173,7 +180,7 @@ fn translate_program_impl<'a>( }; let mut statements: Vec = program - .source_files_info() + .take_source_files_info() .map( |SourceFileInfo { id, @@ -190,7 +197,7 @@ fn translate_program_impl<'a>( format!("x1 <== jump({});", program.start_function()), "return;".to_string(), // This is not "riscv ret", but "return from powdr asm function". ]); - for s in program.executable_statements() { + for s in program.take_executable_statements() { match s { Statement::DebugLoc { file, line, col } => { statements.push(format!(".debug loc {file} {line} {col};")) @@ -645,7 +652,7 @@ fn memory(with_bootloader: bool) -> String { "# } -pub trait Args { +pub trait InstructionArgs { type Error: fmt::Display; fn l(&self) -> Result; @@ -689,7 +696,10 @@ pub fn pop_register(name: &str) -> [String; 2] { ] } -fn process_instruction(instr: &str, args: &A) -> Result, A::Error> { +fn process_instruction( + instr: &str, + args: &A, +) -> Result, A::Error> { Ok(match instr { // load/store registers "li" | "la" => { diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index e97ce513c..18a3ad255 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -15,7 +15,7 @@ use std::fs; pub use crate::runtime::Runtime; -pub mod asm_translate; +pub mod asm; mod code_gen; pub mod continuations; pub mod runtime; @@ -99,7 +99,7 @@ pub fn compile_riscv_asm_bundle( return None; } - let powdr_asm = asm_translate::compile::(riscv_asm_files, runtime, with_bootloader); + let powdr_asm = asm::compile::(riscv_asm_files, runtime, with_bootloader); fs::write(powdr_asm_file_name.clone(), &powdr_asm).unwrap(); log::info!("Wrote {}", powdr_asm_file_name.to_str().unwrap()); diff --git a/riscv/tests/instructions.rs b/riscv/tests/instructions.rs index 692162ca8..32a5c39c9 100644 --- a/riscv/tests/instructions.rs +++ b/riscv/tests/instructions.rs @@ -4,7 +4,7 @@ mod instruction_tests { use crate::common::verify_riscv_asm_string; use powdr_backend::BackendType; use powdr_number::GoldilocksField; - use powdr_riscv::asm_translate::compile; + use powdr_riscv::asm::compile; use powdr_riscv::Runtime; use test_log::test; diff --git a/riscv/tests/riscv.rs b/riscv/tests/riscv.rs index a8fdc4eab..8e53f052e 100644 --- a/riscv/tests/riscv.rs +++ b/riscv/tests/riscv.rs @@ -22,8 +22,7 @@ pub fn test_continuations(case: &str) { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = - powdr_riscv::asm_translate::compile::(riscv_asm, &runtime, true); + let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, true); // Manually create tmp dir, so that it is the same in all chunks. let tmp_dir = mktemp::Temp::new_dir().unwrap(); @@ -275,8 +274,7 @@ fn many_chunks_dry() { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = - powdr_riscv::asm_translate::compile::(riscv_asm, &runtime, true); + let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, true); let mut pipeline = Pipeline::default() .from_asm_string(powdr_asm, Some(PathBuf::from(case))) @@ -301,8 +299,7 @@ fn output_syscall() { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = - powdr_riscv::asm_translate::compile::(riscv_asm, &runtime, false); + let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, false); let inputs = vec![1u32, 2, 3] .into_iter() @@ -376,5 +373,5 @@ fn compile_riscv_crate(case: &str, runtime: &Runtime) -> String &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - powdr_riscv::asm_translate::compile::(riscv_asm, runtime, false) + powdr_riscv::asm::compile::(riscv_asm, runtime, false) } From 5ab5959b608d6db1fe19392b98550f5dc2c74644 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 10 Jun 2024 17:53:24 +0200 Subject: [PATCH 27/88] Building rust into executable instead of library. --- riscv/tests/riscv_data/affine_256/src/{lib.rs => main.rs} | 0 .../tests/riscv_data/byte_access/src/{lib.rs => main.rs} | 1 + .../tests/riscv_data/double_word/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/ec_add/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/ec_double/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/evm/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/keccak/src/main.rs | 3 +-- .../tests/riscv_data/many_chunks/src/{lib.rs => main.rs} | 2 ++ .../riscv_data/many_chunks_memory/src/{lib.rs => main.rs} | 2 ++ riscv/tests/riscv_data/memfuncs/src/{lib.rs => main.rs} | 3 +++ riscv/tests/riscv_data/modmul_256/src/{lib.rs => main.rs} | 8 +++++--- riscv/tests/riscv_data/output/src/{lib.rs => main.rs} | 3 ++- .../riscv_data/password_checker/src/{lib.rs => main.rs} | 5 ++++- .../poseidon_gl_via_coprocessor/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/print/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/sum/src/{lib.rs => main.rs} | 5 ++--- riscv/tests/riscv_data/sum_serde/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/trivial/src/lib.rs | 4 ---- riscv/tests/riscv_data/trivial/src/main.rs | 7 +++++++ .../riscv_data/two_sums_serde/src/{lib.rs => main.rs} | 1 + riscv/tests/riscv_data/vec_median/src/{lib.rs => main.rs} | 1 + .../riscv_data/zero_with_values/src/{lib.rs => main.rs} | 3 +++ 22 files changed, 41 insertions(+), 14 deletions(-) rename riscv/tests/riscv_data/affine_256/src/{lib.rs => main.rs} (100%) rename riscv/tests/riscv_data/byte_access/src/{lib.rs => main.rs} (97%) rename riscv/tests/riscv_data/double_word/src/{lib.rs => main.rs} (97%) rename riscv/tests/riscv_data/ec_add/src/{lib.rs => main.rs} (99%) rename riscv/tests/riscv_data/ec_double/src/{lib.rs => main.rs} (99%) rename riscv/tests/riscv_data/evm/src/{lib.rs => main.rs} (99%) rename riscv/tests/riscv_data/many_chunks/src/{lib.rs => main.rs} (90%) rename riscv/tests/riscv_data/many_chunks_memory/src/{lib.rs => main.rs} (93%) rename riscv/tests/riscv_data/memfuncs/src/{lib.rs => main.rs} (96%) rename riscv/tests/riscv_data/modmul_256/src/{lib.rs => main.rs} (93%) rename riscv/tests/riscv_data/output/src/{lib.rs => main.rs} (80%) rename riscv/tests/riscv_data/password_checker/src/{lib.rs => main.rs} (97%) rename riscv/tests/riscv_data/poseidon_gl_via_coprocessor/src/{lib.rs => main.rs} (99%) rename riscv/tests/riscv_data/print/src/{lib.rs => main.rs} (94%) rename riscv/tests/riscv_data/sum/src/{lib.rs => main.rs} (83%) rename riscv/tests/riscv_data/sum_serde/src/{lib.rs => main.rs} (95%) delete mode 100644 riscv/tests/riscv_data/trivial/src/lib.rs create mode 100644 riscv/tests/riscv_data/trivial/src/main.rs rename riscv/tests/riscv_data/two_sums_serde/src/{lib.rs => main.rs} (96%) rename riscv/tests/riscv_data/vec_median/src/{lib.rs => main.rs} (98%) rename riscv/tests/riscv_data/zero_with_values/src/{lib.rs => main.rs} (84%) diff --git a/riscv/tests/riscv_data/affine_256/src/lib.rs b/riscv/tests/riscv_data/affine_256/src/main.rs similarity index 100% rename from riscv/tests/riscv_data/affine_256/src/lib.rs rename to riscv/tests/riscv_data/affine_256/src/main.rs diff --git a/riscv/tests/riscv_data/byte_access/src/lib.rs b/riscv/tests/riscv_data/byte_access/src/main.rs similarity index 97% rename from riscv/tests/riscv_data/byte_access/src/lib.rs rename to riscv/tests/riscv_data/byte_access/src/main.rs index 54efba76c..314248697 100644 --- a/riscv/tests/riscv_data/byte_access/src/lib.rs +++ b/riscv/tests/riscv_data/byte_access/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use powdr_riscv_runtime::io::read_u32; diff --git a/riscv/tests/riscv_data/double_word/src/lib.rs b/riscv/tests/riscv_data/double_word/src/main.rs similarity index 97% rename from riscv/tests/riscv_data/double_word/src/lib.rs rename to riscv/tests/riscv_data/double_word/src/main.rs index 22e7090ae..9a984218c 100644 --- a/riscv/tests/riscv_data/double_word/src/lib.rs +++ b/riscv/tests/riscv_data/double_word/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use powdr_riscv_runtime::io::read_u32; diff --git a/riscv/tests/riscv_data/ec_add/src/lib.rs b/riscv/tests/riscv_data/ec_add/src/main.rs similarity index 99% rename from riscv/tests/riscv_data/ec_add/src/lib.rs rename to riscv/tests/riscv_data/ec_add/src/main.rs index 2f6d57f1b..f2daeb1f1 100644 --- a/riscv/tests/riscv_data/ec_add/src/lib.rs +++ b/riscv/tests/riscv_data/ec_add/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use hex_literal::hex; diff --git a/riscv/tests/riscv_data/ec_double/src/lib.rs b/riscv/tests/riscv_data/ec_double/src/main.rs similarity index 99% rename from riscv/tests/riscv_data/ec_double/src/lib.rs rename to riscv/tests/riscv_data/ec_double/src/main.rs index f21142312..0f3927d00 100644 --- a/riscv/tests/riscv_data/ec_double/src/lib.rs +++ b/riscv/tests/riscv_data/ec_double/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use hex_literal::hex; diff --git a/riscv/tests/riscv_data/evm/src/lib.rs b/riscv/tests/riscv_data/evm/src/main.rs similarity index 99% rename from riscv/tests/riscv_data/evm/src/lib.rs rename to riscv/tests/riscv_data/evm/src/main.rs index da36f7cf3..4917d2ac5 100644 --- a/riscv/tests/riscv_data/evm/src/lib.rs +++ b/riscv/tests/riscv_data/evm/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use powdr_riscv_runtime::io::read; diff --git a/riscv/tests/riscv_data/keccak/src/main.rs b/riscv/tests/riscv_data/keccak/src/main.rs index 8a785d821..be3675f59 100644 --- a/riscv/tests/riscv_data/keccak/src/main.rs +++ b/riscv/tests/riscv_data/keccak/src/main.rs @@ -1,9 +1,8 @@ #![no_main] #![no_std] -use tiny_keccak::{Hasher, Keccak}; - extern crate powdr_riscv_runtime; +use tiny_keccak::{Hasher, Keccak}; #[no_mangle] pub fn main() { diff --git a/riscv/tests/riscv_data/many_chunks/src/lib.rs b/riscv/tests/riscv_data/many_chunks/src/main.rs similarity index 90% rename from riscv/tests/riscv_data/many_chunks/src/lib.rs rename to riscv/tests/riscv_data/many_chunks/src/main.rs index 77e5aa626..707abdafc 100644 --- a/riscv/tests/riscv_data/many_chunks/src/lib.rs +++ b/riscv/tests/riscv_data/many_chunks/src/main.rs @@ -1,6 +1,8 @@ +#![no_main] #![no_std] extern crate alloc; +extern crate powdr_riscv_runtime; use alloc::vec::Vec; #[no_mangle] diff --git a/riscv/tests/riscv_data/many_chunks_memory/src/lib.rs b/riscv/tests/riscv_data/many_chunks_memory/src/main.rs similarity index 93% rename from riscv/tests/riscv_data/many_chunks_memory/src/lib.rs rename to riscv/tests/riscv_data/many_chunks_memory/src/main.rs index 3bfe7357c..dc515aaf0 100644 --- a/riscv/tests/riscv_data/many_chunks_memory/src/lib.rs +++ b/riscv/tests/riscv_data/many_chunks_memory/src/main.rs @@ -1,6 +1,8 @@ +#![no_main] #![no_std] extern crate alloc; +extern crate powdr_riscv_runtime; use alloc::vec::Vec; const N: usize = 20000; diff --git a/riscv/tests/riscv_data/memfuncs/src/lib.rs b/riscv/tests/riscv_data/memfuncs/src/main.rs similarity index 96% rename from riscv/tests/riscv_data/memfuncs/src/lib.rs rename to riscv/tests/riscv_data/memfuncs/src/main.rs index 5190d1dc7..5103b60dc 100644 --- a/riscv/tests/riscv_data/memfuncs/src/lib.rs +++ b/riscv/tests/riscv_data/memfuncs/src/main.rs @@ -1,5 +1,8 @@ +#![no_main] #![no_std] +extern crate powdr_riscv_runtime; + extern "C" { fn memset(s: *mut u8, c: core::ffi::c_int, n: usize) -> *mut u8; fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8; diff --git a/riscv/tests/riscv_data/modmul_256/src/lib.rs b/riscv/tests/riscv_data/modmul_256/src/main.rs similarity index 93% rename from riscv/tests/riscv_data/modmul_256/src/lib.rs rename to riscv/tests/riscv_data/modmul_256/src/main.rs index 7e0faf502..c3a7e2655 100644 --- a/riscv/tests/riscv_data/modmul_256/src/lib.rs +++ b/riscv/tests/riscv_data/modmul_256/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use hex_literal::hex; @@ -36,7 +37,7 @@ pub fn main() { let r = [0, 0, 0, 0, 0, 0, 0, 0]; assert_eq!(modmul_256(a, b, m), r); - // (0xffffffffeeeeeeeeddddddddccccccccbbbbbbbbaaaaaaaa0000000099999999 * + // (0xffffffffeeeeeeeeddddddddccccccccbbbbbbbbaaaaaaaa0000000099999999 * // 0x8888888877777777666666665555555544444444333333332222222211111111 % // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f) = // 0x 30eca800 38e38dd9 54320f92 93e93e3d 091a2ae9 72ea6053 69d03be7 2229e43e @@ -50,10 +51,11 @@ pub fn main() { ]; // secp modulus let m = [ - 0xfffffc2f, 0xfffffffe, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, + 0xfffffc2f, 0xfffffffe, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, ]; - let r = [0x2229e43e, 0x69d03be7, 0x72ea6053, 0x091a2ae9, 0x93e93e3d, 0x54320f92, 0x38e38dd9, + let r = [ + 0x2229e43e, 0x69d03be7, 0x72ea6053, 0x091a2ae9, 0x93e93e3d, 0x54320f92, 0x38e38dd9, 0x30eca800, ]; assert_eq!(modmul_256(a, b, m), r); diff --git a/riscv/tests/riscv_data/output/src/lib.rs b/riscv/tests/riscv_data/output/src/main.rs similarity index 80% rename from riscv/tests/riscv_data/output/src/lib.rs rename to riscv/tests/riscv_data/output/src/main.rs index c45de0585..0f8ae892f 100644 --- a/riscv/tests/riscv_data/output/src/lib.rs +++ b/riscv/tests/riscv_data/output/src/main.rs @@ -1,6 +1,7 @@ +#![no_main] #![no_std] -use powdr_riscv_runtime::io::{read_u32, write_u8, write_slice, write}; +use powdr_riscv_runtime::io::{read_u32, write, write_slice, write_u8}; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize)] diff --git a/riscv/tests/riscv_data/password_checker/src/lib.rs b/riscv/tests/riscv_data/password_checker/src/main.rs similarity index 97% rename from riscv/tests/riscv_data/password_checker/src/lib.rs rename to riscv/tests/riscv_data/password_checker/src/main.rs index 3a0af9b41..73121125c 100644 --- a/riscv/tests/riscv_data/password_checker/src/lib.rs +++ b/riscv/tests/riscv_data/password_checker/src/main.rs @@ -12,8 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![no_main] #![no_std] +extern crate powdr_riscv_runtime; + pub struct PasswordRequest { pub password: &'static str, pub salt: [u8; 32], @@ -25,7 +28,7 @@ pub fn main() { // Uncomment \/ to see it fail //password: "12345678", password: "S00perSecr1t!!!", - salt: [0xaa; 32] + salt: [0xaa; 32], }; let policy = PasswordPolicy { diff --git a/riscv/tests/riscv_data/poseidon_gl_via_coprocessor/src/lib.rs b/riscv/tests/riscv_data/poseidon_gl_via_coprocessor/src/main.rs similarity index 99% rename from riscv/tests/riscv_data/poseidon_gl_via_coprocessor/src/lib.rs rename to riscv/tests/riscv_data/poseidon_gl_via_coprocessor/src/main.rs index 7c76d1514..1aa15bfa4 100644 --- a/riscv/tests/riscv_data/poseidon_gl_via_coprocessor/src/lib.rs +++ b/riscv/tests/riscv_data/poseidon_gl_via_coprocessor/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use powdr_riscv_runtime::hash::{poseidon_gl, poseidon_gl_unsafe}; diff --git a/riscv/tests/riscv_data/print/src/lib.rs b/riscv/tests/riscv_data/print/src/main.rs similarity index 94% rename from riscv/tests/riscv_data/print/src/lib.rs rename to riscv/tests/riscv_data/print/src/main.rs index f91470a36..226f691df 100644 --- a/riscv/tests/riscv_data/print/src/lib.rs +++ b/riscv/tests/riscv_data/print/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use powdr_riscv_runtime::io::read_u32; diff --git a/riscv/tests/riscv_data/sum/src/lib.rs b/riscv/tests/riscv_data/sum/src/main.rs similarity index 83% rename from riscv/tests/riscv_data/sum/src/lib.rs rename to riscv/tests/riscv_data/sum/src/main.rs index f4a1a36ca..c53629731 100644 --- a/riscv/tests/riscv_data/sum/src/lib.rs +++ b/riscv/tests/riscv_data/sum/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] extern crate alloc; @@ -13,9 +14,7 @@ pub fn main() { let len = read_u32(1) as usize; // Read the numbers from the prover and store them // in a vector. - let data: Vec<_> = (2..(len + 2)) - .map(|idx| read_u32(idx as u32)) - .collect(); + let data: Vec<_> = (2..(len + 2)).map(|idx| read_u32(idx as u32)).collect(); // Compute the sum. let sum: u32 = data.iter().sum(); // Check that our sum matches the prover's. diff --git a/riscv/tests/riscv_data/sum_serde/src/lib.rs b/riscv/tests/riscv_data/sum_serde/src/main.rs similarity index 95% rename from riscv/tests/riscv_data/sum_serde/src/lib.rs rename to riscv/tests/riscv_data/sum_serde/src/main.rs index 83c147944..b7a275615 100644 --- a/riscv/tests/riscv_data/sum_serde/src/lib.rs +++ b/riscv/tests/riscv_data/sum_serde/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] extern crate alloc; diff --git a/riscv/tests/riscv_data/trivial/src/lib.rs b/riscv/tests/riscv_data/trivial/src/lib.rs deleted file mode 100644 index b8eeca29d..000000000 --- a/riscv/tests/riscv_data/trivial/src/lib.rs +++ /dev/null @@ -1,4 +0,0 @@ -#![no_std] - -#[no_mangle] -pub fn main() {} diff --git a/riscv/tests/riscv_data/trivial/src/main.rs b/riscv/tests/riscv_data/trivial/src/main.rs new file mode 100644 index 000000000..f5356cba5 --- /dev/null +++ b/riscv/tests/riscv_data/trivial/src/main.rs @@ -0,0 +1,7 @@ +#![no_main] +#![no_std] + +extern crate powdr_riscv_runtime; + +#[no_mangle] +pub fn main() {} diff --git a/riscv/tests/riscv_data/two_sums_serde/src/lib.rs b/riscv/tests/riscv_data/two_sums_serde/src/main.rs similarity index 96% rename from riscv/tests/riscv_data/two_sums_serde/src/lib.rs rename to riscv/tests/riscv_data/two_sums_serde/src/main.rs index 2d49f257c..e4ee83bd5 100644 --- a/riscv/tests/riscv_data/two_sums_serde/src/lib.rs +++ b/riscv/tests/riscv_data/two_sums_serde/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] extern crate alloc; diff --git a/riscv/tests/riscv_data/vec_median/src/lib.rs b/riscv/tests/riscv_data/vec_median/src/main.rs similarity index 98% rename from riscv/tests/riscv_data/vec_median/src/lib.rs rename to riscv/tests/riscv_data/vec_median/src/main.rs index c5adbacff..51fb1e639 100644 --- a/riscv/tests/riscv_data/vec_median/src/lib.rs +++ b/riscv/tests/riscv_data/vec_median/src/main.rs @@ -11,6 +11,7 @@ //! cargo run --release rust riscv/tests/riscv_data/vec_median -o tmp -f -i 5,11,15,75,6,5,1,4,7,3,2,9,2 //! ``` +#![no_main] #![no_std] extern crate alloc; diff --git a/riscv/tests/riscv_data/zero_with_values/src/lib.rs b/riscv/tests/riscv_data/zero_with_values/src/main.rs similarity index 84% rename from riscv/tests/riscv_data/zero_with_values/src/lib.rs rename to riscv/tests/riscv_data/zero_with_values/src/main.rs index 797649dc4..e255faee5 100644 --- a/riscv/tests/riscv_data/zero_with_values/src/lib.rs +++ b/riscv/tests/riscv_data/zero_with_values/src/main.rs @@ -1,5 +1,8 @@ +#![no_main] #![no_std] +extern crate powdr_riscv_runtime; + // This is stored as a data with the ".zero" directive, // but in the variant where it repeats something else than zero. const DATA: &str = "1111111111111111"; From 18989e471a2f407361aaf487f18fc3244e95cc53 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 10 Jun 2024 18:05:39 +0200 Subject: [PATCH 28/88] Fixing test and improving comments. --- riscv-runtime/powdr.x | 8 +++++--- riscv/tests/riscv_data/affine_256/src/main.rs | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/riscv-runtime/powdr.x b/riscv-runtime/powdr.x index 0704e2bfe..e20c1abee 100644 --- a/riscv-runtime/powdr.x +++ b/riscv-runtime/powdr.x @@ -3,9 +3,10 @@ # If you are using powdr-riscv-runtime, it expects the symbols # "__global_pointer$" and "__powdr_stack_start" to be defined. # -# This is an usable version with a 256 MB stack. If you are not building via powdr-rs, -# to use this linker script you must manually specify "-C link-arg=-Tpowdr.x" in rustc -# (e.g. via RUSTFLAGS). +# This linker script provides usable definitions to these +# symbols, with a 256 MB stack. If you are not building via +# powdr-rs, you must manually specify "-C link-arg=-Tpowdr.x" +# in rustc to use this linker script (e.g. via RUSTFLAGS). SECTIONS { @@ -23,4 +24,5 @@ SECTIONS __powdr_stack_start = 0x10000000; } +# Specify the entry point function provided by powdr-riscv-runtime: ENTRY(__runtime_start) diff --git a/riscv/tests/riscv_data/affine_256/src/main.rs b/riscv/tests/riscv_data/affine_256/src/main.rs index 601b6c667..a1682eb4d 100644 --- a/riscv/tests/riscv_data/affine_256/src/main.rs +++ b/riscv/tests/riscv_data/affine_256/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] use hex_literal::hex; From 8c5a23bda0a2daffa86c82f6a610a3a15bcac6b3 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 10 Jun 2024 19:14:14 +0200 Subject: [PATCH 29/88] Reverting useless change. --- riscv/tests/riscv_data/vec_median/Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/riscv/tests/riscv_data/vec_median/Cargo.toml b/riscv/tests/riscv_data/vec_median/Cargo.toml index a074d42b8..b4b17227e 100644 --- a/riscv/tests/riscv_data/vec_median/Cargo.toml +++ b/riscv/tests/riscv_data/vec_median/Cargo.toml @@ -6,7 +6,4 @@ edition = "2021" [dependencies] powdr-riscv-runtime = { path = "../../../../riscv-runtime" } -[profile.release] -panic = "abort" - [workspace] From 71c56263c5c65d50a911c1480c773e5c006fc93c Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Tue, 11 Jun 2024 11:50:14 +0100 Subject: [PATCH 30/88] Fixing comments. --- cli-rs/src/main.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cli-rs/src/main.rs b/cli-rs/src/main.rs index 18dc9bd5a..c370da392 100644 --- a/cli-rs/src/main.rs +++ b/cli-rs/src/main.rs @@ -67,8 +67,7 @@ enum Commands { #[arg(default_value_t = false)] continuations: bool, }, - /// Compiles riscv assembly to powdr assembly and then to PIL - /// and generates fixed and witness columns. + /// Compiles riscv assembly to powdr assembly. RiscvAsm { /// Input files #[arg(required = true)] @@ -94,8 +93,7 @@ enum Commands { #[arg(default_value_t = false)] continuations: bool, }, - /// Translates a RISC-V statically liked executable to powdr assembly and - /// then to PIL and generates fixed and witness columns. + /// Translates a RISC-V statically liked executable to powdr assembly. RiscvElf { /// Input file #[arg(required = true)] From bfad2ae31def45f91550a4628aaaaf4e586235c2 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Tue, 11 Jun 2024 11:59:21 +0100 Subject: [PATCH 31/88] Addressing review. --- asm-utils/src/reachability.rs | 2 +- riscv-runtime/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/asm-utils/src/reachability.rs b/asm-utils/src/reachability.rs index 023c037d9..d33172892 100644 --- a/asm-utils/src/reachability.rs +++ b/asm-utils/src/reachability.rs @@ -147,7 +147,7 @@ fn extract_replacements( Some((from.to_string(), to.to_string())) }, [Argument::Expression(Expression::Symbol(_)), Argument::Expression(Expression::Number(_))] => { - // Handled elsewhere... + // Not a replacement, but not an error either, so ignore. None } _ =>{ diff --git a/riscv-runtime/Cargo.toml b/riscv-runtime/Cargo.toml index 1b749c75c..56d37aa18 100644 --- a/riscv-runtime/Cargo.toml +++ b/riscv-runtime/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "powdr-riscv-runtime" description = "powdr runtime provider for RISCV programs" -version = "0.2.0-alpha.0" +version = "0.1.0-alpha.2" edition = "2021" license = "MIT" homepage = "https://powdr.org" From 69ee478a260f48ea2aac0587b6b691877fa5275e Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Tue, 11 Jun 2024 14:37:09 +0100 Subject: [PATCH 32/88] Implementing args interface. --- riscv/src/asm/mod.rs | 33 +-- riscv/src/code_gen.rs | 23 +- riscv/src/{elf_translate.rs => elf.rs} | 334 +++++++++++++++++++++---- riscv/src/lib.rs | 4 +- 4 files changed, 303 insertions(+), 91 deletions(-) rename riscv/src/{elf_translate.rs => elf.rs} (68%) diff --git a/riscv/src/asm/mod.rs b/riscv/src/asm/mod.rs index 1a6a4fb51..6a2c612ee 100644 --- a/riscv/src/asm/mod.rs +++ b/riscv/src/asm/mod.rs @@ -94,6 +94,11 @@ impl InstructionArgs for [Argument] { [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3)] => { Ok((*r1, *r2, *r3)) } + [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(None, r3)] => { + // Special syntax used by sc.w + Ok((*r1, *r2, *r3)) + } + _ => Err("Expected: register, register, register"), } } @@ -109,6 +114,10 @@ impl InstructionArgs for [Argument] { fn rr(&self) -> Result<(Register, Register), &'static str> { match self { [Argument::Register(r1), Argument::Register(r2)] => Ok((*r1, *r2)), + [Argument::Register(r1), Argument::RegOffset(None, r2)] => { + // Special syntax used by lr.w + Ok((*r1, *r2)) + } _ => Err("Expected: register, register"), } } @@ -132,11 +141,8 @@ impl InstructionArgs for [Argument] { } fn rro(&self) -> Result<(Register, Register, u32), &'static str> { - if let [Argument::Register(r1), Argument::RegOffset(off, r2)] = self { - if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) - { - return Ok((*r1, *r2, off)); - } + if let [Argument::Register(r1), Argument::RegOffset(Some(off), r2)] = self { + return Ok((*r1, *r2, expression_to_number(off))); } if let [Argument::Register(r1), Argument::Expression(off)] = self { if let Some(off) = expression_to_number(off) { @@ -148,23 +154,6 @@ impl InstructionArgs for [Argument] { Err("Expected: register, offset(register)") } - fn rrro(&self) -> Result<(Register, Register, Register, u32), &'static str> { - if let [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(off, r3)] = self - { - if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) - { - return Ok((*r1, *r2, *r3, off)); - } - } - if let [Argument::Register(r1), Argument::Register(r2), Argument::Expression(off)] = self { - if let Some(off) = expression_to_number(off) { - // If the register is not specified, it defaults to x0 - return Ok((*r1, *r2, Register::new(0), off)); - } - } - Err("Expected: register, register, offset(register)") - } - fn empty(&self) -> Result<(), &'static str> { match self { [] => Ok(()), diff --git a/riscv/src/code_gen.rs b/riscv/src/code_gen.rs index b63b203ed..bf32408b6 100644 --- a/riscv/src/code_gen.rs +++ b/riscv/src/code_gen.rs @@ -664,7 +664,6 @@ pub trait InstructionArgs { fn rrl(&self) -> Result<(Register, Register, String), Self::Error>; fn rl(&self) -> Result<(Register, String), Self::Error>; fn rro(&self) -> Result<(Register, Register, u32), Self::Error>; - fn rrro(&self) -> Result<(Register, Register, Register, u32), Self::Error>; fn empty(&self) -> Result<(), Self::Error>; } @@ -1148,30 +1147,30 @@ fn process_instruction( // TODO this code assumes it is at least aligned on // a two-byte boundary - let (rs, rd, off) = args.rro()?; + let (r1, r2, off) = args.rro()?; vec![ - format!("tmp1, tmp2 <== mload({rd} + {off});"), + format!("tmp1, tmp2 <== mload({r2} + {off});"), "tmp3 <== shl(0xffff, 8 * tmp2);".to_string(), "tmp3 <== xor(tmp3, 0xffffffff);".to_string(), "tmp1 <== and(tmp1, tmp3);".to_string(), - format!("tmp3 <== and({rs}, 0xffff);"), + format!("tmp3 <== and({r1}, 0xffff);"), "tmp3 <== shl(tmp3, 8 * tmp2);".to_string(), "tmp1 <== or(tmp1, tmp3);".to_string(), - format!("mstore {rd} + {off} - tmp2, tmp1;"), + format!("mstore {r2} + {off} - tmp2, tmp1;"), ] } "sb" => { // store byte - let (rs, rd, off) = args.rro()?; + let (r1, r2, off) = args.rro()?; vec![ - format!("tmp1, tmp2 <== mload({rd} + {off});"), + format!("tmp1, tmp2 <== mload({r2} + {off});"), "tmp3 <== shl(0xff, 8 * tmp2);".to_string(), "tmp3 <== xor(tmp3, 0xffffffff);".to_string(), "tmp1 <== and(tmp1, tmp3);".to_string(), - format!("tmp3 <== and({rs}, 0xff);"), + format!("tmp3 <== and({r1}, 0xff);"), "tmp3 <== shl(tmp3, 8 * tmp2);".to_string(), "tmp1 <== or(tmp1, tmp3);".to_string(), - format!("mstore {rd} + {off} - tmp2, tmp1;"), + format!("mstore {r2} + {off} - tmp2, tmp1;"), ] } "fence" | "nop" => vec![], @@ -1195,8 +1194,7 @@ fn process_instruction( insn if insn.starts_with("lr.w") => { // Very similar to "lw": - let (rd, rs, off) = args.rro()?; - assert_eq!(off, 0); + let (rd, rs) = args.rr()?; // TODO misaligned access should raise misaligned address exceptions let mut statements = only_if_no_write_to_zero_vec(vec![format!("{rd}, tmp1 <== mload({rs});")], rd); @@ -1206,8 +1204,7 @@ fn process_instruction( insn if insn.starts_with("sc.w") => { // Some overlap with "sw", but also writes 0 to rd on success - let (rd, rs2, rs1, off) = args.rrro()?; - assert_eq!(off, 0); + let (rd, rs2, rs1) = args.rrr()?; // TODO: misaligned access should raise misaligned address exceptions let mut statements = vec![ "skip_if_zero lr_sc_reservation, 1;".into(), diff --git a/riscv/src/elf_translate.rs b/riscv/src/elf.rs similarity index 68% rename from riscv/src/elf_translate.rs rename to riscv/src/elf.rs index d64efdda0..69c273043 100644 --- a/riscv/src/elf_translate.rs +++ b/riscv/src/elf.rs @@ -1,5 +1,6 @@ use std::{ collections::{BTreeMap, HashSet}, + fmt::Display, fs, }; @@ -10,6 +11,8 @@ use raki::{ Isa, }; +use crate::code_gen::{InstructionArgs, Register, RiscVProgram}; + pub fn elf_translate(file_name: &str) { let file_buffer = fs::read(file_name).unwrap(); @@ -30,7 +33,7 @@ pub fn elf_translate(file_name: &str) { let mut data_map = BTreeMap::new(); // Keep a list of referenced text addresses, so we can generate the labels. - let mut referenced_text_addrs = HashSet::from([elf.entry.try_into().unwrap()]); + let mut referenced_text_addrs = HashSet::from([Label(u32::try_from(elf.entry).unwrap())]); println!("entry: {:08x}:", elf.entry); for p in elf.program_headers.iter() { @@ -68,7 +71,7 @@ pub fn elf_translate(file_name: &str) { // We also need to add the referenced address to the list of text // addresses, so we can generate the label. - referenced_text_addrs.insert(original_addr); + referenced_text_addrs.insert(Label(original_addr)); println!("reloc: {:08x}:", original_addr); } else { data_map.insert(addr, Data::Value(original_addr)); @@ -82,7 +85,7 @@ pub fn elf_translate(file_name: &str) { println!("Text labels:"); for label in referenced_text_addrs { - println!(" label_{:08x}:", label); + println!(" {label}:"); } println!("Non-zero data:"); @@ -93,6 +96,191 @@ pub fn elf_translate(file_name: &str) { todo!(); } +struct ElfProgram { + entry_point: String, +} + +impl RiscVProgram for ElfProgram { + type Args = HighLevelArgs; + + fn take_source_files_info(&mut self) -> impl Iterator { + todo!() + } + + fn take_initial_mem(&mut self) -> impl Iterator { + todo!() + } + + fn take_executable_statements( + &mut self, + ) -> impl Iterator, Self::Args>> { + todo!() + } + + fn start_function(&self) -> &str { + &self.entry_point + } +} + +impl InstructionArgs for HighLevelArgs { + type Error = String; + + fn l(&self) -> Result { + match self { + HighLevelArgs { + imm: HighLevelImmediate::CodeLabel(addr), + rd: None, + rs1: None, + rs2: None, + } => Ok(addr.to_string()), + _ => Err(format!("Expected: label, got {:?}", self)), + } + } + + fn r(&self) -> Result { + match self { + HighLevelArgs { + imm: HighLevelImmediate::None, + rd: None, + rs1: Some(rs1), + rs2: None, + } => Ok(Register::new(*rs1 as u8)), + _ => Err(format!("Expected: rs1, got {:?}", self)), + } + } + + fn rri(&self) -> Result<(Register, Register, u32), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::Value(imm), + rd: Some(rd), + rs1: Some(rs1), + rs2: None, + } => Ok(( + Register::new(*rd as u8), + Register::new(*rs1 as u8), + *imm as u32, + )), + _ => Err(format!("Expected: rd, rs1, imm, got {:?}", self)), + } + } + + fn rrr(&self) -> Result<(Register, Register, Register), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::None, + rd: Some(rd), + rs1: Some(rs1), + rs2: Some(rs2), + } => Ok(( + Register::new(*rd as u8), + Register::new(*rs1 as u8), + Register::new(*rs2 as u8), + )), + _ => Err(format!("Expected: rd, rs1, rs2, got {:?}", self)), + } + } + + fn ri(&self) -> Result<(Register, u32), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::Value(imm), + rd: Some(rd), + rs1: None, + rs2: None, + } => Ok((Register::new(*rd as u8), *imm as u32)), + _ => Err(format!("Expected: rd, imm, got {:?}", self)), + } + } + + fn rr(&self) -> Result<(Register, Register), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::None, + rd: Some(rd), + rs1: Some(rs1), + rs2: None, + } => Ok((Register::new(*rd as u8), Register::new(*rs1 as u8))), + _ => Err(format!("Expected: rd, rs1, got {:?}", self)), + } + } + + fn rrl(&self) -> Result<(Register, Register, String), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::CodeLabel(addr), + rd: None, + rs1: Some(rs1), + rs2: Some(rs2), + } => Ok(( + Register::new(*rs1 as u8), + Register::new(*rs2 as u8), + addr.to_string(), + )), + _ => Err(format!("Expected: rs1, rs2, label, got {:?}", self)), + } + } + + fn rl(&self) -> Result<(Register, String), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::CodeLabel(addr), + rd: None, + rs1: Some(rs1), + rs2: None, + } => Ok((Register::new(*rs1 as u8), addr.to_string())), + HighLevelArgs { + imm: HighLevelImmediate::None, + rd: Some(rd), + rs1: None, + rs2: None, + } => Ok((Register::new(*rd as u8), "".to_string())), + _ => Err(format!("Expected: {{rs1|rd}}, label, got {:?}", self)), + } + } + + fn rro(&self) -> Result<(Register, Register, u32), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::Value(imm), + rd: Some(rd), + rs1: Some(rs1), + rs2: None, + } => Ok(( + Register::new(*rd as u8), + Register::new(*rs1 as u8), + *imm as u32, + )), + HighLevelArgs { + imm: HighLevelImmediate::Value(imm), + rd: None, + rs1: Some(rs1), + rs2: Some(rs2), + } => Ok(( + Register::new(*rs1 as u8), + Register::new(*rs2 as u8), + *imm as u32, + )), + _ => Err(format!( + "Expected: {{rd, rs1 | rs1, rs2}}, imm, got {:?}", + self + )), + } + } + + fn empty(&self) -> Result<(), Self::Error> { + match self { + HighLevelArgs { + imm: HighLevelImmediate::None, + rd: None, + rs1: None, + rs2: None, + } => Ok(()), + _ => Err(format!("Expected: no args, got {:?}", self)), + } + } +} + struct AddressMap<'a>(BTreeMap); impl AddressMap<'_> { @@ -162,25 +350,59 @@ impl From for MaybeInstruction { } } +/// The value is the original address +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct Label(u32); + +impl From for Label { + fn from(addr: i32) -> Self { + Label(addr as u32) + } +} + +impl Display for Label { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "L{:08x}", self.0) + } +} + +#[derive(Debug)] enum HighLevelImmediate { None, - CodeLabel(i32), // The value is the original address + CodeLabel(Label), Value(i32), } -struct HighLevelInsn { - original_address: u32, - op: &'static str, +#[derive(Debug)] +struct HighLevelArgs { rd: Option, rs1: Option, rs2: Option, imm: HighLevelImmediate, } +/// The default args are all empty. +impl Default for HighLevelArgs { + fn default() -> Self { + HighLevelArgs { + rd: None, + rs1: None, + rs2: None, + imm: HighLevelImmediate::None, + } + } +} + +struct HighLevelInsn { + original_address: u32, + op: &'static str, + args: HighLevelArgs, +} + struct InstructionLifter<'a> { base_addr: u32, address_map: &'a AddressMap<'a>, - referenced_text_addrs: &'a mut HashSet, + referenced_text_addrs: &'a mut HashSet