Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EOF container assembler prototype #148

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 186 additions & 3 deletions etk-asm/src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,22 @@ mod error {
/// The location of the error.
backtrace: Backtrace,
},

/// Code containing secions does not start with section declaration.
#[snafu(display("EOF code does not start with section declaration"))]
#[non_exhaustive]
EOFCodeDoesNotStartWithSection,

/// Code containing secions does not start with section declaration.
#[snafu(display("EOF data section is not the last section"))]
#[non_exhaustive]
EOFDataSectionIsNotTheLast,
}
}

pub use self::error::Error;
use crate::ops::expression::Error::{UndefinedVariable, UnknownLabel, UnknownMacro};
use crate::ops::{self, AbstractOp, Assemble, Expression, MacroDefinition};
use crate::ops::{self, AbstractOp, Assemble, EOFSectionKind, Expression, MacroDefinition};
use indexmap::IndexMap;
use num_bigint::BigInt;
use rand::Rng;
Expand Down Expand Up @@ -219,6 +229,9 @@ pub struct Assembler {

/// Pushes that are variable-sized and need to be backpatched.
variable_sized_push: Vec<PushDef>,

/// Positions of sections, if assembling EOF
sections: Vec<SectionDef>,
}

/// A label definition.
Expand Down Expand Up @@ -267,6 +280,13 @@ impl PushDef {
}
}

/// An EOF section definition.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct SectionDef {
position: usize,
kind: EOFSectionKind,
}

impl Assembler {
/// Create a new `Assembler`.
pub fn new() -> Self {
Expand Down Expand Up @@ -343,6 +363,10 @@ impl Assembler {
RawOp::Op(AbstractOp::Macro(ref m)) => {
self.expand_macro(&m.name, &m.parameters)?;
}
RawOp::Op(AbstractOp::EOFSection(kind)) => self.sections.push(SectionDef {
position: self.concrete_len,
kind,
}),
RawOp::Op(ref op) => {
match op
.clone()
Expand Down Expand Up @@ -416,7 +440,7 @@ impl Assembler {
Ok(self.concrete_len)
}

fn backpatch_labels(&mut self) -> Result<(), Error> {
fn backpatch_labels_and_sections(&mut self) -> Result<(), Error> {
for pushdef in self.variable_sized_push.iter() {
if let AbstractOp::Push(imm) = &pushdef.op {
let exp = imm
Expand All @@ -441,6 +465,15 @@ impl Assembler {
updated: true,
});
}
for section in self.sections.iter_mut() {
if section.position < pushdef.position {
// don't move sections that are declared earlier than this push
continue;
};

section.position += imm_size as usize - 1;
// TODO check whether `updated` flag is needed
}
}
}
}
Expand Down Expand Up @@ -473,7 +506,7 @@ impl Assembler {
}
.fail();
}
self.backpatch_labels()?;
self.backpatch_labels_and_sections()?;
let output = match self.emit_bytecode() {
Ok(value) => value,
Err(value) => return value,
Expand All @@ -484,6 +517,12 @@ impl Assembler {

fn emit_bytecode(&mut self) -> Result<Vec<u8>, Result<Vec<u8>, Error>> {
let mut output = Vec::new();
if !self.sections.is_empty() {
if let Err(err) = self.emit_eof_header(&mut output) {
return Err(Err(err)); // Convert the error to the nested `Result` type
}
}

for op in self.ready.iter() {
let op = match op {
RawOp::Op(ref op) => op,
Expand Down Expand Up @@ -523,6 +562,103 @@ impl Assembler {
Ok(output)
}

fn emit_eof_header(&self, output: &mut Vec<u8>) -> Result<(), Error> {
// Error if some code preceeds 0th section declaration
if self.sections.first().unwrap().position != 0 {
return error::EOFCodeDoesNotStartWithSection.fail();
}

// Error if data section is not the last
if let Some(index) = self
.sections
.iter()
.position(|&section| section.kind == EOFSectionKind::Data)
{
if index != self.sections.len() - 1 {
return error::EOFDataSectionIsNotTheLast.fail();
}
}

#[derive(Clone, Copy, Debug, PartialEq)]
struct EOFCodeSection {
size: u16,
inputs: u8,
outputs: u8,
max_stack_height: u16,
}

// Calculate section sizes
let mut code_sections = Vec::with_capacity(self.sections.len());
let mut data_section_size = 0;
for section_bounds in self.sections.windows(2) {
if let [start, end] = section_bounds {
let size = (end.position - start.position) as u16;
if let EOFSectionKind::Code {
inputs,
outputs,
max_stack_height,
} = start.kind
{
code_sections.push(EOFCodeSection {
size,
inputs,
outputs,
max_stack_height,
});
} else {
unreachable!("data section was checked to be the last one")
}
}
}

// add last section
if let Some(&last_section) = self.sections.last() {
let size = (self.concrete_len - last_section.position) as u16;
if let EOFSectionKind::Code {
inputs,
outputs,
max_stack_height,
} = last_section.kind
{
code_sections.push(EOFCodeSection {
size,
inputs,
outputs,
max_stack_height,
});
} else {
data_section_size = size
}
}

output.extend_from_slice(&[0xef, 0x00, 0x01]);
// Type section header
output.push(0x01);
let type_section_size = (code_sections.len() * 4) as u16;
output.extend_from_slice(&type_section_size.to_be_bytes());
// Code section headers
output.push(0x02);

let code_section_num = code_sections.len() as u16;
output.extend_from_slice(&code_section_num.to_be_bytes());

for code_section_size in &code_sections {
output.extend_from_slice(&code_section_size.size.to_be_bytes());
}
// data section header + terminator
output.push(0x04);
output.extend_from_slice(&data_section_size.to_be_bytes());
// terminator
output.push(0x00);
// types section
for code_section in code_sections {
output.push(code_section.inputs);
output.push(code_section.outputs);
output.extend_from_slice(&code_section.max_stack_height.to_be_bytes());
}
Ok(())
}

fn declare_label(&mut self, rop: &RawOp) -> Result<(), Error> {
if let RawOp::Op(AbstractOp::Label(label)) = rop {
if self.declared_labels.contains_key(label) {
Expand Down Expand Up @@ -1537,4 +1673,51 @@ mod tests {

Ok(())
}

#[test]
fn assemble_eof_not_starting_with_section() {
let mut asm = Assembler::new();

let code = vec![
AbstractOp::new(Push0),
AbstractOp::new(Stop),
AbstractOp::EOFSection(EOFSectionKind::Code {
inputs: 0,
outputs: 0,
max_stack_height: 0,
}),
AbstractOp::new(Stop),
];

let err = asm.assemble(&code).unwrap_err();

assert_matches!(err, Error::EOFCodeDoesNotStartWithSection {});
}

#[test]
fn assemble_eof_data_section_not_the_last() {
let mut asm = Assembler::new();

let code = vec![
AbstractOp::EOFSection(EOFSectionKind::Code {
inputs: 0,
outputs: 0x80,
max_stack_height: 1,
}),
AbstractOp::new(Push0),
AbstractOp::new(Stop),
AbstractOp::EOFSection(EOFSectionKind::Data),
AbstractOp::new(JumpDest),
AbstractOp::EOFSection(EOFSectionKind::Code {
inputs: 0,
outputs: 0,
max_stack_height: 0,
}),
AbstractOp::new(Stop),
];

let err = asm.assemble(&code).unwrap_err();

assert_matches!(err, Error::EOFDataSectionIsNotTheLast {});
}
}
36 changes: 36 additions & 0 deletions etk-asm/src/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,22 @@ impl Access {
}
}

/// Kind of EOF section
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum EOFSectionKind {
/// Code section
Code {
/// Code section's inputs
inputs: u8,
/// Code section's outputs or 0x80 if secton is non-returning
outputs: u8,
/// Code section's max stack height
max_stack_height: u16,
},
/// Data section
Data,
}

/// Like an [`Op`], except it also supports virtual instructions.
///
/// In addition to the real EVM instructions, `AbstractOp` also supports defining
Expand All @@ -185,6 +201,9 @@ pub enum AbstractOp {

/// A user-defined macro, which is a virtual instruction.
Macro(InstructionMacroInvocation),

/// EOF Section
EOFSection(EOFSectionKind),
}

impl AbstractOp {
Expand Down Expand Up @@ -232,6 +251,7 @@ impl AbstractOp {
Self::Label(_) => panic!("labels cannot be concretized"),
Self::Macro(_) => panic!("macros cannot be concretized"),
Self::MacroDefinition(_) => panic!("macro definitions cannot be concretized"),
Self::EOFSection(_) => panic!("EOF sections cannot be concretized"),
}
}

Expand Down Expand Up @@ -265,6 +285,7 @@ impl AbstractOp {
Self::Push(_) => None,
Self::Macro(_) => None,
Self::MacroDefinition(_) => None,
Self::EOFSection(_) => None,
}
}

Expand Down Expand Up @@ -300,6 +321,20 @@ impl From<ExpressionMacroDefinition> for AbstractOp {
}
}

impl fmt::Display for EOFSectionKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Code { .. } => {
write!(f, "code")?;
}
Self::Data => {
write!(f, "data")?;
}
}
Ok(())
}
}

impl fmt::Display for AbstractOp {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expand All @@ -314,6 +349,7 @@ impl fmt::Display for AbstractOp {
Self::Label(lbl) => write!(f, r#"{}:"#, lbl),
Self::Macro(m) => write!(f, "{}", m),
Self::MacroDefinition(defn) => write!(f, "{}", defn),
Self::EOFSection(kind) => write!(f, "EOF {} section", kind),
}
}
}
Expand Down
14 changes: 13 additions & 1 deletion etk-asm/src/parse/asm.pest
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
///////////////////////
program = _{ SOI ~ inner ~ EOI }
inner = _{ NEWLINE* ~ (stmt ~ (NEWLINE+|";"))* ~ stmt? }
stmt = _{ label_definition | builtin | local_macro | push | op }
stmt = _{ label_definition | builtin | local_macro | push | op | section }

//////////////////////
// opcode mnemonics //
Expand Down Expand Up @@ -68,6 +68,18 @@ function_invocation = _{ function_name ~ "(" ~ expression* ~ ("," ~ expression)*
function_name = @{ ( ASCII_ALPHA | "_" ) ~ ( ASCII_ALPHANUMERIC | "_" )* }
function_parameter = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* }

//////////////
// sections //
//////////////
section = ${ "section" ~ WHITESPACE ~ section_kind ~ (WHITESPACE ~ section_attributes)? }
section_kind = { ".code" | ".data" }
section_attributes = { (section_attribute ~WHITESPACE?)* }
section_attribute = { inputs_attribute | outputs_attribute | max_stack_height_attribute }
max_stack_height_attribute = { "max_stack_height" ~ "=" ~ number }
inputs_attribute = { "inputs" ~ "=" ~ number }
outputs_attribute = { "outputs" ~ "=" ~ (number | nonreturning) }
nonreturning = { "nonret" }

//////////////
// operands //
//////////////
Expand Down
Loading
Loading