# Building a Statically Type Forth-like DSL in Rust

## Implement a type erased stack

### Determine maximum alignment


In [None]:
:dep static_assertions = "1.1.0"
:dep smallbox = "0.8"
extern crate static_assertions;
extern crate smallbox;

Error: `#![feature]` may not be used on the stable release channel

In [None]:
/// Return a mutable reference to the maximum argument. If there are multiple maximum 
/// arguments, the last one is returned.
#[macro_export]
macro_rules! max_mut {
    ($x:expr) => (&mut $x);
    ($x:expr, $($rest:expr),+) => {
        {
            let max_rest = max_mut!($($rest),+);
            if *max_rest < $x {
                &mut $x
            } else {
                max_rest
            }
        }
    };
}

/// Return a reference to the maximum argument. If there are multiple maximum 
/// arguments, the last one is returned.
#[macro_export]
macro_rules! max {
    ($x:expr) => (&$x);
    ($x:expr, $($rest:expr),+) => {
        {
            let max_rest = max!($($rest),+);
            if *max_rest < $x {
                &$x
            } else {
                max_rest
            }
        }
    };
}

max!(1, 2, 3)

3

In [None]:

fn test() {
    let a = 10;
    let b = 1;
    let c = 10;

    let max_value = max!(a, b, c);
    assert_eq!(max_value, &c);
    println!("max!(1, 2, 3): {}", max!(1, 2, 3)); // This will print 3
    println!("The maximum value is: {}", max_value);

    let mut a = 10;
    let mut b = 1;
    let mut c = 10;

    *max_mut!(a, b, c) = 0;
    println!("c is now: {}", c); // This will print 0
}

test();


max!(1, 2, 3): 3
The maximum value is: 10
c is now: 0


In [None]:
/// Returns the index of the maximum value in a slice of usize values. If there are multiple
/// maximum values, the last one is returned.
const fn max_pos_usize(arr: &[usize]) -> usize {
    let mut max_index: usize = 0;
    let mut i = 0;
    while i < arr.len() {
        if !(arr[i] < arr[max_index]) {
            max_index = i;
        }
        i += 1;
    }
    max_index
}

/// Returns the maximum alignment of the primitive types. May not be the
/// maximum possible alignment of all types.
const fn max_align() -> usize {
    *max!(align_of::<usize>(), align_of::<u128>(), align_of::<f64>())
}

max_align()

16

In [None]:
#[macro_export]
macro_rules! true_ {
    ($msg:expr) => {
        true
    };
}

true_!("Hello, world!")

true

### Static assert of a types alignment requirements are greater than `max_align()`


In [None]:


/// Static assert that the alignment of the type is less than or equal to the
/// maximum alignment of the primitive types.
#[macro_export]
macro_rules! assert_alignable {
    ($t:ty) => {
        const _: () = {
            static_assertions::const_assert!((std::mem::align_of::<$t>() <= max_align()) && true_!("Alignment of type is greater than maximum alignment of primitive types"));
        };
    };
}

assert_alignable!(String);

In [None]:
macro_rules! true_ {
    ($_:expr) => {
        true
    };
}

true_!("Hello, world!")

true

A stack that can old any type as raw bytes. Each value is aligned to the maximum alignment. The stack is type erased so to retrieve a value from the stack, the type must be known.


In [None]:
use std::mem;

pub struct RawStack {
    buffer: Vec<u8>,
}

// Helper function to round up size to the next multiple of align.
const fn padded_size(size: usize) -> usize {
    const ALIGN : usize = max_align();
    (size + ALIGN - 1) / ALIGN * ALIGN
}

impl RawStack {
    pub fn new() -> Self {
        RawStack {
            buffer: Vec::new(),
        }
    }

    // Push a value onto the stack. The value will be stored at an address aligned to max_align().
    pub fn push<T>(&mut self, value: T) {
        // REVISIT: This is a runtime check. Can we do this at compile time?
        if max_align() < mem::align_of::<T>() {
            panic!("Alignment of type is greater than maximum alignment of primitive types");
        }

        let PADDED_SIZE = padded_size(mem::size_of::<T>());
        self.buffer.resize(self.buffer.len() + PADDED_SIZE, 0u8);
        let ptr = unsafe { self.buffer.as_mut_ptr().add(self.buffer.len() - PADDED_SIZE) as *mut T };
        unsafe {
            std::ptr::write(ptr, value);
        }
    }

    // Pop a value from the stack.
    // The caller must specify the expected type T.
    pub fn pop<T>(&mut self) -> T {
        let PADDED = padded_size(mem::size_of::<T>());

        if self.buffer.len() < PADDED {
            panic!("Stack underflow");
        }

        let item_start = self.buffer.len() - PADDED;
        let ptr = unsafe { self.buffer.as_ptr().add(item_start) as *const T };
        let result = unsafe { std::ptr::read(ptr) };
        self.buffer.truncate(item_start);
        result
    }
}

fn main() {
    // Example usage:
    let mut stack = RawStack::new();

    // Push two u32 values
    stack.push(100u32);
    stack.push(200u32);

    // Pop in LIFO order. The caller must know the type.
    let value2: u32 = stack.pop();
    println!("Popped value: {}", value2); // prints 200

    let value1: u32 = stack.pop();
    println!("Popped value: {}", value1); // prints 100
}

main();


Popped value: 200
Popped value: 100


A Vec of function pointers that take a raw stack and push and pop values from the stack, invoke a lambda, and push the result back onto the stack.


In [83]:
// pub type StackOperation = smallbox::SmallBox<dyn Fn(&mut RawStack), smallbox::space::S1>;
pub type StackOperation = Box<dyn Fn(&mut RawStack)>;

pub trait OperationPush0 {
    fn push_op0<R, F>(&mut self, op: F)
    where
        F: Fn() -> R + 'static,
        R: 'static;
}

impl OperationPush0 for Vec<StackOperation> {
    fn push_op0<R, F>(&mut self, op: F)
    where
        F: Fn() -> R + 'static,
        R: 'static,
    {
        self.push(Box::new(move |stack: &mut RawStack| {
            let result = op();
            stack.push(result);
        }));
    }
}

//
pub trait OperationPush1 {
    fn push_op1<T, R, F>(&mut self, op: F)
    where
        F: Fn(T) -> R + 'static,
        T: 'static,
        R: 'static;
}

impl OperationPush1 for Vec<StackOperation> {
    fn push_op1<T, R, F>(&mut self, op: F)
    where
        F: Fn(T) -> R + 'static,
        T: 'static,
        R: 'static,
    {
        self.push(Box::new(move |stack: &mut RawStack| {
            let x: T = stack.pop();
            let result = op(x);
            stack.push(result);
        }));
    }
}

// For binary functions.
pub trait OperationPush2 {
    fn push_op2<T, U, R, F>(&mut self, op: F)
    where
        F: Fn(T, U) -> R + 'static,
        T: 'static,
        U: 'static,
        R: 'static;
}

impl OperationPush2 for Vec<StackOperation> {
    fn push_op2<T, U, R, F>(&mut self, op: F)
    where
        F: Fn(T, U) -> R + 'static,
        T: 'static,
        U: 'static,
        R: 'static,
    {
        self.push(Box::new(move |stack: &mut RawStack| {
            // Pop in reverse order.
            let y: U = stack.pop();
            let x: T = stack.pop();
            let result = op(x, y);
            stack.push(result);
        }));
    }
}

// For ternary functions.
pub trait OperationPush3 {
    fn push_op3<T, U, V, R, F>(&mut self, op: F)
    where
        F: Fn(T, U, V) -> R + 'static,
        T: 'static,
        U: 'static,
        V: 'static,
        R: 'static;
}

impl OperationPush3 for Vec<StackOperation> {
    fn push_op3<T, U, V, R, F>(&mut self, op: F)
    where
        F: Fn(T, U, V) -> R + 'static,
        T: 'static,
        U: 'static,
        V: 'static,
        R: 'static,
    {
        self.push(Box::new(move |stack: &mut RawStack| {
            // Pop in reverse order.
            let z: V = stack.pop();
            let y: U = stack.pop();
            let x: T = stack.pop();
            let result = op(x, y, z);
            stack.push(result);
        }));
    }
}

fn main() {
    // Create a vector for stack operations.
    let mut operations: Vec<StackOperation> = Vec::new();

    // Add a binary operation (addition).
    operations.push_op0(|| -> u32 { 30 });
    operations.push_op0(|| -> u32 { 12 });
    operations.push_op2(|x: u32, y: u32| -> u32 { x + y });
    operations.push_op0(|| -> u32 { 100 });
    operations.push_op0(|| -> u32 { 10 });
    // Add a ternary operation (x + y - z).
    operations.push_op3(|x: u32, y: u32, z: u32| -> u32 { x + y - z });
    operations.push_op1(|x: u32| -> String { format!("result: {}", x.to_string()) });

    // Run the machine.
    let mut stack = RawStack::new();
    for op in operations {
        op(&mut stack);
    }
    
    let final_result: String = stack.pop();
    println!("{}", final_result);
}

main();

result: 132


In [None]:
use std::any::TypeId;

pub struct OperationStack {
    pub ops: Vec<StackOperation>,
    pub type_ids: Vec<TypeId>,
}

impl OperationStack {
    pub fn new() -> Self {
        OperationStack {
            ops: Vec::new(),
            type_ids: Vec::new(),
        }
    }

    pub fn push_op0<R, F>(&mut self, op: F)
    where
        F: Fn() -> R + 'static,
        R: 'static,
    {
        self.ops.push(Box::new(move |stack: &mut RawStack| {
            let result = op();
            stack.push(result);
        }));
        self.type_ids.push(TypeId::of::<R>());
    }

    pub fn push_op1<T, R, F>(&mut self, op: F)
    where
        F: Fn(T) -> R + 'static,
        T: 'static,
        R: 'static,
    {
        match self.type_ids.pop() {
            Some(tid) if tid == TypeId::of::<T>() => {},
            _ => panic!(
                "Type mismatch in push_op1: expected {}",
                std::any::type_name::<T>()
            ),
        }
        self.ops.push(Box::new(move |stack: &mut RawStack| {
            let x: T = stack.pop();
            let result = op(x);
            stack.push(result);
        }));
        self.type_ids.push(TypeId::of::<R>());
    }

    pub fn push_op2<T, U, R, F>(&mut self, op: F)
    where
        F: Fn(T, U) -> R + 'static,
        T: 'static,
        U: 'static,
        R: 'static,
    {
        // Pop in reverse order: first the last argument U, then T.
        match self.type_ids.pop() {
            Some(tid) if tid == TypeId::of::<U>() => {},
            _ => panic!(
                "Type mismatch in push_op2: expected {}",
                std::any::type_name::<U>()
            ),
        }
        match self.type_ids.pop() {
            Some(tid) if tid == TypeId::of::<T>() => {},
            _ => panic!(
                "Type mismatch in push_op2: expected {}",
                std::any::type_name::<T>()
            ),
        }
        self.ops.push(Box::new(move |stack: &mut RawStack| {
            let y: U = stack.pop();
            let x: T = stack.pop();
            let result = op(x, y);
            stack.push(result);
        }));
        self.type_ids.push(TypeId::of::<R>());
    }

    pub fn push_op3<T, U, V, R, F>(&mut self, op: F)
    where
        F: Fn(T, U, V) -> R + 'static,
        T: 'static,
        U: 'static,
        V: 'static,
        R: 'static,
    {
        // Pop in reverse order: first V, then U, then T.
        match self.type_ids.pop() {
            Some(tid) if tid == TypeId::of::<V>() => {},
            _ => panic!(
                "Type mismatch in push_op3: expected {}",
                std::any::type_name::<V>()
            ),
        }
        match self.type_ids.pop() {
            Some(tid) if tid == TypeId::of::<U>() => {},
            _ => panic!(
                "Type mismatch in push_op3: expected {}",
                std::any::type_name::<U>()
            ),
        }
        match self.type_ids.pop() {
            Some(tid) if tid == TypeId::of::<T>() => {},
            _ => panic!(
                "Type mismatch in push_op3: expected {}",
                std::any::type_name::<T>()
            ),
        }
        self.ops.push(Box::new(move |stack: &mut RawStack| {
            let z: V = stack.pop();
            let y: U = stack.pop();
            let x: T = stack.pop();
            let result = op(x, y, z);
            stack.push(result);
        }));
        self.type_ids.push(TypeId::of::<R>());
    }

    pub fn run<T>(&mut self) -> T
        where T: 'static 
    {
        let mut stack = RawStack::new();
        for op in self.ops.iter() {
            op(&mut stack);
        }
        match self.type_ids.pop() {
            Some(tid) if tid == TypeId::of::<T>() => {}
            _ => {
                panic!(
                    "Type mismatch in run: expected {}", std::any::type_name::<T>());
            }
        }
        if self.type_ids.len() != 0 {
            panic!("Value(s) left on execution stack");
        }
        stack.pop()
    }
}

fn main() {
    // Create a vector for stack operations.
    let mut operations = OperationStack::new();

    // Add a binary operation (addition).
    operations.push_op0(|| -> u32 { 30 });
    operations.push_op0(|| -> u32 { 12 });
    operations.push_op2(|x: u32, y: u32| -> u32 { x + y });
    operations.push_op0(|| -> u32 { 100 });
    operations.push_op0(|| -> u32 { 10 });
    // Add a ternary operation (x + y - z).
    operations.push_op3(|x: u32, y: u32, z: u32| -> u32 { x + y - z });
    operations.push_op1(|x: u32| -> String { format!("result: {}", x.to_string()) });

    let final_result: String = operations.run();
    println!("{}", final_result);
}

main();

result: 132


A simple parser for the following grammar in Rust:

```ebnf
expression = number, {("+" | "-"), number};
number = digit, {digit};
```


In [None]:
use std::mem::MaybeUninit;

struct MyStruct {
    x: i32,
}

impl MyStruct {
    fn new(x: i32) -> Self {
        println!("Constructing MyStruct with value: {}", x);
        MyStruct { x }
    }
}

fn main() {
    // Allocate uninitialized memory for MyStruct
    let mut uninit = MaybeUninit::<MyStruct>::uninit();

    unsafe {
        // Construct MyStruct in the allocated memory ("placement new")
        uninit.as_mut_ptr().write(MyStruct::new(10));
        // Assume initialization is complete and get a reference to the value
        let my_obj = uninit.assume_init();
        println!("MyStruct.x = {}", my_obj.x);
    }
}

main();
{
    // Pre-allocated array of bytes; ensure it has enough size.
    let mut buffer = [0u8; std::mem::size_of::<MyStruct>()];
    // Cast the byte array's pointer to a pointer to MyStruct.
    let my_ptr = buffer.as_mut_ptr() as *mut MyStruct;

    unsafe {
        // Use placement new to write the MyStruct instance into the allocated memory.
        my_ptr.write(MyStruct::new(20));
        // Access the constructed object.
        let my_obj_ref = &*my_ptr;
        println!("MyStruct.x = {}", my_obj_ref.x);
    }
}

Constructing MyStruct with value: 10
MyStruct.x = 10
Constructing MyStruct with value: 20
MyStruct.x = 20


()

In [None]:
use std::iter::Peekable;
use std::str::Chars;

#[derive(Debug, PartialEq)]
enum Token {
    Number(i32),
    Plus,
    Minus,
}

struct Lexer<'a> {
    input: Peekable<Chars<'a>>,
}

impl<'a> Lexer<'a> {
    fn new(expr: &'a str) -> Self {
        Lexer {
            input: expr.chars().peekable(),
        }
    }

    fn next_token(&mut self) -> Option<Token> {
        self.skip_whitespace();
        let ch = self.input.peek()?;
        if ch.is_digit(10) {
            return Some(Token::Number(self.next_number()));
        }
        match self.input.next()? {
            '+' => Some(Token::Plus),
            '-' => Some(Token::Minus),
            _   => None,
        }
    }

    fn next_number(&mut self) -> i32 {
        let mut num_str = String::new();
        while let Some(&ch) = self.input.peek() {
            if ch.is_digit(10) {
                num_str.push(ch);
                self.input.next();
            } else {
                break;
            }
        }
        num_str.parse().unwrap()
    }

    fn skip_whitespace(&mut self) {
        while let Some(&ch) = self.input.peek() {
            if ch.is_whitespace() {
                self.input.next();
            } else {
                break;
            }
        }
    }
}

struct Parser<'a> {
    lexer: Lexer<'a>,
    current_token: Option<Token>,
}

impl<'a> Parser<'a> {
    fn new(expr: &'a str) -> Self {
        let mut lexer = Lexer::new(expr);
        let current_token = lexer.next_token();
        Parser { lexer, current_token }
    }

    fn parse_expression(&mut self) -> i32 {
        // Parse the left number.
        let left = match self.current_token.take() {
            Some(Token::Number(n)) => n,
            _ => panic!("Expected a number at the beginning"),
        };

        // Get the optional operator.
        let op = self.lexer.next_token();

        // If there is an operator, parse the second number.
        if let Some(tok) = op {
            let right = match self.lexer.next_token() {
                Some(Token::Number(n)) => n,
                _ => panic!("Expected a number after operator"),
            };

            match tok {
                Token::Plus => left + right,
                Token::Minus => left - right,
                _ => panic!("Unexpected token"),
            }
        } else {
            // If there's no operator, return the first number.
            left
        }
    }
}

fn main() {
    let expr = "12+34";
    let mut parser = Parser::new(expr);
    let result = parser.parse_expression();
    println!("Result of {} = {}", expr, result);
}

main();

Result of 12+34 = 46


In [None]:
let x = 42;
x

42

vm.add_function([](my_struct& foo) { return foo; }, "add"\_name);
vm.add_function([]() { return my_struct{}; }, "make_my_struct"\_name);

"make_my_struct() + 3";


In [None]:
// Choose an arbitrary offset and allocate a buffer
let mut buffer = [0u8; 64];
let arbitrary_offset = 5;
let base_ptr = unsafe { buffer.as_mut_ptr().add(arbitrary_offset) };

// Get the alignment for MyStruct
let align = std::mem::align_of::<MyStruct>();
// Compute how many bytes to add to get an aligned pointer
let adjustment = base_ptr.align_offset(align);
if adjustment == usize::MAX {
    panic!("Unable to align pointer for MyStruct");
}
let aligned_ptr = unsafe { base_ptr.add(adjustment) as *mut MyStruct };

// Use placement new with write() to construct MyStruct at the aligned address
unsafe {
    aligned_ptr.write(MyStruct::new(42));
    let my_obj = &*aligned_ptr;
    println!("Placed MyStruct with value: {}", my_obj.x);
}

Constructing MyStruct with value: 42
Placed MyStruct with value: 42


()

In [None]:
//! returns 42
async fn foo() -> u32 {
    42
}
foo().await

The type of the variable aligned_ptr was redefined, so was lost.


42

In [None]:
use std::str::Chars;

#[derive(Debug, PartialEq)]
enum Token {
    Number(i32),
    Plus,
    Minus,
}

struct Lexer<'a> {
    input: Chars<'a>,
    current_char: Option<char>,
}

impl<'a> Lexer<'a> {
    fn new(input: &'a str) -> Self {
        let mut lexer = Lexer {
            input: input.chars(),
            current_char: None,
        };
        lexer.advance();
        lexer
    }

    fn advance(&mut self) {
        self.current_char = self.input.next();
    }

    fn get_number(&mut self) -> i32 {
        let mut number = String::new();
        while let Some(c) = self.current_char {
            if c.is_digit(10) {
                number.push(c);
                self.advance();
            } else {
                break;
            }
        }
        number.parse::<i32>().unwrap()
    }

    fn next_token(&mut self) -> Option<Token> {
        while let Some(c) = self.current_char {
            return match c {
                '0'..='9' => Some(Token::Number(self.get_number())),
                '+' => {
                    self.advance();
                    Some(Token::Plus)
                }
                '-' => {
                    self.advance();
                    Some(Token::Minus)
                }
                _ => {
                    self.advance();
                    None
                }
            };
        }
        None
    }
}

struct Parser<'a> {
    lexer: Lexer<'a>,
    current_token: Option<Token>,
}

impl<'a> Parser<'a> {
    fn new(lexer: Lexer<'a>) -> Self {
        let mut parser = Parser {
            lexer,
            current_token: None,
        };
        parser.advance();
        parser
    }

    fn advance(&mut self) {
        self.current_token = self.lexer.next_token();
    }

    fn parse_expression(&mut self) -> i32 {
        let mut result = if let Some(Token::Number(value)) = self.current_token {
            value
        } else {
            panic!("Expected a number");
        };
        self.advance();

        while let Some(token) = &self.current_token {
            match token {
                Token::Plus => {
                    self.advance();
                    if let Some(Token::Number(value)) = self.current_token {
                        result += value;
                    } else {
                        panic!("Expected a number");
                    }
                }
                Token::Minus => {
                    self.advance();
                    if let Some(Token::Number(value)) = self.current_token {
                        result -= value;
                    } else {
                        panic!("Expected a number");
                    }
                }
                _ => break,
            }
            self.advance();
        }

        result
    }
}

fn main() {
    let input = "12+34-5+100";
    let lexer = Lexer::new(input);
    let mut parser = Parser::new(lexer);
    let result = parser.parse_expression();
    println!("Result: {}", result);
}

main();

Result: 141
