Permalink
Browse files

Initial checkin of the beginnings of a tiny compiler.

  • Loading branch information...
0 parents commit 9e35de84b09c81b0d8b224e215690dcc8a959532 @kragen committed Oct 4, 2008
Showing with 297 additions and 0 deletions.
  1. +63 −0 hello42.tbf1
  2. +47 −0 tiny.asm
  3. +146 −0 tinyboot.py
  4. +41 −0 tinyboot.s
@@ -0,0 +1,63 @@
+( First step toward a tiny bootstrap Forth compiler written in itself.
+
+Generates an ELF executable for x86 Linux that exits with return code
+42, following Brian Raiter’s lead.
+
+Compile-time primitives:
+( — defines a comment that extends to the next right-paren
+v — defines a dataspace label, like for a variable.
+b — compiles a literal byte, numerically, into data space.
+# — compiles a literal four-byte little-endian number into data space.
+^ — the location where the program should start executing [everything else
+ is just definitions]
+space, newline — ignored
+
+Run-time primitives:
+W — given an address and size on the stack, writes the specified number of
+ bytes to stdout
+Q — exits the program with a return code of 0
++ — adds two numbers
+@ — fetches a word from memory
+! — stores to a word in memory
+~ — bitwise-negates a word
+
+Defined in this file, all necessarily run-time:
+h — the ELF header
+e — the location of the entry point word in the ELF header
+o — the location of the word in the ELF program header that specifies the .ORG
+S — the location of the word in the ELF program header that specifies filesize
+$ — the end of the program
+)
+
+v h ( ELF header, Elf32_Ehdr )
+( ELF info from http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html)
+( but I'm using 4096 for my origin rather than 0x08048000)
+ ( e_ident:) b 127 b 69 ( E) b 76 ( L) b 70 ( F) b 1 b 1 b 1
+ b 0 b 0 b 0 b 0 b 0 b 0 b 0 b 0 b 0 ( 9 bytes of padding)
+ ( e_type:) b 2 b 0 ( e_machine:) b 3 b 0
+ ( e_version:) # 1 ( e_entry:) v e # 0
+ ( The program header offset is 52, the same as the header size,
+ since the program header immediately follows this ELF header )
+ ( e_phoff:) # 52 ( e_shoff:) # 0
+ ( e_flags:) # 0 ( e_ehsize:) b 52 b 0
+( e_phentsize:) b 32 b 0 ( e_phnum:) b 1 b 0
+( e_shentsize:) b 40 b 0 ( e_shentnum:) b 0 b 0
+ ( e_shstrndx:) b 0 b 0
+
+( program header, Elf32_Phdr; note that we are now 52 bytes from 'h' )
+ ( p_type:) # 1 ( p_offset:) # 0
+ ( p_vaddr:) v o # 4096 ( p_paddr:) # 0 ( should be 0, not org as Brian has)
+( Note that you can only make p_memsz as large as you want if p_flags has a
+ 2 = PF_W in it. Otherwise, even one extra byte results in a segfault. )
+ ( p_filesz:) v S # 0 ( p_memsz:) # 655360 ( should be enough for anyone)
+ ( p_flags:) # 7 ( p_align:) # 4096
+
+( mov bl, 42) b 179 b 42
+ ( inc eax) b 64
+ ( int 0x80) b 205 b 128
+
+v $
+
+^ o @ 52 + 32 + e ! ( set entry point correctly wrt origin )
+$ h ~ 1 + + S ! ( subtract h from $ to get size of whole program )
+h S @ W Q
@@ -0,0 +1,47 @@
+; from http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
+BITS 32
+
+; org 0x08048000 I'm using a different org
+ org 4096
+
+
+ehdr: ; Elf32_Ehdr
+ db 0x7F, "ELF", 1, 1, 1 ; e_ident
+ times 9 db 0
+ dw 2 ; e_type
+ dw 3 ; e_machine
+ dd 1 ; e_version
+ dd _start ; e_entry
+ dd phdr - $$ ; e_phoff
+ dd 0 ; e_shoff
+ dd 0 ; e_flags
+ dw ehdrsize ; e_ehsize
+ dw phdrsize ; e_phentsize
+ dw 1 ; e_phnum
+ dw 0 ; e_shentsize
+ dw 0 ; e_shnum
+ dw 0 ; e_shstrndx
+
+ehdrsize equ $ - ehdr
+
+phdr: ; Elf32_Phdr
+ dd 1 ; p_type
+ dd 0 ; p_offset
+ dd $$ ; p_vaddr
+ dd $$ ; p_paddr
+ dd filesize ; p_filesz
+ dd filesize ; p_memsz
+ dd 5 ; p_flags
+ dd 0x1000 ; p_align
+
+phdrsize equ $ - phdr
+
+_start:
+
+; your program here
+ mov bl, 42
+ inc eax
+ int 0x80
+
+filesize equ $ - $$
+
@@ -0,0 +1,146 @@
+#!/usr/bin/python
+"""Tiny bootstrapping interpreter for the first bootstrap stage.
+
+Implements an extremely minimal Forth-like language, used to write
+tinyboot1.tbf1.
+
+The theory is that first we 'compile' the program by reading through
+it to find compile-time definitions and actions, which sets up the
+initial state of memory; then we 'run' the program by directly
+interpreting its text, given that initial state.
+
+"""
+import sys, cgitb
+cgitb.enable(format='text')
+
+def debug(text):
+ sys.stderr.write(text + "\n")
+
+start_address = None
+memory = [] # a list of bytes represented as integers
+
+### Compile-time actions.
+# Note that these should leave program_counter pointing after the
+# last byte they consume.
+
+program_counter = 0
+
+def eat_byte():
+ global program_counter
+ program_counter += 1
+def eat_comment():
+ while program[program_counter] != ')': eat_byte()
+ eat_byte()
+def advance_past_whitespace():
+ while program[program_counter] in ' \n': eat_byte()
+def push_dataspace_label(n):
+ return lambda: stack.append(n)
+def dataspace_label():
+ "Define a label in data space."
+ advance_past_whitespace()
+ name = program[program_counter]
+ run_time_dispatch[name] = push_dataspace_label(len(memory))
+ eat_byte()
+def read_number():
+ start = program_counter
+ while program[program_counter] in '0123456789': eat_byte()
+ return int(program[start:program_counter])
+def literal_byte():
+ advance_past_whitespace()
+ memory.append(read_number())
+def as_bytes(num):
+ "Convert a 32-byte number into a little-endian byte sequence."
+ return [num & 255, num >> 8 & 255, num >> 16 & 255, num >> 24 & 255]
+def literal_word():
+ "Compile a little-endian literal 32-byte number into data space."
+ advance_past_whitespace()
+ memory.extend(as_bytes(read_number()))
+def set_start_address():
+ global start_address
+ start_address = program_counter
+def nop(): pass
+
+compile_time_dispatch = {
+ '(': eat_comment,
+ 'v': dataspace_label,
+ 'b': literal_byte,
+ '#': literal_word,
+ '^': set_start_address,
+ ' ': nop, '\n': nop,
+}
+
+def tbfcompile():
+ while program_counter < len(program):
+ byte = program[program_counter]
+ eat_byte()
+ if byte in compile_time_dispatch:
+ compile_time_dispatch[byte]()
+ elif byte in run_time_dispatch:
+ pass # ignore things from run-time for now
+ else:
+ excerpt_beginning = max(0, program_counter - 10)
+ assert False, '%r not defined at %r (%r)' % \
+ (byte, program[excerpt_beginning:program_counter], run_time_dispatch)
+
+### Run-time actions.
+# Execution should pretty much stay inside of functions, and we
+# shouldn't run into any compile-time actions there, right?
+# Except maybe comments.
+
+stack = []
+def write_out():
+ "Given an address and a count, write out some memory to stdout."
+ count = stack.pop()
+ address = stack.pop()
+ debug('writing address %d, count %d' % (address, count))
+ sys.stdout.write(''.join([chr(memory[ii])
+ for ii in range(address, address+count)]))
+def quit():
+ sys.exit(0)
+def add():
+ stack.append(stack.pop() + stack.pop())
+def push_literal():
+ global program_counter
+ program_counter -= 1
+ stack.append(read_number())
+def decode(bytes):
+ return bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24
+def fetch():
+ addr = stack.pop()
+ stack.append(decode(memory[addr:addr+4]))
+def store():
+ addr = stack.pop()
+ memory[addr:addr+4] = as_bytes(stack.pop())
+def bitwise_not():
+ stack.append(stack.pop() ^ 0xffffffff)
+
+run_time_dispatch = {
+ '(': eat_comment,
+ 'W': write_out,
+ 'Q': quit,
+ '+': add,
+ ' ': nop, '\n': nop,
+ '@': fetch,
+ '!': store,
+ '~': bitwise_not,
+}
+for digit in '0123456789': run_time_dispatch[digit] = push_literal
+
+def tbfrun():
+ assert start_address is not None
+ global program_counter
+ program_counter = start_address
+ while True:
+ byte = program[program_counter]
+ eat_byte()
+ run_time_dispatch[byte]()
+
+def main():
+ global program
+ program = sys.stdin.read()
+ tbfcompile()
+ debug(str(memory))
+ tbfrun()
+ assert False, "tbfrun returned"
+
+if __name__ == '__main__': main()
@@ -0,0 +1,41 @@
+ ## sample code snippets for the tiny bootstrap forth compiler
+return: xchg %ebp, %esp
+ ret
+ ## subroutine call
+call: xchg %ebp, %esp
+ call foo
+ xchg %ebp, %esp
+ ## push a constant
+push: push %eax
+ mov $1234567, %eax
+fetch: mov (%eax), %eax
+store: pop (%eax)
+ pop %eax
+syscall:
+ pop %eax
+ pop %edx
+ pop %ecx
+ pop %ebx
+ int $0x80
+plus: pop %ecx
+foo: add %ecx, %eax
+lessthan:
+ sub (%esp), %eax
+ pop %eax
+ setge %al
+ dec %al
+ movsbl %al, %eax
+not: not %eax
+bytefetch:
+ movzbl (%eax), %eax
+bytestore:
+ pop %ecx
+ movb %cl, (%eax)
+ pop %eax
+jump: test %eax, %eax
+ pop %eax
+ jnz foo
+ jnz bar
+bar: nop
+rshift: pop %ecx
+ sar %cl, %eax

0 comments on commit 9e35de8

Please sign in to comment.