In [18]:
# simple compiler for 8-bit computer

from sre_constants import IN


INSTRUCTIONS = {
    "nop": 0b00000000,
    "lda": 0b00010000,
    "add": 0b00100000,
    "sub": 0b00110000,
    "sta": 0b01000000,
    "ldi": 0b01010000,
    "jmp": 0b01100000,
    "jc": 0b01110000,
    "jz": 0b10000000,
    "out": 0b11100000,
    "hlt": 0b11110000,
    "var": None,  # special directive for defining data
}

RAM_SIZE = 24


def assemble(assembly_code):
    lines = assembly_code.replace("    ", "").strip().splitlines()
    machine_code = [0] * RAM_SIZE  # Initialize memory with 256 bytes
    pc = 0  # Program counter
    vars = {}
    var_idx = RAM_SIZE - 1  # Start allocating variables from the end of memory

    for line in lines:
        if "//" in line:
            continue
        parts = line.split()
        if len(parts) == 0:
            continue
        instr = parts[0]#.upper()
        if instr not in INSTRUCTIONS and ":" not in instr:
            raise ValueError(f"Unknown instruction: {instr}")

        if instr == "var":
            # Handle variable definition: VAR ADDR VALUE
            if len(parts) != 3:
                raise ValueError("'var' requires address and value")
            variable_name =  parts[1]
            value = int(parts[2])
            
            if value < 0 or value > RAM_SIZE - 1:
                raise ValueError(f"Value out of range (0-{RAM_SIZE - 1}): {value}")

            vars[variable_name] = {"value": value, "idx": var_idx}
            machine_code[var_idx] = value
            var_idx -= 1
        elif ":" in instr:
            jump_address = instr.split(":")[0]
            vars[jump_address] = {"value": None, "idx": pc}
    
    for line in lines:
        parts = line.split()
        if len(parts) == 0:
            continue
        instr = parts[0]#.upper()
        if instr in INSTRUCTIONS and INSTRUCTIONS[instr] is not None:
            # Handle regular instructions - opcode and operand in separate bytes
            opcode = INSTRUCTIONS[instr]
            machine_code[pc] = opcode
            pc += 1

            # If instruction has an operand, store it as the next byte
            if len(parts) > 1:
                variable_name = parts[1]
                operand = vars[variable_name]["idx"]
                if operand < 0 or operand > RAM_SIZE - 1:
                    raise ValueError(f"Operand out of range (0-{RAM_SIZE - 1}): {operand}")
                machine_code[pc] = operand
                pc += 1

    return machine_code

def print_memory(machine_code):
    i = 0
    while i < len(machine_code):
        code = machine_code[i]
        if code == 0:
            print(f"0b{code:08b}, // {i} - NOP")
            i += 1
        else:
            # Find instruction name
            instr_name = next((name for name, op in INSTRUCTIONS.items() if op == code), None)
            if instr_name:
                # Check if next byte is an operand
                if i + 1 < len(machine_code) and instr_name in ["lda", "add", "sub", "sta", "ldi", "jmp", "jc", "jz"]:
                    operand = machine_code[i + 1]
                    print(f"0b{code:08b}, // {i} - ISTR: {instr_name}")
                    print(f"0b{operand:08b}, // {i + 1} - MEM: {operand}")
                    i += 2
                else:
                    print(f"0b{code:08b}, // {i} - {instr_name}")
                    i += 1
            else:
                print(f"0b{code:08b}, // {i} - DATA")
                i += 1


if __name__ == "__main__":
    filepath = "programs/fibonacci.asm"
    with open(filepath, "r") as f:
        sample_code = f.read()
    
    machine_code = assemble(sample_code)
    print_memory(machine_code)


0b00010000, // 0 - ISTR: lda
0b00010111, // 1 - MEM: 23
0b00100000, // 2 - ISTR: add
0b00010110, // 3 - MEM: 22
0b01000000, // 4 - ISTR: sta
0b00010101, // 5 - MEM: 21
0b00010000, // 6 - ISTR: lda
0b00010110, // 7 - MEM: 22
0b01000000, // 8 - ISTR: sta
0b00010111, // 9 - MEM: 23
0b00010000, // 10 - ISTR: lda
0b00010101, // 11 - MEM: 21
0b01000000, // 12 - ISTR: sta
0b00010110, // 13 - MEM: 22
0b11100000, // 14 - out
0b01100000, // 15 - ISTR: jmp
0b00000000, // 16 - MEM: 0
0b00000000, // 17 - NOP
0b00000000, // 18 - NOP
0b00000000, // 19 - NOP
0b00000000, // 20 - NOP
0b00000000, // 21 - NOP
0b00000001, // 22 - DATA
0b00000000, // 23 - NOP
