In [71]:
import numpy as np
MAX_MEMORY = 16
instructionStackPointer = -1

variableDefinitions = []
definitionPointer = []
definitionBufferSize = 0
variableStackPointer = MAX_MEMORY

import re
def is_valid_variable_name(name, line):
    pattern = re.compile("^[a-zA-Z_][a-zA-Z0-9_]*$")
    if (bool(pattern.match(name))):
        return
    else:
        raise Exception("Variable has invalid name at line : " + line)

def defineVariable(identifier, line):
    
    global variableDefinitions
    global definitionPointer
    global definitionBufferSize
    global variableStackPointer
    
    is_valid_variable_name(identifier, line)
    variableStackPointer = variableStackPointer - 1 # advance variable stack pointer
        
    if (variableStackPointer == -1 or variableStackPointer == instructionStackPointer):
        raise Exception("Variable stack overflow/collision at line : " + line)
    if (identifier in variableDefinitions):
        raise Exception("Redefinition of variable at line : " + line)
    else:
        variableDefinitions.append(identifier)
        definitionPointer.append(variableStackPointer)
        definitionBufferSize = definitionBufferSize + 1
        print(variableDefinitions)
        return variableStackPointer

    
    
def resolveVariable(identifier, line):
    is_valid_variable_name(identifier, line)
    for i in range(definitionBufferSize): #note: this could be optimised
        if (variableDefinitions[i]==identifier):
            return definitionPointer[i]
    raise Exception("Undefined variable at line : " + line)
        
        
    
    
def assemble(program):
    global instructionStackPointer
    
    machine_code = np.zeros(MAX_MEMORY, dtype=np.uint8)
    i=0
    for line in program:
        line = line.strip()
        if line == "":
            continue
        if line.startswith(";"):
            continue
            
        #Setting variables/constants in memory
        if line.startswith("VAR"):
            parts = line.split()
            address = defineVariable(parts[1], line)
            value = int(parts[2])
            if (address>=MAX_MEMORY):
                raise Exception("Variable exceeds memory size at line : " + line)
            elif (value>255 or value <0):
                raise Exception("Variable is not 8 bit unsigned at line : " + line)
            machine_code[address] = value
            continue
            
        #instructions to machine code
        parts = line.split()
        opcode = parts[0]
        if opcode == "LDA":
            address = resolveVariable(parts[1], line)
            machine_code[i]=(( 1 << 4 | address))
        elif opcode == "ADD":
            address = resolveVariable(parts[1], line)
            machine_code[i]=((2 << 4  | address))
        elif opcode == "SUB":
            address = resolveVariable(parts[1], line)
            machine_code[i]=((3 << 4  | address))
        elif opcode == "STA":
            address = resolveVariable(parts[1], line)
            machine_code[i]=((4 << 4  | address))
        elif opcode == "LDI":
            address = int(parts[1])
            machine_code[i]=((5 << 4  | address))
        elif opcode == "JMP":
            address = int(parts[1])
            machine_code[i]=((6 << 4  | address))
        elif opcode == "JC":
            address = int(parts[1])
            machine_code[i]=((7 << 4  | address))
        elif opcode == "JZ":
            address = int(parts[1])
            machine_code[i]=((8 << 4  | address))
        elif opcode == "OUT":
            machine_code[i]=((14 << 4))
        elif opcode == "HLT":
            machine_code[i]=((15 << 4))
        else:
            raise Exception("Unknown opcode: " + opcode)
        i=i+1
        instructionStackPointer = instructionStackPointer + 1 #increment instruction stack pointer
        if (instructionStackPointer >= MAX_MEMORY or instructionStackPointer == variableStackPointer):
            raise Exception("Instruction stack overflow/collision at line:" + opcode)
    return machine_code

def reverse_hex_no_0x(byte_list):
    hex_str = ''.join(format(b, '02X') for b in reversed(byte_list))
    return hex_str

def main():
    with open("assembly_program_example.txt", "r") as file:
        program = file.readlines()
    machine_code = assemble(program)
    print(machine_code)
    print("INIT_RAM_00 => X\"000000000000000000000000000000" + str(reverse_hex_no_0x(machine_code))+"\"")

if __name__ == "__main__":
    main()
    
#Notes
#line could be replaced with line number and line contents, current prints line contents only

['y']
['y', 'x']
['y', 'x', 'product']
['y', 'x', 'product', 'one']
[ 30  60 118  29 224 240  78  29  47  77  96   0   1   1   3   5]
INIT_RAM_00 => X"0000000000000000000000000000000503010100604D2F1D4EF0E01D763C1E"
