# Assembler per principle of computing in Jack, Hack

In [1]:
# here we have
# A: register for address
# D: register for data
# @symbol: load symbol into A
# (symbol): psuedo command for labeling the next address in ROM holding instructions
# // comment

assembly_example = '''
// comment
@a
D=D+M
@100
D=A // load A into D
'''

In [2]:
from io import StringIO
from typing import Tuple
from enum import Enum

CommandType = Enum('CommandType', 
                   ['A_COMMAND', # @symbol
                    'C_COMMAND', # dest=comp;jump
                    'L_COMMAND'] # (symbol)
                  )

def dec2bin(dec, n_digits=3)->str:
    # decimal to binary, assume positive
    ret = []
    i = 0
    while i < n_digits and dec > 0:
        i += 1
        ret.append(dec % 2)
        dec //= 2
    ret += [0] * (n_digits - len(ret))
    return ''.join(map(str, ret[::-1]))

class Code:
    _dest_lists = ['null', 'M', 'D', 'MD', 'A', 'AM', 'AD', 'AMD']
    _jump_lists = ['null', 'JGT', 'JEQ', 'JGE', 'JLT', 'JNE', 'JLE', 'JMP']
    dest = {name:dec2bin(i, 3) for i, name in enumerate(_dest_lists)}
    jump = {name:dec2bin(i, 3) for i, name in enumerate(_jump_lists)}
    comp = {
        '0': '0101010',
        '1': '0111111',
        '-1': '0111010',
        'D': '0001100',
        'A': '0110000',
        '!D': '0001101',
        '!A': '0110001',
        '-D': '0001111',
        '-A': '0110011',
        'D+1': '0011111',
        'A+1': '0110111',
        'D-1': '0001110',
        'A-1': '0110010',
        'D+A': '0000010',
        'D-A': '0010011',
        'A-D': '0000111',
        'D&A': '0000000',
        'D|A': '0010101',
        'M': '1110000',
        '!M': '1110001',
        '-M': '1110011',
        'M+1': '1110111',
        'M-1': '1110010',
        'D+M': '1000010',
        'D-M': '1010011',
        'M-D': '1000111',
        'D&M': '1000000',
        'D|M': '1010101'
    }

class SymbolTable:
    def __init__(self):
        self.d = {
            # page 110 of 'the elements of computing systems'
            # symbol to RAM address
            'SP': 0,
            'LCL': 1,
            'ARG': 2,
            'THIS': 3,
            'THAT': 4, 
            'SCREEN': 16384,
            'KBD': 24576
        }
        for i in range(16): # R0-R15
            self.d[f'R{i}'] = i

    def addEntry(self, symbol:str, address:int):
        assert symbol not in self.d, f'{symbol} already in symbol table'
        self.d[symbol] = address

    def contains(self, symbol:str):
        return symbol in self.d

    def getAddress(self, symbol:str):
        return self.d[symbol]

class Parser:

    def __init__(self, fs: StringIO):
        self.filestream = fs
        self.lineno = 0
        
    def advance(self)->Tuple[bool, str]:
        'return next command'
        l = self.filestream.readline()
        if l == '':
            # EOF error
            return False, ''

        l = l.strip()
        if '//' in l:
            l = l[:l.index('//')].strip()
            
        if l == '':    
            return self.advance()
        else:
            if not l.startswith('('):
                # pseudo command doesn't advance lineno
                self.lineno += 1
            return True, l

    def commandType(self, command):
        if command.startswith('@'):
            return CommandType.A_COMMAND
        elif command.startswith('('):
            return CommandType.L_COMMAND
        else:
            return CommandType.C_COMMAND

    def symbol(self, command):
        assert self.commandType(command) != CommandType.C_COMMAND, 'symbol does not apply to C command'
        if command.startswith('@'):
            return command[1:].strip()
        else:
            assert command[-1] == ')', 'L_COMMAND must end i )'
            return command[1:-1].strip()

    def dest(self, command):
        assert self.commandType(command) == CommandType.C_COMMAND, 'dest only apply to C command'
        o = 'null'
        if '=' in command:
            o = command.split('=')[0]
            assert o in Code.dest, f'{o} not in dest {Code.dest.keys()}'
        return o

    def comp(self, command):
        assert self.commandType(command) == CommandType.C_COMMAND, 'comp only apply to C command'
        o = command
        if ';' in command:
            o = o.split(';')[0]
        if '=' in command:
            o = o.split('=')[1]
        assert o in Code.comp, f'{o} not in comp {Code.comp.keys()}'
        return o

    def jump(self, command):
        assert self.commandType(command) == CommandType.C_COMMAND, 'jump only apply to C command'
        o = 'null'
        if ';' in command:
            o = o.split(';')[1]
        assert o in Code.jump, f'{o} not in jump {Code.jump.keys()}'
        return o    

def isInt(s:str):
    try:
        int(s)
        return True
    except:
        return False
        
p = Parser(StringIO(assembly_example))
while True:
    ok, command = p.advance()
    if not ok: break
    ct = p.commandType(command)
    if ct == CommandType.C_COMMAND:
        print(f'{command:10s} {ct:10s} (dest: {p.dest(command)} = comp: {p.comp(command)}; jump: {p.jump(command)})')
    else:
        print(f'{command:10s} {ct:10s} (symbol: {p.symbol(command)})')

@a         CommandType.A_COMMAND (symbol: a)
D=D+M      CommandType.C_COMMAND (dest: D = comp: D+M; jump: null)
@100       CommandType.A_COMMAND (symbol: 100)
D=A        CommandType.C_COMMAND (dest: D = comp: A; jump: null)


In [3]:
       
def assembler(assembly_code:str)->str:
    # translate from assembly code to hack machine code
    free_address = 16 # after R0-R15
    fstream = StringIO(assembly_code)
    p = Parser(fstream)
    st = SymbolTable()
    ret = []
    while True:
        ok, command = p.advance()
        if not ok:
            break
        ct = p.commandType(command)
        if ct == CommandType.C_COMMAND:
            dest, comp, jump = p.dest(command), p.comp(command), p.jump(command)
            ret.append(f'111{Code.dest[dest]}{Code.comp[comp]}{Code.jump[jump]}')
        else:
            symbol = p.symbol(command)
            if CommandType.A_COMMAND:
                if isInt(symbol): # e.g., @128
                    address = int(symbol)
                else: # @i
                    if st.contains(symbol):
                        address = st.getAddress(symbol)
                    else: # allocate new memory
                        address = free_address
                        free_address += 1
                        st.addEntry(symbol, address)

                ret.append(f'0{dec2bin(address, 15)}')
            else: # (symbol)
                st.addEntry(symbol, p.lineno + 1)
            
    return "\n".join(ret)

print(assembler(assembly_example))

0000000000010000
1110101000010000
0000000001100100
1110100110000000


In [None]:
# TODO: simulate the effect of the assembly language
# e.g., show memory before and execute each command, one at a time
class Assembler:
    def advance(self):
        'get to the next command'
        pass

    def showMemory(self):
        'print out the meory content at the current state'
        pass