# Assembler per principle of computing in Jack, Hack

In [1]:
# here we have
# A: register for address
# D: register for data
# @symbol: load symbol into A
# (symbol): psuedo command for labeling the next address in ROM holding instructions
# // comment

assembly_example = '''
// comment
@a
D=D+M
@100
D=A // load A into D
'''

In [70]:
from io import StringIO
from typing import Tuple
from enum import Enum

CommandType = Enum('CommandType', 
                   ['A_COMMAND', # @symbol
                    'C_COMMAND', # dest=comp;jump
                    'L_COMMAND'] # (symbol)
                  )

WORD_SIZE = 16 # 16 width for hack machine

def dec2bin(dec:int, n_digits:int)->str:
    # decimal to binary, assume positive
    ret = []
    i = 0
    while i < n_digits and dec > 0:
        i += 1
        ret.append(dec % 2)
        dec //= 2
    ret += [0] * (n_digits - len(ret))
    return ''.join(map(str, ret[::-1]))

def bin2dec(b:str)->int:
    # convert binary number str to dec int
    ret = 0
    for s in b:
        assert s in '01', f'{b} is not binary'
        ret = 2 * ret + int(s)
    return ret

class Code:
    _dest_lists = ['null', 'M', 'D', 'MD', 'A', 'AM', 'AD', 'AMD']
    _jump_lists = ['null', 'JGT', 'JEQ', 'JGE', 'JLT', 'JNE', 'JLE', 'JMP']
    dest = {name:dec2bin(i, 3) for i, name in enumerate(_dest_lists)}
    jump = {name:dec2bin(i, 3) for i, name in enumerate(_jump_lists)}
    comp = {
        '0': '0101010',
        '1': '0111111',
        '-1': '0111010',
        'D': '0001100',
        'A': '0110000',
        '!D': '0001101',
        '!A': '0110001',
        '-D': '0001111',
        '-A': '0110011',
        'D+1': '0011111',
        'A+1': '0110111',
        'D-1': '0001110',
        'A-1': '0110010',
        'D+A': '0000010',
        'D-A': '0010011',
        'A-D': '0000111',
        'D&A': '0000000',
        'D|A': '0010101',
        'M': '1110000',
        '!M': '1110001',
        '-M': '1110011',
        'M+1': '1110111',
        'M-1': '1110010',
        'D+M': '1000010',
        'D-M': '1010011',
        'M-D': '1000111',
        'D&M': '1000000',
        'D|M': '1010101',
    }
    def compFun(comp_code):

        assert comp_code in Code.comp.values()

        def _binary_flip(d:int):
            b:str = dec2bin(d, WORD_SIZE)
            return ''.join([('0' if i=='1' else '1') for i in b])

        def _binary_and(a:int, b:int):
            ab, bb = dec2bin(a, WORD_SIZE), dec2bin(b, WORD_SIZE)
            return ''.join([('1' if i == j == '1' else '0')for i, j in zip(ab, bb)])

        def _binary_or(a:int, b:int):
            ab, bb = dec2bin(a, WORD_SIZE), dec2bin(b, WORD_SIZE)
            return ''.join([('1' if (i == '1' or j == '1') else '0')for i, j in zip(ab, bb)])
            
        def _f(A:int, M:int, D:int):
            'A/D: A/D register value, M: RAM[A] value'
            # TODO: this version doesn't simulate overflow as it can compute value more than 1 word
            # TODO: replace this with a ALU to really simulate the hardware
            mnemonic2output = {
                '0': 0,
                '1': 1,
                '-1': -1,
                'D': D,
                'A': A,
                '!D': _binary_flip(D),
                '!A': _binary_flip(A),
                '-D': -D,
                '-A': -A,
                'D+1': D+1,
                'A+1': A+1,
                'D-1': D-1,
                'A-1': A-1,
                'D+A': D+A,
                'D-A': D-1,
                'A-D': A-D,
                'D&A': _binary_and(D, A),
                'D|A': _binary_or(D, A),
                'M': M,
                '!M': _binary_flip(M),
                '-M': -M,
                'M+1': M+1,
                'M-1': M-1,
                'D+M': D+M,
                'D-M': D-M,
                'M-D': M-D,
                'D&M': _binary_and(D, M),
                'D|M': _binary_or(D, M),
            }
            compCode2mnemonic = {v: k for k, v in Code.comp.items()}
            return mnemonic2output[compCode2mnemonic[comp_code]]

        return _f
        
class SymbolTable:
    def __init__(self):
        self.d = {
            # page 110 of 'the elements of computing systems'
            # symbol to RAM address
            'SP': 0,
            'LCL': 1,
            'ARG': 2,
            'THIS': 3,
            'THAT': 4, 
            'SCREEN': 16384,
            'KBD': 24576
        }
        for i in range(16): # R0-R15
            self.d[f'R{i}'] = i

    def addEntry(self, symbol:str, address:int):
        assert symbol not in self.d, f'{symbol} already in symbol table'
        self.d[symbol] = address

    def contains(self, symbol:str):
        return symbol in self.d

    def getAddress(self, symbol:str):
        return self.d[symbol]

    def __repr__(self):
        ret = f'{"symbol":10s}|{"address":10s}\n' + '-' * 21
        for k, v in self.d.items():
            ret += f'\n{k:10s}|{str(v):10s}'
        return ret

class Parser:

    def __init__(self, fs: StringIO):
        self.filestream = fs
        self.lineno = 0
        
    def advance(self)->Tuple[bool, str]:
        'return next command'
        l = self.filestream.readline()
        if l == '':
            # EOF error
            return False, ''

        l = l.strip()
        if '//' in l:
            l = l[:l.index('//')].strip()
            
        if l == '':    
            return self.advance()
        else:
            if not l.startswith('('):
                # pseudo command doesn't advance lineno
                self.lineno += 1
            return True, l

    def commandType(self, command):
        if command.startswith('@'):
            return CommandType.A_COMMAND
        elif command.startswith('('):
            return CommandType.L_COMMAND
        else:
            return CommandType.C_COMMAND

    def symbol(self, command):
        assert self.commandType(command) != CommandType.C_COMMAND, 'symbol does not apply to C command'
        if command.startswith('@'):
            return command[1:].strip()
        else:
            assert command[-1] == ')', 'L_COMMAND must end i )'
            return command[1:-1].strip()

    def dest(self, command):
        assert self.commandType(command) == CommandType.C_COMMAND, 'dest only apply to C command'
        o = 'null'
        if '=' in command:
            o = command.split('=')[0]
            assert o in Code.dest, f'{o} not in dest {Code.dest.keys()}'
        return o

    def comp(self, command):
        assert self.commandType(command) == CommandType.C_COMMAND, 'comp only apply to C command'
        o = command
        if ';' in command:
            o = o.split(';')[0]
        if '=' in command:
            o = o.split('=')[1]
        assert o in Code.comp, f'{o} not in comp {Code.comp.keys()}'
        return o

    def jump(self, command):
        assert self.commandType(command) == CommandType.C_COMMAND, 'jump only apply to C command'
        o = 'null'
        if ';' in command:
            o = o.split(';')[1]
        assert o in Code.jump, f'{o} not in jump {Code.jump.keys()}'
        return o    

def isInt(s:str):
    try:
        int(s)
        return True
    except:
        return False
        
p = Parser(StringIO(assembly_example))
while True:
    ok, command = p.advance()
    if not ok: break
    ct = p.commandType(command)
    if ct == CommandType.C_COMMAND:
        print(f'{command:10s} {ct:10s} (dest: {p.dest(command)} = comp: {p.comp(command)}; jump: {p.jump(command)})')
    else:
        print(f'{command:10s} {ct:10s} (symbol: {p.symbol(command)})')

@a         CommandType.A_COMMAND (symbol: a)
D=D+M      CommandType.C_COMMAND (dest: D = comp: D+M; jump: null)
@100       CommandType.A_COMMAND (symbol: 100)
D=A        CommandType.C_COMMAND (dest: D = comp: A; jump: null)


In [82]:
# simulate the effect of the assembly language
# e.g., show memory before and execute each command, one at a time

class Machine: # hack machine
    def __init__(self, memory_size=100, max_steps=100):
        self.memory_size = memory_size
        self.max_steps = max_steps # max runtime allowed
        
    def load(self, machine_code):
        codes = machine_code.split('\n')
        self.memory_size = max(self.memory_size, len(codes))
        self.machine = {
            'PC': 0, # program counter
            'A': 0, # A register
            'D': 0, # D register
            'ROM': codes, # code memory
            'RAM': [0] * self.memory_size,
        }

    def _isA(self, command:str):
        # is A command, if False then C command
        # see specification on page 109
        return command[0] == '0'

    def _comp(self, command:str):
        # see specification on page 109
        assert not self._isA(command)
        return command[3:-6]

    def _dest(self, command:str):
        # see specification on page 109
        assert not self._isA(command)
        return command[-6:-3]

    def _jump(self, command:str):
        # see specification on page 109
        assert not self._isA(command)
        return command[-3:]    
        
    def advance(self)->bool:
        ## fetch instruction
        if self.machine['PC'] >= len(self.machine['ROM']):
            # finishes execution
            return False
        command = self.machine['ROM'][self.machine['PC']]
        self.machine['PC'] += 1
        
        ## instruction decoding
        if self._isA(command): # A command: @symbol
            address = command[1:]
            ## instruction execution
            self.machine['A'] = bin2dec(address)
        else: # C command: dest=comp;jump
            d, c, j = self._dest(command), self._comp(command), self._jump(command)
            oldA = self.machine['A']

            ## instruction execution
            # comp: page 67
            o = Code.compFun(c)(self.machine['A'], self.machine['RAM'][oldA], self.machine['D'])

            # dest: page 68
            if d[0] == '1':
                self.machine['A'] = o
            if d[1] == '1':
                self.machine['D'] = o
            if d[2] == '1':
                self.machine['RAM'][old_A] = o
                
            # jump: page 69
            if j[0] == '1' and o < 0:
                self.machine['PC'] = oldA
            if j[1] == '1' and o == 0:
                self.machine['PC'] = oldA
            if j[2] == '1' and o > 0:
                self.machine['PC'] = oldA
        return True
            
    def __call__(self, machine_code:str):
        self.load(machine_code)
        steps = 0
        while steps <= self.max_steps:
            ok = self.advance()
            if not ok:
                print('Finished execution')
        print(f'Program terminated b/c exceeding max step of {self.max_steps}')

    def __repr__(self):
        ret = 'HackMachine(\n'
        ret += f'{"part":10s}|{"value":10s}\n' + '_' * 21
        for k, v in self.machine.items():
            ret += f'\n{k:10s}|{v}'
        return ret + '\n)'

class Assembler:

    def __init__(self, free_address=16):
        self.free_address = free_address
    
    def load(self, assembly_code:str):
        self.code = assembly_code
        fstream = StringIO(self.code)
        self.parser = Parser(fstream)
        self.symbol_table = SymbolTable()        
        
    def advance(self)->Tuple[bool, str]:
        'get to the next command, return ok, machine_code'
        ok, command = self.parser.advance()
        if not ok:
            return False, "no more input"
        ct = self.parser.commandType(command)
        if ct == CommandType.C_COMMAND:
            dest, comp, jump = self.parser.dest(command), self.parser.comp(command), self.parser.jump(command)
            return True, f'111{Code.comp[comp]}{Code.dest[dest]}{Code.jump[jump]}'
        else:
            symbol = self.parser.symbol(command)
            if CommandType.A_COMMAND:
                if isInt(symbol): # e.g., @128
                    address = int(symbol)
                else: # @i
                    if self.symbol_table.contains(symbol):
                        address = self.symbol_table.getAddress(symbol)
                    else: # allocate new memory
                        address = self.free_address
                        self.free_address += 1
                        self.symbol_table.addEntry(symbol, address)

                return True, f'0{dec2bin(address, 15)}'
            else: # (symbol)
                self.symbol_table.addEntry(symbol, p.lineno + 1)
                return self.advance()

    def __call__(self, assembly_code:str):
        self.load(assembly_code)
        ret = []
        while True:
            ok, code = self.advance()
            if not ok: break
            ret.append(code)
        return '\n'.join(ret)

    def __repr__(self):
        return f'Assembler(free_address={self.free_address}, symbol_table=\n{self.symbol_table}\n)'


In [84]:
# assembly program examples
class AssemblyExamples:
    simple_memory = '''
// A=2, D=1, M[2]=1
@2
D=1
M=D
    '''

assembly_example = AssemblyExamples.simple_memory
print('Assembly code')
print('='*30)
print(assembly_example)
print('='*30)


# translate assembly to machine code
ass = Assembler()
machine_codes = ass(assembly_example)

print('Machine code')
print('='*30)
print(machine_codes)
print('='*30)

# execute machine code
machine = Machine()
machine.load(machine_codes)

while True:
    ok = machine.advance()
    if not ok: break

print('Machine execution')
print('='*30)
print(machine)
print('='*30)


Assembly code

// A=2, D=1, M[2]=1
@2
D=1
M=D
    
Machine code
0000000000000010
1110111111010000
1110001100001000


NameError: name 'old_A' is not defined