In [None]:
class Parser:
    def __init__(self, filename):
        with open(filename, 'r') as f:
            self.lines = f.readlines()
        self.index = 0
        self.commands = list()
        
        for line in self.lines:
            processed_line = line.strip().replace(' ', '').split('//')[0]
            if processed_line:
                self.commands.append(processed_line)

        self.current_line = None

    def has_more_commands(self):
        return self.index < len(self.commands)
    
    def advance(self):
        if self.has_more_commands():
            self.current_line = self.commands[self.index]
            self.index += 1

    def command_type(self):
        if self.current_line.startswith('@'):
            return 'A_COMMAND'
        elif any(char in self.current_line for char in ['=', ';']):
            return 'C_COMMAND'
        else:
            return 'L_COMMAND'
        
    def symbol(self) -> str:
        if self.command_type() == 'A_COMMAND':
            return self.current_line.strip('@')
        elif self.command_type() == 'L_COMMAND':
            return self.current_line.strip('()')
        
    def dest(self) -> str | None:
        if self.command_type() == 'C_COMMAND':
            if '=' in self.current_line:
                return self.current_line.split('=')[0]
            else:
                return None
            
    def comp(self) -> str:
        if self.command_type() == 'C_COMMAND':
            if ';' and '=' in self.current_line:
                return self.current_line.split('=')[1].split(';')[0]
            elif '=' in self.current_line:
                return self.current_line.split('=')[1]
            elif ';' in self.current_line:
                return self.current_line.split(';')[0]
            else:
                return self.current_line
            
    def jump(self) -> str | None:
        if self.command_type() == 'C_COMMAND':
            if ';' in self.current_line:
                return self.current_line.split(';')[1]
            else:
                return None
            
    def reset(self):
        self.index = 0

In [None]:
class Code:
    def __init__(self):
        self.dest_map = {
            None: '000',
            'M': '001',
            'D': '010',
            'MD': '011',
            'A': '100',
            'AM': '101',
            'AD': '110',
            'AMD': '111'
        }

        self.comp_map = {
            '0': '0101010',
            '1': '0111111',
            '-1': '0111010',
            'D': '0001100',
            'A': '0110000',
            'M': '1110000',
            '!D': '0001101',
            '!A': '0110001',
            '!M': '1110001',
            '-D': '0001111',
            '-A': '0110011',
            '-M': '1110011',
            'D+1': '0011111',
            'A+1': '0110111',
            'M+1': '1110111',
            'D-1': '0001110',
            'A-1': '0110010',
            'M-1': '1110010',
            'D+A': '0000010',
            'D+M': '1000010',
            'D-A': '0010011',
            'D-M': '1010011',
            'A-D': '0000111',
            'M-D': '1000111',
            'D&A': '0000000',
            'D&M': '1000000',
            'D|A': '0010101',
            'D|M': '1010101'
        }

        self.jump_map = {
            None: '000',
            'JGT': '001',
            'JEQ': '010',
            'JGE': '011',
            'JLT': '100',
            'JNE': '101',
            'JLE': '110',
            'JMP': '111'
        }

    def dest(self, symbol: str | None) -> str:
        return self.dest_map[symbol]
    
    def comp(self, symbol: str) -> str:
        return self.comp_map[symbol]
    
    def jump(self, symbol: str | None) -> str:
        return self.jump_map[symbol]


In [None]:
class SymbolTable:
    def __init__(self):
        self.table = {
            'R0': 0,
            'R1': 1,
            'R2': 2,
            'R3': 3,
            'R4': 4,
            'R5': 5,
            'R6': 6,
            'R7': 7,
            'R8': 8,
            'R9': 9,
            'R10': 10,
            'R11': 11,
            'R12': 12,
            'R13': 13,
            'R14': 14,
            'R15': 15,
            'SCREEN': 16384,
            'KBD': 24576,
            'SP': 0,
            'LCL': 1,
            'ARG': 2,
            'THIS': 3,
            'THAT': 4
        }
        
    def add_entry(self, symbol: str, address: int):
        self.table[symbol] = address

    def contains(self, symbol: str) -> bool:
        return symbol in self.table
    
    def get_address(self, symbol: str) -> int:
        return self.table[symbol]


In [6]:
def main(basename):
    parser = Parser(f'{basename}.asm')
    coder = Code()
    symbol_table = SymbolTable()

    # First loop - look for label declarations
    instruction_address = 0
    while parser.has_more_commands():
        parser.advance()
        if parser.command_type() == 'L_COMMAND':
            symbol_table.add_entry(parser.symbol(), instruction_address)    # The instruction after the label declaration is where the label block starts
        else:
            instruction_address += 1    # label declarations are not real instructions and should thus not be considered when we track instruction addresses

    parser.reset()

    # Second loop - translate assembly into machine code
    machine_instructions = list()
    memory_idx = 16     # memory starting at register 16 is dedicated for variables, increase memory index by one every time a new variable is created

    while parser.has_more_commands():
        parser.advance()
        
        if parser.command_type() == 'C_COMMAND':
            dest_field = coder.dest(parser.dest())
            comp_field = coder.comp(parser.comp())
            jump_field = coder.jump(parser.jump())
            machine_instruction = f'111{comp_field}{dest_field}{jump_field}'

            machine_instructions.append(machine_instruction)

        elif parser.command_type() == 'A_COMMAND':
            if parser.symbol().isnumeric():
                decimal_value = parser.symbol()
            else:
                if symbol_table.contains(parser.symbol()):
                    decimal_value = symbol_table.get_address(parser.symbol())
                else:
                    symbol_table.add_entry(parser.symbol(), memory_idx)
                    decimal_value = memory_idx
                    memory_idx += 1

            binary_value = format(int(decimal_value), '015b')
            machine_instruction = f'0{binary_value}'    # 0 = opcode, 15-bit address

            machine_instructions.append(machine_instruction)

    with open(f'{basename}.hack', 'w') as file:
        file.write('\n'.join(machine_instructions))  

In [8]:
main('Pong')