In [66]:
class SymbolTable(object):
    
    def __init__(self):
        """ Create new symbol table. """
        self.table = {}
    
    def add_entry(self, symbol, address):
        """
        Adds the pair to the table.
        """
        self.table[symbol] = address
    
    def contains(self, symbol):
        """ Does the symbol table contain this value? """
        b = False
        if symbol in self.table:
            b = True
        return b
    
    def get_address(self, symbol):
        """
        Returns the address associated with the symbol.
        """
        return self.table.get(symbol, 0)

In [67]:
class Code(object):
    """ Translates Hack assembly language mnemonics into binary codes. """

    def __init__(self):
        self.ml = bin

    def dest(self, mnemonic):
        """
        in: command
        out: 3 bits
        returns the binary code of the dest mnemonic.
        """
        choices = {"null": 0,
                    "M": 1,
                    "D": 2,
                    "MD": 3,
                    "A": 4,
                    "AM": 5,
                    "AD": 6,
                    "AMD": 7}
#         print "dest ML: ", self.ml(choices[mnemonic])[2:]
        cmd = choices[mnemonic]
        if cmd == 0:
            return "000"
        return self.ml(choices[mnemonic])[2:]

    def comp(self, mnemonic):
        """
        in: command
        out: 7 bits
        returns the binary code of the comp mnemonic.
        """
        a = {}
        if "M" in mnemonic:
            if "-" in mnemonic:
                choices = {"M-D": 7, "D-M": 19, "M-1": 50, "-M": 51}
            elif "+" in mnemonic:
                choices = {"M+1": 55, "D+M": 2}
            elif "|" in mnemonic:
                choices = {"D|M": 21}
            elif "&" in mnemonic:
                choices = {"D&M": 0}
            elif "!" in mnemonic:
                choices = {"!M": 49}
            else:
                choices = {"M": 48}
        else:
            if "-" in mnemonic:
                choices = {"-1": 58, "D-1": 14, "A-1": 50, "D-A": 19, "A-D": 7, "-D": 15}
            elif "+" in mnemonic:
                choices = {"D+1": 13, "A+1": 55, "D+A": 2}
            elif "|" in mnemonic:
                choices = {"D|A": 21}
            elif "&" in mnemonic:
                choices = {"D&A": 0}
            elif "!" in mnemonic:
                choices = {"!A": 49}
            else:
                choices = {"0": 58, "1": 14, "D": 12, "A": 48}
#         print "comp ML: ", self.ml(choices[mnemonic])[2:]
        cmd = choices[mnemonic]
        if cmd == 0:
            return "000000"
        return self.ml(choices[mnemonic])[2:]

    def jump(self, mnemonic):
        """
        in: command
        out: 3 bits
        returns the binary code of the jump mnemonic.
        """
        choices = {
            "null": 0,
            "JGT": 1,
            "JEQ": 2,
            "JGE": 3,
            "JLT": 4,
            "JNE": 5,
            "JLE": 6,
            "JMP": 7,
        }
        cmd = self.ml(choices[mnemonic])
        if cmd == 0:
            return "000"
        else: return self.ml(choices[mnemonic])[2:]

In [68]:
import re


class Parser(object):
    """
    Encapsulates access to the input code. Reads an assembly
    language command, parses it, and provides convenient access to the
    command's components (fields and symbols). In addition,
    removes all white space and comments.
    """
    def __init__(self, i):
        """
        Input file is passed to this object
        thought a command line arg.
        Pass all commands into an array.
        """
        self.command_index = 0
        self.cmds = [line for line in i]
#         print self.cmds

    def cc(self):
        return self.cmds[self.command_index]

    def has_more_cmds(self):
        """
        Are there more commands in the input?
        :return: boolean
        """
        if len(self.cmds) >= (self.command_index + 1):
            return True
        else: return False

    def advance(self):
        """
        Reads the next command from the input and
        makes it the current command. Should be
        called only if `has_more_cmds` is true.
        Initially there is no command.
        :return: None
        """
#         print "cmd_index: ", self.command_index
#         print "current command: ", self.cmds[self.command_index]
        self.command_index += 1

    def reset(self):
        """
        Self-explainitory.
        """
        self.command_index = 0
        
    def command_type(self):
        """
        :return: type of current cmd:
        - A_COMMAND for @xxx where xxx is either a symbol
        or a decimal number.
        - C_COMMAND for dest=comp;jump
        - L_COMMAND for where xxx is a symbol.
        """
        cc = self.cmds[self.command_index]
        l_command = lambda i : i.find('(') != -1 and i.find(')') != -1
        a_command = lambda i : i.find('@') != -1
        c_command = lambda i : i.find("(") == -1 and i.find("@") == -1
        if l_command(cc):
            return "L_COMMAND"
        elif a_command(cc):
            return "A_COMMAND"
        elif c_command(cc):
            return "C_COMMAND"
            

    def cc_is_symbol(self):
        """
        True if current command is symbol else false
        """
        cc = self.cmds[self.command_index]
        m = re.search(r'\d+$', cc)
        jump = ";" in cc
        # if the string ends in digits
        # or has a semi
        if m or jump:
            return False
        else: return True


    def cc_is_int(self):
        cc = self.cmds[self.command_index]
        m = re.search(r'\d+$', cc)
        # if the string ends in digits
        if m:
            return True
        else: return False

    def symbol(self):
        """
        :return: the symbol or decimal xxx of
        current cmd @xxx or (xxx). Should be called only when
        command_type() returns A_COMMAND or L_COMMAND.
        ie. "LOOP" or "i"
        """
        cc = self.cmds[self.command_index]
        cc = re.sub(r"\(", "", cc)
        cc = re.sub(r"\)", "", cc)
        cc = re.sub(r"@", "", cc)
#         print "resulting symbol: ", "".join(cc)
        return str(cc)

    def dest(self):
        """
        :return: the dest mnemonic in the current C-command
        (8 possibilites). Should be called only when
        command_type() is C.
        """
        cc = self.cmds[self.command_index]
        find = re.compile(r"^[^=]*") # everything before "="
        dest = re.search(find, cc).group(0)
#         print "resulting dest: ", "".join(dest)
        return dest

    def comp(self):
        """
        :return: the comp mnemonic in the current C-command
        (28 possibilites). Should only be called when
        command_type() returns C.
        """
        cc = self.cmds[self.command_index]
        equals, semi = 0, 0
        comp = []
        for i, v in enumerate(cc):
            if v == "=":
                equals += (i + 1)
            elif v == ";":
                semi += i
        if "=" in cc and ";" in cc:
            for v in cc[equals:semi]:
                comp.append(v)
        elif "=" in cc:
            for v in cc[equals:]:
                comp.append(v)
#         print "resulting comp: ", "".join(comp)
        return "".join(comp)

    def jump(self):
        """
        :return: the jump mnemonic in the current C-command
        (8 possibilites). Should only be called when
        command_type() returns C.
        """
        cc = self.cmds[self.command_index]
        there = ";" in cc
        if not there:
            return False
        else:
            jump = []
            indextostart = cc.index(";") + 1
            for v in cc[indextostart:]:
                jump.append(v)
#             print "resulting jump: ", "".join(jump)
            return "".join(jump)


In [91]:
i = ["@0", 
     "D=M", 
     "@1", 
     "D=D-M", 
     "@OUTPUT_FIRST", 
     "D;JGT", 
     "@1", 
     "D=M",
     "@OUTPUT_D",
     "0;JMP",
     "(OUTPUT_FIRST)",
     "@0",
     "D=M",
     "(OUTPUT_D)",
     "@2",
     "M=D",
     "(INFINITE_LOOP)",
     "@INFINITE_LOOP",
     "0;JMP"]
ML = {}
code_obj = Code()
st = SymbolTable()
# input_file = sys.argv[1]
parsed = Parser(i)
rom_address = 0

# with open("{0}.hack".format(input_file), "w") as resulting_file:
for incrementer in range(2):
    if incrementer == 0: # first pass
        while parsed.has_more_cmds():
            if parsed.command_type() == "C_COMMAND" or parsed.command_type() == "A_COMMAND":
                rom_address += 1
            elif parsed.command_type() == "L_COMMAND":
                st.add_entry(parsed.symbol(), rom_address + 1)
            parsed.advance()
        print st.table
        parsed.reset()
    elif incrementer == 1:
        i = 0
        while parsed.has_more_cmds():
            cc = parsed.cc()
            command_type = parsed.command_type()
            print cc
            print command_type
            if command_type == "A_COMMAND":
                if st.contains(cc):
                    ML[i] = "0" + str(bin(st.get_address(cc[1:])))[2:]
                else: ML[i] = "0" + str(bin(st.get_address(line)))[2:]
            else:
                symboltable = st
                ML[i] = to_binary(cc, command_type, symboltable)
            parsed.advance()
            i += 1
        for k, v in ML.items():
            print k, v

{'OUTPUT_FIRST': 11, 'OUTPUT_D': 13, 'INFINITE_LOOP': 15}
@0
A_COMMAND
D=M
C_COMMAND


KeyError: 'D=M'

In [92]:
c = Code()
def to_binary(line, cmdtype, symboltable):
    pad = "111"
    if cmdtype == "C_COMMAND":
        null = "000"
        line = "M;JGT"
        a_bit = "1" if "M" in line else "0"
        semi = ";" in line
        equals = "=" in line
        if semi and equals: # dest = comp ; jump
            semi = line.find(";") 
            equals = line.find("=")
            print pad + a_bit + c.comp(line[equals + 1: semi]) + c.dest(line[:equals]) + c.jump(line[semi + 1:])
        elif equals:
            equals = line.find("=")
            print pad + a_bit + c.comp(line[equals + 1:]) + c.dest(line[:equals]) + null
        elif semi:
            semi = line.find(";")
            print pad + a_bit + "000000" + c.comp(line[:semi]) + c.dest(line[semi + 1:])

        comp = c.comp(line)
        dest = c.dest(line)
        jump = c.jump(line)
        return pad + a_bit + comp + dest + jump
    
    

In [93]:
pad = "111"
null = "000"
line = "M;JGT"
a_bit = "1" if "M" in line else "0"
semi = ";" in line
equals = "=" in line
c = Code()

if semi and equals: # dest = comp ; jump
    semi = line.find(";") 
    equals = line.find("=")
    print pad + a_bit + c.comp(line[equals + 1: semi]) + c.dest(line[:equals]) + c.jump(line[semi + 1:])
elif equals:
    equals = line.find("=")
    print pad + a_bit + c.comp(line[equals + 1:]) + c.dest(line[:equals]) + null
elif semi:
    semi = line.find(";")
    print pad + a_bit + "000000" + c.comp(line[:semi]) + c.jump(line[semi + 1:])

KeyError: 'JGT'