In [43]:
from enum import Enum

class Modify(Enum):
    INCREMENT = 1
    DECREMENT = -1

def stringify(unquoted_string):
    return '"' + unquoted_string + '"'

# for all format classes, string values must be passed with quotes included 
# (i.e., single quote-wrapped)
# ex: instead of "happy" -> '"happy"'

def format_tab_string(num_tabs):
    return_string = ""
    for i in range(num_tabs):
        return_string += "\t"
    return return_string

def format_class_initialization(name):
    return "class " + name + ":\n"

def format_attribute_assignment(attribute_name, attribute_value, 
                                num_tabs, is_self):
    return_string = format_tab_string(num_tabs)
    if is_self:
        return_string += "self."
    return return_string + attribute_name + " = " + str(attribute_value) + "\n"

def format_attribute_modification(attribute_name, modification_type, 
                                  modifier_value, num_tabs, is_self):
    return_string = format_tab_string(num_tabs)
    if is_self:
        return_string += "self."
    if modification_type == Modify.INCREMENT:
        return return_string + attribute_name + " += " + str(modifier_value) + "\n"
    elif modification_type == Modify.DECREMENT:
        return return_string + attribute_name + " -= " + str(modifier_value) + "\n"
    
def format_return_statement(return_value, num_tabs):
    tabs_string = format_tab_string(num_tabs)
    return tabs_string + "return " + str(return_value) + "\n"

# pass print_values as a list, maintaining string quotes, if needed
def format_print_statement(print_values, num_tabs):
    tabs_string = format_tab_string(num_tabs)
    if print_values == None:
        return tab_string + "print\n"
    else:
        return_string = tabs_string + "print (" + str(print_values[0])
        for value in print_values[1:]:
            return_string += ", " + str(value)
        return_string += ")\n"
        return return_string

# pass method_parameters as a list, maintaining string quotes, if needed
def format_invoke_method(method_name, method_parameters, num_tabs, is_self):
    return_string = format_tab_string(num_tabs)
    if is_self:
        return_string += "self."
    if method_parameters == None:
        return return_string + method_name + "()\n"
    else:
        return_string = return_string + method_name + "(" + str(method_parameters[0])
        for parameter in method_parameters[1:]:
            return_string += ", " + str(parameter)
        return_string += ")\n"
        return return_string

# pass method_parameters as a list
def format_method_declaration(method_name, method_parameters):
    if method_parameters == None:
        return "\n\tdef " + method_name + "(self):\n"
    else:
        return_string = "\n\tdef " + method_name + "(self, " + str(method_parameters[0])
        for parameter in method_parameters[1:]:
            return_string += ", " + str(parameter)
        return_string += "):\n"
        return return_string

In [None]:
nouns = ["NN", "NNP"]

def build_code_string(tokens):
    current_token = 0
    code_string = ""
    while tokens[current_token][0] not in nouns:
        current_token += 1
    code_string += format_class_initialization(tokens[current_token][1])
    while tokens[current_token][0] != "PRP":
        current_token += 1
    current_token += 1
    while tokens[current_token][0] != "TO":
        if tokens[current_token][0] == "ASSIGN":
            if tokens[current_token + 1][0] == "CD":
                code_string += format_attribute_assignment(tokens[current_token - 1][1], tokens[current_token + 1][1], 1, False)
            elif tokens[current_token + 1][0] == "STRSTART":
                temp_string = ""
                temp_token_count = 2
                while tokens[current_token + temp_token_count][0] != "STRSTOP":
                    temp_string += tokens[current_token + temp_token_count][1]
                    temp_token_count += 1
                code_string += format_attribute_assignment(tokens[current_token - 1][1], stringify(temp_string), 1, False)
            elif tokens[current_token + 1][0] == "TRUE":
                code_string += format_attribute_assignment(tokens[current_token - 1][1], True, 1, False)
            elif tokens[current_token + 1][0] == "FALSE":
                code_string += format_attribute_assignment(tokens[current_token - 1][1], False, 1, False)
            elif tokens[current_token + 1][0] == "PARL" or "BRACKL":
                code_string += format_attribute_assignment(tokens[current_token - 1][1], tokens[current_token + 2][1], 1, False)
        current_token += 1
    while current_token < len(tokens):
        if tokens[current_token][0] == "TO":
            current_token += 1
            if tokens[current_token + 2][0] == "PARAMVERB":
                params = []
                temp_token_count = 3
                while tokens[current_token + temp_token_count][0] in nouns or tokens[current_token + temp_token_count][0] == ",":
                    if tokens[current_token + temp_token_count][0] in nouns:
                        params.append(tokens[current_token + temp_token_count][1])
                        temp_token_count += 1
                    else:
                        temp_token_count += 1
                code_string += format_method_declaration(tokens[current_token][1], params)
                current_token += temp_token_count
            while tokens[current_token][0] != "TO" and current_token < len(tokens):
                print code_string
                if tokens[current_token][0] == "BODYVERB":
                    params = []
                    temp_token_count = 3
                    while tokens[temp_token_count][0] != "PARR":
                        if tokens[temp_token_count][0] != ",":
                            params.append(tokens[temp_token_count][1])
                            temp_token_count += 1
                    code_string += format_invoke_method(tokens[current_token + 1][1], params, 2, True)
                    current_token += temp_token_count
                if tokens[current_token][0] == "ASSIGN":
                    if tokens[current_token + 1][0] == "CD":
                        code_string += format_attribute_assignment(tokens[current_token - 1][1], tokens[current_token + 1][1], 2, True)
                    elif tokens[current_token + 1][0] == "STRSTART":
                        temp_string = ""
                        temp_token_count = 2
                        while tokens[current_token + temp_token_count][0] != "STRSTOP":
                            temp_string += tokens[current_token + temp_token_count][1]
                            temp_token_count += 1
                        code_string += format_attribute_assignment(tokens[current_token - 1][1], stringify(temp_string), 2, True)
                    elif tokens[current_token + 1][0] == "TRUE":
                        code_string += format_attribute_assignment(tokens[current_token - 1][1], True, 2, True)
                    elif tokens[current_token + 1][0] == "FALSE":
                        code_string += format_attribute_assignment(tokens[current_token - 1][1], False, 2, True)
                    elif tokens[current_token + 1][0] == "PARL" or "BRACKL":
                        code_string += format_attribute_assignment(tokens[current_token - 1][1], tokens[current_token + 2][1], 2, True)
                elif tokens[current_token][0] == "SUBS":
                    if tokens[current_token - 1][0] == "BRACKR":
                        index_access = tokens[current_token - 4][1] + "[" + tokens[current_token - 2][1] + "]"
                        code_string += format_attribute_modification(index_access, Modify.DECREMENT, 
                                                                     tokens[current_token + 1][1], 2, True)
                    else:
                        code_string += format_attribute_modification(tokens[current_token - 1][1], 
                                                                 Modify.DECREMENT, tokens[current_token + 1][1], 2, True)
                elif tokens[current_token][0] == "ADD":
                    if tokens[current_token - 1][0] == "BRACKR":
                        index_access = tokens[current_token - 4][1] + "[" + tokens[current_token - 2][1] + "]"
                        code_string += format_attribute_modification(index_access, Modify.INCREMENT, 
                                                                     tokens[current_token + 1][1], 2, True)
                    else:
                        code_string += format_attribute_modification(tokens[current_token - 1][1], 
                                                                 Modify.INCREMENT, tokens[current_token + 1][1], 2, True)
                #elif tokens[current_token][0] == "PRINT":
                    #code_string += format_print_statement(tokens[current_token + 1])
                current_token += 1
        current_token += 1
    return code_string

In [None]:
def tokenize(file_name):
    file = open(file_name, "r")
    word = ""
    list1 = []
    list2 = []
    i = 0
    read = False
    with file as fileobj:
        for line in fileobj:
            for ch in line:
                if ch == "\'" and read == False:
                    read = True
                    i+=1
                    continue
                if read == True and ch == "\'":
                    read = False
                    if i % 2 == 0:
                        list1.append(word)
                    elif i % 2 != 0:
                        list2.append(word)
                    word = ""
                if(read == True):
                    word += ch
    zipped = zip(list1, list2)
    file.close()
    return list(zipped)

def print_tuple(tokens):
    for item in tokens:
        print item
    print "\n"

if __name__ == "__main__":
    file_name = "../../output/tokens.txt"
    tokens = tokenize(file_name)
    print_tuple(tokens)
    print build_code_string(tokens)

In [10]:
print_string = (format_class_initialization("Dog") +
               format_attribute_assignment("mood", '"HAPPY"', 1, False) +
               format_attribute_assignment("energy", 100, 1, False) +
               format_attribute_assignment("coordinatePosition", (0,0), 1, False) +
               format_method_declaration("Run", None) +
               format_invoke_method("MoveForward", [2], 2, True) +
               format_attribute_modification("energy", Modify.DECREMENT, 1, 2, True) +
               format_attribute_assignment("mood", '"PLAY"', 2, True) +
               format_return_statement(0, 2) +
               format_method_declaration("MoveForward", ["numberSteps"]) +
               format_attribute_modification("coordinatePosition[0]", Modify.INCREMENT, "numberSteps", 2, True) +
               format_attribute_assignment("mood", '"MOVING"', 2, True) +
               format_attribute_modification("energy", Modify.DECREMENT, 1, 2, True) +
               format_method_declaration("MoveLeft", ["numberSteps"]) +
               format_attribute_modification("coordinatePosition[1]", Modify.DECREMENT, "numberSteps", 2, True) +
               format_attribute_assignment("mood", '"MOVING"', 2, True) +
               format_attribute_modification("energy", Modify.DECREMENT, 1, 2, True) +
               format_method_declaration("MoveRight", ["numberSteps"]) +
               format_attribute_modification("coordinatePosition[1]", Modify.INCREMENT, "numberSteps", 2, True) +
               format_attribute_assignment("mood", '"MOVING"', 2, True) +
               format_attribute_modification("energy", Modify.DECREMENT, 1, 2, True) +
               format_method_declaration("Bark", None) +
                format_print_statement(['"barf, bark"'], 2) +
                format_attribute_modification("energy", Modify.DECREMENT, 1, 2, True) +
                format_attribute_assignment("mood", '"BARKING"', 2, True) +
                format_method_declaration("Lay", None) +
                format_print_statement(['"relax"'], 2) +
                format_print_statement(['"move the Booty"'], 2) +
                format_attribute_modification("energy", Modify.INCREMENT, 3, 2, True) +
                format_method_declaration("Check", None) +
                format_print_statement(['"mood: " + self.mood'], 2) +
                format_print_statement(['"energy: " + str(self.energy)'], 2) +
                format_print_statement(['"Position: " + str(self.coordinatePosition)'], 2)
               )

print print_string

class Dog:
	mood = "HAPPY"
	energy = 100
	coordinatePosition = (0, 0)

	def Run(self):
		self.MoveForward(2)
		self.energy -= 1
		self.mood = "PLAY"
		return 0

	def MoveForward(self, numberSteps):
		self.coordinatePosition[0] += numberSteps
		self.mood = "MOVING"
		self.energy -= 1

	def MoveLeft(self, numberSteps):
		self.coordinatePosition[1] -= numberSteps
		self.mood = "MOVING"
		self.energy -= 1

	def MoveRight(self, numberSteps):
		self.coordinatePosition[1] += numberSteps
		self.mood = "MOVING"
		self.energy -= 1

	def Bark(self):
		print ("barf, bark")
		self.energy -= 1
		self.mood = "BARKING"

	def Lay(self):
		print ("relax")
		print ("move the Booty")
		self.energy += 3

	def Check(self):
		print ("mood: " + self.mood)
		print ("energy: " + str(self.energy))
		print ("Position: " + str(self.coordinatePosition))

