In [25]:
from collections import deque
import json
from pprint import pprint
import re
import sys
import re


from isa import Opcode, write_code, STDIN, STDOUT,ops_gr


def pre_process(raw: str) -> str:
    lines = []
    for line in raw.split("\n"):
        # remove comments
        comment_idx = line.find("#")
        if comment_idx != -1:
            line = line[:comment_idx]
        # remove leading spaces
        line = line.strip()

        lines.append(line)

    text = " ".join(lines)
    # text = raw.replace("\n", " ")
    # избавляется от лишних пробелов и символов перехода строки
    text = re.sub("[ ][ ]*", " ", text)

    return text


def tokenize(text):
    tokens = re.sub(
        r"'.*'", lambda match: f'{",".join(map(lambda char:str(ord(char)),match.group()))}', text
    )
    tokens = re.split("[, ]", tokens)
    new_tokens = []
    for token in tokens:
        token = token.upper()
        if token[0] == ".":
            new_tokens.append(".")
            token = token[1:]
        if token[-1] == ':':
            token = token[:-1]
            new_tokens.extend([token, ":"])
        else:
            new_tokens.append(token)

    return new_tokens


def allocate(tokens):
    address = 0
    data = []
    labels = {}
    label_start = 0
    length = len(tokens)
    for i in range(length):
        token = tokens[i]
        if token==':':
            labels[tokens[i - 1]] = len(data)
        elif token.isdigit():
            data.append(token)

    return data,labels
def parse(tokens):
    labels = {}
    code = []
    length = len(tokens)
    code_address =0
    args_count = 0
    for i in range(length):
        token = tokens[i]
        if token==':':
            labels[tokens[i - 1]] = len(code)
        elif token in Opcode._value2member_map_:
            code.append({"opcode":token,"args":[]})
            if token in ops_gr['arith']:
                args_count = 3
            elif token in ops_gr['mem']:
                args_count = 2
            elif token == 'JMP':
                print("JMP")
                args_count = 1
            elif token in ops_gr['branch']:
                args_count = 3
            else:
                args_count = 0
        elif args_count > 0:
            code[-1]["args"].append(token)
            args_count -= 1
    

    return code,labels


def translate(text):
    processed_text = pre_process(text)
    tokens = tokenize(processed_text)
    # print(tokens)
    section_start = None
    data_tokens, code_tokens = [], []
    labels = {}
    for token_index, token in enumerate(tokens):
        if token == 'section'.upper() or (token_index == len(tokens) - 1):
            if section_start is None:
                section_start = token_index
            else:
                if tokens[section_start+1] == 'data'.upper():
                    data_tokens = tokens[section_start+3:token_index]
                    section_start = token_index
                elif tokens[section_start+1] == 'text'.upper():
                    code_tokens = tokens[section_start+3:token_index]
                    section_start = token_index
    # print(data_tokens, code_tokens)
    data,data_labels = allocate(data_tokens)
    code,code_labels = parse(code_tokens)
    labels = data_labels.copy()
    code_start_label = len(data)
    print(code_labels)
    for key,value in code_labels.items():
        labels[key] = value + code_start_label
    program = data + code
    for word_idx,word in enumerate(program):
        if isinstance(word,dict):
            for arg_idx,arg in enumerate(word["args"]):
                if arg in labels:
                    program[word_idx]["args"][arg_idx] = labels[arg]
    
    return program,labels
    #     return code, vars_mm


def main(args):
    assert len(args) == 2, \
    "Wrong arguments: translator.py <input_file> <target_file>"

    source,target = args

    with open(source, "rt", encoding="utf-8") as f:
        source = f.read()

    program,labels = translate(source)
    pprint(program)
    pprint(labels)
    print("source LoC:", len(source.split()), "code instr:",
      len(program))

    write_code(target, program)
main(["programs/asm/hello.asm","programs/asm/hello.json"])
main(["programs/asm/cat.asm","programs/asm/cat.json"])



JMP
{'_START': 0, 'WRITE': 2, 'END': 7}
['39',
 '72',
 '101',
 '108',
 '108',
 '111',
 '44',
 '32',
 '87',
 '111',
 '114',
 '108',
 '100',
 '33',
 '39',
 '0',
 '13',
 {'args': ['2', '0', 0], 'opcode': 'ADDI'},
 {'args': ['3', '0', '170'], 'opcode': 'ADDI'},
 {'args': ['1', '2'], 'opcode': 'LW'},
 {'args': ['1', '0', 24], 'opcode': 'BEQ'},
 {'args': ['3', '1'], 'opcode': 'SW'},
 {'args': ['2', '2', '1'], 'opcode': 'ADDI'},
 {'args': [19], 'opcode': 'JMP'},
 {'args': [], 'opcode': 'HALT'}]
{'END': 24, 'HELLO': 0, 'LENGHT': 16, 'WRITE': 19, '_START': 17}
source LoC: 34 code instr: 25
JMP
JMP
{'_START': 0, 'READ': 2, 'FINISH_READ': 7, 'WRITE': 9, 'END': 14}
['0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 {'args': ['2', '0', 0], 'opcode': 'ADDI'},
 {'args': ['3', '0', '169'], 'opcode': 'ADDI'},
 {'args': ['1', '3'], 'opcode': 'LW'},
 {'args': ['2', '1'],