In [3]:
from pathlib import Path


asm = Path("pass1.asm").read_text()

sym_tab = {}               # name -> {'addr', 'defined'}
lit_tab = []               # {'lit','addr','pool'}
pool_tab = [0]
ic = []                    # (lc,label,opcode,operands,remark)

def is_lit(t): return t.startswith("=")
def strip_c(l): return l.split(";",1)[0].rstrip()
def parse_ops(s): return [x.strip() for x in re.split(r',\s*', s) if x.strip()]

DIR = {"START","END","LTORG","ORIGIN","EQU","DS","DC"}
IMP = {"MOV","MOVER","MOVEM","ADD","SUB","MULT","DIV","COMP","BC","STOP","READ","PRINT"}

lc = 0
for line in asm.splitlines():
    line = strip_c(line)
    if not line.strip(): continue
    parts = re.split(r'\s+', line, maxsplit=2)
    label=opcode=operands_str=None
    if len(parts)==1: opcode=parts[0]
    elif len(parts)==2:
        if parts[1].upper() in DIR or parts[1].upper() in IMP or ',' in parts[1] or is_lit(parts[1]):
            opcode,operands_str = parts[0],parts[1]
        else:
            label,opcode = parts[0],parts[1]
    else:
        label,opcode,operands_str = parts[0],parts[1],parts[2]
    opcode_u = opcode.upper() if opcode else ""

    if label:
        sym_tab.setdefault(label,{'addr':None,'defined':False})
        sym_tab[label]['addr']=lc; sym_tab[label]['defined']=True

    if opcode_u=="START":
        lc = int(operands_str.strip()) if operands_str and operands_str.strip() else 0
        ic.append((None,label,opcode_u,(operands_str or "").strip(),f"Set LC = {lc}")); continue

    if opcode_u in ("END","LTORG"):
        p = pool_tab[-1]
        for i in range(p, len(lit_tab)):
            if lit_tab[i]['addr'] is None:
                lit_tab[i]['addr']=lc; lit_tab[i]['pool']=len(pool_tab)-1; lc+=1
        if opcode_u=="LTORG": pool_tab.append(len(lit_tab))
        ic.append((lc,label,opcode_u,(operands_str or "").strip(),"Process literals"))
        if opcode_u=="END": break
        continue

    if opcode_u=="ORIGIN":
        expr=(operands_str or "").strip()
        m=re.match(r'([A-Za-z_][A-Za-z0-9_]*)\s*([\+\-]\s*\d+)?$',expr)
        if m:
            base = sym_tab.get(m.group(1),{'addr':None})['addr'] or 0
            if m.group(2): base += int(m.group(2).replace(' ',''))
            lc=base
        else:
            try: lc=int(expr)
            except: pass
        ic.append((lc,label,opcode_u,expr,f"Set LC = {lc}")); continue

    if opcode_u=="EQU":
        expr=(operands_str or "").strip()
        try: val=int(expr)
        except: val=sym_tab.get(expr,{'addr':None})['addr']
        if label:
            sym_tab.setdefault(label,{'addr':None,'defined':False})
            sym_tab[label]['addr']=val; sym_tab[label]['defined']=True
        ic.append((lc,label,opcode_u,expr,f"Define {label} = {val}")); continue

    if opcode_u=="DS":
        n = int((operands_str or "1").strip())
        ic.append((lc,label,opcode_u,(operands_str or "").strip(),f"Reserve {n} words at {lc}")); lc+=n; continue

    if opcode_u=="DC":
        ic.append((lc,label,opcode_u,(operands_str or "").strip(),f"Define constant at {lc}")); lc+=1; continue

    ops = parse_ops(operands_str or "")
    for o in ops:
        if is_lit(o):
            if o not in (x['lit'] for x in lit_tab): lit_tab.append({'lit':o,'addr':None,'pool':None})
        else:
            if re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', o) and not o.upper().startswith('R'):
                sym_tab.setdefault(o,{'addr':None,'defined':False})
    ic.append((lc,label,opcode_u,','.join(ops),"")); lc+=1

# finalize literals
p = pool_tab[-1]
for i in range(p,len(lit_tab)):
    if lit_tab[i]['addr'] is None:
        lit_tab[i]['addr']=lc; lit_tab[i]['pool']=len(pool_tab)-1; lc+=1

# print results
print("SYMBOLS"); 
for s,v in sym_tab.items(): print(s, v)
print("\nLITERALS")
for l in lit_tab: print(l)
print("\nPOOLS", pool_tab)
print("\nINTERMEDIATE")
for e in ic: print(e)


SYMBOLS
START {'addr': 0, 'defined': True}
LABEL1 {'addr': 1, 'defined': True}
VAR1 {'addr': 4, 'defined': True}
LTORG {'addr': None, 'defined': False}
LABEL2 {'addr': 200, 'defined': True}
END {'addr': None, 'defined': False}

LITERALS
{'lit': "='5'", 'addr': 203, 'pool': 0}
{'lit': "='1'", 'addr': 204, 'pool': 0}
{'lit': "='10'", 'addr': 205, 'pool': 0}

POOLS [0]

INTERMEDIATE
(0, 'START', '100', '', '')
(1, 'LABEL1', 'MOVER', "R1,='5'", '')
(2, '', 'ADD', 'R1,VAR1', '')
(3, '', 'SUB', "R2,='1'", '')
(4, 'VAR1', 'DC', '10', 'Define constant at 4')
(5, None, '', 'LTORG', '')
(6, '', 'MOVER', "R2,='10'", '')
(200, '', 'ORIGIN', '200', 'Set LC = 200')
(200, 'LABEL2', 'DS', '2', 'Reserve 2 words at 200')
(202, None, '', 'END', '')
