In [6]:
import openpyxl

In [7]:
def parent_of_merged_cell(cell):
	"""Find the parent of the merged cell by iterating through the range of merged cells"""
	sheet = cell.parent
	child_coord = cell.coordinate

	# Note: if there are many merged cells in a large spreadsheet, this may become inefficient
	for merged in sheet.merged_cells.ranges:
		if child_coord in merged:
			return merged.start_cell.coordinate
	return None


def cell_value(cell):
	"""Reads the value of a cell, if cell is within a merged cell,
	find the first cell in the merged cell and get its value
	"""
	if isinstance(cell, openpyxl.cell.cell.Cell):
		return cell.value
	if isinstance(cell, openpyxl.cell.cell.MergedCell):
		coord = parent_of_merged_cell(cell)
		parent = cell.parent[coord]
		return parent.value

In [8]:
from itertools import count

wb = openpyxl.load_workbook(filename="instruction excel.xlsx")

print("Parsing Instruction Operation")
opr_sheet = wb["Instruction Operation"]

headers = list()

print("Headers:")
for i in count(1):
	cell = opr_sheet.cell(column=i, row=1)
	if (header_val := cell_value(cell)) is None:
		break
	headers.append(header_val)
	print(f"\t{header_val}")

inst_data = list()

print("Rows:")
for i in count(3):  # Start on first data row
	cell = opr_sheet.cell(column=1, row=i)
	if (header_val := cell_value(cell)) is None:
		break

	tmp_dict = dict()
	for v, h in enumerate(headers):
		tmp_dict[h] = cell_value(opr_sheet.cell(column=v + 1, row=i))
	inst_data.append(tmp_dict)
	print(f"\t{tmp_dict}")

Parsing Instruction Operation
Headers:
	Mnemonic
	Operand
	Byte
	Clock
	Operation
	Flag
Rows:
	{'Mnemonic': 'MOV', 'Operand': 'r, #byte', 'Byte': 3, 'Clock': 6, 'Operation': 'r \uf0ac byte', 'Flag': None}
	{'Mnemonic': 'MOV', 'Operand': 'saddr, #byte', 'Byte': 3, 'Clock': 6, 'Operation': '(saddr) \uf0ac byte', 'Flag': None}
	{'Mnemonic': 'MOV', 'Operand': 'sfr, #byte', 'Byte': 3, 'Clock': 6, 'Operation': 'sfr \uf0ac byte', 'Flag': None}
	{'Mnemonic': 'MOV', 'Operand': 'A, r', 'Byte': 2, 'Clock': 4, 'Operation': 'A \uf0ac r', 'Flag': None}
	{'Mnemonic': 'MOV', 'Operand': 'r, A', 'Byte': 2, 'Clock': 4, 'Operation': 'r \uf0ac A', 'Flag': None}
	{'Mnemonic': 'MOV', 'Operand': 'A, saddr', 'Byte': 2, 'Clock': 4, 'Operation': 'A \uf0ac (saddr)', 'Flag': None}
	{'Mnemonic': 'MOV', 'Operand': 'saddr, A', 'Byte': 2, 'Clock': 4, 'Operation': '(saddr) \uf0ac A', 'Flag': None}
	{'Mnemonic': 'MOV', 'Operand': 'A, sfr', 'Byte': 2, 'Clock': 4, 'Operation': 'A \uf0ac sfr', 'Flag': None}
	{'Mnemonic': '

In [9]:
print("Parsing Instruction Encoding")
opr_sheet = wb["Instruction encoding"]

headers = list()

print("Headers:")
for i in count(1):
	cell = opr_sheet.cell(column=i, row=1)
	if (header_val := cell_value(cell)) is None:
		break
	headers.append(header_val)
	print(f"\t{header_val}")

inst_enc = list()

print("Rows:")
for i in count(2):  # Start on first data row
	cell = opr_sheet.cell(column=1, row=i)
	if (header_val := cell_value(cell)) is None:
		break

	tmp_dict = dict()
	for v, h in enumerate(headers):
		tmp_dict[h] = cell_value(opr_sheet.cell(column=v + 1, row=i))
	inst_enc.append(tmp_dict)
	print(f"\t{tmp_dict}")

Parsing Instruction Encoding
Headers:
	Mnemonic
	Operand
	B1
	B2
	B3
	B4
Rows:
	{'Mnemonic': 'MOV', 'Operand': 'r, #byte', 'B1': '00001  0  10', 'B2': '1  1  1  1   R2 R1 R0 1', 'B3': 'Data', 'B4': None}
	{'Mnemonic': 'MOV', 'Operand': 'saddr, #byte', 'B1': '11110  1  01', 'B2': 'Saddr-offset', 'B3': 'Data', 'B4': None}
	{'Mnemonic': 'MOV', 'Operand': 'sfr, #byte', 'B1': '11110  1  11', 'B2': 'Sfr-offset', 'B3': 'Data', 'B4': None}
	{'Mnemonic': 'MOV', 'Operand': 'A, r', 'B1': '00001  0  10', 'B2': '0  0  1  0   R2 R1 R0 1', 'B3': None, 'B4': None}
	{'Mnemonic': 'MOV', 'Operand': 'r, A', 'B1': '00001  0  10', 'B2': '1  1  1  0   R2 R1 R0 1', 'B3': None, 'B4': None}
	{'Mnemonic': 'MOV', 'Operand': 'A, saddr', 'B1': '00100  1  01', 'B2': 'Saddr-offset', 'B3': None, 'B4': None}
	{'Mnemonic': 'MOV', 'Operand': 'saddr, A', 'B1': '11100  1  01', 'B2': 'Saddr-offset', 'B3': None, 'B4': None}
	{'Mnemonic': 'MOV', 'Operand': 'A, sfr', 'B1': '00100  1  11', 'B2': 'Sfr-offset', 'B3': None, 'B4': 

In [10]:
from enum import Enum
from dataclasses import dataclass, field
import string

inst_formatters = list()

"""
Ex:

class CmpwAxWord(Instruction):
	\"\"\"CMPW AX, #word.\"\"\"

	mnemonic: ClassVar[str] = "CMPW AX, #word"
	match: ClassVar[int] = 0b11100010_00000000_00000000
	mmask: ClassVar[int] = 0b11111111_00000000_00000000
	bytecount: ClassVar[int] = 3
	field_defs: ClassVar[Sequence["Field"]] = (field.Imm16(name="word"),)
	format: ClassVar[str] = "CMPW AX, {0}"
"""


class FlagVal(Enum):
	NotAffected = " "
	ClearToZero = "0"
	SetToOne = "1"
	Result = "x"
	Restore = "R"


@dataclass
class FlagConfig:
	CY: FlagVal = FlagVal.NotAffected  #: Carry flag
	AC: FlagVal = FlagVal.NotAffected  #: Auxiliary carry flag
	Z: FlagVal = FlagVal.NotAffected  #: Zero flag


@dataclass
class InstructFormatter:
	i_data: dict
	i_enc: dict

	reg_types: list = field(default_factory=lambda: list())

	def __post_init__(self):
		self.get_b(1, store=True)
		self.get_b(2, store=True)
		self.get_b(3, store=True)
		self.get_b(4, store=True)

	@property
	def mnemonic(self):
		dat = f'{self.i_data["Mnemonic"].strip()} {self.i_data["Operand"].strip() if self.i_data["Operand"] is not None else ""}'.strip()
		enc = f'{self.i_enc["Mnemonic"].strip()} {self.i_enc["Operand"].strip() if self.i_enc["Operand"] is not None else ""}'.strip()

		assert dat == enc

		return dat

	@property
	def bytecount(self):
		return int(self.i_data["Byte"])

	@property
	def clock_cycles(self):
		return int(self.i_data["Clock"])

	@property
	def text_opp(self):
		return self.i_data["Operation"]

	@property
	def flag(self):
		if self.i_data["Flag"] is None:
			return FlagConfig()
		try:
			chars = [
				(char, idx)
				for idx, char in enumerate(self.i_data["Flag"], start=1)
				if char != " "
			]
		except TypeError:
			if self.i_data["Flag"] == 1:
				return FlagConfig(CY=FlagVal.SetToOne)

			if self.i_data["Flag"] == 0:
				return FlagConfig(CY=FlagVal.ClearToZero)

		Z = FlagVal.NotAffected
		AC = FlagVal.NotAffected
		CY = FlagVal.NotAffected

		for ch, idx in chars:
			c = FlagVal.NotAffected
			if ch == "\uf0b4":
				c = FlagVal.Result
			elif ch == "R":
				c = FlagVal.Restore
			else:
				raise ValueError(f"Unknown val {ch}")

			if idx == 1:
				Z = c
			elif idx == 8:
				AC = c
			elif idx == 7:
				AC = c
			elif idx == 13:
				CY = c
			elif idx == 15:
				CY = c
			else:
				raise ValueError(f"Unknown val {idx}")

		return FlagConfig(CY=CY, AC=AC, Z=Z)

	def get_b(self, b_num, store=False):
		b_idx = f"B{b_num}"

		b_str = self.i_enc[b_idx]

		if b_str is None:
			return None

		b_str = b_str.replace(" ", "")

		try:
			return int(b_str, 2), 0b11111111
		except ValueError:
			pass

		if "P1P0" in b_str:
			b_str: str = b_str.replace("P1P0", "XX")

			if store:
				self.reg_types.append(
					{
						"Name": "P1P0 reg-pair",
						"BitLen": 2,
						"BitPos": b_str.find("XX") + (b_num - 1) * 8 + 1,
					}
				)

		elif "ta4to0" in b_str:
			b_str: str = b_str.replace("ta4to0", "XXXXX")

			if store:
				self.reg_types.append(
					{
						"Name": "5 bits of immediate data corresponding to addr5",
						"BitLen": 5,
						"BitPos": b_str.find("XXXXX") + (b_num - 1) * 8 + 1,
					}
				)
		elif "R2R1R0" in b_str:
			b_str: str = b_str.replace("R2R1R0", "XXX")

			if store:
				self.reg_types.append(
					{
						"Name": "R2R1R0 - 8 bit reg",
						"BitLen": 3,
						"BitPos": b_str.find("XXX") + (b_num - 1) * 8 + 1,
					}
				)
		elif "1" not in b_str and "0" not in b_str:
			if store:
				self.reg_types.append(
					{"Name": b_str, "BitLen": 8, "BitPos": (b_num - 1) * 8 + 1}
				)
			return (0b00000000, 0b00000000)
		elif "B2B1B0" in b_str:
			b_str: str = b_str.replace("B2B1B0", "XXX")

			if store:
				self.reg_types.append(
					{
						"Name": "B2B1B0 - bit address",
						"BitLen": 3,
						"BitPos": b_str.find("XXX") + (b_num - 1) * 8 + 1,
					}
				)
		else:
			raise ValueError(f"Unknown val {b_str}")

		assert len(b_str) == 8

		return int(b_str.replace("X", "0"), 2), int(
			b_str.replace("0", "1").replace("1", "1").replace("X", "0"), 2
		)

	def make_format_string(self):
		f_str = self.mnemonic

		f_str = f_str.replace("$addr16", "{}")

		f_str = f_str.replace("[addr5]", "{}")

		f_str = f_str.replace("#word", "{}")

		f_str = f_str.replace("saddrp", "{}")

		f_str = f_str.replace("rp", "{}")

		f_str = f_str.replace("saddr", "{}")

		f_str = f_str.replace("!addr16", "{}")

		f_str = f_str.replace("sfr", "{}")

		f_str = f_str.replace(" r,", " {},")

		f_str = f_str.replace(" r", " {}")

		f_str = f_str.replace("#byte", "{}")

		f_str = f_str.replace("byte", "{}")

		f_str = f_str.replace(".bit", "{}")

		f_str = f_str.split("{}")

		f_ret = f_str[0]

		for f_i in count(1):
			try:
				f_ret = f"{f_ret}{{{f_i-1}}}{f_str[f_i]}"
			except IndexError:
				break
		return f_ret

	def make_field(self, dat_dict):
		class_vals = {
			"R2R1R0 - 8 bit reg": "Reg8",
			"Data": "Imm8",
			"Saddr-offset": "SAddr",
			"Sfr-offset": "SFR",
			"Lowaddr": None,
			"Lowbyte": None,
			"Highaddr": "Addr16",
			"Highbyte": "Imm16",
			"P1P0 reg-pair": "Reg16",
			"B2B1B0 - bit address": "BitIdx3",
			"5 bits of immediate data corresponding to addr5": "Addr5",
			"jdisp": "JAddrRel",
		}
		if class_vals[dat_dict["Name"]] is None:
			return None
		return f'field.{class_vals[dat_dict["Name"]]}(offset={8*self.bytecount-dat_dict["BitPos"]-dat_dict["BitLen"]+1})'


for dat, enc in zip(inst_data, inst_enc):
	t = InstructFormatter(dat, enc)
	# print(t)
	inst_formatters.append(t)

	# print(f"'{t.mnemonic}', {t.make_format_string()=}, {t.reg_types}")

	print(
		f"""
    class {''.join(c for c in t.mnemonic.replace("$", "Rel") if c in string.ascii_letters)}(Instruction):
        \"\"\"{t.mnemonic}.\"\"\"

        mnemonic: ClassVar[str] = "{t.mnemonic}"
        match: ClassVar[int] = 0b{"_".join(bin(t.get_b(b+1)[0])[2:].zfill(8) for b in range(t.bytecount))}
        mmask: ClassVar[int] = 0b{"_".join(bin(t.get_b(b+1)[1])[2:].zfill(8) for b in range(t.bytecount))}
        bytecount: ClassVar[int] = {t.bytecount}
        field_defs: ClassVar[Sequence["Field"]] = {"tuple" if len(t.reg_types) == 0 else ""}({",".join(t.make_field(f) for f in t.reg_types if t.make_field(f) is not None)+"," if len(t.reg_types) > 0 else ""})
        format: ClassVar[str] = "{t.make_format_string()}"
    """
	)


    class MOVrbyte(Instruction):
        """MOV r, #byte."""

        mnemonic: ClassVar[str] = "MOV r, #byte"
        match: ClassVar[int] = 0b00001010_11110001_00000000
        mmask: ClassVar[int] = 0b11111111_11110001_00000000
        bytecount: ClassVar[int] = 3
        field_defs: ClassVar[Sequence["Field"]] = (field.Reg8(offset=9),field.Imm8(offset=0),)
        format: ClassVar[str] = "MOV {0}, {1}"
    

    class MOVsaddrbyte(Instruction):
        """MOV saddr, #byte."""

        mnemonic: ClassVar[str] = "MOV saddr, #byte"
        match: ClassVar[int] = 0b11110101_00000000_00000000
        mmask: ClassVar[int] = 0b11111111_00000000_00000000
        bytecount: ClassVar[int] = 3
        field_defs: ClassVar[Sequence["Field"]] = (field.SAddr(offset=8),field.Imm8(offset=0),)
        format: ClassVar[str] = "MOV {0}, {1}"
    

    class MOVsfrbyte(Instruction):
        """MOV sfr, #byte."""

        mnemonic: ClassVar[str] = "MOV sfr, #byte"
        match: ClassVar[int] = 0b1111