In [1]:
import time
from cffi import FFI
from array2gif import write_gif
import numpy as np
import random

# options
# this program requires ~ 5000 cycles
cycles      = 5000 # cpu cycles
binary      = 'sierp0.bin' # program
outputfile  = 'sierp0.gif'
framebuffer = 0x200 # framebuffer offset
pc0         = 0x600 # program counter on reset
debug       = 0 # show register state during execution

# see below for assembler code

# prog = [
#  0xa2,0x00,0xa9,0x00,0x85,0x00,0xa9,0x02,
#  0x85,0x01,0x20,0x1f,0x06,0x81,0x00,0xe6,
#  0x00,0xf0,0x03,0x4c,0x0a,0x06,0xe6,0x01,
#  0xa4,0x01,0xc0,0x06,0xd0,0xec,0x60,0xa5,
#  0x00,0x29,0x1f,0x85,0x02,0xa5,0x00,0x4a,
#  0x4a,0x4a,0x4a,0x4a,0x85,0x03,0xa5,0x01,
#  0x38,0xe9,0x02,0x0a,0x0a,0x0a,0x05,0x03,
#  0x25,0x02,0xf0,0x03,0xa9,0x02,0x60,0xa9,
#  0x0d,0x60]

with open(binary, "rb") as f:
    prog = f.read()
    
# init 6502
def init(ffi, PC, SP):
    _6502 = ffi.dlopen("./6502.so");
    _6502.reset(PC,SP); _6502.show_debug=debug
    return _6502

In [2]:
def execute(prog, pc):

    ffi = FFI()

    #C header stuff
    ffi.cdef("""
       typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64;
       void reset(u16,u8); u8 show_debug;
       void cpu_step(u32);
       extern u8 mem[0x10000];
       extern u8 show_debug;
    """)

    #initial program counter and stack pointer
    PC=pc; SP=0xff; FB=framebuffer

    C = init(ffi, PC, SP);
    n_frames = cycles//40

    for i in range(len(prog)): C.mem[PC+i] = prog[i]

    if outputfile:
        imgs = []
        for jj in range(n_frames):
            C.cpu_step(cycles // n_frames)
            _scr = np.zeros((2048,1), dtype='uint8')
            for i in range(512): _scr[i] = C.mem[i] # zero pg,stack
            for i in range(512): _scr[i+512] = C.mem[PC+i] # program
            for i in range(1024): _scr[i+1024] = 255 * C.mem[FB+i]  # 'screen'screen
            frame = _scr.reshape(1,64,32);
            zp = frame[:,0:32,:]; prg = frame[:,32:64,:]
            frame = np.zeros((1,32,64))
            frame[:,:,0:32] = zp; frame[:,:,32:64] = prg
            ff = frame.repeat(3, axis=0)
            imgs.append(ff);
        return imgs

    else:
        C.cpu_step(cycles) # just run for n cycles

        # todo, this is temporary
        _scr = np.zeros((1024,1), dtype='uint8')
        for i in range(1024): _scr[i] = C.mem[FB+i]
        
        return _scr.reshape(1,32,32)

In [3]:
frames = execute(prog, pc0)
write_gif(frames, outputfile, fps=25)

from IPython.display import HTML
HTML('<img src="sierp0.gif" style="width:500px;height:300px;">')



In [4]:
# entire assembler

import re

# addressing modes
#  id   name    arg format, number of bytes
modes = [
  [ 0, 'imm',  '^\s*#(\$?)([0-9a-f]{1,2})\s*$', 1 ],
  [ 1, 'zp',   '^\s*(\$?)0?0?([0-9a-f]{1,2})\s*$', 1 ],
  [ 2, 'zpx',  '^\s*(\$?)([0-9a-f]{1,2})\s*,\s*[xX]\s*$', 1 ],
  [ 3, 'zpy',  '^\s*(\$?)([0-9a-f]{1,2})\s*,\s*[yY]\s*$', 1 ],
  [ 4, 'abs',  '^\s*(\$?)([0-9a-f]{1,4})\s*$', 2 ],
  [ 5, 'absx', '^\s*(\$?)([0-9a-f]{3,4})\s*,\s*[xX]\s*$', 2 ],
  [ 6, 'absy', '^\s*(\$?)([0-9a-f]{3,4})\s*,\s*[yY]\s*$', 2 ],
  [ 7, 'ind',  '^\s*\((\$?)([0-9a-f]*)\)\s*$', 2 ],
  [ 8, 'indx', '^\s*\((\$?)([0-9a-f]{1,2}),\s*[xX]\)\s*$', 1 ],
  [ 9, 'indy', '^\s*\((\$?)([0-9a-f]{1,2})\s*\),\s*[yY]\s*$', 1 ],
  [10, 'impl', '^\s*$', 0 ],
  [11, 'rel',  '^\s*(\$?)0?0?([0-9a-f]{1,4})\s*$', 1 ],
]

opcodes = {
  #Name,   IMM,   ZP,  ZPX,  ZPY,  ABS, ABSX, ABSY,  IND, INDX, INDY, IMPL, REL
  'adc': [0x69, 0x65, 0x75, None, 0x6d, 0x7d, 0x79, None, 0x61, 0x71, None, None],
  'and': [0x29, 0x25, 0x35, None, 0x2d, 0x3d, 0x39, None, 0x21, 0x31, None, None],
  'asl': [None, 0x06, 0x16, None, 0x0e, 0x1e, None, None, None, None, 0x0a, None],
  'bit': [None, 0x24, None, None, 0x2c, None, None, None, None, None, None, None],
  'bpl': [None, None, None, None, None, None, None, None, None, None, None, 0x10],
  'bmi': [None, None, None, None, None, None, None, None, None, None, None, 0x30],
  'bvc': [None, None, None, None, None, None, None, None, None, None, None, 0x50],
  'bvs': [None, None, None, None, None, None, None, None, None, None, None, 0x70],
  'bcc': [None, None, None, None, None, None, None, None, None, None, None, 0x90],
  'bcs': [None, None, None, None, None, None, None, None, None, None, None, 0xb0],
  'bne': [None, None, None, None, None, None, None, None, None, None, None, 0xd0],
  'beq': [None, None, None, None, None, None, None, None, None, None, None, 0xf0],
  'brk': [None, None, None, None, None, None, None, None, None, None, 0x00, None],
  'cmp': [0xc9, 0xc5, 0xd5, None, 0xcd, 0xdd, 0xd9, None, 0xc1, 0xd1, None, None],
  'cpx': [0xe0, 0xe4, None, None, 0xec, None, None, None, None, None, None, None],
  'cpy': [0xc0, 0xc4, None, None, 0xcc, None, None, None, None, None, None, None],
  'dec': [None, 0xc6, 0xd6, None, 0xce, 0xde, None, None, None, None, None, None],
  'eor': [0x49, 0x45, 0x55, None, 0x4d, 0x5d, 0x59, None, 0x41, 0x51, None, None],
  'clc': [None, None, None, None, None, None, None, None, None, None, 0x18, None],
  'sec': [None, None, None, None, None, None, None, None, None, None, 0x38, None],
  'cli': [None, None, None, None, None, None, None, None, None, None, 0x58, None],
  'sei': [None, None, None, None, None, None, None, None, None, None, 0x78, None],
  'clv': [None, None, None, None, None, None, None, None, None, None, 0xb8, None],
  'cld': [None, None, None, None, None, None, None, None, None, None, 0xd8, None],
  'sed': [None, None, None, None, None, None, None, None, None, None, 0xf8, None],
  'inc': [None, 0xe6, 0xf6, None, 0xee, 0xfe, None, None, None, None, None, None],
  'jmp': [None, None, None, None, 0x4c, None, None, 0x6c, None, None, None, None],
  'jsr': [None, None, None, None, 0x20, None, None, None, None, None, None, None],
  'lda': [0xa9, 0xa5, 0xb5, None, 0xad, 0xbd, 0xb9, None, 0xa1, 0xb1, None, None],
  'ldx': [0xa2, 0xa6, None, 0xb6, 0xae, None, 0xbe, None, None, None, None, None],
  'ldy': [0xa0, 0xa4, 0xb4, None, 0xac, 0xbc, None, None, None, None, None, None],
  'lsr': [None, 0x46, 0x56, None, 0x4e, 0x5e, None, None, None, None, 0x4a, None],
  'nop': [None, None, None, None, None, None, None, None, None, None, 0xea, None],
  'ora': [0x09, 0x05, 0x15, None, 0x0d, 0x1d, 0x19, None, 0x01, 0x11, None, None],
  'tax': [None, None, None, None, None, None, None, None, None, None, 0xaa, None],
  'txa': [None, None, None, None, None, None, None, None, None, None, 0x8a, None],
  'dex': [None, None, None, None, None, None, None, None, None, None, 0xca, None],
  'inx': [None, None, None, None, None, None, None, None, None, None, 0xe8, None],
  'tay': [None, None, None, None, None, None, None, None, None, None, 0xa8, None],
  'tya': [None, None, None, None, None, None, None, None, None, None, 0x98, None],
  'dey': [None, None, None, None, None, None, None, None, None, None, 0x88, None],
  'iny': [None, None, None, None, None, None, None, None, None, None, 0xc8, None],
  'ror': [None, 0x66, 0x76, None, 0x6e, 0x7e, None, None, None, None, 0x6a, None],
  'rol': [None, 0x26, 0x36, None, 0x2e, 0x3e, None, None, None, None, 0x2a, None],
  'rti': [None, None, None, None, None, None, None, None, None, None, 0x40, None],
  'rts': [None, None, None, None, None, None, None, None, None, None, 0x60, None],
  'sbc': [0xe9, 0xe5, 0xf5, None, 0xed, 0xfd, 0xf9, None, 0xe1, 0xf1, None, None],
  'sta': [None, 0x85, 0x95, None, 0x8d, 0x9d, 0x99, None, 0x81, 0x91, None, None],
  'txs': [None, None, None, None, None, None, None, None, None, None, 0x9a, None],
  'tsx': [None, None, None, None, None, None, None, None, None, None, 0xba, None],
  'pha': [None, None, None, None, None, None, None, None, None, None, 0x48, None],
  'pla': [None, None, None, None, None, None, None, None, None, None, 0x68, None],
  'php': [None, None, None, None, None, None, None, None, None, None, 0x08, None],
  'plp': [None, None, None, None, None, None, None, None, None, None, 0x28, None],
  'stx': [None, 0x86, None, 0x96, 0x8e, None, None, None, None, None, None, None],
  'sty': [None, 0x84, 0x94, None, 0x8c, None, None, None, None, None, None, None],
  '---': [None, None, None, None, None, None, None, None, None, None, None, None]
}

def encode(op, mode, rawval, is_hex, pc, verbose=False):
  # a helper function which fills the instruction structure
  instr = {}
  val=None
  if rawval:
    val=int(rawval, 16) if is_hex \
    else int(rawval, 10)

  argbytes=mode[3]
  encoding=opcodes[op.lower()][mode[0]]

  if verbose:
    print('  * mode [{}], opcode [0x{:02x}], argbytes [{}] '.format(
      mode[1], encoding, argbytes))
    print('  * rawval [{}], ishex [{}], val [{}] '.format(
      rawval, is_hex, val))

  if mode[1]=='rel':
     val=(val-pc-2)

  s=[encoding]
  for b in range(argbytes):
    s.append((val >> (8*b)) & 0xff)

  instr['src'] = '{} {}'.format(op, rawval)
  instr['obj'] = s

  return instr

def find_mode(op, args):
  # given an expression=(op, args)
  # determine an addressing mode or return None
  # if invalid syntax

  encodings=list(filter(lambda y: y[1]!=None,
  [(i,e) for i,e in enumerate(opcodes[op.lower()])]))

  mode=None
  is_hex=None
  rawval=None
  if args==None: args = ""

  for m, e in encodings:
    p=re.search(modes[m][2], args)
    if p!=None:
      found_mode = modes[m]
      if None==mode or found_mode[3] < mode[3]:
        mode = found_mode
      if mode[0] != 10:
        is_hex = p.group(1)
        rawval = p.group(2)

  return mode, is_hex, rawval

def assemble(lines, pc0, verbose=False):

  # a regex which accepts lines
  # line = [label] [expr] [comment]
  # expr = OPC [args] - see defs.py for syntax
  # label = alphanum + ':'
  # comment = ';' + anything
  pc = pc0
    
  pattern = '^\s*(((?P<label>[A-Za-z0-9]+):)?' \
            '\s*(?P<expr>(?P<op>[a-zA-z]{3})' \
            '(\s+(?P<args>.*?))?)?)?' \
            '(?P<comment>;.*)?$'

  instr, labels, unresolved, data = {}, {}, {}, {}

  for i,l in enumerate(lines):
    m = re.search(pattern, l)
    if verbose:
      print('\nline {}=[{}]'.format(i,l))

    if verbose:
      print('labels={}'.format(labels))
      print('unresolved={}'.format(unresolved))

    # extract tokens
    expr=m.group('expr')        # entire expression
    op=m.group('op')            # op
    label=m.group('label')      # label
    args=m.group('args')        # args
    comment=m.group('comment')  # comment

    if verbose:
      print(' label=[{}], expr=[{}], ' \
            'op=[{}], args=[{}], comm=[{}]'.format(
            label, expr, op, args, comment))

    # do we have a label?
    if label:
      # insert into our dictionary
      labels[label] = '${:04x}'.format(pc)
      if verbose:
        print('  * lab [{}] pc={:x}'.format(label, pc))
      # have we tried to look it up before?
      if label in unresolved:
        f=unresolved[label]
        # patch the unresolved symbol
        pc0=f[0]
        op0=f[1]
        arg0=labels[label]
        mode0, is_hex0, rawval0 = find_mode(op0, arg0)
        instr[pc0] = encode(op0, mode0, rawval0, is_hex0, pc0, verbose)
        # clear
        unresolved[label] = {}

    # is arg a label ?
    if args:
      args_str = args.strip()
      if args_str in labels:
        args = labels[args_str]
        if verbose:
          print('  * args found as a label [{}] -> [{}]'.format(args_str, args))

    if op==None: # empty or comments
      continue

    # op present
    else:
      # determine the mode
      mode, is_hex, rawval = find_mode(op, args)
      if None==mode:
        # something went wrong, maybe it's a label
        # try passing 0 as arg
        mode, is_hex, rawval = find_mode(op, "$0")
        if None==mode:
          # still not working: FATAL
          print('uh-oh: could not find addressing mode')
          return None, None, None
        else:
          # 0 as arg worked, we need to patch it later
          unresolved[args.strip()] = [pc, op]

      instr[pc] = encode(op, mode, rawval, is_hex, pc, verbose)
      pc+=len(instr[pc]['obj'])

  return instr, pc - pc0, labels

In [5]:
source = """
start:  
  lda #$e1    
  sta $0   
  lda #$01
  sta $1 
  ldy #$20
                         
write:    
  ldx #$00 
  eor ($0, x)
  sta ($0),y
                         
  inc $0
  bne write
  inc $1   
  ldx $1
  cpx #$06
  bne write
                         
  rts
"""

lines = source.split('\n')
instr, n_bytes, labels = assemble(lines, pc0, verbose=False)

if instr is None:
    print('Assembly failed!')
else:
    # write a binary
    mem = [0] * n_bytes
    for k, i in enumerate(instr):
        for n in range(len(instr[i]['obj'])):
            mem[i - pc0 + n] = instr[i]['obj'][n]

    #for k,i in enumerate(instr):
    #    print('[{}] -> [0x{:04x}] {:}'.format(instr[i]['src'], i, ['0x{:02x}'.format(x) for x in instr[i]['obj']]))

print('binary')
print(''.join([f'{b:02x}' for b in mem]))

binary
a9e18500a9018501a020a20041009100e600d0f6e601a601e006d0ee60


In [6]:
frames1 = execute(mem, pc0)
write_gif(frames1, 'assembled.gif', fps=25)

from IPython.display import HTML
HTML('<img src="assembled.gif" style="width:500px;height:300px;">')