In [1]:
pwd

'/Users/netcan/Workspace/Programming/FCEmulator/src/tools'

## 爬取Instruction Reference的指令信息
[http://obelisk.me.uk/6502/reference.html](http://obelisk.me.uk/6502/reference.html)

In [2]:
import InstructionReference
import pandas as pd

In [3]:
ret = InstructionReference.get_inst_ref()
df = pd.DataFrame(ret)
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,$69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,Zero Page,$65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,"Zero Page,X",$75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,$6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,"Absolute,X",$7D,3,4 (+1 if page crossed),0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


## 对爬取的指令细节进行清洗

In [4]:
df['addressingMode'] = df['addressingMode'].str.replace('Implied', 'Implicit'
                                ).str.replace('Zero\s+Page', 'ZeroPage'
                                ).str.replace('ZeroPage,X', 'ZeroPageX'
                                ).str.replace('ZeroPage,Y', 'ZeroPageY'
                                ).str.replace('Absolute,X', 'AbsoluteX'
                                ).str.replace('Absolute,Y', 'AbsoluteY'
                                ).str.replace('\(Indirect,X\)', 'IndexIndirect'
                                ).str.replace('\(Indirect\),Y', 'IndirectIndex')
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,$69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,$65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,$75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,$6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,$7D,3,4 (+1 if page crossed),0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


In [5]:
df['code'] = df['code'].str.replace('$', '0x')
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,0x69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,0x65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,0x75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,0x6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,0x7D,3,4 (+1 if page crossed),0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


In [6]:
df.loc[df['cycles'].str.contains('\+1'), 'extraCycles'] = 1
df.loc[df['cycles'].str.contains('\+2'), 'extraCycles'] = 2
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,0x69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,0x65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,0x75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,0x6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,0x7D,3,4 (+1 if page crossed),1,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


In [7]:
df['cycles'] = df['cycles'].str.replace('[\n\s]', ''
                            ).str.replace('^(\d+).*', lambda s: s.group(1))
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,0x69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,0x65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,0x75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,0x6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,0x7D,3,4,1,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


## 导出指令信息结构体数组，for C++ implements using.

In [8]:
for (_, row) in df.iterrows():
    print('\t{{{}, {}, {}, {}, {}, {}, nullptr}}, '
        .format(row['name'], row['addressingMode'],
                row['code'], row['bytes'],
                row['cycles'], row['extraCycles']))

	{ADC, Immediate, 0x69, 2, 2, 0, nullptr}, 
	{ADC, ZeroPage, 0x65, 2, 3, 0, nullptr}, 
	{ADC, ZeroPageX, 0x75, 2, 4, 0, nullptr}, 
	{ADC, Absolute, 0x6D, 3, 4, 0, nullptr}, 
	{ADC, AbsoluteX, 0x7D, 3, 4, 1, nullptr}, 
	{ADC, AbsoluteY, 0x79, 3, 4, 1, nullptr}, 
	{ADC, IndexIndirect, 0x61, 2, 6, 0, nullptr}, 
	{ADC, IndirectIndex, 0x71, 2, 5, 1, nullptr}, 
	{AND, Immediate, 0x29, 2, 2, 0, nullptr}, 
	{AND, ZeroPage, 0x25, 2, 3, 0, nullptr}, 
	{AND, ZeroPageX, 0x35, 2, 4, 0, nullptr}, 
	{AND, Absolute, 0x2D, 3, 4, 0, nullptr}, 
	{AND, AbsoluteX, 0x3D, 3, 4, 1, nullptr}, 
	{AND, AbsoluteY, 0x39, 3, 4, 1, nullptr}, 
	{AND, IndexIndirect, 0x21, 2, 6, 0, nullptr}, 
	{AND, IndirectIndex, 0x31, 2, 5, 1, nullptr}, 
	{ASL, Accumulator, 0x0A, 1, 2, 0, nullptr}, 
	{ASL, ZeroPage, 0x06, 2, 5, 0, nullptr}, 
	{ASL, ZeroPageX, 0x16, 2, 6, 0, nullptr}, 
	{ASL, Absolute, 0x0E, 3, 6, 0, nullptr}, 
	{ASL, AbsoluteX, 0x1E, 3, 7, 0, nullptr}, 
	{BCC, Relative, 0x90, 2, 2, 2, nullptr}, 
	{BCS, Relative, 0xB0

In [9]:
df['name'].unique().size

56

In [10]:
df.count()

name              151
addressingMode    151
code              151
bytes             151
cycles            151
extraCycles       151
description       151
dtype: int64

## 按指令实现难易程度排序

In [11]:
df_sorted = df.sort_values(by=['bytes', 'cycles', 'extraCycles']).drop_duplicates(subset='name')
df_sorted.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
16,ASL,Accumulator,0x0A,1,2,0,"[ASL - Arithmetic Shift Left, A,Z,C,N = M*2 or..."
32,CLC,Implicit,0x18,1,2,0,"[CLC - Clear Carry Flag, C = 0, Set the carry ..."
33,CLD,Implicit,0xD8,1,2,0,"[CLD - Clear Decimal Mode, D = 0, Sets the dec..."
34,CLI,Implicit,0x58,1,2,0,"[CLI - Clear Interrupt Disable, I = 0, Clears ..."
35,CLV,Implicit,0xB8,1,2,0,"[CLV - Clear Overflow Flag, V = 0, Clears the ..."


## 生成指令函数声明

In [12]:
print('/**************** 指令声明区Begin ****************/')
for (_, row) in df_sorted.iterrows():
    print('OpExeFuncDecl(OP_{}); // {}'.format(row['name'], row['description'][0]))
print('/****************  指令声明区End  ****************/\n')

/**************** 指令声明区Begin ****************/
OpExeFuncDecl(OP_ASL); // ASL - Arithmetic Shift Left
OpExeFuncDecl(OP_CLC); // CLC - Clear Carry Flag
OpExeFuncDecl(OP_CLD); // CLD - Clear Decimal Mode
OpExeFuncDecl(OP_CLI); // CLI - Clear Interrupt Disable
OpExeFuncDecl(OP_CLV); // CLV - Clear Overflow Flag
OpExeFuncDecl(OP_DEX); // DEX - Decrement X Register
OpExeFuncDecl(OP_DEY); // DEY - Decrement Y Register
OpExeFuncDecl(OP_INX); // INX - Increment X Register
OpExeFuncDecl(OP_INY); // INY - Increment Y Register
OpExeFuncDecl(OP_LSR); // LSR - Logical Shift Right
OpExeFuncDecl(OP_NOP); // NOP - No Operation
OpExeFuncDecl(OP_ROL); // ROL - Rotate Left
OpExeFuncDecl(OP_ROR); // ROR - Rotate Right
OpExeFuncDecl(OP_SEC); // SEC - Set Carry Flag
OpExeFuncDecl(OP_SED); // SED - Set Decimal Flag
OpExeFuncDecl(OP_SEI); // SEI - Set Interrupt Disable
OpExeFuncDecl(OP_TAX); // TAX - Transfer Accumulator to X
OpExeFuncDecl(OP_TAY); // TAY - Transfer Accumulator to Y
OpExeFuncDecl(OP_TSX); // T

## 生成指令函数定义

In [13]:
df[df['name'] == 'JSR']['description'].values[0]

['JSR - Jump to Subroutine',
 'The JSR instruction pushes the address (minus one) of the return\r\npoint on to the stack and then sets the program counter to the\r\ntarget memory address.']

In [14]:
df['description'].apply(lambda d: len(d)).value_counts()

3    122
2     29
Name: description, dtype: int64

In [15]:
df_sorted[df_sorted['description'].apply(lambda d: len(d)) == 2]['name'].head()

96     NOP
109    ROL
114    ROR
105    PHA
106    PHP
Name: name, dtype: object

In [16]:
print('/**************** 指令实现区Begin ****************/')
for (_, row) in df_sorted.iterrows():
    d = row['description'].copy()
    d[1:] = [d.replace('\r\n', '\n\t * ') for d in d[1:]]
    if len(d) < 3:
        d.append('');
    else:
        d[2] = '\t * {}\n'.format(d[2])
        
    print( 
        'OpExeFuncDefine(OP_{}) {{\n'
        '\t// TODO: wait for implements: {}\n'
        '\t/**\n'
        '\t * {}\n'
        '\t * {}\n'
        '{}'
        '\t **/\n'
        '\n\treturn self.cycles;\n'
        '}}\n'.format(row['name'], row['name'], *d)
         )
print('/****************  指令实现区End  ****************/')

/**************** 指令实现区Begin ****************/
OpExeFuncDefine(OP_ASL) {
	// TODO: wait for implements: ASL
	/**
	 * ASL - Arithmetic Shift Left
	 * A,Z,C,N = M*2 or M,Z,C,N = M*2
	 * This operation shifts all the bits of the accumulator or memory
	 * contents one bit left. Bit 0 is set to 0 and bit 7 is placed in
	 * the carry flag. The effect of this operation is to multiply the
	 * memory contents by 2 (ignoring 2's complement considerations),
	 * setting the carry if the result will not fit in 8 bits.
	 **/

	return self.cycles;
}

OpExeFuncDefine(OP_CLC) {
	// TODO: wait for implements: CLC
	/**
	 * CLC - Clear Carry Flag
	 * C = 0
	 * Set the carry flag to zero.
	 **/

	return self.cycles;
}

OpExeFuncDefine(OP_CLD) {
	// TODO: wait for implements: CLD
	/**
	 * CLD - Clear Decimal Mode
	 * D = 0
	 * Sets the decimal mode flag to zero.
	 **/

	return self.cycles;
}

OpExeFuncDefine(OP_CLI) {
	// TODO: wait for implements: CLI
	/**
	 * CLI - Clear Interrupt Disable
	 * I = 0
	 * Cl

In [17]:
df['addressingMode'].str.len().max()

13

In [241]:
(df.loc[df['addressingMode'].str.contains('ZeroPage')]['extraCycles'] == '0').all() # Zero寻址模式没有+1的情况

True

In [242]:
(df.loc[df['addressingMode'].str.contains('IndexIndirected')]['extraCycles'] == '0').all() # IDX寻址模式没有+1的情况

True

## Undocumented Opcodes

In [21]:
import requests,re
txt = requests.get('http://nesdev.com/undocumented_opcodes.txt').text

In [49]:
opName = []
for m in re.finditer('(\w+) \(\w+\) \[\w+\]\s=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D\n', txt):
    opName.append(m.group(1))
opName

['AAC',
 'AAX',
 'ARR',
 'ASR',
 'ATX',
 'AXA',
 'AXS',
 'DCP',
 'DOP',
 'ISC',
 'KIL',
 'LAR',
 'LAX',
 'NOP',
 'RLA',
 'RRA',
 'SBC',
 'SLO',
 'SRE',
 'SXA',
 'SYA',
 'TOP',
 'XAA',
 'XAS']

In [64]:
re.search('=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D\s.*?\n', txt).group()

'=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D\nAND byte with accumulator. If result is negative then carry is set.\n'

In [114]:
opDescription = []
for m in re.finditer('=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D\s(.*?)\n\n', txt, re.DOTALL):
    opDescription.append(m.group(1))
opDescription

['AND byte with accumulator. If result is negative then carry is set.\nStatus flags: N,Z,C',
 'AND X register with accumulator and store result in memory. Status\nflags: N,Z',
 'AND byte with accumulator, then rotate one bit right in accu-mulator and\ncheck bit 5 and 6:\nIf both bits are 1: set C, clear V.\nIf both bits are 0: clear C and V.\nIf only bit 5 is 1: set V, clear C.\nIf only bit 6 is 1: set C and V.\nStatus flags: N,V,Z,C',
 'AND byte with accumulator, then shift right one bit in accumu-lator.\nStatus flags: N,Z,C',
 'AND byte with accumulator, then transfer accumulator to X register.\nStatus flags: N,Z',
 'AND X register with accumulator then AND result with 7 and store in\nmemory. Status flags: -',
 'AND X register with accumulator and store result in X regis-ter, then\nsubtract byte from X register (without borrow).\nStatus flags: N,Z,C',
 'Subtract 1 from memory (without borrow).\nStatus flags: C',
 'No operation (double NOP). The argument has no signifi-cance. Status\n

In [89]:
len(opDescription) == len(opName)

True

In [156]:
opMap = dict(zip(opName, opDescription))
opMap

{'AAC': 'AND byte with accumulator. If result is negative then carry is set.\nStatus flags: N,Z,C',
 'AAX': 'AND X register with accumulator and store result in memory. Status\nflags: N,Z',
 'ARR': 'AND byte with accumulator, then rotate one bit right in accu-mulator and\ncheck bit 5 and 6:\nIf both bits are 1: set C, clear V.\nIf both bits are 0: clear C and V.\nIf only bit 5 is 1: set V, clear C.\nIf only bit 6 is 1: set C and V.\nStatus flags: N,V,Z,C',
 'ASR': 'AND byte with accumulator, then shift right one bit in accumu-lator.\nStatus flags: N,Z,C',
 'ATX': 'AND byte with accumulator, then transfer accumulator to X register.\nStatus flags: N,Z',
 'AXA': 'AND X register with accumulator then AND result with 7 and store in\nmemory. Status flags: -',
 'AXS': 'AND X register with accumulator and store result in X regis-ter, then\nsubtract byte from X register (without borrow).\nStatus flags: N,Z,C',
 'DCP': 'Subtract 1 from memory (without borrow).\nStatus flags: C',
 'DOP': 'No oper

In [136]:
re.findall('(?P<addressingMode>[ \w()#$,]+)\s*\|(?P<name>\w+)\s.*?\|(?P<code>\$\w{2})\|\s(?P<bytes>\d)\s\|\s(?P<cycles>[\d-])', txt)

[('Immediate   ', 'AAC', '$0B', '2', '2'),
 ('Immediate   ', 'AAC', '$2B', '2', '2'),
 ('Zero Page   ', 'AAX', '$87', '2', '3'),
 ('Zero Page,Y ', 'AAX', '$97', '2', '4'),
 ('(Indirect,X)', 'AAX', '$83', '2', '6'),
 ('Absolute    ', 'AAX', '$8F', '3', '4'),
 ('Immediate   ', 'ARR', '$6B', '2', '2'),
 ('Immediate   ', 'ASR', '$4B', '2', '2'),
 ('Immediate   ', 'ATX', '$AB', '2', '2'),
 ('Absolute,Y  ', 'AXA', '$9F', '3', '5'),
 ('(Indirect),Y', 'AXA', '$93', '2', '6'),
 ('Immediate   ', 'AXS', '$CB', '2', '2'),
 ('Zero Page   ', 'DCP', '$C7', '2', '5'),
 ('Zero Page,X ', 'DCP', '$D7', '2', '6'),
 ('Absolute    ', 'DCP', '$CF', '3', '6'),
 ('Absolute,X  ', 'DCP', '$DF', '3', '7'),
 ('Absolute,Y  ', 'DCP', '$DB', '3', '7'),
 ('(Indirect,X)', 'DCP', '$C3', '2', '8'),
 ('(Indirect),Y', 'DCP', '$D3', '2', '8'),
 ('Zero Page   ', 'DOP', '$04', '2', '3'),
 ('Zero Page,X ', 'DOP', '$14', '2', '4'),
 ('Zero Page,X ', 'DOP', '$34', '2', '4'),
 ('Zero Page   ', 'DOP', '$44', '2', '3'),
 ('Zero Pag

In [158]:
opTable = []
for m in re.finditer('(?P<addressingMode>[ \w()#$,]+)\s*\|(?P<name>\w+)\s.*?\|(?P<code>\$\w{2})\|\s(?P<bytes>\d)\s\|\s(?P<cycles>[\d-])', txt):
    d = m.groupdict()
    d['description'] = opMap[d['name']]
    opTable.append(d)
opTable

[{'addressingMode': 'Immediate   ',
  'bytes': '2',
  'code': '$0B',
  'cycles': '2',
  'description': 'AND byte with accumulator. If result is negative then carry is set.\nStatus flags: N,Z,C',
  'name': 'AAC'},
 {'addressingMode': 'Immediate   ',
  'bytes': '2',
  'code': '$2B',
  'cycles': '2',
  'description': 'AND byte with accumulator. If result is negative then carry is set.\nStatus flags: N,Z,C',
  'name': 'AAC'},
 {'addressingMode': 'Zero Page   ',
  'bytes': '2',
  'code': '$87',
  'cycles': '3',
  'description': 'AND X register with accumulator and store result in memory. Status\nflags: N,Z',
  'name': 'AAX'},
 {'addressingMode': 'Zero Page,Y ',
  'bytes': '2',
  'code': '$97',
  'cycles': '4',
  'description': 'AND X register with accumulator and store result in memory. Status\nflags: N,Z',
  'name': 'AAX'},
 {'addressingMode': '(Indirect,X)',
  'bytes': '2',
  'code': '$83',
  'cycles': '6',
  'description': 'AND X register with accumulator and store result in memory. Stat

In [162]:
undocumented_op_df = pd.DataFrame(opTable)
undocumented_op_df.head()

Unnamed: 0,addressingMode,bytes,code,cycles,description,name
0,Immediate,2,$0B,2,AND byte with accumulator. If result is negati...,AAC
1,Immediate,2,$2B,2,AND byte with accumulator. If result is negati...,AAC
2,Zero Page,2,$87,3,AND X register with accumulator and store resu...,AAX
3,"Zero Page,Y",2,$97,4,AND X register with accumulator and store resu...,AAX
4,"(Indirect,X)",2,$83,6,AND X register with accumulator and store resu...,AAX


In [161]:
len(undocumented_op_df) + len(df)

256

## 对爬取的指令进行清洗

In [163]:
df['addressingMode'].unique()

array(['Immediate', 'ZeroPage', 'ZeroPageX', 'Absolute', 'AbsoluteX',
       'AbsoluteY', 'IndexIndirect', 'IndirectIndex', 'Accumulator',
       'Relative', 'Implicit', 'Indirect', 'ZeroPageY'], dtype=object)

In [168]:
undocumented_op_df['addressingMode'] = \
undocumented_op_df['addressingMode'].str.replace('Implied', 'Implicit'
                                    ).str.replace('Zero\s+Page', 'ZeroPage'
                                    ).str.replace('ZeroPage,X', 'ZeroPageX'
                                    ).str.replace('ZeroPage,Y', 'ZeroPageY'
                                    ).str.replace('Absolute,X', 'AbsoluteX'
                                    ).str.replace('Absolute,Y', 'AbsoluteY'
                                    ).str.replace('\(Indirect,X\)', 'IndexIndirect'
                                    ).str.replace('\(Indirect\),Y', 'IndirectIndex')                                                  

In [167]:
undocumented_op_df['code'] = undocumented_op_df['code'].str.replace('$', '0x')

In [169]:
undocumented_op_df

Unnamed: 0,addressingMode,bytes,code,cycles,description,name
0,Immediate,2,0x0B,2,AND byte with accumulator. If result is negati...,AAC
1,Immediate,2,0x2B,2,AND byte with accumulator. If result is negati...,AAC
2,ZeroPage,2,0x87,3,AND X register with accumulator and store resu...,AAX
3,ZeroPageY,2,0x97,4,AND X register with accumulator and store resu...,AAX
4,IndexIndirect,2,0x83,6,AND X register with accumulator and store resu...,AAX
5,Absolute,3,0x8F,4,AND X register with accumulator and store resu...,AAX
6,Immediate,2,0x6B,2,"AND byte with accumulator, then rotate one bit...",ARR
7,Immediate,2,0x4B,2,"AND byte with accumulator, then shift right on...",ASR
8,Immediate,2,0xAB,2,"AND byte with accumulator, then transfer accum...",ATX
9,AbsoluteY,3,0x9F,5,AND X register with accumulator then AND resul...,AXA
