In [131]:
pwd

'/Users/netcan/Workspace/Programming/FCEmulator/src/tools'

## 爬取Instruction Reference的指令信息
[http://obelisk.me.uk/6502/reference.html](http://obelisk.me.uk/6502/reference.html)

In [153]:
import InstructionReference
import pandas as pd

In [155]:
ret = InstructionReference.get_inst_ref()
df = pd.DataFrame(ret)

## 对爬取的指令细节进行清洗

In [156]:
df['addressingMode'] = df['addressingMode'].str.replace('Implied', 'Implicit'
                                ).str.replace('Zero\s+Page', 'ZeroPage'
                                ).str.replace('ZeroPage,X', 'ZeroPageX'
                                ).str.replace('ZeroPage,Y', 'ZeroPageY'
                                ).str.replace('Absolute,X', 'AbsoluteX'
                                ).str.replace('Absolute,Y', 'AbsoluteY'
                                ).str.replace('\(Indirect,X\)', 'IndexIndirect'
                                ).str.replace('\(Indirect\),Y', 'IndirectIndex')
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles
0,ADC,Immediate,$69,2,2,0
1,ADC,ZeroPage,$65,2,3,0
2,ADC,ZeroPageX,$75,2,4,0
3,ADC,Absolute,$6D,3,4,0
4,ADC,AbsoluteX,$7D,3,4 (+1 if page crossed),0


In [157]:
df['code'] = df['code'].str.replace('$', '0x')
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles
0,ADC,Immediate,0x69,2,2,0
1,ADC,ZeroPage,0x65,2,3,0
2,ADC,ZeroPageX,0x75,2,4,0
3,ADC,Absolute,0x6D,3,4,0
4,ADC,AbsoluteX,0x7D,3,4 (+1 if page crossed),0


In [158]:
df.loc[df['cycles'].str.contains('\+1'), 'extraCycles'] = 1
df.loc[df['cycles'].str.contains('\+2'), 'extraCycles'] = 2
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles
0,ADC,Immediate,0x69,2,2,0
1,ADC,ZeroPage,0x65,2,3,0
2,ADC,ZeroPageX,0x75,2,4,0
3,ADC,Absolute,0x6D,3,4,0
4,ADC,AbsoluteX,0x7D,3,4 (+1 if page crossed),1


In [159]:
df['cycles'] = df['cycles'].str.replace('[\n\s]', ''
                            ).str.replace('(\d+).*', lambda s: s.group(1))
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles
0,ADC,Immediate,0x69,2,2,0
1,ADC,ZeroPage,0x65,2,3,0
2,ADC,ZeroPageX,0x75,2,4,0
3,ADC,Absolute,0x6D,3,4,0
4,ADC,AbsoluteX,0x7D,3,4,1


## 导出指令信息结构体数组，for C++ implements using.

In [160]:
for (_, row) in df.iterrows():
    print('\t{{{}, {}, {}, {}, {}, {}, nullptr}}, '
        .format(row['name'], row['addressingMode'],
                row['code'], row['bytes'],
                row['cycles'], row['extraCycles']))

	{ADC, Immediate, 0x69, 2, 2, 0, nullptr}, 
	{ADC, ZeroPage, 0x65, 2, 3, 0, nullptr}, 
	{ADC, ZeroPageX, 0x75, 2, 4, 0, nullptr}, 
	{ADC, Absolute, 0x6D, 3, 4, 0, nullptr}, 
	{ADC, AbsoluteX, 0x7D, 3, 4, 1, nullptr}, 
	{ADC, AbsoluteY, 0x79, 3, 4, 1, nullptr}, 
	{ADC, IndexIndirect, 0x61, 2, 6, 0, nullptr}, 
	{ADC, IndirectIndex, 0x71, 2, 5, 1, nullptr}, 
	{AND, Immediate, 0x29, 2, 2, 0, nullptr}, 
	{AND, ZeroPage, 0x25, 2, 3, 0, nullptr}, 
	{AND, ZeroPageX, 0x35, 2, 4, 0, nullptr}, 
	{AND, Absolute, 0x2D, 3, 4, 0, nullptr}, 
	{AND, AbsoluteX, 0x3D, 3, 4, 1, nullptr}, 
	{AND, AbsoluteY, 0x39, 3, 4, 1, nullptr}, 
	{AND, IndexIndirect, 0x21, 2, 6, 0, nullptr}, 
	{AND, IndirectIndex, 0x31, 2, 5, 1, nullptr}, 
	{ASL, Accumulator, 0x0A, 1, 2, 0, nullptr}, 
	{ASL, ZeroPage, 0x06, 2, 5, 0, nullptr}, 
	{ASL, ZeroPageX, 0x16, 2, 6, 0, nullptr}, 
	{ASL, Absolute, 0x0E, 3, 6, 0, nullptr}, 
	{ASL, AbsoluteX, 0x1E, 3, 7, 0, nullptr}, 
	{BCC, Relative, 0x90, 2, 2, 2, nullptr}, 
	{BCS, Relative, 0xB0

In [161]:
df['name'].unique().size

56

In [166]:
df.count()

name              151
addressingMode    151
code              151
bytes             151
cycles            151
extraCycles       151
dtype: int64

## 按指令实现难易程度排序

In [163]:
ordered_df = df.sort_values(by=['bytes', 'cycles', 'extraCycles']).drop_duplicates(subset='name')
ordered_df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles
16,ASL,Accumulator,0x0A,1,2,0
32,CLC,Implicit,0x18,1,2,0
33,CLD,Implicit,0xD8,1,2,0
34,CLI,Implicit,0x58,1,2,0
35,CLV,Implicit,0xB8,1,2,0


In [177]:
df['addressingMode'].str.len().max()

13