In [226]:
pwd

'/Users/netcan/Workspace/Programming/FCEmulator/src/tools'

## 爬取Instruction Reference的指令信息
[http://obelisk.me.uk/6502/reference.html](http://obelisk.me.uk/6502/reference.html)

In [227]:
import InstructionReference
import pandas as pd

In [230]:
ret = InstructionReference.get_inst_ref()
df = pd.DataFrame(ret)
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,$69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,Zero Page,$65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,"Zero Page,X",$75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,$6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,"Absolute,X",$7D,3,4 (+1 if page crossed),0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


## 对爬取的指令细节进行清洗

In [231]:
df['addressingMode'] = df['addressingMode'].str.replace('Implied', 'Implicit'
                                ).str.replace('Zero\s+Page', 'ZeroPage'
                                ).str.replace('ZeroPage,X', 'ZeroPageX'
                                ).str.replace('ZeroPage,Y', 'ZeroPageY'
                                ).str.replace('Absolute,X', 'AbsoluteX'
                                ).str.replace('Absolute,Y', 'AbsoluteY'
                                ).str.replace('\(Indirect,X\)', 'IndexIndirect'
                                ).str.replace('\(Indirect\),Y', 'IndirectIndex')
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,$69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,$65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,$75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,$6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,$7D,3,4 (+1 if page crossed),0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


In [232]:
df['code'] = df['code'].str.replace('$', '0x')
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,0x69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,0x65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,0x75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,0x6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,0x7D,3,4 (+1 if page crossed),0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


In [233]:
df.loc[df['cycles'].str.contains('\+1'), 'extraCycles'] = 1
df.loc[df['cycles'].str.contains('\+2'), 'extraCycles'] = 2
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,0x69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,0x65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,0x75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,0x6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,0x7D,3,4 (+1 if page crossed),1,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


In [234]:
df['cycles'] = df['cycles'].str.replace('[\n\s]', ''
                            ).str.replace('^(\d+).*', lambda s: s.group(1))
df.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
0,ADC,Immediate,0x69,2,2,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
1,ADC,ZeroPage,0x65,2,3,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
2,ADC,ZeroPageX,0x75,2,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
3,ADC,Absolute,0x6D,3,4,0,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."
4,ADC,AbsoluteX,0x7D,3,4,1,"[ADC - Add with Carry, A,Z,C,N = A+M+C, This i..."


## 导出指令信息结构体数组，for C++ implements using.

In [None]:
for (_, row) in df.iterrows():
    print('\t{{{}, {}, {}, {}, {}, {}, nullptr}}, '
        .format(row['name'], row['addressingMode'],
                row['code'], row['bytes'],
                row['cycles'], row['extraCycles']))

In [236]:
df['name'].unique().size

56

In [237]:
df.count()

name              151
addressingMode    151
code              151
bytes             151
cycles            151
extraCycles       151
description       151
dtype: int64

## 按指令实现难易程度排序

In [243]:
df_sorted = df.sort_values(by=['bytes', 'cycles', 'extraCycles']).drop_duplicates(subset='name')
df_sorted.head()

Unnamed: 0,name,addressingMode,code,bytes,cycles,extraCycles,description
16,ASL,Accumulator,0x0A,1,2,0,"[ASL - Arithmetic Shift Left, A,Z,C,N = M*2 or..."
32,CLC,Implicit,0x18,1,2,0,"[CLC - Clear Carry Flag, C = 0, Set the carry ..."
33,CLD,Implicit,0xD8,1,2,0,"[CLD - Clear Decimal Mode, D = 0, Sets the dec..."
34,CLI,Implicit,0x58,1,2,0,"[CLI - Clear Interrupt Disable, I = 0, Clears ..."
35,CLV,Implicit,0xB8,1,2,0,"[CLV - Clear Overflow Flag, V = 0, Clears the ..."


## 生成指令函数声明

In [None]:
print('/**************** 指令声明区Begin ****************/')
for (_, row) in df_sorted.iterrows():
    print('OpExeFuncDecl(OP_{}); // {}'.format(row['name'], row['description'][0]))
print('/****************  指令声明区End  ****************/\n')

## 生成指令函数定义

In [337]:
df[df['name'] == 'JSR']['description'].values[0]

['JSR - Jump to Subroutine',
 'The JSR instruction pushes the address (minus one) of the return\r\npoint on to the stack and then sets the program counter to the\r\ntarget memory address.']

In [268]:
df['description'].apply(lambda d: len(d)).value_counts()

3    122
2     29
Name: description, dtype: int64

In [306]:
df_sorted[df_sorted['description'].apply(lambda d: len(d)) == 2]['name'].head()

96     NOP
109    ROL
114    ROR
105    PHA
106    PHP
Name: name, dtype: object

In [None]:
print('/**************** 指令实现区Begin ****************/')
for (_, row) in df_sorted.iterrows():
    d = row['description'].copy()
    d[1:] = [d.replace('\r\n', '\n\t * ') for d in d[1:]]
    if len(d) < 3:
        d.append('');
    else:
        d[2] = '\t * {}\n'.format(d[2])
        
    print( 
        'OpExeFuncDefine(OP_{}) {{\n'
        '\t// TODO: wait for implements: {}\n'
        '\t/**\n'
        '\t * {}\n'
        '\t * {}\n'
        '{}'
        '\t **/\n'
        '\n\treturn self.cycles;\n'
        '}}\n'.format(row['name'], row['name'], *d)
         )
print('/****************  指令实现区End  ****************/')

In [239]:
df['addressingMode'].str.len().max()

13

In [240]:
df.groupby('name').size().sort_values()[-10:]

name
LDX    5
LDY    5
STA    7
EOR    8
ORA    8
LDA    8
CMP    8
AND    8
SBC    8
ADC    8
dtype: int64

In [241]:
(df.loc[df['addressingMode'].str.contains('ZeroPage')]['extraCycles'] == '0').all() # Zero寻址模式没有+1的情况

True

In [242]:
(df.loc[df['addressingMode'].str.contains('IndexIndirected')]['extraCycles'] == '0').all() # IDX寻址模式没有+1的情况

True