# Byterun Python 解释器

[教程链接](http://qingyunha.github.io/taotao/)

根据输入表达式生成对应的指令集

例：
* 输入：7+5
* 输出：
```
what_to_execute = {
    "instructions": [("LOAD_VALUE", 0),  # the first number
                     ("LOAD_VALUE", 1),  # the second number
                     ("ADD_TWO_VALUES", None),
                     ("PRINT_ANSWER", None)],
    "numbers": [7, 5] }
```

以下是第一个版本的Interpreter

In [67]:
class Interpreter:
    def __init__(self):
        self.stack = []
    
    def LOAD_VALUE(self, number):
        self.stack.append(number)
    
    def PRINT_ANSWER(self):
        answer = self.stack.pop()
        print(answer)

    def ADD_TWO_VALUES(self):
        first_num = self.stack.pop()
        second_num = self.stack.pop()
        total = first_num + second_num
        self.stack.append(total)

    def run_code(self, what_to_execute):
        instructions = what_to_execute["instructions"]
        numbers = what_to_execute["numbers"]
        for each_step in instructions:
            instruction, argument = each_step
            if instruction == "LOAD_VALUE":
                number = numbers[argument]
                self.LOAD_VALUE(number)
            elif instruction == "ADD_TWO_VALUES":
                self.ADD_TWO_VALUES()
            elif instruction == "PRINT_ANSWER":
                self.PRINT_ANSWER()

这里的what_to_execute用于测试Interpreter的功能，作为测试输入使用。

In [68]:
what_to_execute = {
    "instructions": [("LOAD_VALUE", 0),  # the first number
                     ("LOAD_VALUE", 1),  # the second number
                     ("ADD_TWO_VALUES", None),
                     ("PRINT_ANSWER", None)],
    "numbers": [7, 5] }

开始测试

In [69]:
interpreter = Interpreter()
interpreter.run_code(what_to_execute)

12


更新更复杂的指令集，添加更多的加法组成连加

In [70]:
what_to_execute = {
        "instructions": [("LOAD_VALUE", 0),
                         ("LOAD_VALUE", 1),
                         ("ADD_TWO_VALUES", None),
                         ("LOAD_VALUE", 2),
                         ("ADD_TWO_VALUES", None),
                         ("PRINT_ANSWER", None)],
        "numbers": [7, 5, 8] }

In [71]:
interpreter = Interpreter()
interpreter.run_code(what_to_execute)

20


添加变量支持

In [72]:
class Interpreter:
    def __init__(self):
        self.stack = []
        self.environment = {}
    
    def STORE_NAME(self, name):
        val = self.stack.pop()
        self.environment[name] = val

    def LOAD_NAME(self, name):
        val = self.environment[name]
        self.stack.append(val)

    def parse_argument(self, instruction, argument, what_to_execute):
        numbers = ["LOAD_VALUE"]
        names = ["LOAD_NAME", "STORE_NAME"]
    
        if instruction in numbers:
            argument = what_to_execute["numbers"][argument]
        elif instruction in names:
            argument = what_to_execute["names"][argument]
        
        return argument

    def LOAD_VALUE(self, number):
        self.stack.append(number)
    
    def PRINT_ANSWER(self):
        answer = self.stack.pop()
        print(answer)

    def ADD_TWO_VALUES(self):
        first_num = self.stack.pop()
        second_num = self.stack.pop()
        total = first_num + second_num
        self.stack.append(total)

    def run_code(self, what_to_execute):
        instructions = what_to_execute["instructions"]
        for each_step in instructions:
            instruction, argument = each_step
            argument = self.parse_argument(instruction, argument, what_to_execute)

            if instruction == "LOAD_VALUE":
                self.LOAD_VALUE(argument)
            elif instruction == "ADD_TWO_VALUES":
                self.ADD_TWO_VALUES()
            elif instruction == "PRINT_ANSWER":
                self.PRINT_ANSWER()
            elif instruction == "STORE_NAME":
                self.STORE_NAME(argument)
            elif instruction == "LOAD_NAME":
                self.LOAD_NAME(argument)

测试新添加的两个指令STORE_NAME和LOAD_NAME

In [73]:
what_to_execute = {
        "instructions": [("LOAD_VALUE", 0),
                         ("STORE_NAME", 0),
                         ("LOAD_VALUE", 1),
                         ("STORE_NAME", 1),
                         ("LOAD_NAME", 0),
                         ("LOAD_NAME", 1),
                         ("ADD_TWO_VALUES", None),
                         ("PRINT_ANSWER", None)],
        "numbers": [1, 2],
        "names":   ["a", "b"] }

interpreter = Interpreter()
interpreter.run_code(what_to_execute)

3


将多个if-elif语句合并为执行框架

In [74]:
class Interpreter:
    def __init__(self):
        self.stack = []
        self.environment = {}
    
    def STORE_NAME(self, name):
        val = self.stack.pop()
        self.environment[name] = val

    def LOAD_NAME(self, name):
        val = self.environment[name]
        self.stack.append(val)

    def parse_argument(self, instruction, argument, what_to_execute):
        numbers = ["LOAD_VALUE"]
        names = ["LOAD_NAME", "STORE_NAME"]
    
        if instruction in numbers:
            argument = what_to_execute["numbers"][argument]
        elif instruction in names:
            argument = what_to_execute["names"][argument]
        
        return argument

    def LOAD_VALUE(self, number):
        self.stack.append(number)
    
    def PRINT_ANSWER(self):
        answer = self.stack.pop()
        print(answer)

    def ADD_TWO_VALUES(self):
        first_num = self.stack.pop()
        second_num = self.stack.pop()
        total = first_num + second_num
        self.stack.append(total)

    def execute(self, what_to_execute):
        instructions = what_to_execute["instructions"]
        for each_step in instructions:
            instruction, argument = each_step
            argument = self.parse_argument(instruction, argument, what_to_execute)

            bytecode_method = getattr(self, instruction)
            if argument is None:
                bytecode_method()
            else:
                bytecode_method(argument)

In [75]:
what_to_execute = {
        "instructions": [("LOAD_VALUE", 0),
                         ("STORE_NAME", 0),
                         ("LOAD_VALUE", 1),
                         ("STORE_NAME", 1),
                         ("LOAD_NAME", 0),
                         ("LOAD_NAME", 1),
                         ("ADD_TWO_VALUES", None),
                         ("PRINT_ANSWER", None)],
        "numbers": [1, 2],
        "names":   ["a", "b"] }

interpreter = Interpreter()
interpreter.execute(what_to_execute)

3


## 真正的Python字节码

In [76]:
def cond():
    x = 3
    if x < 5:
        return 'yes'
    else:
        return 'no'

In [77]:
cond.__code__.co_code # func_name.__code__.co_code就是函数对象的字节码

b'd\x01}\x00|\x00d\x02k\x00r\x08d\x03S\x00d\x04S\x00'

In [78]:
list(cond.__code__.co_code) # 解析字节码

[100, 1, 125, 0, 124, 0, 100, 2, 107, 0, 114, 8, 100, 3, 83, 0, 100, 4, 83, 0]

In [79]:
import dis

dis.dis(cond) # 解析字节码

  2           0 LOAD_CONST               1 (3)
              2 STORE_FAST               0 (x)

  3           4 LOAD_FAST                0 (x)
              6 LOAD_CONST               2 (5)
              8 COMPARE_OP               0 (<)
             10 POP_JUMP_IF_FALSE        8 (to 16)

  4          12 LOAD_CONST               3 ('yes')
             14 RETURN_VALUE

  6     >>   16 LOAD_CONST               4 ('no')
             18 RETURN_VALUE


In [80]:
dis.opname[100]

'LOAD_CONST'

In [81]:
dis.opname[125]

'STORE_FAST'

## Byterun

* VirtualMachine类，它管理高层结构，frame调用栈，指令到操作的映射。这是一个比前面Inteprter对象更复杂的版本。
* Frame类，每个Frame类都有一个code object，并且管理者其他一些必要的状态信息，全局和局部命名空间，指向调用它的frame的指针和最后执行的字节码指令。
* Function类，它被用来代替真正的Python函数。回想一下，调用函数时会创建一个新的frame。我们自己实现Function，所以我们控制新frame的创建。
* Block类，它只是包装了代码块的3个属性。（代码块的细节不是解释器的核心，我们不会花时间在它身上，把它列在这里，是因为Byterun需要它。）

In [82]:
class Frame: # 这是一个属性集合，没有任何方法，相当于一个结构体
    def __init__(self, code_obj, global_names, local_names, prev_frame):
        self.code_obj = code_obj # 编译器生成的代码段
        self.global_names = global_names
        self.local_names = local_names
        self.prev_frame = prev_frame # 父级frame，及调用本frame的frame
        self.stack = []
        if prev_frame:
            self.buildin_names = prev_frame.buildin_names
        else:
            self.buildin_names = local_names['__buildins__']
            if hasattr(self.buildin_names, '__dict__'):
                self.buildin_names = self.buildin_names.__dict__
        
        self.last_instruction = 0
        self.block_stack = []

In [83]:
class VirtualMachineError(Exception):
    pass


class VirtualMachine:
    def __init__(self):
        self.frames = [] # frame的栈
        self.frame = None # 当前frame
        self.return_value = None # frame的返回值（函数return语句返回的值）
        self.last_exception = None

    # 整个类的入口点
    def run_code(self, code, global_names=None, local_names=None):
        frame = self.make_frame(code, global_names=global_names, local_names=local_names)
        self.run_frame(frame)

    # frame 的相关操作方法

    # 创建新frame
    def make_frame(self, code, callargs={}, global_names=None, local_names=None):
        if global_names is not None and local_names is not None:
            local_names = global_names
        elif self.frames: # 这不是第一个module级别的frame
            global_names = self.frame.global_names
            local_names = {}
        else: # module级别的frame
            global_names = local_names = {
                '__buildins__': __buildins__,
                '__name__': '__main__',
                '__doc__': None,
                '__package__': None,
            }
        local_names.update(callargs) # 将函数调用参数都添加到frame的本地数据栈中
        frame = Frame(code, global_names, local_names, self.frame) # 实例化frame对象
        return frame

    def push_frame(self, frame): # frame入栈
        self.frames.append(frame)
        self.frame = frame

    def pop_frame(self): # frame出栈
        self.frames.pop()
        if self.frames: # 更新当前frame，如果frame栈中没有frame，则当前frame为None
            self.frame = self.frames[-1]
        else:
            self.frame = None

    def run_frame(self):
        pass

In [None]:
class Function:
    __slots__ = [
        'func_code', 'func_name', 'func_defaults', 'func_globals',
        'func_locals', 'func_dict', 'func_closure',
        '__name__', '__dict__', '__doc__',
        '_vm', '_func',
    ]

    def __init__(self, name, code, globs, defaults, closure, vm):
        self._vm = vm
        self.func_code = code
        self.func_name = self.__name__ = name or code.co_name
        self.func_defaults = tuple(defaults)
        self.func_globals = globs
        self.func_locals = self._vm.frame.f_locals
        self.__dict__ = {}
        self.func_closure = closure
        self.__doc__ = code.co_consts[0] if code.co_consts else None

        kw = {
            'argdefs': self.func_defaults,
        }
        if closure:
            kw['closure'] = tuple(make_cell(0) for _ in closure)
        self._func = types.FunctionType(code, globs, **kw)

    def __call__(self, *args, **kwargs):
        callargs = inspect.getcallargs(self._func, *args, **kwargs)

        frame = self._vm.make_frame(
            self.func_code, callargs, self.func_globals, {}
        )