diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1359a39 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.pyc +insns.py +iset.py \ No newline at end of file diff --git a/codec.py b/codec.py index 16af6e7..23a8fb9 100644 --- a/codec.py +++ b/codec.py @@ -1,21 +1,9 @@ -import iseq - -class Instruction(object): - def __init__(self, opcode, data): - self.opcode = opcode - self.name = data[1] - self.length = data[0] - self.tag = data[3] - -INSN_MAP = {} -for i in range(len(iseq.INSNS)): - insn = Instruction(i, iseq.INSNS[i]) - INSN_MAP[insn.name] = insn +import iset def inplace_encode(asm): i = 0 while i < len(asm): - insn = INSN_MAP[asm[i]] + insn = iset.INSN_MAP[asm[i]] asm[i] = insn.opcode i += insn.length return asm diff --git a/iseq.py b/iseq.py deleted file mode 100644 index 0ff755f..0000000 --- a/iseq.py +++ /dev/null @@ -1,124 +0,0 @@ -def op_ret(ctx): - ctx.vm.ret() - -def op_call(ctx): - proc_name = get_param(1) - argc = get_param(2) - -def op_push_local(ctx): - pass - -def op_push_literal(ctx): - idx = get_param(ctx, 1) - lit = ctx.proc.literals[idx] - ctx.vm.stk_push(lit) - -def op_push_0(ctx): - ctx.vm.stk_push(0) - -def op_push_1(ctx): - ctx.vm.stk_push(1) - -def op_dup(ctx): - ctx.vm.stk_push(ctx.vm.stk_top()) - -def op_plus(ctx): - res = ctx.vm.stk_pop() + ctx.vm.stk_pop() - ctx.vm.stk_push(res) - -def op_minus(ctx): - res = ctx.vm.stk_pop() - ctx.vm.stk_pop() - ctx.vm.stk_push(res) - -def op_multiply(ctx): - res = ctx.vm.stk_pop() * ctx.vm.stk_pop() - ctx.vm.stk_push(res) - -def op_divide(ctx): - res = ctx.vm.stk_pop() / ctx.vm.stk_pop() - ctx.vm.stk_push(res) - -def op_equal(ctx): - res = (ctx.vm.stk_pop() == ctx.vm.stk_pop()) - ctx.vm.stk_push(res) - -def op_goto(ctx): - ip = get_param(ctx, 1) - ctx.ip = ip - -def op_goto_if_true(ctx): - ip = get_param(ctx, 1) - cond = ctx.vm.stk_pop() - if cond is True: - ctx.ip = ip - else: - ctx.ip += 2 # insn length = 2 - -def op_goto_if_not_true(ctx): - ip = get_param(ctx, 1) - cond = ctx.vm.stk_pop() - if cond is not True: - ctx.ip = ip - else: - ctx.ip += 2 # insn length = 2 - -def op_push_local(ctx): - idx = get_param(ctx, 1) - loc = ctx.proc.locals[idx] - ctx.vm.stk_push(loc) - -def op_set_local(ctx): - idx = get_param(ctx, 1) - val = ctx.vm.stk_pop() - ctx.proc.locals[idx] = val - -# Instruction Tags -TAG_NORMAL = 0 -TAG_CTL_FLOW = 1 - -INSNS = [ - # length name action tag - ( 1, "ret", op_ret, TAG_NORMAL), - ( 3, "call", op_call, TAG_NORMAL), - ( 2, "goto", op_goto, TAG_NORMAL), - ( 2, "push_local", op_push_local, TAG_NORMAL), - ( 1, "push_0", op_push_0, TAG_NORMAL), - ( 1, "push_1", op_push_1, TAG_NORMAL), - ( 2, "push_literal", op_push_literal, TAG_NORMAL), - ( 1, "dup", op_dup, TAG_NORMAL), - ( 1, "plus", op_plus, TAG_NORMAL), - ( 1, "minus", op_minus, TAG_NORMAL), - ( 1, "multiply", op_multiply, TAG_NORMAL), - ( 1, "divide", op_divide, TAG_NORMAL), - ( 1, "equal", op_equal, TAG_NORMAL), - ( 2, "goto", op_goto, TAG_CTL_FLOW), - ( 2, "goto_if_true", op_goto_if_true, TAG_CTL_FLOW), - ( 2, "goto_if_not_true", op_goto_if_not_true, TAG_CTL_FLOW), - ( 2, "push_local", op_push_local, TAG_NORMAL), - ( 2, "set_local", op_set_local, TAG_NORMAL) - ] - -def run(vm): - ctx = vm.ctx - while ctx.ip < len(ctx.bytecode): - opcode = ctx.bytecode[ctx.ip] - insn_action(opcode)(ctx) - if not has_tag(opcode, TAG_CTL_FLOW): - ctx.ip = ctx.ip + insn_length(opcode) - - - -def insn_length(insn): - return INSNS[insn][0] - -def insn_action(insn): - return INSNS[insn][2] - -def insn_tag(insn): - return INSNS[insn][3] - -def get_param(ctx, n): - return ctx.bytecode[ctx.ip+n] - -def has_tag(opcode, tag): - return (insn_tag(opcode) & tag) == tag diff --git a/iset.yml b/iset.yml new file mode 100644 index 0000000..5f44acf --- /dev/null +++ b/iset.yml @@ -0,0 +1,204 @@ +tags: + - ctrl_flow + - ctx_switch + +# Instructions +# +# The opcode is the index of each instruction. Stack before and after +# instruction is only for documenting purpose. The elements are +# specified from bottom to top. E.g: +# +# stack_before: [a, b] +# stack_after: [b-a] +# +# will be +# +# +---------+ +# | b | +# +---------+ ==> +---------+ +# | a | | b-a | +# +---------+ +---------+ +# + +instructions: + - + name: ret + tags: [ctx_switch, ctrl_flow] + desc: Return from a procedure. + operands: [] + stack_before: [return_value] + stack_after: [] + code: | + pass + + - + name: call + tags: [ctx_switch, ctrl_flow] + desc: Call a procedure. + operands: [] + stack_before: [] + stack_after: [] + code: | + pass + + - + name: push_local + tags: [] + desc: Push value of a local variable to operand stack. + operands: [local] + stack_before: [] + stack_after: [value] + code: | + idx = get_param(ctx, 1) + loc = ctx.proc.locals[idx] + ctx.vm.stk_push(loc) + + - + name: set_local + tags: [] + desc: Pop the stack top and assign it to a local variable. + operands: [local] + stack_before: [value] + stack_after: [] + code: | + idx = get_param(ctx, 1) + val = ctx.vm.stk_pop() + ctx.proc.locals[idx] = val + + - + name: push_literal + tags: [] + desc: Push a literal to operand stack. + operands: [literal] + stack_before: [] + stack_after: [value] + code: | + idx = get_param(ctx, 1) + lit = ctx.proc.literals[idx] + ctx.vm.stk_push(lit) + + - + name: push_0 + tags: [] + desc: Push 0 to operand stack. + operands: [] + stack_before: [] + stack_after: [0] + code: | + ctx.vm.stk_push(0) + + - + name: push_1 + tags: [] + desc: Push 1 to operand stack. + operands: [] + stack_before: [] + stack_after: [0] + code: | + ctx.vm.stk_push(1) + + - + name: dup + tags: [] + desc: Duplicate the stack top object. + operands: [] + stack_before: [] + stack_after: [value] + code: | + ctx.vm.stk_push(ctx.vm.stk_top()) + + - + name: plus + tags: [] + desc: Performan arithmetic + + operands: [] + stack_before: [a, b] + stack_after: [b+a] + code: | + res = ctx.vm.stk_pop() + ctx.vm.stk_pop() + ctx.vm.stk_push(res) + + - + name: minus + tags: [] + desc: Performan arithmetic - + operands: [] + stack_before: [a, b] + stack_after: [b-a] + code: | + res = ctx.vm.stk_pop() - ctx.vm.stk_pop() + ctx.vm.stk_push(res) + + - + name: multiply + tags: [] + desc: Performan arithmetic * + operands: [] + stack_before: [a, b] + stack_after: [b*a] + code: | + res = ctx.vm.stk_pop() * ctx.vm.stk_pop() + ctx.vm.stk_push(res) + + - + name: divide + tags: [] + desc: Performance arithmetic / + operands: [] + stack_before: [a, b] + stack_after: [b/a] + code: | + res = ctx.vm.stk_pop() / ctx.vm.stk_pop() + ctx.vm.stk_push(res) + + - + name: equal + tags: [] + desc: Test equality. + operands: [] + stack_before: [a, b] + stack_after: [b==a] + code: | + res = (ctx.vm.stk_pop() == ctx.vm.stk_pop()) + ctx.vm.stk_push(res) + + - + name: goto + tags: [ctrl_flow] + desc: Unconditional jump. + operands: [ip] + stack_before: [] + stack_after: [] + code: | + ip = get_param(ctx, 1) + ctx.ip = ip + + - + name: goto_if_true + tags: [ctrl_flow] + desc: Jump if the stack top is true. + operands: [ip] + stack_before: [condition] + stack_after: [] + code: | + ip = get_param(ctx, 1) + cond = ctx.vm.stk_pop() + if cond is True: + ctx.ip = ip + else: + ctx.ip += $(insn_len) + + - + name: goto_if_not_true + tags: [ctrl_flow] + desc: Jump if the stack top is not true. + operands: [ip] + stack_before: [condition] + stack_after: [] + code: | + ip = get_param(ctx, 1) + cond = ctx.vm.stk_pop() + if cond is not True: + ctx.ip = ip + else: + ctx.ip += $(insn_len) diff --git a/iset_gen.py b/iset_gen.py new file mode 100644 index 0000000..3a91eee --- /dev/null +++ b/iset_gen.py @@ -0,0 +1,152 @@ +import re +import yaml + +def gen_tags(tags): + stmts = [] + for tag, i in zip(tags, range(len(tags))): + stmts.append('TAG_%-12s = %d' % (tag.upper(), 2**i)) + return '\n'.join(stmts) + +def gen_actions(instructions): + def gen_action(insn): + func = "def op_%s(ctx):\n" % insn['name'] + + env = { + 'insn_len' : 1 + len(insn['operands']) + } + code = insn['code'] + if not 'ctrl_flow' in insn['tags']: + code += 'ctx.ip += $(insn_len)\n' + + code = process_tmpl(code, env) + code = re.sub(re.compile('^', re.MULTILINE), ' ', code) + + return func + code + + return '\n'.join([gen_action(insn) + for insn in instructions]) + +def gen_action_table(instructions): + return 'INSN_ACTION = [\n' + \ + ',\n'.join([' op_' + insn['name'] + for insn in instructions]) + \ + '\n]\n' + + +def gen_tags_table(instructions): + def gen_tag(insn): + if len(insn['tags']) == 0: + return '0' + else: + return ' | '.join(['TAG_%s' % tag.upper() + for tag in insn['tags']]) + + return 'INSN_TAGS = [\n' + \ + ',\n'.join([' ' + gen_tag(insn) + for insn in instructions]) + \ + '\n]\n' + +def gen_insn_table(instructions): + def gen_insn(i, insn): + return "Instruction(" + str(i) + ",\n" + \ + ",\n".join([" " + insn[key].__repr__() + for key in ['name', 'tags', 'desc', 'operands', + 'stack_before', 'stack_after', 'code']]) + \ + ")" + + insns = zip(range(len(instructions)), instructions) + + return "INSTRUCTIONS = [\n" + \ + ",\n".join([gen_insn(i, insn) + for i, insn in insns]) + \ + "]\n\nINSN_MAP = {\n" + \ + ",\n".join([' ' + insn['name'].__repr__() + ' : INSTRUCTIONS[%d]' % i + for i, insn in insns]) + \ + "\n}\n" + + +def process_tmpl(tmpl, env): + """\ + Process template. Special variables like $(key) in the template + will be replaced by the value found in env (env['key']). + """ + PATTERN = re.compile(r"\$\(([^)]+)\)") + return re.sub(PATTERN, lambda m: str(env[m.group(1)]), tmpl) + + +TMPL_INSNS = """\ +# Don't edit this file. This is generated by iset_gen.py + +$(tags) + +$(actions) + +$(action_table) + +$(tags_table) + + +def has_tag(opcode, tag): + return TAGS_TABLE[opcode] & tag == tag + +def get_param(ctx, n): + return ctx.bytecode[ctx.ip+n] + + +def run(vm): + ctx = vm.ctx + while ctx.ip < len(ctx.bytecode): + opcode = ctx.bytecode[ctx.ip] + INSN_ACTION[opcode](ctx) +""" + +TMPL_ISET = """\ +# Don't edit this file. This is generated by iset_gen.py + +class Instruction(object): + __slots__ = ('opcode', + 'name', + 'tags', + 'desc', + 'operands', + 'stack_before', + 'stack_after', + 'code') + def __init__(self, opcode, name, tags, desc, operands, + stack_before, stack_after, code): + self.opcode = opcode + self.name = name + self.tags = tags + self.desc = desc + self.operands = operands + self.stack_before = stack_before + self.stack_after = stack_after + self.code = code + + def length_get(self): + return len(self.operands)+1 + def length_set(self): + raise AttributeError, 'length attribute is read only' + length = property(length_get, length_set, 'length of the instruction') + +$(instruction_table) +""" + +if __name__ == '__main__': + iset = yaml.load(open("iset.yml").read()) + + env = { + 'tags' : gen_tags(iset['tags']), + 'actions' : gen_actions(iset['instructions']), + 'instruction_table' : gen_insn_table(iset['instructions']), + 'action_table' : gen_action_table(iset['instructions']), + 'tags_table' : gen_tags_table(iset['instructions']) + } + + py = open("iset.py", "w") + py.write(process_tmpl(TMPL_ISET, env)) + py.close() + + py = open("insns.py", "w") + py.write(process_tmpl(TMPL_INSNS, env)) + py.close() diff --git a/vm.py b/vm.py index 864ce4f..6d582c4 100644 --- a/vm.py +++ b/vm.py @@ -1,5 +1,5 @@ from ctx import Context -import iseq +import insns class VM(object): @@ -8,7 +8,7 @@ def __init__(self): self.ctx = None def run(self): - iseq.run(self) + insns.run(self) def call(self, proc, argc): ctx = Context(self.ctx, proc)