## Understanding Python Bytecode

#### Reza Bagheri

In [448]:
import dis, types
import marshal, sys, timeit

#### compile()

In [449]:
s='''
a=5
a+=1
print(a)
'''
compile(s, "", "exec")

<code object <module> at 0x000002004B742F60, file "", line 2>

In [450]:
compile("a=5 \na+=1 \nprint(a)", "", "exec")

<code object <module> at 0x000002004B87A0C0, file "", line 1>

In [451]:
compile("a+7", "", "eval")

<code object <module> at 0x000002004B87A270, file "", line 1>

In [452]:
# This does not work:
#compile("a=a+1", "", "eval")

In [453]:
compile("a=a+1", "", "single")

<code object <module> at 0x000002004B87A300, file "", line 1>

In [454]:
c = compile("a=a+1", "", "single")
type(c)  # Output is: code

code

In [455]:
exec(compile("print(5)", "", "single"))

5


In [456]:
def f(n):
    return n
f.__code__

<code object f at 0x000002004B742ED0, file "<ipython-input-456-2b9217854dc7>", line 1>

In [457]:
c = compile("print(5)", "", "single")
c.co_code

b'e\x00d\x00\x83\x01F\x00d\x01S\x00'

In [458]:
print(c.co_code[0])
chr(c.co_code[0])

101


'e'

In [459]:
print(c.co_code[4])
chr(c.co_code[4])

131


'\x83'

#### Bytecode details

In [460]:
bytecode = b'd\x00Z\x00d\x01S\x00'
for byte in bytecode:
    print(byte, end=' ')

100 0 90 0 100 1 83 0 

In [461]:
dis.opname[100]

'LOAD_CONST'

In [462]:
dis.HAVE_ARGUMENT

90

In [463]:
dis.EXTENDED_ARG

144

#### Code object attributes

In [464]:
# Listing 1
s = '''
a = 5
b = 'text'
def f(x):
    return x
f(5)
'''
c=compile(s, "", "exec")

In [465]:
c.co_consts

(5, 'text', <code object f at 0x000002004B87A5D0, file "", line 4>, 'f', None)

In [466]:
c.co_names

('a', 'b', 'f')

In [467]:
s = "3 * a"
c1 = compile(s, "", "eval")
c1.co_consts

(3,)

In [468]:
def f(x):
    a = x * 2
    return a
f.__code__.co_consts

(None, 2)

In [469]:
# Listing 2
def f(x):
    z = 3
    t = 5
    def g(y):
        return t*x + y
    return g
a = 5
b = 1
h = f(a)

In [470]:
f.__code__.co_varnames

('x', 'z', 'g')

In [471]:
f.__code__.co_cellvars

('t', 'x')

In [472]:
h.__code__.co_freevars

('t', 'x')

#### dis module categories

In [473]:
dis.hasconst

[100]

In [474]:
dis.hasname

[90, 91, 95, 96, 97, 98, 101, 106, 108, 109, 116, 160]

In [475]:
dis.haslocal

[124, 125, 126]

In [476]:
dis.hasfree

[135, 136, 137, 138, 148]

In [477]:
dis.hascompare

[107]

In [478]:
dis.hasjrel

[93, 110, 120, 121, 122, 143, 154]

### Disassembling the bytecode

In [479]:
def unpack_op(bytecode): 
    '''Unpacks the offset, opcode and opaarg for each pair of bytes in the bytecode.
    
    Parameters
    =========================================
    bytecode (bytes): the bytecode of a code object
    
    Generates
    =========================================
    tuple: a tuple of offset, opcode and oparg for each pair of bytes in the bytecode
    '''
    extended_arg = 0
    for i in range(0, len(bytecode), 2):
        opcode = bytecode[i]
        if opcode >= dis.HAVE_ARGUMENT:
            oparg = bytecode[i+1] | extended_arg
            extended_arg = (oparg << 8) if opcode == dis.EXTENDED_ARG else 0
        else:
            oparg = None
        yield (i, opcode, oparg)

In [480]:
def find_linestarts(codeobj):
    """Finds the offsets in a bytecode which are the start a line in the source code.
    Parameters
    =========================================
    codeobj (code): code object
    
    Returns
    =========================================
    dict: a dictionary with offsets as the keys and their line numbers as their values of offsets
    """
    byte_increments = codeobj.co_lnotab[0::2]
    line_increments = codeobj.co_lnotab[1::2]
    byte = 0
    line = codeobj.co_firstlineno
    linestart_dict = {byte: line}      
    for byte_incr, line_incr in zip(byte_increments,
                                    line_increments):
        byte += byte_incr
        if line_incr >= 0x80:
            line_incr -= 0x100
        line += line_incr
        linestart_dict[byte]=line
    return linestart_dict

In [481]:
def get_argvalue(offset, codeobj, opcode, oparg):
    '''Finds the human friendly meaning of each oparg in an instruction.
    
    Parameters
    =========================================
    offset (int): offset of the instruction
    codeobj (code): code object
    opcode (int): opcode of the instruction
    oparg (int): oparg of the instruction
    
    Returns
    =========================================
    argval: the human friendly meaning of the oparg in an instruction. 
    '''
    constants= codeobj.co_consts
    varnames = codeobj.co_varnames
    names = codeobj.co_names
    cell_names = codeobj.co_cellvars + codeobj.co_freevars
    argval = None
    if opcode in dis.hasconst:
        if constants is not None:
            argval = constants[oparg]
            if type(argval)==str or argval==None:
                 argval = repr(argval)
    elif opcode in dis.hasname:
        if names is not None:
            argval = names[oparg]
    elif opcode in dis.hasjrel:
        argval = offset + 2 + oparg
        argval = "to " + repr(argval)
    elif opcode in dis.haslocal:
        if varnames is not None:
            argval = varnames[oparg]
    elif opcode in dis.hascompare:
        argval = dis.cmp_op[oparg]
    elif opcode in dis.hasfree:
        if cell_names is not None:
            argval = cell_names[oparg]
    return argval

In [482]:
def findlabels(codeobj):
    '''Finds all the offsets in the bytecode which are jump targets.
    
    Parameters
    =========================================
    codeobj (code): code object
    
    Returns
    =========================================
    list: list of offsets
    '''
    bytecode = codeobj.co_code
    labels = []
    for offset, opcode, oparg in unpack_op(bytecode):
            if opcode in dis.hasjrel:
                label = offset + 2 + oparg
            elif opcode in dis.hasjabs:
                label = oparg
            else:
                continue
            if label not in labels:
                labels.append(label)
    return labels

In [483]:
def disassemble(c): 
    '''Disassebmles and prints the bytecode of a code object.
    
    Parameters
    =========================================
    c (code): code object        
    '''    
    if not(hasattr(c, 'co_code')):
        raise TypeError("The argument should be a code object")
    code_objects = []
    linestarts = find_linestarts(c)
    labels = findlabels(c)
    bytecode = c.co_code
    extended_arg = 0
    for offset, opcode, oparg in unpack_op(bytecode):
        argvalue = get_argvalue(offset, c, opcode, oparg)
        if hasattr(argvalue, 'co_code'):
            code_objects.append(argvalue)
        line_start = linestarts.get(offset, None)
        dis_text =  "{0:4}{1:2}{2:5} {3:<22} {4:3} {5}".format(str(line_start or ''),                                                        
                                                        ">>" if offset in labels else "",
                                                        offset, dis.opname[opcode],                                                             
                                                        oparg if oparg is not None else '',
                                                        "(" + str(argvalue) + ")" if argvalue is not 
                                                                                        None else '')                                                   
        if (line_start and offset):
            print() 
        print(dis_text)
    for oc in code_objects:
        print("\nDisassembly of{}:\n".format(oc))
        disassemble(oc)

In [484]:
s='''a=0
while a<10:
    print(a)
    a += 1
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 LOAD_CONST               0 (0)
          2 STORE_NAME               0 (a)

2         4 SETUP_LOOP              28 (to 34)
    >>    6 LOAD_NAME                0 (a)
          8 LOAD_CONST               1 (10)
         10 COMPARE_OP               0 (<)
         12 POP_JUMP_IF_FALSE       32 

3        14 LOAD_NAME                1 (print)
         16 LOAD_NAME                0 (a)
         18 CALL_FUNCTION            1 
         20 POP_TOP                    

4        22 LOAD_NAME                0 (a)
         24 LOAD_CONST               2 (1)
         26 INPLACE_ADD                
         28 STORE_NAME               0 (a)
         30 JUMP_ABSOLUTE            6 
    >>   32 POP_BLOCK                  
    >>   34 LOAD_CONST               3 (None)
         36 RETURN_VALUE               


In [485]:
# You can also use the dis() function in dis module
dis.dis(c)

  1           0 LOAD_CONST               0 (0)
              2 STORE_NAME               0 (a)

  2           4 SETUP_LOOP              28 (to 34)
        >>    6 LOAD_NAME                0 (a)
              8 LOAD_CONST               1 (10)
             10 COMPARE_OP               0 (<)
             12 POP_JUMP_IF_FALSE       32

  3          14 LOAD_NAME                1 (print)
             16 LOAD_NAME                0 (a)
             18 CALL_FUNCTION            1
             20 POP_TOP

  4          22 LOAD_NAME                0 (a)
             24 LOAD_CONST               2 (1)
             26 INPLACE_ADD
             28 STORE_NAME               0 (a)
             30 JUMP_ABSOLUTE            6
        >>   32 POP_BLOCK
        >>   34 LOAD_CONST               3 (None)
             36 RETURN_VALUE


#### Disassembling a pyc file

In [486]:
# To compile the sample.py file in this folder, in the termina write:
# python -m compileall sample.py

In [487]:
header_size = 8
if sys.version_info >= (3, 6):
    header_size = 12
if sys.version_info >= (3, 7):
    header_size = 16
with open(r"__pycache__\sample.cpython-37.pyc", "rb") as f:
    metadata = f.read(header_size)  
    code_obj = marshal.load(f)
    disassemble(code_obj)

1         0 LOAD_CONST               0 (0)
          2 STORE_NAME               0 (a)

2         4 SETUP_LOOP              28 (to 34)
    >>    6 LOAD_NAME                0 (a)
          8 LOAD_CONST               1 (10)
         10 COMPARE_OP               0 (<)
         12 POP_JUMP_IF_FALSE       32 

3        14 LOAD_NAME                1 (print)
         16 LOAD_NAME                0 (a)
         18 CALL_FUNCTION            1 
         20 POP_TOP                    

4        22 LOAD_NAME                0 (a)
         24 LOAD_CONST               2 (1)
         26 INPLACE_ADD                
         28 STORE_NAME               0 (a)
         30 JUMP_ABSOLUTE            6 
    >>   32 POP_BLOCK                  
    >>   34 LOAD_CONST               3 (None)
         36 RETURN_VALUE               


### Bytecode operations

In [488]:
s='''a=1
b=2
c=a+b
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 LOAD_CONST               0 (1)
          2 STORE_NAME               0 (a)

2         4 LOAD_CONST               1 (2)
          6 STORE_NAME               1 (b)

3         8 LOAD_NAME                0 (a)
         10 LOAD_NAME                1 (b)
         12 BINARY_ADD                 
         14 STORE_NAME               2 (c)
         16 LOAD_CONST               2 (None)
         18 RETURN_VALUE               


In [489]:
c.co_consts

(1, 2, None)

In [490]:
c.co_names

('a', 'b', 'c')

#### Functions, global and local variables

In [491]:
s='''a = 1
b = 2
def f(x):
    global b
    b = 3
    y = x + 1
    return y 
f(4)
print(a)
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 LOAD_CONST               0 (1)
          2 STORE_NAME               0 (a)

2         4 LOAD_CONST               1 (2)
          6 STORE_GLOBAL             1 (b)

3         8 LOAD_CONST               2 (<code object f at 0x000002004B87AE40, file "", line 3>)
         10 LOAD_CONST               3 ('f')
         12 MAKE_FUNCTION            0 
         14 STORE_NAME               2 (f)

8        16 LOAD_NAME                2 (f)
         18 LOAD_CONST               4 (4)
         20 CALL_FUNCTION            1 
         22 POP_TOP                    

9        24 LOAD_NAME                3 (print)
         26 LOAD_NAME                0 (a)
         28 CALL_FUNCTION            1 
         30 POP_TOP                    
         32 LOAD_CONST               5 (None)
         34 RETURN_VALUE               

Disassembly of<code object f at 0x000002004B87AE40, file "", line 3>:

5         0 LOAD_CONST               1 (3)
          2 STORE_GLOBAL             0 (b)

6         4 LOAD_FA

In [492]:
c.co_consts

(1, 2, <code object f at 0x000002004B87AE40, file "", line 3>, 'f', 4, None)

In [493]:
c.co_names

('a', 'b', 'f', 'print')

#### Function with a keyword argument

In [494]:
s='''a = 1
b = 2
def f(x=5):
    global b
    b = 3
    y = x + 1
    return y 
f(4)
print(a)
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 LOAD_CONST               0 (1)
          2 STORE_NAME               0 (a)

2         4 LOAD_CONST               1 (2)
          6 STORE_GLOBAL             1 (b)

3         8 LOAD_CONST               7 ((5,))
         10 LOAD_CONST               3 (<code object f at 0x000002004B87A6F0, file "", line 3>)
         12 LOAD_CONST               4 ('f')
         14 MAKE_FUNCTION            1 
         16 STORE_NAME               2 (f)

8        18 LOAD_NAME                2 (f)
         20 LOAD_CONST               5 (4)
         22 CALL_FUNCTION            1 
         24 POP_TOP                    

9        26 LOAD_NAME                3 (print)
         28 LOAD_NAME                0 (a)
         30 CALL_FUNCTION            1 
         32 POP_TOP                    
         34 LOAD_CONST               6 (None)
         36 RETURN_VALUE               

Disassembly of<code object f at 0x000002004B87A6F0, file "", line 3>:

5         0 LOAD_CONST               1 (3)
          2 STORE

#### Built-in functions

In [495]:
s='''a = [1, 2, 3]
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 LOAD_CONST               0 (1)
          2 LOAD_CONST               1 (2)
          4 LOAD_CONST               2 (3)
          6 BUILD_LIST               3 
          8 STORE_NAME               0 (a)
         10 LOAD_CONST               3 (None)
         12 RETURN_VALUE               


#### EXTENDED_ARG

In [496]:
s= 'print(' + '"*",' * 260 + ')'
c = compile(s, "", "exec")
disassemble(c)

1         0 LOAD_NAME                0 (print)
          2 LOAD_CONST               0 ('*')
          4 LOAD_CONST               0 ('*')
          6 LOAD_CONST               0 ('*')
          8 LOAD_CONST               0 ('*')
         10 LOAD_CONST               0 ('*')
         12 LOAD_CONST               0 ('*')
         14 LOAD_CONST               0 ('*')
         16 LOAD_CONST               0 ('*')
         18 LOAD_CONST               0 ('*')
         20 LOAD_CONST               0 ('*')
         22 LOAD_CONST               0 ('*')
         24 LOAD_CONST               0 ('*')
         26 LOAD_CONST               0 ('*')
         28 LOAD_CONST               0 ('*')
         30 LOAD_CONST               0 ('*')
         32 LOAD_CONST               0 ('*')
         34 LOAD_CONST               0 ('*')
         36 LOAD_CONST               0 ('*')
         38 LOAD_CONST               0 ('*')
         40 LOAD_CONST               0 ('*')
         42 LOAD_CONST               0 ('*')
        

#### Conditional statements and jumps

In [497]:
s='''a = 1
if a>=0:
    b=a
else:
    b=-a
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 LOAD_CONST               0 (1)
          2 STORE_NAME               0 (a)

2         4 LOAD_NAME                0 (a)
          6 LOAD_CONST               1 (0)
          8 COMPARE_OP               5 (>=)
         10 POP_JUMP_IF_FALSE       18 

3        12 LOAD_NAME                0 (a)
         14 STORE_NAME               1 (b)
         16 JUMP_FORWARD             6 (to 24)

5   >>   18 LOAD_NAME                0 (a)
         20 UNARY_NEGATIVE             
         22 STORE_NAME               1 (b)
    >>   24 LOAD_CONST               2 (None)
         26 RETURN_VALUE               


In [498]:
s='''a = 1
c = 3
if a>=0 and c==3:
    b=a
else:
    b=-a
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 LOAD_CONST               0 (1)
          2 STORE_NAME               0 (a)

2         4 LOAD_CONST               1 (3)
          6 STORE_NAME               1 (c)

3         8 LOAD_NAME                0 (a)
         10 LOAD_CONST               2 (0)
         12 COMPARE_OP               5 (>=)
         14 POP_JUMP_IF_FALSE       30 
         16 LOAD_NAME                1 (c)
         18 LOAD_CONST               1 (3)
         20 COMPARE_OP               2 (==)
         22 POP_JUMP_IF_FALSE       30 

4        24 LOAD_NAME                0 (a)
         26 STORE_NAME               2 (b)
         28 JUMP_FORWARD             6 (to 36)

6   >>   30 LOAD_NAME                0 (a)
         32 UNARY_NEGATIVE             
         34 STORE_NAME               2 (b)
    >>   36 LOAD_CONST               3 (None)
         38 RETURN_VALUE               


#### Loops and block stack

In [499]:
s='''for i in range(3):
    print(i)
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 SETUP_LOOP              24 (to 26)
          2 LOAD_NAME                0 (range)
          4 LOAD_CONST               0 (3)
          6 CALL_FUNCTION            1 
          8 GET_ITER                   
    >>   10 FOR_ITER                12 (to 24)
         12 STORE_NAME               1 (i)

2        14 LOAD_NAME                2 (print)
         16 LOAD_NAME                1 (i)
         18 CALL_FUNCTION            1 
         20 POP_TOP                    
         22 JUMP_ABSOLUTE           10 
    >>   24 POP_BLOCK                  
    >>   26 LOAD_CONST               1 (None)
         28 RETURN_VALUE               


In [500]:
s='''for i in range(3):
    break
    print(i)
'''
c=compile(s, "", "exec")
disassemble(c)

1         0 SETUP_LOOP              26 (to 28)
          2 LOAD_NAME                0 (range)
          4 LOAD_CONST               0 (3)
          6 CALL_FUNCTION            1 
          8 GET_ITER                   
    >>   10 FOR_ITER                14 (to 26)
         12 STORE_NAME               1 (i)

2        14 BREAK_LOOP                 

3        16 LOAD_NAME                2 (print)
         18 LOAD_NAME                1 (i)
         20 CALL_FUNCTION            1 
         22 POP_TOP                    
         24 JUMP_ABSOLUTE           10 
    >>   26 POP_BLOCK                  
    >>   28 LOAD_CONST               1 (None)
         30 RETURN_VALUE               


### Creating the code object

#### Code object attributes

In [501]:
def f(a, b, *args, c, **kwargs):
    d=1
    def g():
        return 1
    g()
    return 1

In [502]:
f.__code__.co_argcount

2

In [503]:
f.__code__.co_kwonlyargcount

1

In [504]:
f.__code__.co_nlocals

7

In [505]:
f.__code__.co_stacksize

2

In [506]:
f.__code__.co_flags

79

In [507]:
bin(f.__code__.co_flags)

'0b1001111'

In [508]:
f.__code__.co_filename

'<ipython-input-501-cb7dfbcc0072>'

In [509]:
f.__code__.co_name

'f'

#### Bytecode injection

In [510]:
def disassemble_to_list(c):
    '''Disassebmles the bytecode of a code object and returns the result as a list.
    
    Parameters
    =========================================
    c (code): code object    
    
    Returns
    =========================================
    list: disassembled bytecode instructions
    '''     
    code_list = []
    bytecode = c.co_code
    for offset, opcode, oparg in unpack_op(bytecode):
        argval = get_argvalue(offset, c, opcode, oparg)
        if argval is not None:
            if type(argval)==str:
                argval = argval.strip("\'")
            argval = None if argval=='None' else argval
            code_list.append([dis.opname[opcode], argval])
        else:
            if oparg is not None:
                code_list.append([dis.opname[opcode], oparg])
            else:
                code_list.append([dis.opname[opcode]])              
    return code_list

In [511]:
def get_oparg(offset, opcode, argval, constants, varnames, names, cell_names):
    '''Finds the oparg of an instruction based on its human friendly meaning.
    
    Parameters
    =========================================
    offset (int): offset of the instruction
    opcode (int): opcode of the instruction
    argval: human friendly meaning of the oparg
    constants (tuple): co_consts attribute of the code object
    varnames (tuple): co_varnames attribute of the code object
    names (tuple): co_names attribute of the code object
    cell_names (tuple): co_freevars + co_cellvars attributes of the code object
    
    Returns
    =========================================
    int: the oparg of the instruction 
    '''
    oparg = argval
    if opcode in dis.hasconst:
        if constants is not None:
            oparg = constants.index(argval)
    elif opcode in dis.hasname:
        if names is not None:
            oparg = names.index(argval)
    elif opcode in dis.hasjrel:
        argval = int(argval.split()[1])
        oparg = argval - offset - 2 
    elif opcode in dis.haslocal:
        if varnames is not None:
            oparg = varnames.index(argval)
    elif opcode in dis.hascompare:
        oparg = dis.cmp_op.index(argval)
    elif opcode in dis.hasfree:
        if cell_names is not None:
            oparg = cell_names.index(argval)
    return oparg

In [512]:
def assemble(code_list, constants, varnames, names, cell_names):
    '''Assembles the bytecode list into a bytes literal.
    
    Parameters
    =========================================
    code_list (list): disassembled bytecode list
    constants (tuple): co_consts attribute of the code object
    varnames (tuple): co_varnames attribute of the code object
    names (tuple): co_names attribute of the code object
    cell_names (tuple): co_freevars + co_cellvars attributes of the code object
    
    Returns
    =========================================
    bytes: the bytes literal of the disassembled bytescode list
    '''
    byte_list = []
    for i, instruction in enumerate(code_list):
        if len(instruction)==2:
            opname, argval = instruction
            opcode = dis.opname.index(opname)
            oparg = get_oparg(i*2, opcode, argval, constants, varnames, names, cell_names)
        else:
            opname = instruction[0]
            opcode = dis.opname.index(opname)
            oparg = 0    
        byte_list += [opcode, oparg]  
    return(bytes(byte_list))

In [513]:
def f(x, y):
    return x + y
c = f.__code__

In [514]:
f(2,5)

7

In [515]:
disassembled_bytecode = disassemble_to_list(c)

In [516]:
disassembled_bytecode

[['LOAD_FAST', 'x'], ['LOAD_FAST', 'y'], ['BINARY_ADD'], ['RETURN_VALUE']]

In [517]:
disassembled_bytecode[2] = ['BINARY_MULTIPLY']

In [518]:
new_co_code= assemble(disassembled_bytecode, c.co_consts,
                      c.co_varnames, c.co_names,
                      c.co_cellvars+c.co_freevars)

In [519]:
nc = types.CodeType(c.co_argcount, c.co_kwonlyargcount,
                    c.co_nlocals, c.co_stacksize, c.co_flags,
                    new_co_code, c.co_consts, c.co_names, 
                    c.co_varnames, c.co_filename, c.co_name, 
                    c.co_firstlineno, c.co_lnotab, 
                    cellvars = c.co_cellvars, freevars = c.co_freevars)
f.__code__ = nc

In [520]:
f(2,5)

10

#### Code object for closures

In [521]:
def func(x):
    def g(y):
        return x + y 
    return g
c = func.__code__

In [522]:
disassemble(c)

2         0 LOAD_CLOSURE             0 (x)
          2 BUILD_TUPLE              1 
          4 LOAD_CONST               1 (<code object g at 0x000002004C8A21E0, file "<ipython-input-521-98e6b6af1a1c>", line 2>)
          6 LOAD_CONST               2 ('func.<locals>.g')
          8 MAKE_FUNCTION            8 
         10 STORE_FAST               1 (g)

4        12 LOAD_FAST                1 (g)
         14 RETURN_VALUE               

Disassembly of<code object g at 0x000002004C8A21E0, file "<ipython-input-521-98e6b6af1a1c>", line 2>:

3         0 LOAD_DEREF               0 (x)
          2 LOAD_FAST                0 (y)
          4 BINARY_ADD                 
          6 RETURN_VALUE               


In [523]:
c.co_varnames

('x', 'g')

In [524]:
c.co_cellvars, c.co_freevars

(('x',), ())

In [525]:
nc = types.CodeType(c.co_argcount, c.co_kwonlyargcount,
                    c.co_nlocals, c.co_stacksize, c.co_flags,
                    new_co_code, c.co_consts, c.co_names, 
                    c.co_varnames, c.co_filename, c.co_name, 
                    c.co_firstlineno, c.co_lnotab, 
                    cellvars = c.co_cellvars, freevars = c.co_freevars)

In [526]:
# The new code object did not capture the cellvars
nc.co_cellvars, nc.co_freevars

((), ())

#### Code optimization

In [527]:
setup1='''import math
mult = 2
def f():
    total = 0
    i = 1
    for i in range(1, 200):
        total += mult * math.log(i)
    return total
'''

setup2='''import math
def f(): 
    log = math.log
    mult = 2
    total = 0
    for i in range(1, 200):
        total += mult * log(i)
    return total
'''

t1 = timeit.timeit(stmt="f()", setup=setup1, number=100000)
t2 = timeit.timeit(stmt="f()", setup=setup2, number=100000)
print("t1=", t1)
print("t2=", t2)

t1= 3.8076129000110086
t2= 3.2230119000014383


In [528]:
c1=compile(setup1, "", "exec")
disassemble(c1)

1         0 LOAD_CONST               0 (0)
          2 LOAD_CONST               1 (None)
          4 IMPORT_NAME              0 (math)
          6 STORE_NAME               0 (math)

2         8 LOAD_CONST               2 (2)
         10 STORE_NAME               1 (mult)

3        12 LOAD_CONST               3 (<code object f at 0x000002004C8A20C0, file "", line 3>)
         14 LOAD_CONST               4 ('f')
         16 MAKE_FUNCTION            0 
         18 STORE_NAME               2 (f)
         20 LOAD_CONST               1 (None)
         22 RETURN_VALUE               

Disassembly of<code object f at 0x000002004C8A20C0, file "", line 3>:

4         0 LOAD_CONST               1 (0)
          2 STORE_FAST               0 (total)

5         4 LOAD_CONST               2 (1)
          6 STORE_FAST               1 (i)

6         8 SETUP_LOOP              36 (to 46)
         10 LOAD_GLOBAL              0 (range)
         12 LOAD_CONST               2 (1)
         14 LOAD_CONST         

In [529]:
c2=compile(setup2, "", "exec")
disassemble(c2)

1         0 LOAD_CONST               0 (0)
          2 LOAD_CONST               1 (None)
          4 IMPORT_NAME              0 (math)
          6 STORE_NAME               0 (math)

2         8 LOAD_CONST               2 (<code object f at 0x000002004C8A25D0, file "", line 2>)
         10 LOAD_CONST               3 ('f')
         12 MAKE_FUNCTION            0 
         14 STORE_NAME               1 (f)
         16 LOAD_CONST               1 (None)
         18 RETURN_VALUE               

Disassembly of<code object f at 0x000002004C8A25D0, file "", line 2>:

3         0 LOAD_GLOBAL              0 (math)
          2 LOAD_ATTR                1 (log)
          4 STORE_FAST               0 (log)

4         6 LOAD_CONST               1 (2)
          8 STORE_FAST               1 (mult)

5        10 LOAD_CONST               2 (0)
         12 STORE_FAST               2 (total)

6        14 SETUP_LOOP              34 (to 50)
         16 LOAD_GLOBAL              2 (range)
         18 LOAD_CONST  

### Example: Defining constants in Python

In [530]:
class ConstError(Exception):
    pass

def add_const(cl):
    '''Detects the declared constants and modifies the disassembled bytecode list after that. 
    Raises an exception if that constat is reassigned.
    
    Parameters
    =========================================
    cl (list): disassembled bytecode list
    
    Returns
    =========================================
    tuple: a tuple of the modified bytecode list plus the tuple of constant variables
    '''
    code_list = cl.copy()
    constants= []
    indices = []
    
    # Find the variables declared as const. Add their name and index to constants and indices list
    for index, instruction in enumerate(code_list[:-1]):
            if instruction == ['LOAD_GLOBAL', 'const']:
                code_list[index]=['NOP']
                next_instruction = code_list[index+1] 
                if (next_instruction[0]=='STORE_ATTR'):
                    if next_instruction[1] in constants:
                        raise ConstError("You cannot declare a constant variable twice!")
                    else:
                        constants.append(next_instruction[1])
                        indices.append(index+1)
                        code_list[index+1][0]='STORE_FAST'   
                else:
                    raise ConstError("The constant variable should be assigned after declaration!")
    
    #If a constant variable has been reassigned then raise an exception
    for index, instruction in enumerate(code_list[:-1]):
            if (instruction[0] == 'LOAD_GLOBAL') and (instruction[1] in constants):
                code_list[index][0] = 'LOAD_FAST'
            if (instruction[0] == 'STORE_GLOBAL' or instruction[0] == 'STORE_FAST') and \
            (instruction[1] in constants) and index not in indices:
                raise ConstError("'"+instruction[1]+"' is a constant and cannot be reassigned!")               
                    
    return code_list, tuple(constants)

In [531]:
def const(f):
    '''
    A decorator to apply add_const() to the bytecode of a target function.
    
    Parameters
    =========================================
    f (function): target function
    
    Returns
    =========================================
    function: the input function with a new code object and modified bytecode
    '''
    c = f.__code__
    code_list = disassemble_to_list(c)
    new_code_list, const_vars = add_const(code_list)
    new_co_names = tuple(i for i in c.co_names if i not in const_vars + ('const',))
    new_bytecode = assemble(new_code_list, c.co_consts, c.co_varnames + const_vars, new_co_names, c.co_freevars + c.co_cellvars)
    
    nc = types.CodeType(c.co_argcount, c.co_kwonlyargcount, c.co_nlocals+len(const_vars), c.co_stacksize, c.co_flags,
                    new_bytecode, c.co_consts, new_co_names, c.co_varnames + const_vars, c.co_filename, c.co_name, 
                    c.co_firstlineno, c.co_lnotab)
    f.__code__ = nc
    return f

In [532]:
def f(x):
    const. A=5
    A = A + 1
    return A*x

In [533]:
# This will raise an exception:
# @const
# def f(x):
#     const. A=5
#     A = A + 1
#     return A*x