## Inspecting the memory layout of a Python list

based on https://jakevdp.github.io/blog/2014/05/09/why-python-is-slow/

In [None]:
import sys
import ctypes
import numpy as np

In [None]:
x=[1,[2,3]]
print("x = {} has type {}".format(x, type(x)))
i = x[0]
print("i = {} has type {} and uses {} bytes".format(i, type(i), sys.getsizeof(i)))
y=1.2
print("y = {} has type {} and uses {} bytes".format(y, type(y), sys.getsizeof(y)))

In [None]:
# what is the data type for integers?
N=10000
print("N={} has type {}".format(N, type(N)))

In [None]:
# Integers are dynamically sized! Check its size in bytes:
a=46560000000000000000000000000000000000000

print("a has type", type(a))

print("bytes used to store 1:", sys.getsizeof(1))
print("bytes used to store a:", sys.getsizeof(a))

In [None]:
class IntStruct(ctypes.Structure): # PyLongObject
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_size", ctypes.c_ulong),
                ("ob_digit", ctypes.c_uint32)]

This is how an int is represented in memory:

In [None]:
x = 10000 # change to a really large number
y = x
# z = x
# help(id)

print("ids:", id(x), id(y), id(100), id(1234567))

info=IntStruct.from_address(id(x))
print("refcount =", info.ob_refcnt, 
      "type =", info.ob_type,
      "size =", info.ob_size, 
      "digits =", info.ob_digit)

How about a List?

In [None]:
class ListStruct(ctypes.Structure): # struct PyListObject
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_size", ctypes.c_ulong),
                ("ob_item", ctypes.c_void_p),
                ("allocated", ctypes.c_ulong)]

x = [1,[2,3]]

info = ListStruct.from_address(id(x))
print("refcount =", info.ob_refcnt, 
      "type =", info.ob_type,
      "size =", info.ob_size, 
      "item =", hex(info.ob_item),
      "allocated = ", info.allocated)


What happens if we append to the list x?

In [None]:
y = x # this increases reference count
x.append(4) # this will resize the array
info = ListStruct.from_address(id(x))
print("refcount =", info.ob_refcnt, 
      "type =", info.ob_type,
      "size =", info.ob_size, 
      "item =", hex(info.ob_item),
      "allocated = ", info.allocated)
