In [1]:
# Python is slower than Fortran and C for a variety of reasons

In [2]:
#  Python is Dynamically Typed rather than Statically Typed.

# At the time the program executes, 
# the interpreter  know the type of the variables that are defined. 

# The difference between a C variable (I'm using C as a stand-in for compiled languages) and a Python variable 



In [2]:
# python code
a = 1

In [4]:
#   a->PyObject_HEAD->typecode to integer
#   Set a->val = 1

type(a )

int

In [5]:
import sys

print("Python version =",                  sys.version[:5])

('Python version =', '2.7.1')


In [28]:
x = 42
print(x)

42


In [7]:
type(x)

int

In [None]:
#  C:\Users\jvt\Anaconda3\include/object.h

/* Object and type object interface */

/*
Objects are structures allocated on the heap.  Special rules apply to
the use of objects to ensure they are properly garbage-collected.
Objects are never allocated statically or on the stack; they must be
accessed through special macros and functions only.  (Type objects are
exceptions to the first rule; the standard types are represented by
statically initialized type objects, although work on type/class unification
for Python 2.2 made it possible to have heap-allocated type objects too).

An object has a 'reference count' that is increased or decreased when a
pointer to the object is copied or deleted; when the reference count
reaches zero there are no references to the object left and it can be
removed from the heap.

An object has a 'type' that determines what it represents and what kind
of data it contains.  An object's type is fixed when it is created.
Types themselves are represented as objects; an object contains a
pointer to the corresponding type object.  The type itself has a type
pointer pointing to the object representing the type 'type', which
contains a pointer to itself!).

Objects do not float around in memory; once allocated an object keeps
the same size and address.  Objects that must hold variable-size data
can contain pointers to variable-size parts of the object.  Not all
objects of the same type have the same size; but the size cannot change
after allocation.  (These restrictions are made so a reference to an
object can be simply a pointer -- moving an object would require
updating all the pointers, and changing an object's size would require
moving it if there was another object right next to it.)

Objects are always accessed through pointers of the type 'PyObject *'.
The type 'PyObject' is a structure that only contains the reference count
and the type pointer.  The actual memory allocated for an object
contains other data that can only be accessed after casting the pointer
to a pointer to a longer structure type.  This longer type must start
with the reference count and type fields; the macro PyObject_HEAD should be
used for this (to accommodate for future changes).  The implementation
of a particular object type can cast the object pointer to the proper
type and back.

A standard interface exists for objects that contain an array of items
whose size is determined when the object is allocated.
*/

In [21]:
# How Python manage its objects in memory, that leads me to PyObject.

# Let’s take a look at how PyObject being defined in Python first.

#  C:\Users\jvt\Anaconda3\include/object.h

#define _PyObject_HEAD_EXTRA            \
    struct _object *_ob_next;           \
    struct _object *_ob_prev;


typedef struct _object {
    _PyObject_HEAD_EXTRA
    Py_ssize_t ob_refcnt;
    struct _typeobject *ob_type;
} PyObject;



typedef struct _typeobject {
    
    PyObject_VAR_HEAD
    const char *tp_name; /* For printing, in format "<module>.<name>" */
    Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */

    .....
} PyTypeObject;
#endif


PyTypeObject holds the type information of PyObject, so we can use PyObject pointer as 
the only type of pointers in Python.
So how can we know the object’s type

In [None]:
# C:\Users\jvt\Anaconda3\include/object.h

typedef struct _typeobject PyTypeObject

typedef struct _object {
    _PyObject_HEAD_EXTRA
    Py_ssize_t ob_refcnt;
    struct _typeobject *ob_type;
} PyObject;

typedef struct {
    PyObject ob_base;
    Py_ssize_t ob_size; /* Number of items in variable part */
} PyVarObject;

PyObject is the basic structure for fixed length objects

PyVarObject  is the basis for various length objects

In [9]:
#The actual x variable in CPython is stored in a structure which is defined in the CPython source code,
# in Include/longintrepr.h

# C:\Users\jvt\Anaconda3\include/longintrepr

In [8]:
# https://hg.python.org/cpython/file/3.4/Include/longintrepr.h/#l89

#define PyObject_VAR_HEAD      PyVarObject ob_base;

# struct _longobject {
# 	PyObject_VAR_HEAD
# 	digit ob_digit[1];
# };



In [None]:
# Our integer object works out to something like the following structure:

#C:\Users\jvt\.julia\v0.6\Conda\deps\usr\pkgs\python-2.7.14-h8c3f1cb_23\include/intobject.h

typedef struct _object {
    _PyObject_HEAD_EXTRA
    Py_ssize_t ob_refcnt;
    struct _typeobject *ob_type;
} PyObject;
   
#define PyObject_HEAD                   PyObject ob_base;
    
 typedef struct {
    PyObject_HEAD  # ob_base variable is a object to the structure containing all the type information and method definitions for the object
    long ob_ival; # ob_digit holds the actual numerical value.
} PyIntObject;
    
    
struct _longobject {
    long ob_refcnt;
    PyTypeObject *ob_type; 
    # ob_type variable is a pointer to the structure containing all the type information and method definitions for the object
    size_t ob_size;
    long ob_digit[1]; # ob_digit holds the actual numerical value.
};

In [3]:
# We start with defining a Python representation of the C structure:
    
import ctypes

class IntStruct(ctypes.Structure):
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_size", ctypes.c_ulong),
                ("ob_digit", ctypes.c_long)]
    
    def __repr__(self):
        return ("IntStruct(ob_digit={self.ob_digit}, "
                "refcount={self.ob_refcnt})").format(self=self)

In [4]:
# Now let's look at the internal representation for some number, say 42. 
# We'll use the fact that in CPython, the id function gives the memory location of the object:

num = 42
IntStruct.from_address(id(num))

# CPython, the id function gives the memory location of the object:
# The ob_digit attribute points to the correct location in memory!

IntStruct(ob_digit=0, refcount=29)

In [None]:
But what about refcount? We've only created a single value: why is the reference count so much greater than one?

Well it turns out that Python uses small integers a lot. 

If a new PyObject were created for each of these integers, it would take a lot of memory.

Because of this, Python implements common integer values as singletons: that is, only one 
copy of these numbers exist in memory. In other words, every time you create a new Python integer
in this range, you're simply creating a reference to the singleton with that value:

In [12]:
x = 42
y = 42
id(x) == id(y)

True

In [13]:
# When you get to much bigger integers (larger than 255 in Python 3.4), this is no longer true:

x = 1234
y = 1234
id(x) == id(y)

False

In [82]:
# C:\Users\jvt\Anaconda3\include/listobject.h

# Digging into Python Lists

/*

Another generally useful object type is a list of object pointers.
This is a mutable type: the list items can be changed, and items can be
added or removed.  Out-of-range indices or non-list objects are ignored.

*** WARNING *** PyList_SetItem does not increment the new item's reference
count, but does decrement the reference count of the item it replaces,
if not nil.  It does *decrement* the reference count if it is *not*
inserted in the list.  Similarly, PyList_GetItem does not increment the
returned item's reference count.

*/


typedef struct {
    
    
    PyObject_VAR_HEAD
    
    /* Vector of pointers to list elements.  list[0] is ob_item[0], etc. */
    
    PyObject **ob_item;

    /* ob_item contains space for 'allocated' elements.  The number
     * currently in use is ob_size.
     * Invariants:
     *     0 <= ob_size <= allocated
     *     len(list) == ob_size
     *     ob_item == NULL implies ob_size == allocated == 0
     * list.sort() temporarily sets allocated to -1 to detect mutations.
     *
     * Items must normally not be NULL, except during construction when
     * the list is not yet visible outside the function that builds it.
     */
    
    Py_ssize_t allocated;
    
} PyListObject;

SyntaxError: invalid syntax (<ipython-input-82-2b71266a9cbd>, line 5)

In [None]:
# Int type list object

typedef struct {
    long ob_refcnt;
    PyTypeObject *ob_type;
    Py_ssize_t ob_size;
    PyObject **ob_item;
    long allocated;
} PyListObject;

PyObject **ob_item is what points to the contents of the list, 
ob_size value tells us how many items are in the list.

In [14]:
class ListStruct(ctypes.Structure):
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_size", ctypes.c_ulong),
                ("ob_item", ctypes.c_long),  # PyObject** pointer cast to long
                ("allocated", ctypes.c_ulong)]
    
    def __repr__(self):
        return ("ListStruct(len={self.ob_size}, "
                "refcount={self.ob_refcnt})").format(self=self)

In [15]:
L = [1,2,3,4,5]
ListStruct.from_address(id(L))

ListStruct(len=5, refcount=1)

In [19]:
tup = [L, L]  # two more references to L
ListStruct.from_address(id(L))

# let's create a few extra references to the list, and see how it affects the reference count:

ListStruct(len=5, refcount=3)

In [20]:
# Elements are stored via a contiguous array of PyObject pointers. ctypes

# get a raw pointer to our list
Lstruct = ListStruct.from_address(id(L))

type(Lstruct)

# create a type which is an array of integer pointers the same length as L
PtrArray = Lstruct.ob_size * ctypes.POINTER(IntStruct)

# instantiate this type using the ob_item pointer
L_values = PtrArray.from_address(Lstruct.ob_item)

In [22]:
type(Lstruct)

# [ptr[0] for ptr in L_values]  # ptr[0] dereferences the pointer crash

__main__.ListStruct

In [24]:
class NumpyStruct(ctypes.Structure):
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_data", ctypes.c_long),  # char* pointer cast to long
                ("ob_ndim", ctypes.c_int),
                ("ob_shape", ctypes.c_voidp),
                ("ob_strides", ctypes.c_voidp)]
    
    @property
    def shape(self):
        return tuple((self.ob_ndim * ctypes.c_int64).from_address(self.ob_shape))
    
    @property
    def strides(self):
        return tuple((self.ob_ndim * ctypes.c_int64).from_address(self.ob_strides))
    
    def __repr__(self):
        return ("NumpyStruct(shape={self.shape}, "
                "refcount={self.ob_refcnt})").format(self=self)

In [30]:
# WARNNG: never do this!
id113 = id(113)
iptr = IntStruct.from_address(id113)
iptr.ob_digit = 4  # now Python's 113 contains a 4!

113 == 4

False

In [31]:
iptr

IntStruct(ob_digit=4, refcount=10)

In [None]:
# WARNING: never do this!
L = [42]
Lwrapper = ListStruct.from_address(id(L))
item_address = ctypes.c_long.from_address(Lwrapper.ob_item)
print("before:", L)

# change the c-pointer of the list item
item_address.value = id(6)

# we need to update reference counts by hand
IntStruct.from_address(id(42)).ob_refcnt -= 1
IntStruct.from_address(id(6)).ob_refcnt += 1

print("after: ", L)

In [3]:
import ctypes

class CStructStruct(ctypes.Structure):
    _fields_ = [("ob_refcnt", ctypes.c_long),
                ("ob_type", ctypes.c_void_p),
                ("ob_ptr", ctypes.c_long),  # char* pointer cast to long
                    ]
    
    def __repr__(self):
        return ("CStructStruct(ptr=0x{self.ob_ptr:x}, "
                "refcnt={self.ob_refcnt})").format(self=self)

In [4]:
tmp = IntStruct.from_address(id(0))
meta = CStructStruct.from_address(id(tmp))

print(repr(meta))

NameError: name 'IntStruct' is not defined

In [6]:
meta_wrapper = CStructStruct.from_address(id(meta))
meta_wrapper.ob_ptr = id(meta)

print(meta.ob_ptr == id(meta))
print(repr(meta))

NameError: name 'meta' is not defined