# Extension Types

Wherein we learn about one of Cython's most powerful features -- easy creation of Python extension types.

In [None]:
%load_ext Cython

## Random Number generation

We will implement a very simple random number generator, the [linear congruential generator (LCG)](https://en.wikipedia.org/wiki/Linear_congruential_generator):

$$X_{n+1} = (aX_n + c)\mod m$$

We'll use $a=1664525$, $c=1013904223$, and $m=2^{32}$.

*Caveat emptor*: Don't use these simple RNGs for anything real, especially with [much better implementations](https://docs.scipy.org/doc/numpy/reference/routines.random.html) so readily available.

## Pure Python version

In [None]:
class PyLCG(object):
    
    def __init__(self, a=1664525, c=1013904223, m=2**32, seed=0):
        self.a = a
        self.c = c
        if m <= 0:
            raise ValueError("m must be > 0, given {}".format(m))
        self.m = m
        # The RNG state.
        self.x = seed
        
    def _advance(self):
        r = self.x
        self.x = (self.a * self.x + self.c) % self.m
        return r
        
    def randint(self, size=None):
        if size is None:
            return self._advance()
        return np.asarray([self._advance() for _ in range(size)])

### Tests and timing

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
rng = PyLCG()
nums = rng.randint(size=10000) % 100
sns.distplot(nums, bins=100, kde=False);

In [None]:
%%timeit rng = PyLCG()
nums = rng.randint(size=10000)

## Cython version, extension types, and `cdef class`

In [None]:
%%cython -a

import numpy as np
cimport numpy as cnp
cimport cython

# Creates a new extension type: https://docs.python.org/3/extending/newtypes.html
cdef class CyLCG:
    
    # We declare the compile-time types of our *instance* attributes here.
    # This is similar to C++ class declaration syntax.
    cdef long a, c, m, x
    
    # Special Cython-defined initializer.
    # Called before __init__ to initialize all C-level attributes.
    def __cinit__(self, long a=1664525, long c=1013904223, long m=2**32, long seed=0):
        self.a = a
        self.c = c
        if m <= 0:
            raise ValueError("m must be > 0, given {}".format(m))
        self.m = m
        self.x = seed
    
    # cdef / cpdef methods are supported
    @cython.cdivision(True)
    cpdef long _advance(self):
        cdef long r = self.x
        self.x = (self.a * self.x + self.c) % self.m
        return r
    
    # Regular def method
    @cython.boundscheck(False)
    @cython.wraparound(False)
    def randint(self, size=None):
        cdef long r
        if size is None:
            # Call to self._advance() here is efficient and at the C level.
            r = self._advance()
            return r
        cdef long[::1] a = np.empty((size,), dtype='i8')
        cdef int i
        cdef int n = int(size)
        for i in range(n):
            a[i] = self._advance()
        return np.asarray(a)

### Tests and timing

In [None]:
rng = CyLCG()
nums = rng.randint(size=10000) % 100
sns.distplot(nums, bins=100, kde=False);

In [None]:
%%timeit rng = CyLCG()
nums = rng.randint(size=10000)

### Pure-python memory footprint
The sum of the number of bytes in:
* the `PyLCG()` object itself
* the instance `__dict__`
* and each key / value in the instance `__dict__`

In [None]:
import sys

In [None]:
pyrng = PyLCG()
(sys.getsizeof(pyrng) # the object itself
 + sys.getsizeof(pyrng.__dict__)  # the instance __dict__
 + sum(sys.getsizeof(k) + sys.getsizeof(v) for k, v in pyrng.__dict__.items())) # k/v memory use

We could improve this by using a `__slots__` attribute, but won't go down that road.

### Cython memory footprint
The sum of the number of bytes in:
* The `CyLCG()` object itself
* each instance `long`

In [None]:
(sys.getsizeof(CyLCG()) # the object itself
 + 4 * 8) # The 4 8-byte longs (a, c, m, x)

## Public / private instance attributes

### Extension type instance attributes are not visibile to Python by default

In [None]:
cyrng = CyLCG(seed=42)

In [None]:
cyrng.a, cyrng.c, cyrng.m, cyrng.x
# => all give `AttributeError`

### Extension type instances aren't open to new attributes

In [None]:
cyrng.z = 15

### But we can control that with `public` and `readonly` declarations

In [None]:
%%cython -a

import numpy as np
cimport cython

cdef class CyLCGOpen:
    
    cdef public long x
    cdef readonly long a, c, m
    
    def __cinit__(self, long a=1664525, long c=1013904223, long m=2**32, long seed=0):
        self.a = a
        self.c = c
        if m <= 0:
            raise ValueError("m must be > 0, given {}".format(m))
        self.m = m
        self.x = seed
        
    # cdef / cpdef methods are supported
    @cython.cdivision(True)        
    cpdef long _advance(self):
        cdef long r = self.x
        self.x = (self.a * self.x + self.c) % self.m
        return r
    
    # Regular def method
    @cython.boundscheck(False)
    @cython.wraparound(False)
    def randint(self, size=None):
        cdef long r
        if size is None:
            # Call to self._advance() here is efficient and at the C level.
            r = self._advance()
            return r
        cdef long[::1] a = np.empty((size,), dtype='i8')
        cdef int i
        cdef int n = int(size)
        for i in range(n):
            a[i] = self._advance()
        return np.asarray(a)

In [None]:
lcg_open = CyLCGOpen()

In [None]:
# now we can see all the instance attributes
lcg_open.a, lcg_open.c, lcg_open.m, lcg_open.x

In [None]:
# and we can modify `x`
lcg_open.x = 42
lcg_open.x

In [None]:
# but we can't modify `readonly` attributes
lcg_open.a = -1

### Cython extension types in more depth
* [cython.org docs](http://cython.readthedocs.io/en/latest/src/userguide/extension_types.html)
* [_Cython_ Book, chapter 5](http://shop.oreilly.com/product/0636920033431.do)