### What is Cython?

- python has a VM on which it runs
- the C side of it has no VM: it runs native machine code
- cython bridges these and lets you bring in static typing from C into python

In [1]:
%load_ext Cython

In [2]:
def f(x):
    return 2.0*x
print(id(f))
%timeit a=f(200)

4395401696
The slowest run took 16.23 times longer than the fastest. This could mean that an intermediate result is being cached 
1000000 loops, best of 3: 148 ns per loop


In [3]:
import dis
print(type(f))
dis.dis(f)

<class 'function'>
  2           0 LOAD_CONST               1 (2.0)
              3 LOAD_FAST                0 (x)
              6 BINARY_MULTIPLY
              7 RETURN_VALUE


#### Python functions in cython with `def`

This valid python code is valid cython code

In [4]:
%%cython --annotate
def f(x):
    return 2.0*x

In [5]:
print(id(f))
%timeit a=f(200)

4392970784
The slowest run took 20.66 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 98.5 ns per loop


In [6]:
print(type(f))
dis.dis(f)

<class 'builtin_function_or_method'>


TypeError: don't know how to disassemble builtin_function_or_method objects

- notice difference in time
- will run faster, as its not running bytecode
- this really is a measure of the bytecode overhead as
- cython compiles python down to C, but makes it accesible from python
- still must box and unbox to `PyObject`s, especially on inputs and outputs as these must speak Python types

In [7]:
%%cython --annotate
def f(int x):
    return 2.0*x


In [8]:
2*'a', f('a')

TypeError: an integer is required

In [9]:
print(id(f))
%timeit a=f(200)

4393175296
The slowest run took 19.98 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 85.3 ns per loop


- you can statically type inside a def!
- this is a python function
- python will infer variable types when semantics dont change. 

#### C functions in cython with `cdef`

- generates a c function, entirely. 
- no python objects anywhere in `_f`
- `_f` is not accessible from python, and must be wrapped
- can be called by any function (python or C) within same cell or source file
- limited now to c-integers, cant use python arbitrary precision

In [10]:
%%cython --annotate
cdef double _f(int x):
    return 2.0*x

def f(x):
    return _f(x)

In [11]:
print(id(_f))

NameError: name '_f' is not defined

In [12]:
print(id(f))
%timeit a=f(200)

4392971288
The slowest run took 15.56 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 66.5 ns per loop


- Inside a function `cdef`s are local

In [13]:
%%cython --annotate
def ff(int x):
    cdef int a=1
    return a+x

In [15]:
ff(3)

4

####  `def` + `cdef` = `cpdef`

- get c=only version and python wrapper with same name
- limted to types both in python and C

In [16]:
%%cython --annotate
cpdef double f(int x):
    return 2.0*x

In [17]:
print(id(f))
%timeit a=f(200)

4393218624
The slowest run took 20.57 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 63.3 ns per loop


#### Exception Handling



In [18]:
%%cython
cpdef int divide_ints(int i, int j): 
    return i/j

In [19]:
divide_ints(1,2),divide_ints(1,0)

Exception ignored in: '_cython_magic_d7cf46a898230095cd196ff6cc6ad770.divide_ints'
ZeroDivisionError: integer division or modulo by zero


(0, 0)

In [20]:
%%cython
cpdef int divide_ints(int i, int j) except? -1: 
    return i/j

Any valid return value like -1 is ok. The questionmark means it can be reached otherwise.

In [21]:
divide_ints(1,2),divide_ints(1,0)

ZeroDivisionError: integer division or modulo by zero

In [39]:
!ls -l /Users/rahul/.ipython/cython/

total 14976
drwxr-xr-x  3 rahul  staff     102 Mar 16 18:34 [1m[36mUsers[m[m
-rw-r--r--  1 rahul  staff  111044 Apr  2 16:55 _cython_magic_00fb9c28e2ecaf954e5d178589557b6f.c
-rwxr-xr-x  1 rahul  staff   23932 Apr  2 16:55 [31m_cython_magic_00fb9c28e2ecaf954e5d178589557b6f.cpython-35m-darwin.so[m[m
-rw-r--r--  1 rahul  staff   36886 Apr  2 16:55 _cython_magic_00fb9c28e2ecaf954e5d178589557b6f.html
-rw-r--r--  1 rahul  staff     277 Apr  2 16:55 _cython_magic_00fb9c28e2ecaf954e5d178589557b6f.pyx
-rw-r--r--  1 rahul  staff      78 Apr  2 16:54 _cython_magic_0157582144e5fafe077249f533eaf38c.c
-rw-r--r--  1 rahul  staff     280 Apr  2 16:54 _cython_magic_0157582144e5fafe077249f533eaf38c.pyx
-rw-r--r--  1 rahul  staff   69202 Mar 16 18:34 _cython_magic_07ae01bb035b86a3ce26375bb5465a38.c
-rwxr-xr-x  1 rahul  staff   17136 Mar 16 18:34 [31m_cython_magic_07ae01bb035b86a3ce26375bb5465a38.cpython-35m-darwin.so[m[m
-rw-r--r--  1 rahul  staff      27 Mar 16 18:34 _cython_magic_07

We can pass optional arguments to the %%cython magic command. The first set of options control the cython compilation stage:

-n, --name
Specifies the name of the generated .pyx file

--cplus
Instructs cython to generate C++ source

-a, --annotate
Instructs cython to output an annotated source file (see Chapter 9)

-f, --force
Forces cython to regenerate C or C++ source

The second set of options allows us to control the second pipeline stage:

-I, --include
Adds extra directories to search for file inclusions and cimports

-c, --compile-args
Allows inclusion of extra C compiler arguments

--link-args
Allows inclusion of extra link arguments

-L
Adds extra library search directories

-l
Adds extra library names to link against

#### Performance

Where do you think `typed_fact` may run slow?

In [22]:
%%cython

def typed_fact(long n):
    if n <= 1:
        return 1
    return n * typed_fact(n - 1)

cpdef long c_fact(long n):
    if n <= 1:
        return 1
    return n * c_fact(n - 1)

In [23]:
%timeit typed_fact(20)
%timeit c_fact(20)

1000000 loops, best of 3: 1.36 µs per loop
10000000 loops, best of 3: 94.6 ns per loop


### Fibonacci our old friend

In [24]:
#pure python
def fib(n): 
    a,b=0.0,1.0
    for i in range(n): 
        a,b=a+b,a
    return a

In [25]:
%%cython
def fib_cython(int n): 
    cdef int i
    cdef double a=0.0, b=1.0 
    for i in range(n):
        a,b=a+b,a 
    return a

#### function call overhead and loop overhead

In [26]:
%timeit fib(0)

The slowest run took 9.10 times longer than the fastest. This could mean that an intermediate result is being cached 
1000000 loops, best of 3: 351 ns per loop


In [27]:
%timeit fib_cython(0)

The slowest run took 28.99 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 62.2 ns per loop


In [28]:
%timeit fib(90)

100000 loops, best of 3: 5.77 µs per loop


In [29]:
%timeit fib_cython(90)

The slowest run took 8.82 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 140 ns per loop


- in python + must figure the opertor overloading
- here it is just one machine code instruction
- cython fib allocates everything on stack while python variables are allocated on heap
- also c floating points are mutable, unlike pythons

In [30]:
%%cython
cdef double _fib_cython2(int n): 
    cdef int i
    cdef double a=0.0, b=1.0 
    for i in range(n):
        a,b=a+b,a 
    return a

def fib_cython2(n):
    return _fib_cython2(n)

In [31]:
%timeit fib_cython2(90)

The slowest run took 11.30 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 152 ns per loop


### Wrapping a function written in C

In [32]:
%%file cfib.h

double _cfib(int n);


Overwriting cfib.h


In [33]:
%%file cfib.c

#include "cfib.h"

double _cfib(int n) { 
    int i;
    double a=0.0, b=1.0, tmp; 
    for (i=0; i<n; ++i) {
        tmp=a;a=a+b;b=tmp; 
    }
return a; 
}

Overwriting cfib.c


In [34]:
%%file wrapfib.pyx
cdef extern from "cfib.h": 
    double _cfib(int n)
    
def cfib(n):
    return _cfib(n)

Overwriting wrapfib.pyx


#### Using distutils

`python setup.py build_ext --inplace` is your friend.

In [35]:
%%file setup.py

from distutils.core import setup, Extension
from Cython.Build import cythonize

exts = cythonize([Extension("wrapfib", sources=["cfib.c", "wrapfib.pyx"])])

setup(
    ext_modules = exts,
)

Overwriting setup.py


In [36]:
!export CC=gcc;python setup.py build_ext -if

Compiling wrapfib.pyx because it changed.
[1/1] Cythonizing wrapfib.pyx
running build_ext
building 'wrapfib' extension
gcc -fno-strict-aliasing -Wsign-compare -Wunreachable-code -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -I//anaconda/envs/py35/include -arch x86_64 -I//anaconda/envs/py35/include/python3.5m -c wrapfib.c -o build/temp.macosx-10.5-x86_64-3.5/wrapfib.o
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
[0;1;32m                           ^
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
[0;1;32m                               ^
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
[0;1;32m                         ^
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
[0;1;32m                                ^
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
[0;1;32m                                ^
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
[0;1;32m

In [37]:
!ls -l cfib* wrap*

-rw-r--r--  1 rahul  staff    157 Apr  4 13:24 cfib.c
-rw-r--r--  1 rahul  staff     21 Apr  4 13:24 cfib.h
-rw-r--r--  1 rahul  staff   2836 Apr  4 01:20 cfib.o
-rw-r--r--  1 rahul  staff  68101 Apr  4 13:25 wrapfib.c
-rwxr-xr-x  1 rahul  staff  17176 Apr  4 13:25 [31mwrapfib.cpython-35m-darwin.so[m[m
-rw-r--r--  1 rahul  staff     89 Apr  4 13:24 wrapfib.pyx


In [38]:
from wrapfib import cfib
%timeit cfib(90)

The slowest run took 9.14 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 144 ns per loop


Notice that evan though we wrapped existing C code, cython actually generated comparable-speed for us!

Which choice do we use? 

- If we have pre-existing code, wrap it!
- otherwise use `def` or `cdef`
- the latter is prefereable if you have to call it in tje file itself
- in all cases static typing is key

#### `pyximport` on-the-fly compilation

In [39]:
%%file c2fib.pyx
cdef extern from "cfib.h": 
    double _cfib(int n)
    
def c2fib(n):
    return _cfib(n)

Overwriting c2fib.pyx


In [40]:
%%file c2fib.pyxbld

def make_ext(modname, pyxfilename):
    from distutils.extension import Extension 
    return Extension(modname,
                sources=[pyxfilename, 'cfib.c'],
                include_dirs = ['.'])

Overwriting c2fib.pyxbld


In [41]:
import pyximport
pyximport.install()

(None, <pyximport.pyximport.PyxImporter at 0x105fba668>)

In [42]:
from c2fib import c2fib

In [43]:
c2fib(5)

5.0

#### Compiling yourself

In [44]:
%%file c3fib.pyx
cdef extern from "cfib.h": 
    double _cfib(int n)
    
def c3fib(n):
    return _cfib(n)

Overwriting c3fib.pyx


In [45]:
!cython c3fib.pyx

In [46]:
!ls c3*

c3fib.c                     c3fib.o
[31mc3fib.cpython-35m-darwin.so[m[m c3fib.pyx


In [47]:
!cat c3fib.c

/* Generated by Cython 0.23.5 */

#define PY_SSIZE_T_CLEAN
#include "Python.h"
#ifndef Py_PYTHON_H
    #error Python headers needed to compile C extensions, please install development version of Python.
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000)
    #error Cython requires Python 2.6+ or Python 3.2+.
#else
#define CYTHON_ABI "0_23_5"
#include <stddef.h>
#ifndef offsetof
#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
#endif
#if !defined(WIN32) && !defined(MS_WINDOWS)
  #ifndef __stdcall
    #define __stdcall
  #endif
  #ifndef __cdecl
    #define __cdecl
  #endif
  #ifndef __fastcall
    #define __fastcall
  #endif
#endif
#ifndef DL_IMPORT
  #define DL_IMPORT(t) t
#endif
#ifndef DL_EXPORT
  #define DL_EXPORT(t) t
#endif
#ifndef PY_LONG_LONG
  #define PY_LONG_LONG LONG_LONG
#endif
#ifndef Py_HUGE_VAL
  #define Py_HUGE_VAL HUGE_VAL
#endif
#ifdef PYPY_VERSION
#define CYTHON_COMP

In [48]:
!python3-config --cflags

-I//anaconda/envs/py35/include/python3.5m -I//anaconda/envs/py35/include/python3.5m -fno-strict-aliasing -Wsign-compare -Wunreachable-code -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -I//anaconda/envs/py35/include -arch x86_64 -I//anaconda/envs/py35/include -arch x86_64


In [49]:
!gcc -c  c3fib.c $(python3-config --cflags)

static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
[0;1;32m                           ^
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
[0;1;32m                               ^
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
[0;1;32m                         ^
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
[0;1;32m                                ^
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
[0;1;32m                                ^
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
[0;1;32m                               ^
      [-Wunneeded-internal-declaration][0m
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
[0;1;32m                          ^


In [50]:
!gcc -c cfib.c $(python3-config --cflags)

Do not use the `--ldflags` as its not built to make shared libraries. For that, the incantation is:

```
gcc -bundle -undefined dynamic_lookup -L//anaconda/envs/py35/lib -arch x86_64 c3fib.o cfib.o -L//anaconda/envs/py35/lib -o c3fib.cpython-35m-darwin.so
```

The particular form of the library is important for immediate import on the mac

In [51]:
!python3-config --ldflags

-lpython3.5m -ldl -framework CoreFoundation -Wl,-stack_size,1000000 -framework CoreFoundation


In [52]:
!gcc -bundle -undefined dynamic_lookup -L//anaconda/envs/py35/lib -arch x86_64 c3fib.o cfib.o -L//anaconda/envs/py35/lib -o c3fib.cpython-35m-darwin.so

In [53]:
from c3fib import c3fib

In [54]:
c3fib(5)

5.0

### Extension types from C structs

This alsh shows in a bit more detail how to use C code

In [55]:
%%file pointy.h

typedef struct Point {
    double x;
    double y;
} Point;

double distance(Point *p1, Point *p2);

Overwriting pointy.h


In [56]:
%%file pointy.c
#include <math.h>
#include "pointy.h"

double distance(Point *p1, Point *p2) { 
    return hypot(p1->x - p2->x, p1->y - p2->y);
}


Overwriting pointy.c


In [57]:
%%file cpointy.pxd

cdef extern from "pointy.h":
    ctypedef struct Point:
                 double x
                 double y
    double distance(Point *, Point *)


Overwriting cpointy.pxd


In [58]:
%%file pointclass.pyx
cimport cpointy
from libc.stdlib cimport malloc, free

cdef class Point:
    
    cdef cpointy.Point *_c_point
    
    def __cinit__(self, double x, double y):
            self._c_point = <cpointy.Point *> malloc(sizeof(cpointy.Point))
            self._c_point.x = x
            self._c_point.y = y
            
    def __dealloc__(self):
        print("deleting")
        free(self._c_point)

    property x:
        def __get__(self):
            return self._c_point.x 
        def __set__(self, value):
            self._c_point.x = value
            
    property y:
        def __get__(self):
            return self._c_point.y 
        def __set__(self, value):
            self._c_point.y = value

def distance(Point p1, Point p2):
    return cpointy.distance(p1._c_point, p2._c_point)

Overwriting pointclass.pyx


In [59]:
%%file setup.py
from distutils.core import setup
from distutils.extension import Extension 
from Cython.Distutils import build_ext
ext_modules = [
    Extension('pointclass',
              sources=['pointclass.pyx', 'pointy.c'],
              include_dirs = ['.'])]
setup(
  name = 'pointclass',
  cmdclass = {'build_ext': build_ext},
  ext_modules = ext_modules
)

Overwriting setup.py


In [60]:
!python setup.py build_ext --inplace

running build_ext
cythoning pointclass.pyx to pointclass.c
building 'pointclass' extension
gcc -fno-strict-aliasing -Wsign-compare -Wunreachable-code -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -I//anaconda/envs/py35/include -arch x86_64 -I. -I//anaconda/envs/py35/include/python3.5m -c pointclass.c -o build/temp.macosx-10.5-x86_64-3.5/pointclass.o
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
[0;1;32m                           ^
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
[0;1;32m                               ^
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
[0;1;32m                         ^
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
[0;1;32m                                ^
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
[0;1;32m                                ^
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
[0;1;32m                  

In [61]:
import pointclass as pc

In [62]:
p1 = pc.Point(2,3)
p2 = pc.Point(4,5)
p1,p2

(<pointclass.Point at 0x1058b5d68>, <pointclass.Point at 0x1058b5d80>)

In [63]:
print(p1.x)
print(p2.y)
pc.distance(p1,p2)

2.0
5.0


2.8284271247461903

In [64]:
del p1