# Cython Demo

In [51]:
import timeit

In [4]:
# There are two packages, one containing regular Python modules and
# the other containing corresponding Cython modules

## Cythonization

In [16]:
# Let's create a C extension from the `hello` module
ls awesome_cython_stuff/hello*

[0m[01;32mawesome_cython_stuff/hello.pyx[0m*


In [7]:
# The Cython module contains code that is unmodified Python code (in this case)
! more awesome_cython_stuff/hello.pyx

def say_hello():
    print("Hello World!")


## C Extensions
To generate the C extension, there are a couple ways:
1. Run `cython` manually to generate the `.c` file and then run `gcc` to generate the `.so`:
  ```
  cython -a awesome_cython_stuff/hello.pyx
  ```
  ```
  gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing -I${PYTHON_HEADER_DIR} -o awesome_cython_stuff/hello.so awesome_cython_stuff/hello.c
  ```

2. Run `setup.py` (using `distutils.extension.Extension` and `Cython.Distutils.build_ext` or `Cython.Build.cythonize`, etc. to generate the extensions):
   ```
   python setup.py install
   ```

In [35]:
# Example setup.py
! head -25 setup.py

from os.path import join
from subprocess import getoutput
from setuptools import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext

# Hackish way of doing this. Find better way...
root_env = getoutput('conda info | grep "package cache :"'
                     ' | awk \'{print $4}\'')
python_header_dir = join(root_env, 'python-3.4.3-2/include/python3.4m')

setup(name = 'spaCy and Cython demo',
      description='Repository storing spaCy demo + mini-Cython demo.',
      url='https://github.com/mulhod/spaCy_demo',
      author='Matt Mulholland',
      author_email='mulhodm@gmail.com',
      packages=['awesome_cython_stuff', 'regular_old_yet_fine_python_stuff'],
      cmdclass={'build_ext': build_ext},
      ext_modules=[Extension('awesome_cython_stuff.hello',
                             ['awesome_cython_stuff/hello.pyx'],
                             include_dirs=[python_header_dir])],
      keywords='spacy cython',
      classif

In [20]:
# After generating the C extension, there will be a .c file and a .so file,
# the latter being the more important since that is what actually gets
# imported

In [55]:
%%bash
ROOTENV=$(conda info | grep "root environment :" | awk '{print $4}')
PYTHON_HEADER_DIR=${ROOTENV}/pkgs/python-3.4.3-2/include/python3.4m
cython -a awesome_cython_stuff/hello.pyx
gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing -I${PYTHON_HEADER_DIR} -o awesome_cython_stuff/hello.so awesome_cython_stuff/hello.c
ls awesome_cython_stuff/hello*

awesome_cython_stuff/hello.c
awesome_cython_stuff/hello.html
awesome_cython_stuff/hello.pyx
awesome_cython_stuff/hello.so


## Hello World in Python vs. Cython

In [56]:
from regular_old_yet_fine_python_stuff.hello import say_hello as say_hello_python
from awesome_cython_stuff.hello import say_hello as say_hello_cython

In [57]:
say_hello_python()

Hello World!


In [58]:
say_hello_cython()

Hello World!


In [59]:
# Let's see what kind of difference there is in terms of speed

In [70]:
t = timeit.Timer("say_hello_python()", "from regular_old_yet_fine_python_stuff.hello import say_hello as say_hello_python")
print("Cython function: {} seconds".format(t.timeit(100000)))

Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!

In [71]:
t = timeit.Timer("say_hello_cython()", "from awesome_cython_stuff.hello import say_hello as say_hello_cython")
print("Cython function: {} seconds".format(t.timeit(100000)))

Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!
Hello World!

In [73]:
# It seems like the C extension version is at least marginally faster even
# when the original code is unchanged

## "Great Circle" Function in Python vs. Cython

In [66]:
# The "great circle" function calculate the distance between two points on
# the surface of the earth

In [68]:
# Python version
! cat regular_old_yet_fine_python_stuff/great_circle.py

import math

def great_circle(lon1, lat1, lon2, lat2):
    radius = 3956 #miles
    x = math.pi/180.0

    a = (90.0-lat1)*(x)
    b = (90.0-lat2)*(x)
    theta = (lon2-lon1)*(x)
    c = math.acos((math.cos(a)*math.cos(b)) +
                  (math.sin(a)*math.sin(b)*math.cos(theta)))
    return radius*c


In [69]:
# Cython version (with modified Python code)
! cat awesome_cython_stuff/great_circle.pyx

import math

def great_circle(float lon1,float lat1,float lon2,float lat2):
    cdef float radius = 3956.0 
    cdef float pi = 3.14159265
    cdef float x = pi/180.0
    cdef float a,b,theta,c

    a = (90.0-lat1)*(x)
    b = (90.0-lat2)*(x)
    theta = (lon2-lon1)*(x)
    c = math.acos((math.cos(a)*math.cos(b)) + (math.sin(a)*math.sin(b)*math.cos(theta)))
    return radius*c

In [80]:
from regular_old_yet_fine_python_stuff.great_circle import great_circle as great_circle_python
from awesome_cython_stuff.great_circle import great_circle as great_circle_cython

lon1, lat1, lon2, lat2 = -72.345, 34.323, -61.823, 54.826
args = "lon1, lat1, lon2, lat2"

print("great_circle_python({1}) = {0}".format(great_circle_python(lon1, lat1, lon2, lat2),
                                              args))
print("great_circle_cython({1}) = {0}".format(great_circle_cython(lon1, lat1, lon2, lat2),
                                              args))

great_circle_python(lon1, lat1, lon2, lat2) = 1503.3934477840162
great_circle_cython(lon1, lat1, lon2, lat2) = 1503.393310546875


In [87]:
# Test
num = 100000

t1 = timeit.Timer("great_circle_python(%f, %f, %f, %f)" % (lon1, lat1, lon2, lat2), 
                  "from regular_old_yet_fine_python_stuff.great_circle import great_circle as great_circle_python")
t2 = timeit.Timer("great_circle_cython(%f, %f, %f, %f)" % (lon1, lat1, lon2, lat2), 
                  "from awesome_cython_stuff.great_circle import great_circle as great_circle_cython")
print("Pure python function: {} seconds".format(t.timeit(num)))
print("Cython function: {} seconds".format(t.timeit(num)))

Pure python function: 0.10714197100605816 seconds
Cython function: 0.08735901501495391 seconds


In [None]:
# The C extension function is faster by a small, but consistent amount
# (though the difference decreases as num increases)