# Cython and NumPy, buffers, fused types

<img src="https://imgs.xkcd.com/comics/password_strength.png" width=600 height=600 />

In [1]:
%load_ext Cython
import numpy as np

## Pure Python implementation

In [2]:
# https://en.wiktionary.org/wiki/Shannon_entropy

def shannon_entropy_py(p_x):
    return - np.sum(p_x * np.log(p_x))

## Cythonized version

In [15]:
%%cython -a

import numpy as np
cimport numpy as cnp

def shannon_entropy_cy(cnp.ndarray p_x):
    return - np.sum(p_x * np.log(p_x))

## Scipy.stats comparison

In [49]:
from scipy.stats import poisson
poi = poisson(10.0)
n = 100
pmf = poi.pmf(np.arange(n))

In [50]:
print(poi.entropy())
print(shannon_entropy_py(pmf))
print(shannon_entropy_cy(pmf))

2.5614099352749125
2.56140993527
2.56140993527


In [51]:
%%timeit
poi.entropy()

804 µs ± 12 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [52]:
%%timeit
shannon_entropy_py(pmf)

6.2 µs ± 101 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [53]:
%%timeit
shannon_entropy_cy(pmf)

6.12 µs ± 82.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Explicit `for` loop

In [17]:
%%cython -a

from libc.math cimport log as clog

def shannon_entropy_v1(p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * clog(p_x[i])
    return -res

In [18]:
%%timeit
shannon_entropy_v1(pmf)

21.4 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## NumPy buffer special declaration

In [20]:
%%cython -a

cimport numpy as cnp
from libc.math cimport log as clog

def shannon_entropy_v2(cnp.ndarray[double] p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * clog(p_x[i])
    return -res

In [21]:
%%timeit
shannon_entropy_v2(pmf)

2.95 µs ± 110 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Turn off boundschecking and wraparound checking

In [22]:
%%cython -a

cimport cython
cimport numpy as cnp
from libc.math cimport log

@cython.boundscheck(False)
@cython.wraparound(False)
def shannon_entropy_v3(cnp.ndarray[double] p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * log(p_x[i])
    return -res

In [23]:
%%timeit
shannon_entropy_v3(pmf)

2.91 µs ± 141 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Typed memoryview syntax

In [27]:
%%cython -a

cimport cython
from libc.math cimport log

@cython.boundscheck(False)
@cython.wraparound(False)
def shannon_entropy_mv(double[::1] p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * log(p_x[i])
    return -res

In [26]:
%%timeit
shannon_entropy_mv(pmf)

2.74 µs ± 95.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Fused types example

In [48]:
%%cython -a

cimport cython
from libc.math cimport log

@cython.boundscheck(False)
@cython.wraparound(False)
def shannon_entropy_mv(cython.floating[::1] p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        if p_x[i] > 0.0: # Have to guard against underflow...
            res += p_x[i] * log(p_x[i])
    return -res

In [54]:
print(shannon_entropy_mv(pmf.astype('f8')))
print(shannon_entropy_mv(pmf.astype('f4')))

2.5614099352749107
2.5614099208867804
