# Speed up by Cython
Reference:
* https://mp.weixin.qq.com/s/GxnczSw6FtO8-DCE8vPnWA

In [2]:
import pandas as pd
import numpy as np

df = pd.DataFrame({"a": np.random.randn(10000),"b": np.random.randn(10000),
        "N": np.random.randint(100, 1000, (10000)),"x": "x",}) 
df.tail()

Unnamed: 0,a,b,N,x
9995,-1.146198,-3.090375,528,x
9996,1.620999,-0.526381,415,x
9997,2.450856,0.046508,739,x
9998,1.906391,-0.614417,372,x
9999,0.876042,2.265473,163,x


In [3]:
def f(x):
    return x * (x - 1)

def integrate_f(a, b, N):
    s  = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

In [4]:
%timeit df.apply(lambda x: integrate_f(x["a"], x["b"], x["N"]), axis=1)

1.1 s ± 2.47 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%prun -l 4 df.apply(lambda x: integrate_f(x["a"], x["b"], x["N"]), axis=1) 

 

## 1, The simplest cython

In [7]:
%load_ext Cython

In [8]:
%%cython
def f_plain(x):
    return x * (x - 1)

def integrate_f_plain(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_plain(a + i * dx)
    return s * dx

In [9]:
%timeit df.apply(lambda x: integrate_f_plain(x["a"], x["b"], x["N"]), axis=1)

615 ms ± 1.16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 2, Introduced types

In [10]:
%%cython
cdef double f_typed(double x) except? -2:
    return x * (x - 1)

cpdef double integrate_f_typed(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx

In [11]:
%timeit df.apply(lambda x: integrate_f_typed(x["a"], x["b"], x["N"]), axis=1)

125 ms ± 702 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 3, ndarray

In [12]:
%%cython
cimport numpy as np
import numpy as np

cdef double f_typed(double x) except? -2:
    return x * (x - 1)

cpdef double integrate_f_typed(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx

cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b,
                                           np.ndarray col_N):
    assert (col_a.dtype == np.float_
            and col_b.dtype == np.float_ and col_N.dtype == np.int_)
    cdef Py_ssize_t i, n = len(col_N)
    assert (len(col_a) == len(col_b) == n)
    cdef np.ndarray[double] res = np.empty(n)
    for i in range(len(col_a)):
        res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i])
    return res

In [13]:
%timeit apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy())

11.4 ms ± 82.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## 4, advanced

In [14]:
%%cython
cimport cython
cimport numpy as np
import numpy as np
cdef double f_typed(double x) except? -2:
    return x * (x - 1)
cpdef double integrate_f_typed(double a, double b, int N):
    cdef long i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef np.ndarray[double] apply_integrate_f_wrap(np.ndarray[double] col_a,
                                                np.ndarray[double] col_b,
                                                np.ndarray[long] col_N):
    cdef long i, n = len(col_N)
    assert len(col_a) == len(col_b) == n
    cdef np.ndarray[double] res = np.empty(n)
    for i in range(n):
        res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i])
    return res

In [15]:
%timeit apply_integrate_f_wrap(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy())

8.8 ms ± 6.32 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
