Original link: https://pandas.pydata.org/pandas-docs/stable/user_guide/enhancingperf.html

In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.DataFrame({'a': np.random.randn(1000),
                   'b': np.random.randn(1000),
                   'N': np.random.randint(100, 1000, (1000)),
                   'x': 'x'})

In [5]:
df

Unnamed: 0,a,b,N,x
0,-0.077157,0.187279,824,x
1,-0.135650,-1.048175,775,x
2,-0.246453,-1.369587,261,x
3,1.102079,-0.734347,813,x
4,0.724510,-1.678795,742,x
...,...,...,...,...
995,-1.056319,0.321975,767,x
996,2.087671,0.085007,406,x
997,0.469304,-0.772226,803,x
998,-0.989863,0.393338,752,x


In [6]:
def f(x):
    return x * (x - 1)

In [7]:
def integrate_f(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

In [8]:
%timeit df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1)

167 ms ± 15.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%prun -l 4 df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1)  # noqa E999

 

In [10]:
%load_ext Cython

In [11]:
%%cython
def f_plain(x):
    return x * (x - 1)
def integrate_f_plain(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_plain(a + i * dx)
    return s * dx

In [12]:
%timeit df.apply(lambda x: integrate_f_plain(x['a'], x['b'], x['N']), axis=1)

73.3 ms ± 2.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
%%cython
cdef double f_typed(double x) except? -2:
    return x * (x - 1)
cpdef double integrate_f_typed(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx

In [14]:
%timeit df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1)

28.8 ms ± 800 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
%prun -l 4 df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1)

 