# <center> Enhancing Performance
Source: https://pandas.pydata.org/pandas-docs/stable/enhancingperf.html

In [6]:
import pandas as pd
import numpy as np
pd.__version__

'0.20.3'

In [9]:
df = pd.DataFrame({'a': np.random.randn(1000),
                   'b': np.random.randn(1000),
                   'N': np.random.randint(100, 1000, (1000)),
                   'x': 'x'})
df.sample(5)

Unnamed: 0,N,a,b,x
239,510,0.285063,-0.435981,x
697,555,1.764188,-1.57596,x
350,668,-0.238376,-0.276518,x
105,659,0.822136,-0.25822,x
235,660,0.073597,-1.251005,x


In [12]:
def f(x):
  return x * (x-1)

def integrate_f(a,b,N):
  s = 0
  dx = (b-a)/N
  for i in range(N):
    s += f(a + i *dx)
  return s * dx

In [15]:
%timeit df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis =1)

10 loops, best of 3: 164 ms per loop


 look and see where the time is spent during this operation. Use %prun. -1 is not a number its the letter "L" in lowercase.

In [24]:
%prun -l 4 df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis =1)

 

### Load Cython

In [25]:
%load_ext Cython

In [26]:
%%cython
def f(x):
  return x * (x-1)

def integrate_f(a,b,N):
  s = 0
  dx = (b-a)/N
  for i in range(N):
    s += f(a + i *dx)
  return s * dx

In [28]:
%timeit df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis =1)

10 loops, best of 3: 91.4 ms per loop


### Adding Type Information

In [36]:
%%cython
cdef double f_typed(double x) except? -2:
  return x * (x-1)

cpdef double integrate_f_typed(double a, double b, int N):
  cdef int i
  cdef double s, dx
  s = 0
  dx = (b-a)/N
  for i in range(N):
    s += f_typed(a + i *dx)
  return s * dx

In [37]:
%timeit df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1)

10 loops, best of 3: 31.6 ms per loop


In [40]:
%prun -l 4 df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1)

 

More Options Available: https://pandas.pydata.org/pandas-docs/stable/enhancingperf.html