<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Financial Data Science

Dr Yves J Hilpisch | The Python Quants GmbH

http://tpq.io | <a href="mailto:training@tpq.io">training@tpq.io</a>


<img src="http://hilpisch.com/images/py4fi_2nd.png" width="35%" align="left">

### The use of the "Python 3.10, Numpy 1.26.4" kernel is recommended.

# Performance Python

In [None]:
!git clone https://github.com/tpq-classes/financial_data_science_.git
import sys
sys.path.append('financial_data_science_')


In [None]:
import warnings
warnings.simplefilter('ignore')

## Loops

### Python

In [None]:
import random

In [None]:
def average_py(n):
    s = 0
    for i in range(n):
        s += random.random()
    return s / n

In [None]:
n = 10000000

In [None]:
%time average_py(n)

In [None]:
%timeit average_py(n)

In [None]:
%time sum([random.random() for _ in range(n)]) / n

### NumPy 

In [None]:
import numpy as np

In [None]:
def average_np(n):
    s = np.random.random(n)
    return s.mean()

In [None]:
%time average_np(n)

In [None]:
%timeit average_np(n)

In [None]:
s = np.random.random(n)
s.nbytes

### Numba

In [None]:
import numba

In [None]:
average_nb = numba.jit(average_py)

In [None]:
%time average_nb(n)

In [None]:
%time average_nb(n)

In [None]:
%timeit average_nb(n)

### Cython

In [None]:
%load_ext Cython

In [None]:
%%cython -a
import random
def average_cy1(int n):
    cdef int i
    cdef float s = 0
    for i in range(n):
        s += random.random()
    return s / n

In [None]:
%time average_cy1(n)

In [None]:
%timeit average_cy1(n)

In [None]:
%%cython
from libc.stdlib cimport rand
cdef extern from 'limits.h':
    int INT_MAX
cdef int i
cdef float rn
for i in range(5):
    rn = rand() / INT_MAX
    print(rn)

In [None]:
%%cython -a
from libc.stdlib cimport rand
cdef extern from 'limits.h':
    int INT_MAX
def average_cy2(int n):
    cdef int i
    cdef float s = 0
    for i in range(n):
        s += rand() / INT_MAX
    return s / n

In [None]:
%time average_cy2(n)

In [None]:
%timeit average_cy2(n)

## Prime Numbers

### Pure Python

In [None]:
def is_prime(I):
    if I % 2 == 0: return False
    for i in range(3, int(I ** 0.5) + 1, 2):
        if I % i == 0: return False
    return True

In [None]:
n = int(1e8 + 3)
n

In [None]:
%time is_prime(n)

In [None]:
p1 = int(1e8 + 7)
p1

In [None]:
%time is_prime(p1)

In [None]:
p2 = 100109100129162907

In [None]:
p2.bit_length()

In [None]:
%time is_prime(p2)

### Numba

In [None]:
is_prime_nb = numba.jit(is_prime)

In [None]:
%time is_prime_nb(n)

In [None]:
%time is_prime_nb(n)

In [None]:
%time is_prime_nb(p1)

In [None]:
%time is_prime_nb(p2)

### Cython

In [None]:
%%cython
def is_prime_cy1(I):
    if I % 2 == 0: return False
    for i in range(3, int(I ** 0.5) + 1, 2):
        if I % i == 0: return False
    return True

In [None]:
%timeit is_prime(p1)

In [None]:
%timeit is_prime_cy1(p1)

In [None]:
%%cython
def is_prime_cy2(long I):
    cdef long i
    if I % 2 == 0: return False
    for i in range(3, int(I ** 0.5) + 1, 2):
        if I % i == 0: return False
    return True

In [None]:
%timeit is_prime_cy2(p1)

In [None]:
%time is_prime_nb(p2)

In [None]:
%time is_prime_cy2(p2)

### Multiprocessing

In [None]:
import multiprocessing as mp

In [None]:
pool = mp.Pool(processes=4)

In [None]:
%time pool.map(is_prime, 10 * [p1])

In [None]:
%time pool.map(is_prime_nb, 10 * [p2])

In [None]:
%time pool.map(is_prime_cy2, 10 * [p2])

## Fibonacci Numbers

### Recursive Algorithm

In [None]:
def fib_rec_py1(n):
    if n < 2:
        return n
    else:
        return fib_rec_py1(n - 1) + fib_rec_py1(n - 2)

In [None]:
%time fib_rec_py1(35)

In [None]:
fib_rec_nb = numba.jit(fib_rec_py1)

In [None]:
%time fib_rec_nb(35)

In [None]:
%%cython
def fib_rec_cy(int n):
    if n < 2:
        return n
    else:
        return fib_rec_cy(n - 1) + fib_rec_cy(n - 2)

In [None]:
%time fib_rec_cy(35)

In [None]:
from functools import lru_cache as cache

In [None]:
@cache(maxsize=None)
def fib_rec_py2(n):
    if n < 2:
        return n
    else:
        return fib_rec_py2(n - 1) + fib_rec_py2(n - 2)

In [None]:
%time fib_rec_py2(35)

In [None]:
%time fib_rec_py2(80)

### Iterative Algorithm 

In [None]:
def fib_it_py(n):
    x, y = 0, 1
    for i in range(1, n + 1):
        x, y = y, x + y
    return x

In [None]:
%time fib_it_py(80)

In [None]:
fib_it_nb = numba.jit(fib_it_py)

In [None]:
%time fib_it_nb(80)

In [None]:
%time fib_it_nb(80)

In [None]:
%%cython
def fib_it_cy1(int n):
    cdef long i
    cdef long x = 0, y = 1
    for i in range(1, n + 1):
        x, y = y, x + y
    return x

In [None]:
%time fib_it_cy1(80)

In [None]:
%%time
fn = fib_rec_py2(150)
print(fn)

In [None]:
fn.bit_length()

In [None]:
%%time 
fn = fib_it_nb(150)
print(fn)

In [None]:
fn.bit_length()

In [None]:
%%time
fn = fib_it_cy1(150)
print(fn)

In [None]:
fn.bit_length()

In [None]:
%%cython
cdef extern from *:
    ctypedef int int128 '__int128_t'
def fib_it_cy2(int n):
    cdef int128 i
    cdef int128 x = 0, y = 1
    for i in range(1, n + 1):
        x, y = y, x + y
    return x

In [None]:
%%time
fn = fib_it_cy2(150)
print(fn)

In [None]:
fn.bit_length()

## The Number Pi

The code example is taken from [StackExchange](https://codereview.stackexchange.com/questions/69370/monte-carlo-pi-calculation).

In [None]:
import random
import numpy as np
from pylab import mpl, plt
plt.style.use('seaborn-v0_8')
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline

In [None]:
rn = [(random.random() * 2 - 1, random.random() * 2 - 1)
      for _ in range(500)]

In [None]:
rn = np.array(rn)
rn[:5]

In [None]:
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(1, 1, 1)
circ = plt.Circle((0, 0), radius=1, edgecolor='g', lw=2.0,
                  facecolor='None')
box = plt.Rectangle((-1, -1), 2, 2, edgecolor='b', alpha=0.3)
ax.add_patch(circ)
ax.add_patch(box)
plt.plot(rn[:, 0], rn[:, 1], 'r.')
plt.ylim(-1.1, 1.1)
plt.xlim(-1.1, 1.1);

In [None]:
n = int(1e7)

In [None]:
%time rn = np.random.random((n, 2)) * 2 - 1

In [None]:
rn.nbytes

In [None]:
%time distance = np.sqrt((rn ** 2).sum(axis=1))
distance[:8].round(3)

In [None]:
%time frac = (distance <= 1.0).sum() / len(distance)

In [None]:
pi_mcs = frac * 4
pi_mcs

In [None]:
def mcs_pi_py(n):
    circle = 0
    for _ in range(n):
        x, y = random.random(), random.random()
        if (x ** 2 + y ** 2) ** 0.5 <= 1:
            circle += 1
    return (4 * circle) / n

In [None]:
%time mcs_pi_py(n)

In [None]:
mcs_pi_nb = numba.jit(mcs_pi_py)

In [None]:
%time mcs_pi_nb(n)

In [None]:
%time mcs_pi_nb(n)

In [None]:
%%cython -a
import random
def mcs_pi_cy1(int n):
    cdef int i, circle = 0
    cdef float x, y
    for i in range(n):
        x, y = random.random(), random.random()
        if (x ** 2 + y ** 2) ** 0.5 <= 1:
            circle += 1
    return (4 * circle) / n

In [None]:
%time mcs_pi_cy1(n)

In [None]:
%%cython -a
from libc.stdlib cimport rand
cdef extern from 'limits.h':
    int INT_MAX
def mcs_pi_cy2(int n):
    cdef int i, circle = 0
    cdef float x, y
    for i in range(n):
        x, y = rand() / INT_MAX, rand() / INT_MAX
        if (x ** 2 + y ** 2) ** 0.5 <= 1:
            circle += 1
    return (4 * circle) / n

In [None]:
%time mcs_pi_cy2(n)

## Binomial Trees

### Python

In [None]:
import math

In [None]:
S0 = 36.
T = 1.0
r = 0.06
sigma = 0.2

In [None]:
def simulate_tree(M):
    dt = T / M
    u = math.exp(sigma * math.sqrt(dt))
    d = 1 / u
    S = np.zeros((M + 1, M + 1))
    S[0, 0] = S0
    z = 1
    for t in range(1, M + 1):
        for i in range(z):
            S[i, t] = S[i, t-1] * u
            S[i+1, t] = S[i, t-1] * d
        z += 1
    return S

In [None]:
np.set_printoptions(formatter={'float':
                               lambda x: '%6.2f' % x})  

In [None]:
simulate_tree(4)

In [None]:
%time simulate_tree(500)

### NumPy

In [None]:
M = 4

In [None]:
up = np.arange(M + 1)
up = np.resize(up, (M + 1, M + 1))
up

In [None]:
down = up.T * 2
down

In [None]:
up - down

In [None]:
dt = T / M

In [None]:
S0 * np.exp(sigma * math.sqrt(dt) * (up - down))

In [None]:
def simulate_tree_np(M):
    dt = T / M
    up = np.arange(M + 1)
    up = np.resize(up, (M + 1, M + 1))
    down = up.transpose() * 2
    S = S0 * np.exp(sigma * math.sqrt(dt) * (up - down))
    return S

In [None]:
simulate_tree_np(4)

In [None]:
%time simulate_tree_np(500)

### Numba

In [None]:
simulate_tree_nb = numba.jit(simulate_tree)

In [None]:
simulate_tree_nb(4)

In [None]:
%time simulate_tree_nb(500)

In [None]:
%timeit simulate_tree_nb(500)

### Cython

In [None]:
%%cython -a
import numpy as np
cimport cython
from libc.math cimport exp, sqrt
cdef float S0 = 36.
cdef float T = 1.0
cdef float r = 0.06
cdef float sigma = 0.2
def simulate_tree_cy(int M):
    cdef int z, t, i
    cdef float dt, u, d
    cdef float[:, :] S = np.zeros((M + 1, M + 1),
                                  dtype=np.float32)
    dt = T / M
    u = exp(sigma * sqrt(dt))
    d = 1 / u
    S[0, 0] = S0
    z = 1
    for t in range(1, M + 1):
        for i in range(z):
            S[i, t] = S[i, t-1] * u
            S[i+1, t] = S[i, t-1] * d
        z += 1
    return np.array(S)

In [None]:
simulate_tree_cy(4)

In [None]:
%time simulate_tree_cy(500)

In [None]:
%timeit S = simulate_tree_cy(500)

## Monte Carlo Simulation

### Python

In [None]:
M = 100
I = 50000

In [None]:
def mcs_simulation_py(p):
    M, I = p
    dt = T / M
    S = np.zeros((M + 1, I))
    S[0] = S0
    rn = np.random.standard_normal(S.shape)
    for t in range(1, M + 1):
        for i in range(I):
            S[t, i] = S[t-1, i] * math.exp((r - sigma ** 2 / 2) * dt +
                                         sigma * math.sqrt(dt) * rn[t, i])
    return S      

In [None]:
%time S = mcs_simulation_py((M, I))

In [None]:
S[-1].mean()

In [None]:
S0 * math.exp(r * T)

In [None]:
K = 40.

In [None]:
C0 = math.exp(-r * T) * np.maximum(K - S[-1], 0).mean()

In [None]:
C0  #  <8>

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(S[-1], bins=35, label='frequency')
plt.axvline(S[-1].mean(), color='r', label='mean value')
plt.legend(loc=0);

### NumPy

In [None]:
def mcs_simulation_np(p):
    M, I = p
    dt = T / M
    S = np.zeros((M + 1, I))
    S[0] = S0
    rn = np.random.standard_normal(S.shape)
    for t in range(1, M + 1):
        S[t] = S[t-1] * np.exp((r - sigma ** 2 / 2) * dt + sigma * math.sqrt(dt) * rn[t])
    return S      

In [None]:
%time S = mcs_simulation_np((M, I))

In [None]:
S[-1].mean()

In [None]:
%timeit S = mcs_simulation_np((M, I))

### Numba 

In [None]:
mcs_simulation_nb = numba.jit(mcs_simulation_py)

In [None]:
%time S = mcs_simulation_nb((M, I))

In [None]:
%time S = mcs_simulation_nb((M, I))

In [None]:
S[-1].mean()

In [None]:
C0 = math.exp(-r * T) * np.maximum(K - S[-1], 0).mean()

In [None]:
C0

In [None]:
%timeit S = mcs_simulation_nb((M, I))

### Cython &mdash; Sequential

In [None]:
%%cython
import numpy as np
cimport numpy as np
cimport cython
from libc.math cimport exp, sqrt
cdef float S0 = 36.
cdef float T = 1.0
cdef float r = 0.06
cdef float sigma = 0.2
@cython.boundscheck(False)
@cython.wraparound(False)
def mcs_simulation_cy(p):
    cdef int M, I
    M, I = p
    cdef int t, i
    cdef float dt = T / M
    cdef double[:, :] S = np.zeros((M + 1, I))
    cdef double[:, :] rn = np.random.standard_normal((M + 1, I))
    S[0] = S0
    for t in range(1, M + 1):
        for i in range(I):
            S[t, i] = S[t-1, i] * exp((r - sigma ** 2 / 2) * dt +
                                         sigma * sqrt(dt) * rn[t, i])
    return np.array(S)

In [None]:
%time S = mcs_simulation_cy((M, I))

In [None]:
S[-1].mean()

In [None]:
%timeit S = mcs_simulation_cy((M, I))

### Multiprocessing

In [None]:
import multiprocessing as mp

In [None]:
pool = mp.Pool(processes=4)

In [None]:
p = 20

In [None]:
%timeit S = np.hstack(pool.map(mcs_simulation_np, p * [(M, int(I / p))]))

In [None]:
%timeit S = np.hstack(pool.map(mcs_simulation_nb, p * [(M, int(I / p))]))

In [None]:
%timeit S = np.hstack(pool.map(mcs_simulation_cy, p * [(M, int(I / p))]))

## Recursive pandas Algorithm

### Data and Formula

In [None]:
import pandas as pd

In [None]:
sym = 'SPY'

In [None]:
data = pd.DataFrame(pd.read_csv('http://hilpisch.com/tr_eikon_eod_data.csv',
                               index_col=0, parse_dates=True)[sym])

In [None]:
alpha = 0.25

In [None]:
data['EWMA'] = data[sym]

In [None]:
%%time
for t in zip(data.index, data.index[1:]):
    data.loc[t[1], 'EWMA'] = (alpha * data.loc[t[1], sym] +
                              (1 - alpha) * data.loc[t[0], 'EWMA'])

In [None]:
data.head()

In [None]:
data[data.index > '2017-1-1'].plot(figsize=(10, 6));

### Pure Python

In [None]:
def ewma_py(x, alpha):
    y = np.zeros_like(x)
    for i in range(len(x)):
        if i == 0:
            y[i] = x[i]
        else:
            y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
    return y

In [None]:
%time data['EWMA_PY'] = ewma_py(data[sym], alpha)

In [None]:
%time data['EWMA_PY'] = ewma_py(data[sym].values, alpha)

### Numba

In [None]:
# numba.jit?

In [None]:
ewma_nb = numba.jit(ewma_py, forceobj=True)

In [None]:
%time data['EWMA_NB'] = ewma_nb(data[sym], alpha)

In [None]:
%timeit data['EWMA_NB'] = ewma_nb(data[sym], alpha)

In [None]:
%time data['EWMA_NB'] = ewma_nb(data[sym].values, alpha)

In [None]:
%timeit data['EWMA_NB'] = ewma_nb(data[sym].values, alpha)

### Cython

In [None]:
%%cython
import numpy as np
def ewma_cy(double[:] x, float alpha):
    cdef int i
    cdef double[:] y = np.empty_like(x)
    for i in range(len(x)):
        if i == 0:
            y[i] = x[i]
        else:
            y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
    return y

In [None]:
%time data['EWMA_CY'] = ewma_cy(data[sym].values, alpha)

In [None]:
%timeit data['EWMA_CY'] = ewma_cy(data[sym].values, alpha)

In [None]:
data.head()

In [None]:
data.tail()

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:training@tpq.io">training@tpq.io</a>