<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Reinforcement Learning for Finance

**Chapter 09 &mdash; Optimal Execution**

&copy; Dr. Yves J. Hilpisch

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Model Implementation

In [None]:
!git clone https://github.com/tpq-classes/rl_4_finance.git
import sys
sys.path.append('rl_4_finance')


In [None]:
import math
import random
import numpy as np
import pandas as pd
from pylab import plt, mpl

In [None]:
plt.style.use('seaborn-v0_8')
mpl.rcParams['figure.dpi'] = 300
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
np.set_printoptions(suppress=True)

In [None]:
class AlmgrenChriss:
    def __init__(self, T, N, S0, sigma, X, eta, gamma, lamb):
        self.T = T              
        self.N = N           
        self.dt = T / N
        self.S0 = S0
        self.sigma = sigma
        self.X = X
        self.eta = eta
        self.gamma = gamma
        self.lamb = lamb

In [None]:
class AlmgrenChriss(AlmgrenChriss):
    def optimal_execution(self):
        kappa = np.sqrt(self.lamb * self.sigma ** 2 / self.gamma)
        t = np.linspace(0, self.T, self.N + 1)
        xt_sum = self.X * np.sinh(kappa * (self.T - t)) / np.sinh(kappa * self.T)
        xt = -np.diff(xt_sum, prepend=0)
        xt[0] = 0
        return t, xt

In [None]:
T = 10  # <1>
N = 10  # <2>
S0 = 100  # <3>
sigma = 0.25  # <4>
X = 10000  # <5>
eta = 0.001  # <6>
gamma = 0.01  # <7>
lamb = 2e-2  # <8>

In [None]:
ac = AlmgrenChriss(T, N, S0, sigma, X, eta, gamma, lamb)

In [None]:
t, xt = ac.optimal_execution()

In [None]:
t

In [None]:
xt.round()  # <9>

In [None]:
ac.lamb = 1e-4  # <10>

In [None]:
t, xt_ = ac.optimal_execution()
xt_.round()  # <11>

In [None]:
plt.plot(t, ac.X - xt.cumsum(), 'r', lw=1,
         label='high $\\lambda$ (position)')
plt.plot(t, xt, 'rs', markersize=4,
         label='high $\\lambda$ (trade)')
plt.plot(t, ac.X- xt_.cumsum(), 'b--', lw=1,
         label='low $\\lambda$ (position)')
plt.plot(t, xt_, 'bo', markersize=4,
         label='low $\\lambda$ (trade)')
plt.legend();

In [None]:
from numpy.random import default_rng

In [None]:
class AlmgrenChriss(AlmgrenChriss):
    def simulate_stock_price(self, xt, seed=None):
        rng = default_rng(seed=seed)
        S = np.zeros(self.N + 1)  # <1>
        S[0] = self.S0  # <1>
        P = np.zeros(self.N + 1)  # <2>
        P[0] = self.S0  # <2>
        for t in range(1, self.N + 1):
            dZ = rng.normal(0, np.sqrt(self.dt))
            S[t] = S[t - 1] * np.exp((self.sigma ** 2 / 2) * self.dt
                                     + self.sigma * dZ)  # <1>
            P[t] = S[t] - self.eta * xt[:t].sum()  # <2>
        return S, P

In [None]:
ac = AlmgrenChriss(T, N, S0, sigma, X, eta, gamma, lamb)

In [None]:
t, xt = ac.optimal_execution()

In [None]:
xt.round()

In [None]:
seed = 250

In [None]:
S, P = ac.simulate_stock_price(xt, seed=seed)

In [None]:
ac.lamb = 1e-4

In [None]:
t, xt_ = ac.optimal_execution()

In [None]:
xt_.round()

In [None]:
S, P_ = ac.simulate_stock_price(xt_, seed=seed)

In [None]:
plt.plot(t, S, 'b', lw=1, label='simulated stock price path')
plt.plot(t, P, 'r--', lw=1, label='adjusted path (high $\\lambda$)');
plt.plot(t, P_, 'g:', lw=1, label='adjusted path (low $\\lambda$)')
plt.legend();

In [None]:
class AlmgrenChriss(AlmgrenChriss):
    def calculate_costs(self, xt):
        temporary_cost = np.sum(self.gamma * (xt / self.dt) ** 2 * self.dt)
        permanent_cost = np.sum(self.eta * np.cumsum(xt) * xt)
        execution_risk = self.lamb * self.sigma ** 2 * np.sum(
            (np.cumsum(xt[::-1])[::-1] / self.dt) ** 2 * self.dt)
        TEC = temporary_cost + permanent_cost + execution_risk
        return temporary_cost, permanent_cost, execution_risk, TEC

In [None]:
ac = AlmgrenChriss(T, N, S0, sigma, X, eta, gamma, lamb)

In [None]:
t, xt = ac.optimal_execution()

In [None]:
S, P = ac.simulate_stock_price(xt)

In [None]:
tc, pc, er, TEC = ac.calculate_costs(xt)

In [None]:
print(f'lambda = {ac.lamb}')
print(f'temporary cost = {tc:7.0f}')
print(f'permanent cost = {pc:7.0f}')
print(f'execution risk = {er:7.0f}')
print(f'total ex. cost = {TEC:7.0f}')  # <1>

In [None]:
ac.X * ac.S0

In [None]:
ac.lamb = 1e-4

In [None]:
t, xt_ = ac.optimal_execution()

In [None]:
S, P_ = ac.simulate_stock_price(xt_)

In [None]:
tc, pc, er, TEC = ac.calculate_costs(xt_)

In [None]:
print(f'lambda = {ac.lamb}')
print(f'temporary cost = {tc:7.0f}')
print(f'permanent cost = {pc:7.0f}')
print(f'execution risk = {er:7.0f}')
print(f'total ex. cost = {TEC:7.0f}')  # <2>

<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>