<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Reinforcement Learning for Finance

**Chapter 06 &mdash; Algorithmic Trading**

&copy; Dr. Yves J. Hilpisch

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Prediction Game Revisited

In [None]:
import math
import random
import numpy as np
import pandas as pd
from pylab import plt, mpl
import torch

In [None]:
plt.style.use('seaborn-v0_8')
mpl.rcParams['figure.dpi'] = 300
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
np.set_printoptions(suppress=True)

In [None]:
from finance import *

In [None]:
finance = Finance('GLD', 'r', min_accuracy=47.5,
              n_features=8)

In [None]:
finance.data[finance.symbol].plot(title=finance.symbol,
                                  lw=1.0, c='b');

In [None]:
from dqlagent_pytorch import *

In [None]:
random.seed(100)
np.random.seed(100)
torch.manual_seed(100)

In [None]:
dqlagent = DQLAgent(finance.symbol, finance.feature,
                 finance.n_features, finance, lr=0.0001)

In [None]:
%time dqlagent.learn(500)

In [None]:
dqlagent.test(3)

In [None]:
from simulation import Simulation

In [None]:
random.seed(500)

In [None]:
simulation = Simulation('SYMBOL', 'r', 4, '2025-1-1', '2027-1-1',
                2 * 252, min_accuracy=0.5, x0=1, kappa=1,
                theta=0.75, sigma=0.1, new=True, normalize=True)

In [None]:
for _ in range(5):
    simulation.reset()
    simulation.data[simulation.symbol].plot(title=simulation.symbol,
                                           lw=1.0, c='b');

In [None]:
random.seed(100)
np.random.seed(100)
torch.manual_seed(100)


In [None]:
agent = DQLAgent(simulation.symbol, simulation.feature,
                 simulation.n_features, simulation)

In [None]:
%time agent.learn(250)

In [None]:
agent.test(5)

In [None]:
class ActionSpace:
    n = 2
    def sample(self):
        return random.randint(0, 1)

In [None]:
class Trading:
    def __init__(self, symbol, features, window, lags,
                 start, end, periods,
                 x0=100, kappa=1, theta=100, sigma=0.2,
                 leverage=1, min_accuracy=0.5, min_performance=0.85,
                 mu=None, std=None,
                 new=True, normalize=True):
        self.symbol = symbol
        self.features = features
        self.n_features = len(features)
        self.window = window
        self.lags = lags
        self.start = start
        self.end = end
        self.periods = periods
        self.x0 = x0
        self.kappa = kappa
        self.theta = theta
        self.sigma = sigma
        self.leverage = leverage
        self.min_accuracy = min_accuracy
        self.min_performance = min_performance
        self.start = start
        self.end = end
        self.mu = mu
        self.std = std
        self.new = new
        self.normalize = normalize
        self.action_space = ActionSpace()
        self._simulate_data()
        self._prepare_data()

In [None]:
class Trading(Trading):
    def _simulate_data(self):
        index = pd.date_range(start=self.start,
                    end=self.end, periods=self.periods)
        s = [self.x0]
        dt = (index[-1] - index[0]).days / 365 / self.periods
        for t in range(1, len(index)):
            s_ = (s[t - 1] + self.kappa * (self.theta - s[t - 1]) * dt +
              s[t - 1] * self.sigma * math.sqrt(dt) *
                random.gauss(0, 1))
            s.append(s_)
        self.data = pd.DataFrame(s, columns=[self.symbol], index=index)

In [None]:
class Trading(Trading):
    def _prepare_data(self):
        self.data['r'] = np.log(self.data / self.data.shift(1))
        self.data.dropna(inplace=True)
        # additional features
        if self.window > 0:
            self.data['SMA'] = self.data[
                self.symbol].rolling(self.window).mean()
            self.data['DEL'] = self.data[
                self.symbol] - self.data['SMA']
            self.data['MIN'] = self.data[
                self.symbol].rolling(self.window).min()
            self.data['MAX'] = self.data[
                self.symbol].rolling(self.window).max()
            self.data['MOM'] = self.data['r'].rolling(
                self.window).mean()
            # add more features here
            self.data.dropna(inplace=True)
        if self.normalize:
            if self.mu is None or self.std is None:
                self.mu = self.data.mean()
                self.std = self.data.std()
            self.data_ = (self.data - self.mu) / self.std
        else:
            self.data_ = self.data.copy()
        self.data['d'] = np.where(self.data['r'] > 0, 1, 0)
        self.data['d'] = self.data['d'].astype(int)

In [None]:
class Trading(Trading):
    def _get_state(self):
        return self.data_[self.features].iloc[self.bar -
                                self.lags:self.bar]
    def seed(self, seed):
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
    def reset(self):
        if self.new:
            self._simulate_data()
            self._prepare_data()
        self.treward = 0
        self.accuracy = 0
        self.actions = list()
        self.returns = list()
        self.performance = 1
        self.bar = self.lags
        state = self._get_state()
        return state.values, {}

In [None]:
class Trading(Trading):
    def step(self, action):
        correct = action == self.data['d'].iloc[self.bar]
        ret = self.data['r'].iloc[self.bar] * self.leverage
        reward_ = 1 if correct else 0
        pl = abs(ret) if correct else -abs(ret)
        reward = reward_
        # alternative options:
        # reward = pl  # only the P&L in log returns
        # reward = reward_ + 10 * pl  # the reward + the scaled P&L
        self.treward += reward
        self.bar += 1
        self.accuracy = self.treward / (self.bar - self.lags) 
        self.performance *= math.exp(pl)
        if self.bar >= len(self.data):
            done = True
        elif reward_ == 1:
            done = False
        elif (self.accuracy < self.min_accuracy and
              self.bar > self.lags + 15):
            done = True
        elif (self.performance < self.min_performance and
              self.bar > self.lags + 15):
            done = True
        else:
            done = False
        state = self._get_state()
        return state.values, reward, done, False, {}

In [None]:
symbol = 'SYMBOL'

In [None]:
trading = Trading(symbol, [symbol, 'r', 'DEL'], window=10, lags=5,
            start='2024-1-1', end='2026-1-1', periods=504,
            x0=100, kappa=2, theta=300, sigma=0.1, normalize=False)

In [None]:
random.seed(750)

In [None]:
trading.reset()

In [None]:
trading.data.info()

In [None]:
trading.data.iloc[-200:][
    [trading.symbol, 'SMA', 'MIN', 'MAX']].plot(
        style=['b-', 'r--', 'g:', 'g:'], lw=1.0);

In [None]:
class TradingAgent(DQLAgent):
    def _create_model(self, hu, lr):
        self.model = Sequential()
        self.model.add(Dense(hu, input_dim=
            self.env.lags * self.env.n_features,
                        activation='relu'))
        self.model.add(Flatten())
        self.model.add(Dense(hu, activation='relu'))
        self.model.add(Dense(2, activation='linear'))
        self.model.compile(loss='mse',
            optimizer=opt(learning_rate=lr))

In [None]:
random.seed(100)
np.random.seed(100)
torch.manual_seed(100)


In [None]:
trading = Trading(symbol, ['r', 'DEL', 'MOM'], window=10, lags=8,
            start='2024-1-1', end='2026-1-1', periods=2 * 252,
            x0=100, kappa=2, theta=50, sigma=0.1,
            leverage=1, min_accuracy=0.5, min_performance=0.85,
            new=True, normalize=True)

In [None]:
tradingagent = TradingAgent(trading.symbol, trading.features,
                 trading.lags * trading.n_features, trading, hu=24, lr=0.0001)

In [None]:
%%time
tradingagent.test(100, min_accuracy=0.0,
           min_performance=0.0,
           verbose=True, full=False)

In [None]:
random_performances = tradingagent.performances

In [None]:
sum(random_performances) / len(random_performances)

In [None]:
plt.hist(random_performances, bins=50, color='b')
plt.xlabel('gross performance')
plt.ylabel('frequency');

In [None]:
%time tradingagent.learn(500)

In [None]:
%%time
tradingagent.test(50, min_accuracy=0.0,
           min_performance=0.0,
           verbose=True, full=False)

In [None]:
sum(tradingagent.performances) / len(tradingagent.performances)

In [None]:
plt.hist(random_performances, bins=30,
         color='b', label='random (left)')
plt.hist(tradingagent.performances, bins=30,
         color='r', label='trained (right)')
plt.xlabel('gross performance')
plt.ylabel('frequency')
plt.legend();

<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>