<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Reinforcement Learning for Finance

**Chapter 04 &mdash; Simulated Data**

&copy; Dr. Yves J. Hilpisch

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Noisy Data

In [None]:
!git clone https://github.com/tpq-classes/rl_4_finance.git
import sys
sys.path.append('rl_4_finance')


In [None]:
class ActionSpace:
    def sample(self):
        return random.randint(0, 1)

In [None]:
import numpy as np
import pandas as pd
from numpy.random import default_rng  # <1>

In [None]:
rng = default_rng(seed=100)  # <1>

In [None]:
class NoisyData:
    # url = 'https://certificate.tpq.io/findata.csv'
    url = 'https://certificate.tpq.io/rl4finance.csv'
    def __init__(self, symbol, feature, n_features=4,
                 min_accuracy=0.485, noise=True,
                 noise_std=0.001):
        self.symbol = symbol
        self.feature = feature
        self.n_features = n_features
        self.noise = noise  # <2>
        self.noise_std = noise_std  # <3>
        self.action_space = ActionSpace()
        self.min_accuracy = min_accuracy
        self._get_data()
        self._prepare_data()
    def _get_data(self):
        self.raw = pd.read_csv(self.url,
                index_col=0, parse_dates=True)

In [None]:
class NoisyData(NoisyData):
    def _prepare_data(self):
        self.data = pd.DataFrame(self.raw[self.symbol]).dropna()
        if self.noise:
            std = self.data.mean() * self.noise_std  # <1>
            self.data[self.symbol] = (self.data[self.symbol] +
                rng.normal(0, std, len(self.data)))  # <2>
        self.data['r'] = np.log(self.data / self.data.shift(1))
        self.data['d'] = np.where(self.data['r'] > 0, 1, 0)
        self.data.dropna(inplace=True)
        ma, mi = self.data.max(), self.data.min()  # <3>
        self.data_ = (self.data - mi) / (ma - mi)  # <3>
    def reset(self):
        if self.noise:
            self._prepare_data()  # <4>
        self.bar = self.n_features
        self.treward = 0
        state = self.data_[self.feature].iloc[
            self.bar - self.n_features:self.bar].values
        return state, {}

In [None]:
class NoisyData(NoisyData):
    def step(self, action):
        if action == self.data['d'].iloc[self.bar]:
            correct = True
        else:
            correct = False
        reward = 1 if correct else 0
        self.treward += reward
        self.bar += 1
        self.accuracy = self.treward / (self.bar - self.n_features)
        if self.bar >= len(self.data):
            done = True
        elif reward == 1:
            done = False
        elif (self.accuracy < self.min_accuracy and
              self.bar > self.n_features + 15):
            done = True
        else:
            done = False
        next_state = self.data_[self.feature].iloc[
            self.bar - self.n_features:self.bar].values
        return next_state, reward, done, False, {}

In [None]:
fin = NoisyData(symbol='EUR=', feature='EUR=',
                noise=True, noise_std=0.005)

In [None]:
fin.reset()  # <1>

In [None]:
fin.reset()  # <1>

In [None]:
fin = NoisyData('EUR=', 'r', n_features=4,
                noise=True, noise_std=0.005)

In [None]:
fin.reset()  # <2>

In [None]:
fin.reset()  # <2>

In [None]:
from pylab import plt, mpl
plt.style.use('seaborn-v0_8')
mpl.rcParams['figure.dpi'] = 300
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'

In [None]:
import warnings
warnings.simplefilter('ignore')

In [None]:
for _ in range(3):
    fin.reset()
    fin.data[fin.symbol].loc['2023-7-1':].plot(lw=1)

In [None]:
import os
import warnings
warnings.simplefilter('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
%run /content/rl_4_finance/dqlagent.py

In [None]:
agent = DQLAgent(fin.symbol, fin.feature, fin.n_features, fin)

In [None]:
%time agent.learn(250)

In [None]:
agent.test(5)

## Simulated Data

In [None]:
class Simulation:
    def __init__(self, symbol, feature, n_features,
                 start, end, periods,
                 min_accuracy=0.525, x0=100,
                 kappa=1, theta=100, sigma=0.2,
                 normalize=True, new=False):
        self.symbol = symbol
        self.feature = feature
        self.n_features = n_features
        self.start = start  # <1>
        self.end = end  # <2>
        self.periods = periods  # <3>
        self.x0 = x0  # <4>
        self.kappa = kappa  # <4>
        self.theta = theta  # <4>
        self.sigma = sigma  # <4>
        self.min_accuracy = min_accuracy  # <5>
        self.normalize = normalize  # <6>
        self.new = new  # <7>
        self.action_space = ActionSpace()
        self._simulate_data()
        self._prepare_data()

In [None]:
import math
class Simulation(Simulation):
    def _simulate_data(self):
        index = pd.date_range(start=self.start,
                    end=self.end, periods=self.periods)
        x = [self.x0]  # <1>
        dt = (index[-1] - index[0]).days / 365 / self.periods  # <2>
        for t in range(1, len(index)):
            x_ = (x[t - 1] + self.kappa * (self.theta - x[t - 1]) * dt +
                  x[t - 1] * self.sigma * math.sqrt(dt) * random.gauss(0, 1))  # <3>
            x.append(x_)  # <4>

        self.data = pd.DataFrame(x, columns=[self.symbol], index=index)  # <5>

In [None]:
class Simulation(Simulation):
    def _prepare_data(self):
        self.data['r'] = np.log(self.data / self.data.shift(1))  # <1>
        self.data.dropna(inplace=True)
        if self.normalize:
            self.mu = self.data.mean()  # <2>
            self.std = self.data.std()  # <2>
            self.data_ = (self.data - self.mu) / self.std  # <2>
        else:
            self.data_ = self.data.copy()
        self.data['d'] = np.where(self.data['r'] > 0, 1, 0)  # <3>
        self.data['d'] = self.data['d'].astype(int)  # <3>

In [None]:
class Simulation(Simulation):
    def _get_state(self):
        return self.data_[self.feature].iloc[self.bar -
                                self.n_features:self.bar]  # <1>
    def seed(self, seed):
        random.seed(seed)  # <2>
        tf.random.set_random_seed(seed)  # <2>
    def reset(self):
        self.treward = 0
        self.accuracy = 0
        self.bar = self.n_features
        if self.new:
            self._simulate_data()
            self._prepare_data()
        state = self._get_state()
        return state.values, {}

In [None]:
class Simulation(Simulation):
    def step(self, action):
        if action == self.data['d'].iloc[self.bar]:
            correct = True
        else:
            correct = False
        reward = 1 if correct else 0
        self.treward += reward
        self.bar += 1
        self.accuracy = self.treward / (self.bar - self.n_features)
        if self.bar >= len(self.data):
            done = True
        elif reward == 1:
            done = False
        elif (self.accuracy < self.min_accuracy and self.bar > 25):
            done = True
        else:
            done = False
        next_state = self.data_[self.feature].iloc[
            self.bar - self.n_features:self.bar].values
        return next_state, reward, done, False, {}

In [None]:
sym = 'EUR='

In [None]:
random.seed(100)

In [None]:
env_base = Simulation(sym, sym, 5, start='2024-1-1', end='2025-1-1',
                 periods=252, x0=1, kappa=1, theta=1.1, sigma=0.0,
                 normalize=True)  # <1>

In [None]:
env_trend = Simulation(sym, sym, 5, start='2024-1-1', end='2025-1-1',
                 periods=252, x0=1, kappa=1, theta=2, sigma=0.1,
                 normalize=True)  # <2>

In [None]:
env_mrev = Simulation(sym, sym, 5, start='2024-1-1', end='2025-1-1',
                 periods=252, x0=1, kappa=1, theta=1, sigma=0.1,
                 normalize=True)  # <3>

In [None]:
env_base.data[sym].plot(figsize=(10, 6),label='baseline')
env_trend.data[sym].plot(label='trend', style='--')
env_mrev.data[sym].plot(label='mean-reversion', style='-.')
plt.legend();

In [None]:
random.seed(100)

In [None]:
sim = Simulation(sym, 'r', 4, start='2024-1-1', end='2028-1-1',
                 periods=2 * 252, min_accuracy=0.485, x0=1,
                 kappa=2, theta=2, sigma=0.15,
                 normalize=True, new=True)

In [None]:
for _ in range(10):
    sim.reset()
    sim.data[sym].plot(figsize=(10, 6));

In [None]:
agent = DQLAgent(sim.symbol, sim.feature, sim.n_features, sim, lr=0.001)

In [None]:
%time agent.learn(500)

In [None]:
agent.test(5)

In [None]:
random.seed(100)

In [None]:
sim = Simulation(sym, 'r', 4, start='2024-1-1', end='2028-1-1',
                 periods=2 * 252, min_accuracy=0.6, x0=1,
                 kappa=1.25, theta=1, sigma=0.15,
                 normalize=True, new=True)  # <3>

In [None]:
agent = DQLAgent(sim.symbol, sim.feature, sim.n_features, sim)

In [None]:
%time agent.learn(500)

In [None]:
agent.test(5)

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>