<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Reinforcement Learning for Finance

**Chapter 05 &mdash; Generated Data**

&copy; Dr. Yves J. Hilpisch

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Simple Example

In [None]:
import os
import numpy as np
import pandas as pd
from pylab import plt, mpl

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

In [None]:
plt.style.use('seaborn-v0_8')
mpl.rcParams['figure.dpi'] = 300
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'

In [None]:
x = np.linspace(-2, 2, 500)

In [None]:
def f(x):
    return x ** 3

In [None]:
y = f(x)

In [None]:
scaler = StandardScaler()

In [None]:
y_ = scaler.fit_transform(y.reshape(-1, 1))

In [None]:
plt.plot(x, y, 'r', lw=1.0,
         label='real data')
plt.plot(x, y_, 'b--', lw=1.0,
         label='normalized data')
plt.legend();

### Model Training

In [None]:
class Generator(nn.Module):
    def __init__(self, hu=32):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(1, hu),
            nn.ReLU(),
            nn.Linear(hu, hu),
            nn.ReLU(),
            nn.Linear(hu, 1)
        )

    def forward(self, x):
        return self.net(x)

In [None]:
class Discriminator(nn.Module):
    def __init__(self, hu=32):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(1, hu),
            nn.ReLU(),
            nn.Linear(hu, hu),
            nn.ReLU(),
            nn.Linear(hu, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

In [None]:
def create_gan(generator, discriminator, lr=0.001):
    gen_optimizer = optim.Adam(generator.parameters(), lr=lr)
    disc_optimizer = optim.Adam(discriminator.parameters(), lr=lr)
    criterion = nn.BCELoss()
    return gen_optimizer, disc_optimizer, criterion

In [None]:
generator = Generator()
discriminator = Discriminator()
gen_optimizer, disc_optimizer, criterion = create_gan(generator, discriminator, lr=0.0001)

In [None]:
from numpy.random import default_rng

In [None]:
rng = default_rng(seed=100)

In [None]:
def train_models(y_, epochs, batch_size):
    for epoch in range(epochs):
        # sample real data
        idx = rng.integers(0, len(y_), batch_size)
        real_batch = torch.from_numpy(y_[idx].reshape(-1, 1)).float()
        real_labels = torch.ones(batch_size, 1)
        fake_labels = torch.zeros(batch_size, 1)

        # generate fake data
        noise = torch.randn(batch_size, 1)
        fake_batch = generator(noise)

        # train discriminator
        disc_optimizer.zero_grad()
        real_preds = discriminator(real_batch)
        real_loss = criterion(real_preds, real_labels)
        fake_preds = discriminator(fake_batch.detach())
        fake_loss = criterion(fake_preds, fake_labels)
        d_loss = real_loss + fake_loss
        d_loss.backward()
        disc_optimizer.step()

        # train generator
        gen_optimizer.zero_grad()
        noise = torch.randn(batch_size, 1)
        fake_batch = generator(noise)
        gen_preds = discriminator(fake_batch)
        g_loss = criterion(gen_preds, real_labels)
        g_loss.backward()
        gen_optimizer.step()

        if epoch % 1000 == 0:
            print(f'Epoch: {epoch}')
    # after training, generate final data
    with torch.no_grad():
        idx = rng.integers(0, len(y_), batch_size)
        real_data = y_[idx]
        synthetic_data = generator(torch.randn(batch_size, 1)).cpu().numpy()
    return real_data, synthetic_data

In [None]:
%%time
real_data, synthetic_data = train_models(y_, epochs=15001, batch_size=32)

In [None]:
plt.plot(real_data, 'r', lw=1.0,
         label='real data (last batch)')
plt.plot(synthetic_data, 'b:', lw=1.0,
         label='synthetic data (last batch)')
plt.legend();

In [None]:
data = pd.DataFrame({'real': y}, index=x)

In [None]:
N = 5
for i in range(N):
    noise = np.random.normal(0, 1, (len(y), 1))
    noise_t = torch.from_numpy(noise).float()
    synthetic_data = generator(noise_t).detach().numpy()
    data[f'synth_{i:02d}'] = scaler.inverse_transform(synthetic_data)

In [None]:
data.describe().round(3)

In [None]:
((data.apply(np.sort)['real'] -
  data.apply(np.sort)['synth_00']) ** 2).mean()

In [None]:
data.apply(np.sort).plot(style=['r'] + N * ['b--'], lw=1, legend=False);

## Financial Example

In [None]:
raw = pd.read_csv('https://certificate.tpq.io/rl4finance.csv',
                 index_col=0, parse_dates=True).dropna()

In [None]:
rets = raw['GLD'].iloc[-2 * 252:]
rets = np.log((rets / rets.shift(1)).dropna())
rets = rets.values

In [None]:
scaler = StandardScaler()

In [None]:
rets_ = scaler.fit_transform(rets.reshape(-1, 1))

In [None]:
rng = default_rng(seed=100)
torch.manual_seed(100)

In [None]:
generator = Generator(hu=24)
discriminator = Discriminator(hu=24)
gen_optimizer, disc_optimizer, criterion = create_gan(generator, discriminator, lr=0.0001)

In [None]:
%time rd, sd = train_models(y_=rets_, epochs=15001, batch_size=32)

In [None]:
data = pd.DataFrame({'real': rets})

In [None]:
N = 25

In [None]:
for i in range(N):
    noise = np.random.normal(0, 1, (len(rets_), 1))
    noise_t = torch.from_numpy(noise).float()
    synthetic_data = generator(noise_t).detach().numpy()
    data[f'synth_{i:02d}'] = scaler.inverse_transform(synthetic_data)

In [None]:
res = data.describe().round(4)
res.iloc[:, :5]

In [None]:
data.iloc[:, :2].plot(style=['r', 'b--', 'b--'], lw=1, alpha=0.7);

In [None]:
data['real'].plot(kind='hist', bins=50, label='real',
                  color='r', alpha=0.7)
data['synth_00'].plot(kind='hist', bins=50, alpha=0.7,
                  label='synthetic', color='b', sharex=True)
plt.legend();

In [None]:
plt.plot(np.sort(data['real']), 'r', lw=1.0, label='real')
plt.plot(np.sort(data['synth_00']), 'b--', lw=1.0, label='synthetic')
plt.legend();

In [None]:
sn = N
data.iloc[:, 1:sn + 1].cumsum().apply(np.exp).plot(
    style='b--', lw=0.7, legend=False)
data.iloc[:, 1:sn + 1].mean(axis=1).cumsum().apply(
    np.exp).plot(style='g', lw=2)
data['real'].cumsum().apply(np.exp).plot(style='r', lw=2);

### Kolmogorow-Smirnow (KS) Test

In [None]:
from scipy import stats

In [None]:
pvs = list()
for i in range(N):
    pvs.append(stats.kstest(data[f'synth_{i:02d}'],
                            data['real']).pvalue)
pvs = np.array(pvs)

In [None]:
np.sort((pvs > 0.05).astype(int))

In [None]:
sum(np.sort(pvs > 0.05)) / N

In [None]:
plt.hist(pvs, bins=100)
plt.axvline(0.05, color='r');

<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>