<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Reinforcement Learning for Finance

**Chapter 05 &mdash; Generated Data**

&copy; Dr. Yves J. Hilpisch

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

### Please use the "Python 3.10, Tensorflow 2.10" kernel.

## Simple Example

In [None]:
!git clone https://github.com/tpq-classes/rl_4_finance.git
import sys
sys.path.append('rl_4_finance')


In [None]:
import os
import warnings
warnings.simplefilter('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:

import numpy as np
import pandas as pd
from pylab import plt, mpl

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

In [None]:
plt.style.use('seaborn-v0_8')
mpl.rcParams['figure.dpi'] = 300
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
x = np.linspace(-2, 2, 500)  # <1>

In [None]:
def f(x):
    return x ** 3  # <2>

In [None]:
y = f(x)  # <3>

In [None]:
scaler = StandardScaler()  # <4>

In [None]:
y_ = scaler.fit_transform(y.reshape(-1, 1))  # <4>

In [None]:
plt.plot(x, y, 'r', label='real data')
plt.plot(x, y_, 'b--', label='normalized data')
plt.legend();

### Model Training

In [None]:
def create_generator(hu=32):
    model = Sequential()
    model.add(Dense(hu, activation='relu', input_dim=1))
    model.add(Dense(hu, activation='relu'))
    model.add(Dense(1, activation='linear'))
    return model

In [None]:
def create_discriminator(hu=32):
    model = Sequential()
    model.add(Dense(hu, activation='relu', input_dim=1))
    model.add(Dense(hu, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(),
                  metrics=['accuracy'])
    return model

In [None]:
def create_gan(generator, discriminator, lr=0.001):
    discriminator.trainable = False  # <1>
    model = Sequential()
    model.add(generator)  # <2>
    model.add(discriminator)  # <3>
    model.compile(loss='binary_crossentropy',
                  optimizer=Adam(learning_rate=lr))
    return model

generator = create_generator()  # <4>
discriminator = create_discriminator()  # <4>
gan = create_gan(generator, discriminator, 0.0001)  # <4>

In [None]:
from numpy.random import default_rng

In [None]:
rng = default_rng(seed=100)

In [None]:
def train_models(y_, epochs, batch_size):
    for epoch in range(epochs):
        # Generate synthetic data
        noise = rng.normal(0, 1, (batch_size, 1))  # <1>
        synthetic_data = generator.predict(noise, verbose=False)  # <2>

        # Train discriminator
        real_data = y_[rng.integers(0, len(y_), batch_size)]  # <3>
        discriminator.train_on_batch(real_data, np.ones(batch_size))  # <4>
        discriminator.train_on_batch(synthetic_data,
                                     np.zeros(batch_size))  # <5>

        # Train generator
        noise = rng.normal(0, 1, (batch_size, 1))  # <6>
        gan.train_on_batch(noise, np.ones(batch_size))  # <7>

        # Print progress
        if epoch % 1000 == 0:
            print(f'Epoch: {epoch}')
    return real_data, synthetic_data

In [None]:
%%time
real_data, synthetic_data = train_models(y_, epochs=5001, batch_size=64)

In [None]:
plt.plot(real_data, label='real (last batch)')
plt.plot(synthetic_data, label='synthetic (last batch)')
plt.legend();

In [None]:
data = pd.DataFrame({'real': y}, index=x)

In [None]:
N = 5  # <1>
for i in range(N):
    noise = rng.normal(0, 1, (len(y), 1))
    synthetic_data = generator.predict(noise, verbose=False)
    data[f'synth_{i:02d}'] = scaler.inverse_transform(synthetic_data)

In [None]:
data.describe().round(3)

In [None]:
((data.apply(np.sort)['real'] -
  data.apply(np.sort)['synth_00']) ** 2).mean()  # <1>

In [None]:
data.apply(np.sort).plot(style=['r'] + N * ['b--'], lw=1, legend=False);

## Financial Example

In [None]:
url = 'https://certificate.tpq.io/findata.csv'
url = 'https://certificate.tpq.io/rl4finance.csv'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna()  # <1>

In [None]:
rets = raw['GLD'].iloc[-1000:]  # <2>
rets = np.log((rets / rets.shift(1)).dropna())  # <3>
rets = rets.values  # <4>

In [None]:
scaler = StandardScaler()  # <5>

In [None]:
rets_ = scaler.fit_transform(rets.reshape(-1, 1))  # <5>

In [None]:
rng = default_rng(100)
tf.random.set_seed(100)

In [None]:
generator = create_generator(hu=24)
discriminator = create_discriminator(hu=24)
gan = create_gan(generator, discriminator, lr=0.0001)

In [None]:
%time rd, sd = train_models(y_=rets_, epochs=5001, batch_size=32)

In [None]:
data = pd.DataFrame({'real': rets})

In [None]:
N = 25

In [None]:
for i in range(N):
    noise = np.random.normal(0, 1, (len(rets_), 1))  # <1>
    synthetic_data = generator.predict(noise, verbose=False)  # <1>
    data[f'synth_{i:02d}'] = scaler.inverse_transform(synthetic_data)  # <2>

In [None]:
res = data.describe().round(4)  # <3>
res.iloc[:, :5]  # <3>

In [None]:
data.iloc[:, :2].plot(style=['r', 'b--', 'b--'], lw=1, alpha=0.7);

In [None]:
data['real'].plot(kind='hist', bins=50, label='real',
                  color='r', alpha=0.7)
data['synth_00'].plot(kind='hist', bins=50, alpha=0.7,
                  label='synthetic', color='b', sharex=True)
plt.legend();

In [None]:
plt.plot(np.sort(data['real']), 'r', label='real')
plt.plot(np.sort(data['synth_00']), 'b--', label='synthetic')
plt.legend();

In [None]:
sn = N
data.iloc[:, 1:sn + 1].cumsum().apply(np.exp).plot(
    style='b--', lw=0.7, legend=False)
data.iloc[:, 1:sn + 1].mean(axis=1).cumsum().apply(
    np.exp).plot(style='g', lw=2)
data['real'].cumsum().apply(np.exp).plot(style='r', lw=2);

### Kolmogorow-Smirnow (KS) Test

From ChatGPT:

> The Kolmogorov-Smirnov (KS) Test is a statistical method used to determine if two datasets differ significantly or if a dataset differs significantly from a reference probability distribution. It does so by comparing the cumulative distribution functions (CDFs) of the datasets. The KS Test calculates the maximum distance (D statistic) between the CDFs of the two samples or between the sample CDF and the reference CDF. A small D value suggests the datasets are similar, while a larger D value indicates they are different. The test also provides a p-value, which helps to decide whether the observed differences are statistically significant.

In [None]:
from scipy import stats

In [None]:
pvs = list()
for i in range(N):
    pvs.append(stats.kstest(data[f'synth_{i:02d}'], data['real']).pvalue)
pvs = np.array(pvs)

In [None]:
np.sort((pvs > 0.05).astype(int))

In [None]:
sum(np.sort(pvs > 0.05)) / N

In [None]:
plt.hist(pvs, bins=100)
plt.axvline(0.05, color='r');

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>