<img src="https://certificate.tpq.io/taim_logo.png" width="350px" align="right">

# Artificial Intelligence in Finance

## Recurrent Neural Networks

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## First Example

In [None]:
!git clone https://github.com/tpq-classes/ai_in_finance_book.git
import sys
sys.path.append('ai_in_finance_book')


In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from pprint import pprint
from pylab import plt, mpl
plt.style.use('seaborn-v0_8')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
pd.set_option('display.precision', 4)
np.set_printoptions(suppress=True, precision=4)
os.environ['PYTHONHASHSEED'] = '0'

In [None]:
def set_seeds(seed=100):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
set_seeds()

In [None]:
a = np.arange(100)
a

In [None]:
a = a.reshape((len(a), -1))

In [None]:
a.shape

In [None]:
a[:5]

In [None]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
lags = 3

In [None]:
g = TimeseriesGenerator(a, a, length=lags, batch_size=5)

In [None]:
#pprint(list(g)[0])

In [None]:
from keras.models import Sequential
from keras.layers import SimpleRNN, LSTM, Dense

In [None]:
model = Sequential()
model.add(SimpleRNN(100, activation='relu',
                    input_shape=(lags, 1)))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adagrad', loss='mse',
              metrics=['mae'])

In [None]:
model.summary()

In [None]:
%%time
model.fit(g, epochs=1000, steps_per_epoch=5,
            verbose=False)

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
res.iloc[10:].plot(figsize=(10, 6), style=['--', '--']);

In [None]:
x = np.array([21, 22, 23]).reshape((1, lags, 1))
y = model.predict(x, verbose=False)
int(round(y[0, 0]))

In [None]:
x = np.array([87, 88, 89]).reshape((1, lags, 1))
y = model.predict(x, verbose=False)
int(round(y[0, 0]))

In [None]:
x = np.array([187, 188, 189]).reshape((1, lags, 1))
y = model.predict(x, verbose=False)
int(round(y[0, 0]))

In [None]:
x = np.array([1187, 1188, 1189]).reshape((1, lags, 1))
y = model.predict(x, verbose=False)
int(round(y[0, 0]))

## Second Example

In [None]:
def transform(x):
    y = 0.05 * x ** 2 + 0.2 * x + np.sin(x) + 5
    y += np.random.standard_normal(len(x)) * 0.2
    return y

In [None]:
x = np.linspace(-2 * np.pi, 2 * np.pi, 500)
a = transform(x)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, a);

In [None]:
a = a.reshape((len(a), -1))

In [None]:
a[:5]

In [None]:
lags = 5

In [None]:
g = TimeseriesGenerator(a, a, length=lags, batch_size=5)

In [None]:
model = Sequential()
model.add(SimpleRNN(500, activation='relu', input_shape=(lags, 1)))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])

In [None]:
model.summary()

In [None]:
%%time
model.fit(g, epochs=500,
          steps_per_epoch=10,
          verbose=False)

In [None]:
x = np.linspace(-6 * np.pi, 6 * np.pi, 1000)
d = transform(x)

In [None]:
g_ = TimeseriesGenerator(d, d, length=lags, batch_size=len(d))

In [None]:
#f = list(g_)[0][0].reshape((len(d) - lags, lags, 1))

In [None]:
y = model.predict(g_, verbose=False)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x[lags:], d[lags:], label='data', alpha=0.75)
plt.plot(x[lags:], y, 'r.', label='pred', ms=3)
plt.axvline(-2 * np.pi, c='g', ls='--')
plt.axvline(2 * np.pi, c='g', ls='--')
plt.text(-15, 22, 'out-of-sample')
plt.text(-2, 22, 'in-sample')
plt.text(10, 22, 'out-of-sample')
plt.legend();

## Financial Price Series

In [None]:
url = 'http://hilpisch.com/aiif_eikon_id_eur_usd.csv'

In [None]:
symbol = 'EUR_USD'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True)

In [None]:
def generate_data():
    data = pd.DataFrame(raw['CLOSE'])
    data.columns = [symbol]
    data = data.resample('30min', label='right').last().ffill()
    return data

In [None]:
data = generate_data()

In [None]:
data = (data - data.mean()) / data.std()

In [None]:
p = data[symbol].values

In [None]:
p = p.reshape((len(p), -1))

In [None]:
lags = 5

In [None]:
g = TimeseriesGenerator(p, p, length=lags, batch_size=5)

In [None]:
def create_rnn_model(hu=100, lags=lags, layer='SimpleRNN',
                           features=1, algorithm='estimation'):
    model = Sequential()
    if layer == 'SimpleRNN':
        model.add(SimpleRNN(hu, activation='relu',
                            input_shape=(lags, features)))
    else:
        model.add(LSTM(hu, activation='relu',
                       input_shape=(lags, features)))
    if algorithm == 'estimation':
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    else:
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy',
                      metrics=['accuracy'])
    return model

In [None]:
model = create_rnn_model()

In [None]:
%%time
model.fit(g, epochs=500, steps_per_epoch=10,
          verbose=False)

In [None]:
y = model.predict(g, verbose=False)

In [None]:
data['pred'] = np.nan
data['pred'].iloc[lags:] = y.flatten()

In [None]:
data[[symbol, 'pred']].plot(
            figsize=(10, 6), style=['b', 'r-.'],
            alpha=0.75);

In [None]:
data[[symbol, 'pred']].iloc[50:100].plot(
            figsize=(10, 6), style=['b', 'r-.'],
            alpha=0.75);

## Financial Return Series

In [None]:
data = generate_data()

In [None]:
data['r'] = np.log(data / data.shift(1))

In [None]:
data.dropna(inplace=True)

In [None]:
data = (data - data.mean()) / data.std()

In [None]:
r = data['r'].values

In [None]:
r = r.reshape((len(r), -1))

In [None]:
g = TimeseriesGenerator(r, r, length=lags, batch_size=5)

In [None]:
# list(g)[0]

In [None]:
model = create_rnn_model()

In [None]:
%%time
model.fit(g, epochs=500, steps_per_epoch=10,
          verbose=False)

In [None]:
y = model.predict(g, verbose=False)

In [None]:
data['pred'] = np.nan
data['pred'].iloc[lags:] = y.flatten()
data.dropna(inplace=True)

In [None]:
data[['r', 'pred']].iloc[50:100].plot(
            figsize=(10, 6), style=['b', 'r-.'],
            alpha=0.75);
plt.axhline(0, c='grey', ls='--');

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(np.sign(data['r']), np.sign(data['pred']))

In [None]:
split = int(len(r) * 0.8)

In [None]:
train = r[:split]

In [None]:
test = r[split:]

In [None]:
g = TimeseriesGenerator(train, train, length=lags, batch_size=5)

In [None]:
set_seeds()
model = create_rnn_model(hu=100)

In [None]:
%%time
model.fit(g, epochs=100, steps_per_epoch=10, verbose=False)

In [None]:
g_ = TimeseriesGenerator(test, test, length=lags, batch_size=5)

In [None]:
y = model.predict(g_)

In [None]:
accuracy_score(np.sign(test[lags:]), np.sign(y))

## Financial Features

In [None]:
data = generate_data()

In [None]:
data['r'] = np.log(data / data.shift(1))

In [None]:
window = 20
data['mom'] = data['r'].rolling(window).mean()
data['vol'] = data['r'].rolling(window).std()

In [None]:
data.dropna(inplace=True)

### Estimation

In [None]:
split = int(len(data) * 0.8)

In [None]:
train = data.iloc[:split].copy()

In [None]:
mu, std = train.mean(), train.std()

In [None]:
train = (train - mu) / std

In [None]:
test = data.iloc[split:].copy()

In [None]:
test = (test - mu) / std

In [None]:
g = TimeseriesGenerator(train.values, train['r'].values,
                        length=lags, batch_size=5)

In [None]:
set_seeds()
model = create_rnn_model(hu=100, features=len(data.columns),
                         layer='SimpleRNN')

In [None]:
%%time
model.fit(g, epochs=100, steps_per_epoch=10,
                verbose=False)

In [None]:
g_ = TimeseriesGenerator(test.values, test['r'].values,
                         length=lags, batch_size=5)

In [None]:
y = model.predict(g_).flatten()

In [None]:
accuracy_score(np.sign(test['r'].iloc[lags:]), np.sign(y))

### Classification

In [None]:
set_seeds()
model = create_rnn_model(hu=50,
            features=len(data.columns),
            layer='LSTM',
            algorithm='classification')

In [None]:
train_y = np.where(train['r'] > 0, 1, 0)

In [None]:
np.bincount(train_y)

In [None]:
def cw(a):
    c0, c1 = np.bincount(a)
    w0 = (1 / c0) * (len(a)) / 2
    w1 = (1 / c1) * (len(a)) / 2
    return {0: w0, 1: w1}

In [None]:
g = TimeseriesGenerator(train.values, train_y,
                        length=lags, batch_size=5)

In [None]:
%%time
model.fit(g, epochs=5, steps_per_epoch=10,
          verbose=False, class_weight=cw(train_y))

In [None]:
test_y = np.where(test['r'] > 0, 1, 0)

In [None]:
g_ = TimeseriesGenerator(test.values, test_y,
                         length=lags, batch_size=5)

In [None]:
y = np.where(model.predict(g_, batch_size=None) > 0.5,
             1, 0).flatten()

In [None]:
np.bincount(y)

In [None]:
accuracy_score(test_y[lags:], y)

### Deep RNNs

In [None]:
from keras.layers import Dropout

In [None]:
def create_deep_rnn_model(hl=2, hu=100, layer='SimpleRNN',
                          optimizer='rmsprop', features=1,
                          dropout=False, rate=0.3, seed=100):
    if hl <= 2: hl = 2
    if layer == 'SimpleRNN':
        layer = SimpleRNN
    else:
        layer = LSTM
    model = Sequential()
    model.add(layer(hu, input_shape=(lags, features),
                     return_sequences=True,
                    ))
    if dropout:
        model.add(Dropout(rate, seed=seed))
    for _ in range(2, hl):
        model.add(layer(hu, return_sequences=True))
        if dropout:
            model.add(Dropout(rate, seed=seed))
    model.add(layer(hu))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
set_seeds()
model = create_deep_rnn_model(
            hl=2, hu=50, layer='SimpleRNN',
            features=len(data.columns),
            dropout=True, rate=0.3)

In [None]:
model.summary()

In [None]:
%%time
model.fit(g, epochs=200, steps_per_epoch=10,
          verbose=False, class_weight=cw(train_y))

In [None]:
y = np.where(model.predict(g_, batch_size=None) > 0.5,
             1, 0).flatten()

In [None]:
np.bincount(y)

In [None]:
accuracy_score(test_y[lags:], y)

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

<br><br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>