<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# AI-First Finance

**Recurrent Neural Networks**

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## Imports

In [None]:
!git clone https://github.com/tpq-classes/ai_in_finance.git
import sys
sys.path.append('ai_in_finance')


In [None]:
import math
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn-v0_8')

In [None]:
import cufflinks as cf
cf.set_config_file(offline=True)

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
np.random.seed(100)
import tensorflow as tf
tf.random.set_seed(100)

## First Example

In [None]:
a = np.arange(100)
a

In [None]:
a = a.reshape((len(a), -1))

In [None]:
a.shape

In [None]:
a[:5]

In [None]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
lags = 3

In [None]:
g = TimeseriesGenerator(a, a, length=lags, batch_size=5)

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import SimpleRNN, LSTM, Dense

In [None]:
model = Sequential()
model.add(SimpleRNN(100, activation='relu', input_shape=(lags, 1)))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])

In [None]:
model.summary()

In [None]:
%%time
model.fit(g, epochs=150, steps_per_epoch=10, verbose=False);

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
ax = res.plot(figsize=(10, 6), style=['--', '--'])

In [None]:
# make a one step prediction in-sample
x = np.array([21, 22, 23]).reshape((1, lags, 1))
y = model.predict(x, verbose=True)
y

In [None]:
y.round()

In [None]:
int(round(y[0, 0]))

In [None]:
# make a one step prediction in-sample
x = np.array([87, 88, 89]).reshape((1, lags, 1))
y = model.predict(x, verbose=True)
y

In [None]:
y.round()

In [None]:
# make a one step prediction out of sample
x = np.array([187, 188, 189]).reshape((1, lags, 1))
y = model.predict(x, verbose=True)
y

In [None]:
y.round()

In [None]:
# make a one step prediction out of sample
x = np.array([1187, 1188, 1189]).reshape((1, lags, 1))
y = model.predict(x, verbose=True)
y

In [None]:
y.round()

## Second Example

In [None]:
def transform(x):
    y = 0.05 * x ** 2 + 0.2 * x + np.sin(x) + 5
    y += np.random.standard_normal(len(x)) * 0.2
    return y

In [None]:
x = np.linspace(-2 * np.pi, 2 * np.pi, 500)
a = transform(x)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, a);

In [None]:
a = a.reshape((len(a), -1))

In [None]:
a[:5]

In [None]:
lags = 5

In [None]:
g = TimeseriesGenerator(a, a, length=lags, batch_size=5)

In [None]:
# for _ in g:
#     print(_)

In [None]:
model = Sequential()
model.add(SimpleRNN(500, activation='relu', input_shape=(lags, 1)))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])

In [None]:
model.summary()

In [None]:
%%time
model.fit(g, epochs=500, steps_per_epoch=10, verbose=False);

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
ax = res.plot(figsize=(10, 6), style=['--', '--'])

In [None]:
x = np.linspace(-6 * np.pi, 6 * np.pi, 1000)
d = transform(x)

In [None]:
len(d)

In [None]:
lags=5

In [None]:
gd = TimeseriesGenerator(d, d, length=lags, batch_size=len(d))

In [None]:
#list(gd)[0][0].shape

In [None]:
#f = list(gd)[0][0].reshape((len(d) - lags, lags, 1))

In [None]:
y = model.predict(gd, verbose=True)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x[lags:], d[lags:], label='data')
plt.plot(x[lags:], y, 'r.', label='pred')
plt.axvline(-2 * np.pi, c='m')
plt.axvline(2 * np.pi, c='m')
plt.legend();

In [None]:
df = pd.DataFrame({'data': d[lags:], 'pred': y.reshape(1, -1)[0]}, index=x[lags:])

In [None]:
df.iplot()

## Financial Data

### The Data

In [None]:
raw = pd.read_csv('http://hilpisch.com/tr_eikon_eod_data.csv',
                  index_col=0, parse_dates=True).dropna()

In [None]:
symbol = 'EUR='
data = pd.DataFrame(raw[symbol])
data.info()

In [None]:
data = data.loc[:'2014-09-30']

In [None]:
data.info()

In [None]:
data.iplot()

In [None]:
split = int(len(data) * 0.7)
val_size = int(len(data) * 0.15)

In [None]:
train = data.iloc[:split]
val = train[-val_size:]
train = train[:-val_size]
test = data.iloc[split:].copy()

In [None]:
lags = 5
gtr = TimeseriesGenerator(train.values, train.values, length=lags, batch_size=10)
gva = TimeseriesGenerator(val.values, val.values, length=lags, batch_size=10)

In [None]:
# list(gtr)[0]

### Model Fitting

In [None]:
model = Sequential()
model.add(LSTM(250, activation='relu', input_shape=(lags, 1),
               return_sequences=True))
model.add(LSTM(250, activation='relu', input_shape=(lags, 1)))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])

In [None]:
model.summary()

In [None]:
%%time
model.fit(gtr, epochs=500, steps_per_epoch=10, verbose=False,
                    validation_data=gva);

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
to_plot = ['mae', 'val_mae']
ax = res[to_plot].iloc[2:].plot(figsize=(10, 6), style=['--', '--', '-', '-'])
x = np.arange(len(res));
reg = np.polyfit(x, res['val_mae'], deg=2)
plt.plot(x, np.polyval(reg, x), 'r');

### Model Testing

In [None]:
gte = TimeseriesGenerator(test.values, test.values, length=lags, batch_size=len(test))

In [None]:
#f = list(gte)[0][0].reshape((len(test) - lags, lags, 1))

In [None]:
y = model.predict(gte, verbose=True)

In [None]:
data[symbol].iplot()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(test.index[lags:], test[lags:], label='data')
plt.plot(test.index[lags:], y, 'r.', label='pred')
plt.legend();

In [None]:
df = pd.DataFrame({'data': test.values[lags:].reshape(1, -1)[0],
                   'pred': y.reshape(1, -1)[0]}, index=test.index[lags:])

In [None]:
df.iplot()

## Patterns

### The Data

In [None]:
symbol = 'EUR='
data = pd.DataFrame(raw[symbol])
data.info()

In [None]:
nbins = 50

In [None]:
split = 1900
val_size = 400

In [None]:
train = data.iloc[1000:split]
val = train[-val_size:].copy()
train = train[:-val_size].copy()
test = data.iloc[split:].copy()

In [None]:
adj = 0.002
mi = train.min()
ma = train.max()
bins = np.linspace(mi - adj, ma + adj, nbins).flatten()

In [None]:
bins

In [None]:
train['o'] = np.digitize(train[symbol], bins)
val['o'] = np.digitize(val[symbol], bins)
test['o'] = np.digitize(test[symbol], bins)

In [None]:
train.head(5)

In [None]:
train.iloc[:150].iplot(secondary_y='o')

In [None]:
lags = 15
gt = TimeseriesGenerator(train['o'].values, train['o'].values,
                         length=lags, batch_size=len(train))

In [None]:
gtr = TimeseriesGenerator(train['o'].values.reshape(-1, 1),
                          train['o'].values.reshape(-1, 1),
                          length=lags, batch_size=10)
gva = TimeseriesGenerator(val['o'].values.reshape(-1, 1),
                          val['o'].values.reshape(-1, 1),
                          length=lags, batch_size=10)

In [None]:
# list(gtr)[0]

### Model Fitting

In [None]:
model = Sequential()

model.add(SimpleRNN(100, activation='relu', input_shape=(lags, 1),
               return_sequences=True))    
model.add(SimpleRNN(100, activation='relu', input_shape=(lags, 1),))
model.add(Dense(1, activation='linear'))

model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])

In [None]:
model.summary()

In [None]:
%%time
model.fit(gtr, epochs=150, steps_per_epoch=10, verbose=False,
                   validation_data=gva);

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
to_plot = ['mae', 'val_mae']
ax = res[to_plot].iloc[2:].plot(figsize=(10, 6), style=['--', '--', '-', '-'])
x = np.arange(len(res));
reg = np.polyfit(x, res['val_mae'], deg=2)
plt.plot(x, np.polyval(reg, x), 'r');

In [None]:
#f = list(gt)[0][0].reshape((len(train) - lags, lags, 1))

In [None]:
y = model.predict(gt, verbose=True)

In [None]:
df = pd.DataFrame({'level': train['o'].values[lags:].reshape(1, -1)[0],
                   'pred': y.reshape(1, -1)[0].round()}, index=train['o'].index[lags:])

In [None]:
df.iplot()

### Model Testing

In [None]:
gte = TimeseriesGenerator(test['o'].values.reshape(-1, 1),
                          test['o'].values.reshape(-1, 1),
                          length=lags, batch_size=len(test))

In [None]:
#f = list(gte)[0][0].reshape((len(test) - lags, lags, 1))

In [None]:
y = model.predict(gte, verbose=True)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(test['o'].index[lags:], test['o'][lags:], label='level')
plt.plot(test['o'].index[lags:], y, 'r.', label='pred')
plt.legend();

In [None]:
df = pd.DataFrame({'level': test['o'].values[lags:].reshape(1, -1)[0],
                   'pred': y.reshape(1, -1)[0].round()}, index=test['o'].index[lags:])

In [None]:
df.iplot()

## Returns

### The Data

In [None]:
symbol = 'EUR='
data = pd.DataFrame(raw[symbol])
data['r'] = np.log(data[symbol] / data[symbol].shift(1))
data['rs'] = np.log(data[symbol] / data[symbol].shift(1)) * 500
data['d'] = np.where(data['r'] > 0, 1, 0)

In [None]:
lags = 10
cols = []
for lag in range(1, lags + 1):
    col = 'lag_{}'.format(lag)
    data[col] = data['r'].shift(lag)
    cols.append(col)
data.dropna(inplace=True)

In [None]:
cols[::-1]

In [None]:
data.head(5)

In [None]:
len(data)

In [None]:
split = 1600
val_size = 400

In [None]:
train = data.iloc[:split]
val = train[-val_size:]
train = train[:-val_size]
test = data.iloc[split:].copy()

### Model Fitting

In [None]:
from keras.layers import Dropout
from keras.callbacks import ModelCheckpoint

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

# hidden layers
model.add(SimpleRNN(128, input_shape=(lags, 1),
                     return_sequences=True,
                    ))
model.add(Dropout(0.5, seed=100))
model.add(SimpleRNN(128, return_sequences=True))
model.add(Dropout(0.5, seed=100))
model.add(SimpleRNN(128))

# output layer
model.add(Dense(1, activation='linear'))

# model compilation
model.compile(optimizer='rmsprop',
              loss='mse',
              metrics=['mse'])

In [None]:
model.summary()

In [None]:
callbacks = [ModelCheckpoint(filepath='../../../data/.weights.h5',
                             monitor='val_mse',
                             verbose=0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='auto')]

In [None]:
%%time
model.fit(train[cols[::-1]].values.reshape(len(train),lags, -1),
          train['r'].values.reshape(len(train), -1),
          epochs=25, batch_size=32, verbose=False,
          validation_data=(
              val[cols[::-1]].values.reshape(len(val),lags,-1),
              val['r'].values.reshape(len(val), -1)
          ), callbacks=callbacks);

In [None]:
res = pd.DataFrame(model.history.history)
res.tail(3)

In [None]:
ax = res.plot(figsize=(10, 6), style=['--', '--', '-', '-'])
x = np.arange(len(res));
reg = np.polyfit(x, res['val_mse'], deg=2)
plt.plot(x, np.polyval(reg, x), 'r');

### Backtesting

In [None]:
model.load_weights('../../../data/.weights.h5')

In [None]:
model.evaluate(test[cols].values.reshape(len(test), lags, -1),
               test['r'])

In [None]:
test['p'] = model.predict(test[cols].values.reshape(len(test), lags, -1))

In [None]:
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).iplot();

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">