<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# AI-First Finance

**Estimation for Market Prediction**

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## Imports

For the `tpqoa` package see http://github.com/yhilpisch/tpqoa.

In [None]:
!git clone https://github.com/tpq-classes/ai_in_finance.git
import sys
sys.path.append('ai_in_finance')


In [None]:
!pip install git+https://github.com/yhilpisch/tpqoa

In [None]:
import math
import tpqoa
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn-v0_8')

In [None]:
import warnings
warnings.simplefilter('ignore')

## Data

In [None]:
symbol = 'EUR_USD'
start =  '2019-01-01'
end = '2019-03-27'
granularity = 'M10'
price = 'A'
fn = f'/content/ai_in_finance/oanda_{symbol}_{start}_{end}_{granularity}_{price}.csv'
fn

In [None]:
%%time
try:
    raw = pd.read_csv(fn, index_col=0, parse_dates=True)
except:
    api = tpqoa.tpqoa('../oanda.cfg')    #adapt path
    raw = api.get_history(symbol, start, end, granularity, price)
    raw.to_csv(fn)

In [None]:
raw.info()

In [None]:
data = pd.DataFrame(raw['c'])
data.columns = [symbol]
data['r'] = np.log(data[symbol] / data[symbol].shift(1))
data['d'] = np.where(data['r'] > 0, 1, 0)
data.dropna(inplace=True)

In [None]:
ld = len(data)
ld

In [None]:
split = int(ld * 0.6)
val_size = int(ld * 0.1)

In [None]:
data.dropna(inplace=True)

In [None]:
train = data.iloc[:split]
val = train[-val_size:]
train = train[:-val_size]
test = data.iloc[split:].copy()

In [None]:
lags = 5

In [None]:
def create_lags(df, mu, std):
    global cols
    cols = []
    df['rs'] = df['r'] - mu
    df['rs'] /= std
    for lag in range(1, lags + 1):
        col = 'lag_{}'.format(lag)
        df[col] = df['rs'].shift(lag)
        cols.append(col)
    df.dropna(inplace=True)

In [None]:
mu = train['r'].mean()
std = train['r'].std()
create_lags(train, mu, std)

In [None]:
train.head(5)

In [None]:
create_lags(val, mu, std)

In [None]:
val.head()

In [None]:
create_lags(test, mu, std)

In [None]:
test.head()

## MLP Regressor

In [None]:
from sklearn.neural_network import MLPRegressor

In [None]:
model = MLPRegressor(hidden_layer_sizes=(192, 192),
                      activation='relu',
                      learning_rate_init=0.0005,
                      random_state=100,
                      max_iter=500,
                      validation_fraction=0.1,
                      shuffle=False,
                      early_stopping=True,
                      verbose=False)

In [None]:
%time model.fit(train[cols], train['rs'])

In [None]:
model.predict(test[cols])

In [None]:
test['p'] = model.predict(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## No Regularization

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
opt = tf.keras.optimizers.RMSprop(learning_rate=0.005, rho=0.9, decay=0.0)

In [None]:
model = Sequential()

model.add(Dense(2 ** 8, activation='relu', input_shape=(lags,)))
model.add(Dense(2 ** 8, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(optimizer=opt, loss='mae', metrics=['mae'])

In [None]:
scale = 1

In [None]:
val.info()

In [None]:
%%time
history = model.fit(train[cols], train['r'] * scale,
                    epochs=250, batch_size=32, verbose=False,
                    validation_data=(val[cols], val['r'] * scale));

In [None]:
res = pd.DataFrame(history.history)

In [None]:
res.tail(3)

In [None]:
to_plot = ['mae', 'val_mae']
ax = res[to_plot].plot(figsize=(10, 6), style=['--', '--', '-', '-'])
x = np.arange(len(res));
reg = np.polyfit(x, res['val_mae'], deg=2)
plt.plot(x, np.polyval(reg, x), 'r');

In [None]:
model.evaluate(test[cols], test['r'] * scale)

In [None]:
model(tf.convert_to_tensor(test[cols], dtype=tf.float32), training=False).numpy()

In [None]:
test['p'] = model(tf.convert_to_tensor(test[cols], dtype=tf.float32), training=False).numpy()
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## With Kernel Regularizer

In [None]:
from tensorflow.keras.regularizers import l1, l2

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

model.add(Dense(2 ** 8, activation='relu',
                kernel_regularizer=l2(0.001),
                input_shape=(lags,)))
model.add(Dense(2 ** 8, activation='relu',
                kernel_regularizer=l2(0.001)))
model.add(Dense(1, activation='linear'))

model.compile(optimizer='rmsprop', loss='mae', metrics=['mae'])

In [None]:
%%time
model.fit(train[cols], train['rs'],
          epochs=250, batch_size=32, verbose=False,
          validation_data=(val[cols], val['rs']));

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
to_plot = ['mae', 'val_mae']
ax = res[to_plot].plot(figsize=(10, 6), style=['--', '--', '-', '-'])
x = np.arange(len(res));
reg = np.polyfit(x, res['val_mae'], deg=2)
plt.plot(x, np.polyval(reg, x), 'rs');

In [None]:
model.evaluate(test[cols], test['rs'])

In [None]:
test['p'] = model(tf.convert_to_tensor(test[cols], dtype=tf.float32), training=False).numpy()
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## With Dropout

In [None]:
from tensorflow.keras.layers import Dropout

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

model.add(Dense(2 ** 8, activation='relu',
                input_shape=(lags,)))
model.add(Dropout(0.3, seed=100))
model.add(Dense(2 ** 8, activation='relu'))
model.add(Dropout(0.3, seed=100))
model.add(Dense(1, activation='linear'))

model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])

In [None]:
%%time
model.fit(train[cols], train['rs'],
          epochs=125, batch_size=32, verbose=False,
          validation_data=(val[cols], val['rs']));

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
to_plot = ['mse', 'val_mse']
ax = res[to_plot].plot(figsize=(10, 6), style=['--', '--', '-', '-'])
x = np.arange(len(res));
reg = np.polyfit(x, res['val_mse'], deg=2)
plt.plot(x, np.polyval(reg, x), 'r');

In [None]:
model.evaluate(test[cols], test['rs'])

In [None]:
test['p'] = model(tf.convert_to_tensor(test[cols], dtype=tf.float32), training=False).numpy()
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## Early Stopping

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

model.add(Dense(2 ** 8, activation='relu',
                # kernel_regularizer=l2(0.001),
                input_shape=(lags,)))
model.add(Dropout(0.3, seed=100))
model.add(Dense(2 ** 8, activation='relu',
                # kernel_regularizer=l2(0.001)
               ))
model.add(Dropout(0.3, seed=100))
model.add(Dense(1, activation='linear'))

model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])

In [None]:
callbacks = [EarlyStopping(monitor='val_mse', patience=25)]

In [None]:
%%time
model.fit(train[cols], train['rs'],
          epochs=125, batch_size=32, verbose=False,
          validation_data=(val[cols], val['rs']),
          callbacks=callbacks);

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
to_plot = ['mse', 'val_mse']
ax = res[to_plot].plot(figsize=(10, 6), style=['--', '--', '-', '-'])
x = np.arange(len(res));
reg = np.polyfit(x, res['val_mse'], deg=2)
plt.plot(x, np.polyval(reg, x), 'r');

In [None]:
model.evaluate(test[cols], test['rs'])

In [None]:
test['p'] = model(tf.convert_to_tensor(test[cols], dtype=tf.float32), training=False).numpy()
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">