<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Machine Learning for Finance

## Dense Neural Networks

Dr Yves J Hilpisch | The Python Quants GmbH

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>

In [None]:
import os
import numpy as np
import pandas as pd
from pylab import plt, mpl
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
pd.set_option('format.precision', 4)
np.set_printoptions(suppress=True, precision=4)
os.environ['PYTHONHASHSEED'] = '0'
%config InlineBackend.figure_format = 'svg'

## The Data

In [None]:
url = 'https://hilpisch.com/aiif_eikon_id_eur_usd.csv'

In [None]:
symbol = 'EUR_USD'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True)

In [None]:
raw.head()

In [None]:
raw.info()

In [None]:
data = pd.DataFrame(raw['CLOSE'].loc[:])
data.columns = [symbol]

In [None]:
data = data.resample('1h', label='right').last().ffill()

In [None]:
data.info()

In [None]:
data.plot(figsize=(10, 6));

## Baseline Prediction

In [None]:
lags = 5

In [None]:
def add_lags(data, symbol, lags, window=20):
    cols = []
    df = data.copy()
    df.dropna(inplace=True)
    df['r'] = np.log(df / df.shift())
    df['sma'] = df[symbol].rolling(window).mean()
    df['min'] = df[symbol].rolling(window).min()
    df['max'] = df[symbol].rolling(window).max()
    df['mom'] = df['r'].rolling(window).mean()
    df['vol'] = df['r'].rolling(window).std()
    df.dropna(inplace=True)
    df['d'] = np.where(df['r'] > 0, 1, 0)
    features = [symbol, 'r', 'd', 'sma', 'min', 'max', 'mom', 'vol']
    for f in features:
        for lag in range(1, lags + 1):
            col = f'{f}_lag_{lag}'
            df[col] = df[f].shift(lag)
            cols.append(col)
    df.dropna(inplace=True)
    return df, cols

In [None]:
data, cols = add_lags(data, symbol, lags)

In [None]:
len(data)

In [None]:
# data.iloc[:10, :14].round(4)

In [None]:
c = data['d'].value_counts()
c

In [None]:
def cw(df):
    c0, c1 = np.bincount(df['d'])
    w0 = (1 / c0) * (len(df)) / 2
    w1 = (1 / c1) * (len(df)) / 2
    return {0: w0, 1: w1}

In [None]:
class_weight = cw(data)

In [None]:
class_weight

In [None]:
class_weight[0] * c[0]

In [None]:
class_weight[1] * c[1]

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [None]:
import random
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense
from keras.models import Sequential
from sklearn.metrics import accuracy_score

In [None]:
def set_seeds(seed=100):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=0.001)

In [None]:
def create_model(hl=1, hu=128, optimizer=optimizer):
    model = Sequential()
    model.add(Dense(hu, input_dim=len(cols),
                    activation='relu'))
    for _ in range(hl):
        model.add(Dense(hu, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    return model

In [None]:
set_seeds()
model = create_model(hl=1, hu=128)

In [None]:
%%time
model.fit(data[cols], data['d'], epochs=50,
          verbose=False, class_weight=cw(data))

In [None]:
model.evaluate(data[cols], data['d'])

In [None]:
data['p'] = np.where(model.predict(data[cols]) > 0.5, 1, 0)

In [None]:
data['p'].value_counts()

In [None]:
split = int(len(data) * 0.8)

In [None]:
train = data.iloc[:split].copy()

In [None]:
test = data.iloc[split:].copy()

In [None]:
set_seeds()
model = create_model(hl=1, hu=128)

In [None]:
%%time 
hist = model.fit(train[cols], train['d'],
          epochs=50, verbose=False,
          validation_split=0.2, shuffle=False,
          class_weight=cw(train))

In [None]:
model.evaluate(train[cols], train['d'])

In [None]:
model.evaluate(test[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test[cols]) > 0.5, 1, 0)

In [None]:
test['p'].value_counts()

In [None]:
res = pd.DataFrame(hist.history)

In [None]:
res[['accuracy', 'val_accuracy']].plot(figsize=(10, 6), style='--');

## Normalization

In [None]:
mu, std = train.mean(), train.std()

In [None]:
train_ = (train - mu) / std

In [None]:
# train_.std().round(3)

In [None]:
set_seeds()
model = create_model(hl=2, hu=128)

In [None]:
%%time 
hist = model.fit(train_[cols], train['d'],
          epochs=50, verbose=False,
          validation_split=0.2, shuffle=False,
          class_weight=cw(train))

In [None]:
model.evaluate(train_[cols], train['d'])

In [None]:
test_ = (test - mu) / std

In [None]:
model.evaluate(test_[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test_[cols]) > 0.5, 1, 0)

In [None]:
test['p'].value_counts()

In [None]:
res = pd.DataFrame(hist.history)

In [None]:
res[['accuracy', 'val_accuracy']].plot(figsize=(10, 6), style='--');

## Dropout 

In [None]:
from keras.layers import Dropout

In [None]:
def create_model(hl=1, hu=128, dropout=True, rate=0.3,
                 optimizer=optimizer):
    model = Sequential()
    model.add(Dense(hu, input_dim=len(cols),
                    activation='relu'))
    if dropout:
        model.add(Dropout(rate, seed=100))
    for _ in range(hl):
        model.add(Dense(hu, activation='relu'))
        if dropout:
            model.add(Dropout(rate, seed=100))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer,
                 metrics=['accuracy'])
    return model

In [None]:
set_seeds()
model = create_model(hl=1, hu=128, rate=0.3)

In [None]:
model.summary()

In [None]:
%%time 
hist = model.fit(train_[cols], train['d'],
          epochs=50, verbose=False,
          validation_split=0.15, shuffle=False,
          class_weight=cw(train))

In [None]:
model.evaluate(train_[cols], train['d'])

In [None]:
model.evaluate(test_[cols], test['d'])

In [None]:
res = pd.DataFrame(hist.history)

In [None]:
res[['accuracy', 'val_accuracy']].plot(figsize=(10, 6), style='--');

## Regularization 

In [None]:
from keras.regularizers import l1, l2

In [None]:
def create_model(hl=1, hu=128, dropout=False, rate=0.3, 
                 regularize=False, reg=l1(0.0005),
                 optimizer=optimizer, input_dim=len(cols)):
    if not regularize:
        reg = None
    model = Sequential()
    model.add(Dense(hu, input_dim=input_dim,
                    activity_regularizer=reg,
                    activation='relu'))
    if dropout:
        model.add(Dropout(rate, seed=100))
    for _ in range(hl):
        model.add(Dense(hu, activation='relu',
                        activity_regularizer=reg))
        if dropout:
            model.add(Dropout(rate, seed=100))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer,
                 metrics=['accuracy'])
    return model

In [None]:
set_seeds()
model = create_model(hl=1, hu=128, regularize=True)

In [None]:
%%time 
hist = model.fit(train_[cols], train['d'],
          epochs=50, verbose=False,
          validation_split=0.2, shuffle=False,
          class_weight=cw(train))

In [None]:
model.evaluate(train_[cols], train['d'])

In [None]:
model.evaluate(test_[cols], test['d'])

In [None]:
res = pd.DataFrame(hist.history)

In [None]:
res[['accuracy', 'val_accuracy']].plot(figsize=(10, 6), style='--');

In [None]:
set_seeds()
model = create_model(hl=2, hu=128,
                     dropout=True, rate=0.3,
                     regularize=True, reg=l2(0.001),
                    )

In [None]:
%%time 
hist = model.fit(train_[cols], train['d'],
          epochs=50, verbose=False,
          validation_split=0.2, shuffle=False,
          class_weight=cw(train))

In [None]:
model.evaluate(train_[cols], train['d'])

In [None]:
model.evaluate(test_[cols], test['d'])

In [None]:
res = pd.DataFrame(hist.history)

In [None]:
res[['accuracy', 'val_accuracy']].plot(figsize=(10, 6), style='--');

In [None]:
res.mean()['accuracy'] - res.mean()['val_accuracy']

## Bagging

In [None]:
# !pip install scikeras

In [None]:
from sklearn.ensemble import BaggingClassifier
from scikeras.wrappers import KerasClassifier

In [None]:
len(cols)

In [None]:
max_features = 0.75

In [None]:
set_seeds()
base_estimator = KerasClassifier(model=create_model,
                        verbose=False, epochs=20, hl=1, hu=128,
                        dropout=True, regularize=False,
                        input_dim=int(len(cols) * max_features))

In [None]:
model_bag = BaggingClassifier(base_estimator=base_estimator,
                          n_estimators=15,
                          max_samples=0.75,
                          max_features=max_features,
                          bootstrap=True,
                          bootstrap_features=True,
                          n_jobs=1,
                          random_state=100,
                         )

In [None]:
%time model_bag.fit(train_[cols], train['d'])

In [None]:
model_bag.score(train_[cols], train['d'])

In [None]:
model_bag.score(test_[cols], test['d'])

In [None]:
test['p'] = model_bag.predict(test_[cols])

In [None]:
test['p'].value_counts()

## Optimizers

In [None]:
import time

In [None]:
optimizers = ['sgd', 'rmsprop', 'adagrad', 'adadelta',
              'adam', 'adamax', 'nadam']

In [None]:
%%time
for optimizer in optimizers:
    set_seeds()
    model = create_model(hl=1, hu=128,
                     dropout=True, rate=0.3,
                     regularize=False, reg=l2(0.001),
                     optimizer=optimizer
                    )
    t0 = time.time()
    model.fit(train_[cols], train['d'],
              epochs=50, verbose=False,
              validation_split=0.2, shuffle=False,
              class_weight=cw(train))
    t1 = time.time()
    t = t1 - t0
    acc_tr = model.evaluate(train_[cols], train['d'], verbose=False)[1]
    acc_te = model.evaluate(test_[cols], test['d'], verbose=False)[1]
    out = f'{optimizer:10s} | time[s]: {t:7.4f} | in-sample={acc_tr:.4f}'
    out += f' | out-of-sample={acc_te:.4f}'
    print(out)

In [None]:
test['p'] = np.where(model.predict(test_[cols]) > 0.5, 1, 0)

In [None]:
test['p'].value_counts()

In [None]:
accuracy_score(test['p'], test['d'])

<img src='http://hilpisch.com/tpq_logo.png' width="35%" align="right">

<br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>