<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# AI-First Finance

**Classification for Market Prediction**

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

### The use of the "Python 3.10, Numpy 1.26.4"  kernel is recommended.

## Imports

For the `tpqoa` package see http://github.com/yhilpisch/tpqoa.

In [None]:
# !pip install git+https://github.com/yhilpisch/tpqoa

In [None]:
import math
import tpqoa
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn-v0_8')

## Data

In [None]:
symbol = 'EUR_USD'
start =  '2019-01-01'
end = '2019-03-27'
granularity = 'M10'
price = 'A'
# adjust path
fn = f'../../../data/oanda_{symbol}_{start}_{end}_{granularity}_{price}.csv'
fn

In [None]:
%%time
try:
    raw = pd.read_csv(fn, index_col=0, parse_dates=True)
except:
    # adjust path
    api = tpqoa.tpqoa('../../../oanda.cfg')
    raw = api.get_history(symbol, start, end, granularity, price)
    raw.to_csv(fn)

In [None]:
raw.info()

In [None]:
data = pd.DataFrame(raw['c'])
data.columns = [symbol]
data['r'] = np.log(data[symbol] / data[symbol].shift(1))
data['d'] = np.where(data['r'] > 0, 1, 0)
data.dropna(inplace=True)

In [None]:
ld = len(data)
ld

In [None]:
split = int(ld * 0.6)
split

In [None]:
val_size = int(ld * 0.1)
val_size

In [None]:
train = data.iloc[:split].copy().sort_index()
val = train.iloc[-val_size:].copy().sort_index()
train = train.iloc[:-val_size].copy().sort_index()
test = data.iloc[split:].copy().sort_index()

In [None]:
lags = 10

In [None]:
def create_lags(df, mu, std):
    global cols
    cols = []
    bins = [mu - std, mu - std / 2, mu, mu + std, mu + std]
    df['o'] = np.digitize(df['r'], bins)
    for lag in range(1, lags + 1):
        col = 'lag_{}'.format(lag)
        df[col] = df['o'].shift(lag)
        cols.append(col)
    df.dropna(inplace=True)
    df[cols] = df[cols].astype(int)

In [None]:
mu = train['r'].mean()
std = train['r'].std()
create_lags(train, mu, std)

In [None]:
train.head(5)

In [None]:
create_lags(val, mu, std)

In [None]:
val.head()

In [None]:
create_lags(test, mu, std)

In [None]:
test.head()

## MLP Classifier

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
model = MLPClassifier(hidden_layer_sizes=(128, 128, 128),
                      activation='relu',
                      learning_rate_init=0.001,
                      random_state=100,
                      max_iter=500,
                      validation_fraction=0.1,
                      shuffle=False,
                      early_stopping=False,
                      verbose=False)

In [None]:
%time model.fit(train[cols], train['d'])

In [None]:
model.predict(test[cols])

In [None]:
test['p'] = model.predict(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## No Regularization

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

model.add(Dense(128, activation='sigmoid', input_shape=(lags,)))
model.add(Dense(128, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
%%time
model.fit(train[cols], train['d'],
          epochs=150, batch_size=32, verbose=False,
          validation_data=(val[cols], val['d']));

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
res.plot(figsize=(10, 6), style=['--', '--', '-', '-']);

In [None]:
model.evaluate(test[cols], test['d'])

In [None]:
model.predict(test[cols])

In [None]:
test['p'] = np.where(model.predict(test[cols]) > 0.5, 1, 0)
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## With Kernel Regularizer

In [None]:
from keras.regularizers import l1, l2

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

model.add(Dense(128, activation='relu',
                kernel_regularizer=l2(0.001),
                input_shape=(lags,)))
model.add(Dense(128, activation='relu',
               kernel_regularizer=l2(0.001)))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
%%time
model.fit(train[cols], train['d'],
          epochs=125, batch_size=32, verbose=False,
          validation_data=(val[cols], val['d']));

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
res.plot(figsize=(10, 6), style=['--', '--', '-', '-']);

In [None]:
model.evaluate(test[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test[cols]) > 0.5, 1, 0)
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## With Dropout

In [None]:
from keras.layers import Dropout

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

model.add(Dense(128, activation='relu',
                input_shape=(lags,)))
model.add(Dropout(0.3, seed=100))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3, seed=100))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
%%time
model.fit(train[cols], train['d'],
          epochs=125, batch_size=32, verbose=False,
          validation_data=(val[cols], val['d']));

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
res.plot(figsize=(10, 6), style=['--', '--', '-', '-']);

In [None]:
model.evaluate(test[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test[cols]) > 0.5, 1, 0)
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## Early Stopping

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = Sequential()

model.add(Dense(128, activation='relu',
                input_shape=(lags,)))
model.add(Dropout(0.3, seed=100))
model.add(Dense(128, activation='relu',
               ))
model.add(Dropout(0.3, seed=100))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
callbacks = [EarlyStopping(monitor='val_accuracy', patience=20)]

In [None]:
%%time
model.fit(train[cols], train['d'],
          epochs=250,
          batch_size=32,
          verbose=False,
          validation_data=(val[cols], val['d']),
          callbacks=callbacks);

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
res.val_accuracy.max()

In [None]:
res.plot(figsize=(10, 6), style=['--', '--', '-', '-'])

In [None]:
model.evaluate(test[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test[cols]) > 0.5, 1, 0)
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## Best Weights (Validation)

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# adjust path
callbacks = [ModelCheckpoint(filepath='../../../data/weights.hdf5',
                             monitor='val_accuracy',
                             verbose=0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='auto',
                             save_freq=1)]

In [None]:
%%time
model.fit(train[cols], train['d'],
          epochs=100,
          batch_size=32,
          verbose=False,
          validation_data=(val[cols], val['d']),
          callbacks=callbacks);

In [None]:
# adjust path
model.load_weights('../../../data/weights.hdf5')

In [None]:
model.evaluate(test[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test[cols]) > 0.5, 1, 0)
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">