<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Finance Basics

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

http://tpq.io | [training@tpq.io](mailto:trainin@tpq.io) | [@dyjh](http://twitter.com/dyjh)

## `TensorFlow/Keras` package

In [None]:
!git clone https://github.com/tpq-classes/pff_basics.git
import sys
sys.path.append('pff_basics')


In [None]:
import numpy as np
import pandas as pd
from pylab import plt
np.set_printoptions(suppress=True)
plt.style.use('seaborn-v0_8')
%config InlineBackend.figure_format = 'svg'

## Supervised Learning

**Deep Neural Networks**

## Real Financial Data

### Preprocessing the Data

In [None]:
eod = False

In [None]:
if eod:
    url = 'https://certificate.tpq.io/mlfin.csv'
else:
    url = 'https://hilpisch.com/aiif_eikon_id_eur_usd.csv'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True)

In [None]:
raw.info()

In [None]:
symbol = 'EUR='

In [None]:
if eod:
    data = pd.DataFrame(raw[symbol]).dropna()
else:
    data = pd.DataFrame(raw['CLOSE'])
    data.columns = [symbol]
    data = data.resample('1h', label='right').last().dropna()

In [None]:
data[symbol].plot();

In [None]:
data['r'] = np.log(data[symbol] / data[symbol].shift(1))

In [None]:
data['r'].plot();

In [None]:
data['d'] = np.where(data['r'] > 0, 1, 0)

In [None]:
data.head()

In [None]:
window = 21

In [None]:
data['SMA'] = data[symbol].rolling(window).mean()

In [None]:
data['MIN'] = data[symbol].rolling(window).min()

In [None]:
data['MAX'] = data[symbol].rolling(window).max()

In [None]:
data['MOM'] = data['r'].rolling(window).mean()

In [None]:
data['VOL'] = data['r'].rolling(window).std()

In [None]:
features = [symbol, 'r', 'SMA', 'MIN', 'MAX', 'MOM', 'VOL']

In [None]:
data.dropna(inplace=True)

In [None]:
lags = 3

In [None]:
cols = list()
for f in features:
    for lag in range(1, lags + 1):
        col = f'{f}_lag_{lag}'
        data[col] = data[f].shift(lag)
        cols.append(col)

In [None]:
# data.head()

In [None]:
data.dropna(inplace=True)

In [None]:
len(cols)

In [None]:
cols

In [None]:
# data.head()

### Benchmark Prediction

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [None]:
model = GaussianNB()

In [None]:
model.fit(data[cols], data['d'])

In [None]:
data['p'] = model.predict(data[cols])

In [None]:
data['p'].value_counts()

In [None]:
data['d'].value_counts()

In [None]:
accuracy_score(data['d'], data['p'])

## Dense Neural Network

In [None]:
from tensorflow import keras
from keras.layers import Dense
from keras.models import Sequential

### Base Case

In [None]:
def create_model(hl=1, hu=128):
    model = Sequential()
    model.add(Dense(hu, activation='relu', input_dim=len(cols)))
    for _ in range(1, hl):
        model.add(Dense(hu, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',
                  metrics=['accuracy'])
    return model

In [None]:
model = create_model()

In [None]:
model.summary()

In [None]:
%time model.fit(data[cols], data['d'], epochs=100, verbose=False)

In [None]:
model.predict(data[cols])

In [None]:
data['p'] = np.where(model.predict(data[cols]) > 0.5, 1, 0)

In [None]:
data['p'].value_counts()

In [None]:
model.evaluate(data[cols], data['d'])

### Normalized Data

In [None]:
data_ = (data - data.mean()) / data.std()

In [None]:
model = create_model(hl=2)

In [None]:
model.summary()

In [None]:
%%time
res = model.fit(data_[cols], data['d'],
              epochs=100, verbose=False,
              validation_split=0.15)

In [None]:
model.evaluate(data_[cols], data['d'])

In [None]:
h = pd.DataFrame(res.history)

In [None]:
h[['accuracy', 'val_accuracy']].plot();

### Train-Test Split

In [None]:
split = int(len(data) * 0.8)
split

In [None]:
train = data.iloc[:split].copy()

In [None]:
mu, std = train.mean(), train.std()

In [None]:
train_ = (train - mu) / std

In [None]:
test = data.iloc[split:].copy()

In [None]:
test_ = (test - mu) / std

In [None]:
model = create_model(hl=2, hu=128)

In [None]:
%%time
res = model.fit(train_[cols], train['d'],
                epochs=125, verbose=False,
                validation_split=0.2,
                shuffle=False)

In [None]:
model.evaluate(train_[cols], train['d'], steps=1)

In [None]:
model.evaluate(test_[cols], test['d'], steps=1)

In [None]:
h = pd.DataFrame(res.history)

In [None]:
h[['accuracy', 'val_accuracy']].plot();

In [None]:
h['accuracy'].mean() - h['val_accuracy'].mean()

## Adding Dropout

In [None]:
from keras.layers import Dropout

In [None]:
# Dropout?

In [None]:
def create_model(hl=1, hu=128, dropout=True, rate=0.3):
    model = Sequential()
    model.add(Dense(hu, activation='relu', input_dim=len(cols)))
    if dropout:
        model.add(Dropout(rate))
    for _ in range(1, hl):
        model.add(Dense(hu, activation='relu'))
        if dropout:
            model.add(Dropout(rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',
                  metrics=['accuracy'])
    return model

In [None]:
model = create_model(hl=2, hu=128, dropout=True, rate=0.3)

In [None]:
model.summary()

In [None]:
%%time
res = model.fit(train_[cols], train['d'],
                epochs=125, verbose=False,
                validation_split=0.2,
                shuffle=False)

In [None]:
model.evaluate(train_[cols], train['d'], steps=1)

In [None]:
model.evaluate(test_[cols], test['d'], steps=1)

In [None]:
h = pd.DataFrame(res.history)

In [None]:
h[['accuracy', 'val_accuracy']].plot();

In [None]:
h['accuracy'].mean() - h['val_accuracy'].mean()

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="mailto:training@tpq.io">training@tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> 