<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Finance Basics

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

http://tpq.io | [training@tpq.io](mailto:trainin@tpq.io) | [@dyjh](http://twitter.com/dyjh)

## `scikit-learn` package

In [None]:
!git clone https://github.com/tpq-classes/pff_basics.git
import sys
sys.path.append('pff_basics')


In [None]:
import numpy as np
import pandas as pd
from pylab import plt
np.set_printoptions(suppress=True)
plt.style.use('seaborn-v0_8')
%config InlineBackend.figure_format = 'svg'

## Supervised Learning

**Deep Neural Networks**

## Market Prediction

In [None]:
url = 'https://certificate.tpq.io/mlfin.csv'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True)

In [None]:
del raw['BTC=']

In [None]:
sym = 'EUR='

In [None]:
data = pd.DataFrame(raw[sym]).dropna()

In [None]:
data.plot();

## Classification

### Binary Features, Binary Labels

In [None]:
data['r'] = np.log(data[sym] / data[sym].shift(1))

In [None]:
data['r'].plot();

In [None]:
data['d'] = np.sign(data['r'])

In [None]:
data.head()

In [None]:
lags = 10

In [None]:
cols = list()
for lag in range(1, lags + 1):
    col = f'lag_{lag}'
    data[col] = data['d'].shift(lag)
    cols.append(col)

In [None]:
data.head(8)

In [None]:
data.dropna(inplace=True)

In [None]:
2 ** lags

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
model = GaussianNB()
model = MLPClassifier(hidden_layer_sizes=[128, 128],
                      max_iter=2000)

In [None]:
model.fit(data[cols], data['d'])

In [None]:
data['p'] = model.predict(data[cols])

In [None]:
accuracy_score(data['d'], data['p'])

### Float Features, Binary Labels

In [None]:
data = pd.DataFrame(raw[sym]).dropna()

In [None]:
data['r'] = np.log(data[sym] / data[sym].shift(1))

In [None]:
data['d'] = np.sign(data['r'])

In [None]:
lags = 5

In [None]:
cols = list()
for lag in range(1, lags + 1):
    col = f'lag_{lag}'
    data[col] = data['r'].shift(lag)
    cols.append(col)

In [None]:
# data.head(8)

In [None]:
data.dropna(inplace=True)

In [None]:
model = GaussianNB()
model = MLPClassifier(hidden_layer_sizes=[128, 128],
                      max_iter=5000, shuffle=False)

In [None]:
model.fit(data[cols], data['d'])

In [None]:
data['p'] = model.predict(data[cols])

In [None]:
accuracy_score(data['d'], data['p'])  # with original data

In [None]:
data['p'].value_counts()

In [None]:
data_ = (data - data.mean()) / data.std()

In [None]:
# data_.mean().round(8)

In [None]:
# data_.std()

In [None]:
%time model.fit(data_[cols], data['d'])

In [None]:
data['p'] = model.predict(data_[cols])

In [None]:
accuracy_score(data['d'], data['p'])  # with normalized features

In [None]:
data['p'].value_counts()

## Train-Test Split

In [None]:
split = int(len(data) * 0.8)
split

In [None]:
train = data.iloc[:split].copy()

In [None]:
mu, std = train.mean(), train.std()

In [None]:
train_ = (train - mu) / std

In [None]:
test = data.iloc[split:].copy()

In [None]:
test_ = (test - mu) / std

In [None]:
%time model.fit(train_[cols], train['d'])  # training step

In [None]:
train['p'] = model.predict(train_[cols])

In [None]:
accuracy_score(train['d'], train['p'])

In [None]:
test['p'] = model.predict(test_[cols])  # testing step

In [None]:
accuracy_score(test['d'], test['p'])

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="mailto:training@tpq.io">training@tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> 