<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Asset Management

### Machine Learning

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

http://tpq.io | [training@tpq.io](mailto:trainin@tpq.io) | [@dyjh](http://twitter.com/dyjh)

## Machine Learning

Topics of interest include:

* deep learning
* return predictions
* mixed features
* multiple labels
* vectorized backtesting
 * single instruments
 * multiple instruments
* train-test splits
* adding financial features
* adding typical risk factors
* rolling train-test approach

# Imports & Configurations

In [None]:
!git clone https://github.com/tpq-classes/python_for_asset_management.git
import sys
sys.path.append('python_for_asset_management')


In [None]:
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

In [None]:
np.random.seed(1000)
plt.style.use('seaborn-v0_8')
pd.set_option('display.precision', 4)
np.set_printoptions(suppress=True, precision=4)
%config InlineBackend.figure_format = 'svg'

## The Data

In [None]:
raw = pd.read_csv('http://hilpisch.com/aiif_eikon_eod_data.csv',
                  index_col=0, parse_dates=True).dropna()

In [None]:
raw.columns

In [None]:
raw.info()

In [None]:
symbols = raw.columns[:3]
symbols

In [None]:
data = pd.DataFrame(raw[symbols])

In [None]:
data.info()

In [None]:
# data = data.resample('1w').last()

In [None]:
rets = np.log(data / data.shift(1))

In [None]:
rets.dropna(inplace=True)

In [None]:
rets.head()

In [None]:
lags = 3

In [None]:
# returns features only
# returns + risk factors features
def create_lags(rets):
    global cols
    cols = []
    for sym in rets.columns:
        for lag in range(1, lags + 1):
            col = f'{sym}_lag_{lag}'
            rets[col] = rets[sym].shift(lag)
            cols.append(col)

In [None]:
create_lags(rets)

In [None]:
cols

In [None]:
# rets.head()

In [None]:
rets.dropna(inplace=True)

In [None]:
# rets.head()

In [None]:
rets[cols] = (rets[cols] - rets[cols].mean()) / rets[cols].std()

In [None]:
# rets.head()

## Predicting Returns

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, accuracy_score

In [None]:
model = MLPRegressor(hidden_layer_sizes=1 * [128,],
                    random_state=100, shuffle=False)

### In-Sample Training-Testing

In [None]:
model.fit(rets[cols], rets[symbols])

In [None]:
pred = model.predict(rets[cols])
pred[:5]

In [None]:
rets_ = rets[symbols].copy()
rets_.iloc[:] = pred
rets_.head()

In [None]:
mean_squared_error(rets[symbols], rets_)

In [None]:
for sym in symbols:
    mse = mean_squared_error(rets[sym], rets_[sym])
    print(f'{sym}: MSE={mse}')

In [None]:
np.sign(rets_).head()

In [None]:
for sym in symbols:
    acc = accuracy_score(np.sign(rets[sym]), np.sign(rets_[sym]))
    print(f'{sym}: ACC={acc}')

In [None]:
strat = np.sign(rets_) * rets[symbols]

In [None]:
strat.head()

In [None]:
strat.sum().apply(np.exp)

In [None]:
strat.cumsum().apply(np.exp).plot();  # in-sample performance

In [None]:
weights = len(symbols) * [1 / len(symbols)]

In [None]:
strat.cumsum().apply(np.exp).plot()
(strat * weights).cumsum().sum(axis=1).apply(np.exp).plot(label='Portfolio')
plt.legend();  # in-sample performance

## Train-Test Split

In [None]:
split = int(len(rets) * 0.7)

In [None]:
train = rets.iloc[:split].copy()

In [None]:
test = rets.iloc[split:].copy()

In [None]:
model.fit(train[cols], train[symbols])

In [None]:
pred = model.predict(train[cols])  # in-sample
rets_ = train[symbols].copy()
rets_[:] = pred

In [None]:
for sym in symbols:    
    acc = accuracy_score(np.sign(train[sym]), np.sign(rets_[sym]))
    print(f'{sym}: ACC={acc:.3f}')

In [None]:
pred = model.predict(test[cols])  # out-of-sample
rets_ = test[symbols].copy()
rets_[:] = pred

In [None]:
for sym in symbols:    
    acc = accuracy_score(np.sign(test[sym]), np.sign(rets_[sym]))
    print(f'{sym}: ACC={acc:.3f}')  # out-of-sample

In [None]:
strat = np.sign(rets_) * test[symbols]

In [None]:
strat.head()

In [None]:
strat.sum().apply(np.exp)  # out-of-sample

In [None]:
strat.cumsum().apply(np.exp).plot();  # out-of-sample

In [None]:
(strat / len(symbols)).cumsum().sum(axis=1).apply(np.exp).plot();  # out-of-sample

## Rolling Train-Test Approach

In [None]:
rets.index

### Weekly Testing

In [None]:
weeks = pd.date_range(rets.index[0], rets.index[-1], freq='w')

In [None]:
start = 52

In [None]:
pred

In [None]:
%%time
rets_ = pd.DataFrame()
for w in weeks[start:start + 250]:
    train = rets.loc[:w]
    model.fit(train[cols], train[symbols])
    test = rets.loc[w:w + weeks.freq]
    pred = model.predict(test[cols])  
    df = pd.DataFrame(pred, columns=symbols, index=test.index)
    rets_ = pd.concat([rets_, df])
# when dealing with daily base data

In [None]:
train.index

In [None]:
test.index

In [None]:
rets_.head()  # out-of-sample

In [None]:
strat = np.sign(rets_) * rets.loc[
        rets_.index[0]:rets_.index[-1]][symbols]

In [None]:
strat.head()

In [None]:
strat.sum().apply(np.exp)  # out-of-sample

In [None]:
strat.cumsum().apply(np.exp).plot();  # out-of-sample

In [None]:
(strat / len(symbols)).cumsum().sum(axis=1).apply(np.exp).plot();  # out-of-sample

### Daily Testing

In [None]:
start = 252

In [None]:
%%time
rets_ = pd.DataFrame()
for d in range(start, start + 750):
    train = rets.iloc[:d]
    model.fit(train[cols], train[symbols])
    test = rets.iloc[d:d + 1]
    pred = model.predict(test[cols])  
    df = pd.DataFrame(pred, columns=symbols, index=test.index)
    rets_ = pd.concat([rets_, df])

In [None]:
train.index

In [None]:
test.index

In [None]:
strat = np.sign(rets_) * rets.loc[
        rets_.index[0]:rets_.index[-1]][symbols]

In [None]:
strat.head()

In [None]:
strat.sum().apply(np.exp)  # out-of-sample

In [None]:
strat.cumsum().apply(np.exp).plot();  # out-of-sample

In [None]:
(strat / len(symbols)).cumsum().sum(axis=1).apply(np.exp).plot();  # out-of-sample

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:training@tpq.io">training@tpq.io</a>