<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Asset Management

### Machine Learning

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

http://tpq.io | [training@tpq.io](mailto:trainin@tpq.io) | [@dyjh](http://twitter.com/dyjh)

## Machine Learning

Topics of interest include:

* deep learning
* return predictions
* mixed features
* multiple labels
* vectorized backtesting
 * single instruments
 * multiple instruments

# Imports & Configurations

In [None]:
!git clone https://github.com/tpq-classes/python_for_asset_management.git
import sys
sys.path.append('python_for_asset_management')


In [None]:
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt
import warnings

In [None]:
np.set_printoptions(suppress=True, precision=4)
plt.style.use('seaborn-v0_8')
np.random.seed(1000)
%config InlineBackend.figure_format = 'svg'

## The Data

In [None]:
raw = pd.read_csv('http://hilpisch.com/aiif_eikon_eod_data.csv',
                  index_col=0, parse_dates=True).dropna()

In [None]:
raw.columns

In [None]:
raw.info()

In [None]:
symbols = raw.columns[:3]
symbols

In [None]:
data = pd.DataFrame(raw[symbols])

In [None]:
data = data.resample('1w').last()

In [None]:
rets = np.log(data / data.shift(1))

In [None]:
rets.dropna(inplace=True)

In [None]:
rets.head()

In [None]:
lags = 3

In [None]:
def create_lags(rets):
    global cols
    cols = []
    for sym in rets.columns:
        for lag in range(1, lags + 1):
            col = f'{sym}_lag_{lag}'
            rets[col] = rets[sym].shift(lag)
            cols.append(col)

In [None]:
create_lags(rets)

In [None]:
cols

In [None]:
rets.head()

In [None]:
rets.dropna(inplace=True)

In [None]:
rets.head()

In [None]:
rets[cols] = (rets[cols] - rets[cols].mean()) / rets[cols].std()

In [None]:
rets.head()

## Predicting Returns

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, accuracy_score

In [None]:
model = MLPRegressor(hidden_layer_sizes=1 * [128,],
                    random_state=100, shuffle=False)

In [None]:
model.fit(rets[cols], rets[symbols])

In [None]:
pred = model.predict(rets[cols])
pred

In [None]:
rets_ = rets[symbols].copy()
rets_.iloc[:] = pred
rets_.head()

In [None]:
mean_squared_error(rets[symbols], rets_)

In [None]:
for sym in symbols:
    mse = mean_squared_error(rets[sym], rets_[sym])
    print(f'{sym}: MSE={mse}')

In [None]:
np.sign(rets_).head()

In [None]:
for sym in symbols:
    acc = accuracy_score(np.sign(rets[sym]), np.sign(rets_[sym]))
    print(f'{sym}: ACC={acc}')

In [None]:
strat = np.sign(rets_) * rets[symbols]

In [None]:
strat.head()

In [None]:
strat.sum().apply(np.exp)

In [None]:
strat.cumsum().apply(np.exp).plot();  # in-sample performance

In [None]:
weights = len(symbols) * [1 / len(symbols)]

In [None]:
strat.cumsum().apply(np.exp).plot()
(strat * weights).cumsum().sum(axis=1).apply(np.exp).plot(label='Portfolio')
plt.legend();  # in-sample performance

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:training@tpq.io">training@tpq.io</a>