In [None]:
import os
import math
import numpy as np
import pandas as pd
from pylab import plt, mpl
import datetime as dt
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.float_format', '{:.4f}'.format)
np.set_printoptions(suppress=True, precision=4)
os.environ['PYTHONHASHSEED'] = '0'
from sklearn.preprocessing import StandardScaler
from pandas_datareader import DataReader
from datetime import datetime

In [None]:
import random
import tensorflow as tf
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.regularizers import l1
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score
import warnings

In [None]:
%config InlineBackend.figure_format = 'svg'
#To ignore all warnings in model output
warnings.filterwarnings("ignore")
random.seed(50) #for reproducibility

In [None]:
baba = DataReader('BABA',  'yahoo', datetime(2014,9,14), datetime(2020,10,16))
baba = pd.DataFrame(baba.loc[:,'Adj Close'])
baba.columns = ['BABA']
data = baba

The previous section lays out the blueprint for vectorized backtesting on the basis of a simple, easy-to-visualize trading strategy. We are going to apply vectorized backtesting to DNN-based trading strategies.The following trains a Keras DNN model. We are going to use the following technical indicators as features in our model.
returns, minimun price over a window, maximum price over a window, the momentum indicator and the voloatility.

In [None]:
def add_lags(dataframe, symbol, lags=5, features, window=20):
    cols = []
    df = dataframe.copy()
    df.dropna(inplace=True)
    df['r'] = np.log(df / df.shift(1))
    df['sma'] = df[symbol].rolling(window).mean()
    df['min'] = df[symbol].rolling(window).min()
    df['max'] = df[symbol].rolling(window).max()
    df['mom'] = df['r'].rolling(window).mean()
    df['vol'] = df['r'].rolling(window).std()
    df.dropna(inplace=True)
    df['d'] = np.where(df['r'] > 0, 1, 0)
    for f in features:
        for lag in range(1, lags + 1):
            col = f'{f}_lag_{lag}'
            df[col] = df[f].shift(lag)
            cols.append(col)
    df.dropna(inplace=True)
    return df, cols

Let's choose `lags=5` and `features = [symbol, 'r', 'd', 'sma', 'min', 'max', 'mom', 'vol']` to generate the data for our model

lags = 5
features = [symbol, 'r', 'd', 'sma', 'min', 'max', 'mom', 'vol']
data, cols = add_lags(data, symbol, lags,features, window=20)

In [None]:
features = ['BABA', 'r', 'sma', 'min', 'max', 'mom', 'vol']
data, cols = add_lags(data, 'BABA', lags,features, window=20)

In [None]:
optimizer = Adam(learning_rate=0.0001)

In [None]:
def create_model(hl=2, hu=128, dropout=False, rate=0.3,
                 regularize=False, reg=l1(0.0005),
                 optimizer=optimizer, input_dim=len(cols)):
    model = Sequential()
    if not regularize:
        reg = None
    model.add(Dense(hu, input_dim=input_dim,
                          activity_regularizer=reg,
                          activation='relu'))
    if dropout:
        model.add(Dropout(rate, seed=100))
    for _ in range(hl):
        model.add(Dense(hu, activation='relu',
                        activity_regularizer=reg))
        if dropout:
            model.add(Dropout(rate, seed=100))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                   metrics=['accuracy'])
    return model

Let's train-test split the historical data on a sequential manner, and train the DNN model based on normalized features data:

from sklearn.model_selection import TimeSeriesSplit #better way of splitting to be used later after successfully running first version of model

### Creation of training and testing samples and model creation

We are going to use 10% of the samples for testing

In [None]:
sample_numbers = data.shape[0]
test_sample = int(0.1 * sample_numbers)
#delta =dt.timedelta(-test_sample)
#split_point = data.index[-1] + delta
split_point = list(data.index)[sample_numbers - test_sample]
train = data.loc[:split_point].copy()
test = data.loc[split_point:]
#scaler = StandardScaler()
mu, std = train.mean(), train.std()
train_ = (train - mu) / std
test = data.loc[split_point:].copy()
test_ = (test - mu) / std
#train_ = scaler.fit_transform(train)
model = create_model(hl=2, hu=64, dropout=True, regularize=True)
model.fit(train_[cols], train['d'],
                 epochs=20, verbose=False,
                 validation_split=0.2, shuffle=False)

In [None]:
model.evaluate(train_[cols], train['d']) 
model.metrics_names

Vectorized backtesting can now be applied to judge the economic performance of the DNN-based trading strategy in-sample based on the model’s predictions.
In this context, an upward prediction is naturally interpreted as a long position and a downward prediction as a short position:

In [None]:
train['p'] = np.where(model.predict(train_[cols]) > 0.5, 1, -1)

In [None]:
train.loc[:,['p','r']].head()

In [None]:
train['s'] = train['p'] * train['r']

In [None]:
train.loc[:,['p','r','s']].head()

In [None]:
cumulative_returns = train[['r', 's']].add(1).cumprod().sub(1)
cumulative_returns.plot(figsize=(10, 6));

### Out-of-sample performance

In [None]:
model.evaluate(test_[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test_[cols]) > 0.5, 1, -1)
test['p'].value_counts()
test['s'] = test['p'] * test['r']
cumulative_returns_test = test[['r', 's']].add(1).cumprod().sub(1)
cumulative_returns_test.plot(figsize=(10, 6));

In [None]:
test['p'].value_counts()

## Ideas to improve the model
- Add addtitional indicators such bollingers band, etc (checkout the packages for algorithmic trading)
- suggest adding sentiment analysis, etc

### Model evaluation
Calculate Sharpe Ratio, Maxdrawdown, ... etc.

#### Add references
- Machine learning for AI, Yves Ipsich
- [python for trading](https://www.datacamp.com/community/tutorials/finance-python-trading)