<img src="http://certificate.tpq.io/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# EPAT Session 1

**Executive Program in Algorithmic Trading**

**_Vectorized Backtesting_**

Prof. Dr. Yves J. Hilpisch | The Python Quants GmbH | http://tpq.io

<a href="https://home.tpq.io/certificates/pyalgo" target="_blank"><img src="https://hilpisch.com/pyalgo_cover_shadow.png" width="300px" align="left"></a>

## Basic Imports

In [None]:
import cufflinks
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn')
pd.set_option('mode.chained_assignment', None)
cufflinks.set_config_file(offline=True)
%config InlineBackend.figure_format = 'svg'

## Reading Financial Data

In [None]:
url = 'http://hilpisch.com/pyalgo_eikon_eod_data.csv'

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna()

In [None]:
raw.head()

In [None]:
raw['RANDOM'] = 100
raw['RANDOM'].iloc[1:] += np.random.standard_normal(len(raw) - 1).cumsum()

In [None]:
raw['AAPL.O'].plot();

In [None]:
raw['RANDOM'].plot();

## Efficient Markets

In [None]:
symbol = 'RANDOM'
symbol = 'AAPL.O'
symbol = '.SPX'
symbol = 'EUR='

In [None]:
data = pd.DataFrame(raw[symbol])

In [None]:
data.head()

In [None]:
lags = 5

In [None]:
cols = list()
for lag in range(1, lags + 1):
    col = f'lag_{lag}'
    data[col] = data[symbol].shift(lag)
    cols.append(col)

In [None]:
data.head(7)

In [None]:
data.dropna(inplace=True)

In [None]:
data.head()

In [None]:
reg = np.linalg.lstsq(data[cols], data[symbol], rcond=-1)[0]

In [None]:
reg

In [None]:
data[cols].corr()

## Simple Trading Strategy

In [None]:
symbol = 'RANDOM'
symbol = 'AAPL.O'
symbol = '.SPX'
symbol = 'EUR='

In [None]:
data = pd.DataFrame(raw[symbol])

In [None]:
data['SMA1'] = data[symbol].rolling(42).mean()

In [None]:
data['SMA2'] = data[symbol].rolling(252).mean()

In [None]:
data.plot();

In [None]:
data.dropna(inplace=True)

In [None]:
# data['p'] = np.where(data['SMA1'] > data['SMA2'], 'long', 'short')

In [None]:
data['p'] = np.where(data['SMA1'] > data['SMA2'], 1, -1)

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.plot(secondary_y='p');

## Vectorized Backtesting

In [None]:
data['r'] = np.log(data[symbol] / data[symbol].shift(1))  # log returns

In [None]:
data.head()

In [None]:
data['s'] = data['p'].shift(1) * data['r']

In [None]:
data[['r', 's']].sum()  # sum of log returns

In [None]:
data[['r', 's']].sum().apply(np.exp)  # gross performance

In [None]:
data[['r', 's']].cumsum().apply(np.exp).plot();  # gross performance over time

## Excursion: Discounting

In [None]:
S0 = 100
S1 = 110

In [None]:
r = S1 / S0 - 1

In [None]:
r

In [None]:
r = (S1 - S0) / S0  # simple return

In [None]:
r

In [None]:
import math

In [None]:
lr = math.log(S1 / S0)  # log return

In [None]:
lr

In [None]:
(1 + r) * S0  # simple return

In [None]:
math.exp(lr)

In [None]:
math.exp(lr) * S0  # log return

## Optimizing the Parameters (OOP Way)

### `FinancialData` Class

In [None]:
class FinancialData:
    url = 'http://hilpisch.com/pyalgo_eikon_eod_data.csv'
    def __init__(self, symbol):
        self.symbol = symbol
        self.retrieve_data()
        self.prepare_data()
    def retrieve_data(self):
        self.raw = pd.read_csv(self.url, index_col=0,
                               parse_dates=True).dropna()
    def prepare_data(self):
        self.data = pd.DataFrame(self.raw[self.symbol])
        self.data['r'] = np.log(self.data / self.data.shift(1))
    def plot_data(self, cols=None):
        if cols is None:
            cols = [self.symbol,]
        self.data[cols].plot()

In [None]:
fd = FinancialData('EUR=')  # instantiation

In [None]:
fd.url  # class attribute

In [None]:
fd.symbol  # instance attribute

In [None]:
# fd.retrieve_data()  # method call

In [None]:
fd.data.head()

In [None]:
fd.plot_data()

### Vectorized Backtesting Class

In [None]:
class SMABacktester(FinancialData):
    def prepare_statistics(self):
        self.data['SMA1'] = self.data[self.symbol].rolling(self.SMA1).mean()
        self.data['SMA2'] = self.data[self.symbol].rolling(self.SMA2).mean()
    def backtest_strategy(self, SMA1, SMA2, start=None):
        self.SMA1 = SMA1
        self.SMA2 = SMA2
        self.prepare_statistics()
        if start is None:
            start = self.SMA2
        self.data['p'] = np.where(self.data['SMA1'] > self.data['SMA2'], 1, -1)
        self.data['s'] = self.data['p'].shift(1) * self.data['r']
        perf = self.data[['r', 's']].iloc[start:].sum().apply(np.exp)
        return perf
    def plot_results(self):
        self.data[['r', 's']].iloc[self.SMA2:].cumsum().apply(np.exp).plot()

In [None]:
sma = SMABacktester('EUR=')
# sma = SMABacktester('GLD')
# sma = SMABacktester('GS.N')

In [None]:
sma.data.info()

In [None]:
sma.backtest_strategy(42, 252)

In [None]:
sma.data.info()

In [None]:
sma.plot_data([sma.symbol, 'SMA1', 'SMA2'])

In [None]:
sma.plot_results()

### Optimization

In [None]:
from itertools import product

In [None]:
sma1 = [30, 40, 50]

In [None]:
sma2 = [200, 240]

In [None]:
product(sma1, sma2)

In [None]:
list(product(sma1, sma2))

In [None]:
for SMA1, SMA2 in product(sma1, sma2):
    perf = sma.backtest_strategy(SMA1, SMA2)
    print(SMA1, SMA2, '\n', perf)

In [None]:
sma.plot_results()

## Optimization within Class

Optimization in the sense of "brute force" optimization (= checking a large number of parameter combinations and picking the best one).

In [None]:
class SMABacktester(SMABacktester):
    def optimize_parameters(self, sma1, sma2):
        start = max(sma2)
        self.res = pd.DataFrame()
        for SMA1, SMA2 in product(sma1, sma2):
            perf = sma.backtest_strategy(SMA1, SMA2, start)
            self.df = pd.DataFrame({'SMA1': SMA1, 'SMA2': SMA2,
                                   'BENCH': perf['r'], 'STRAT': perf['s'],
                                   'OUTPERF': perf['s'] - perf['r']},
                                  index=[0,])
            self.res = self.res.append(self.df, ignore_index=True)

In [None]:
sma = SMABacktester('EUR=')

In [None]:
sma.optimize_parameters(sma1, sma2)

In [None]:
sma.res

In [None]:
sma.res.sort_values('OUTPERF', ascending=False)

In [None]:
sma.res.groupby('SMA1').mean()[['STRAT', 'OUTPERF']]

In [None]:
sma.res.groupby('SMA2').mean()[['STRAT', 'OUTPERF']]

## Another Example

In [None]:
sma1 = range(30, 61, 1)

In [None]:
sma2 = range(150, 301, 2)

In [None]:
%time sma.optimize_parameters(sma1, sma2)

In [None]:
len(sma.res)

In [None]:
sma.res.head()

In [None]:
sma.res.sort_values('OUTPERF', ascending=False).head()

In [None]:
sma.res.groupby('SMA1').mean()[['STRAT', 'OUTPERF']].sort_values(
    'OUTPERF', ascending=False).head()

In [None]:
sma.res.groupby('SMA2').mean()[['STRAT', 'OUTPERF']].sort_values(
    'OUTPERF', ascending=False).head()

<img src="http://certificate.tpq.io/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>