## Backtesting trading strategies

In [23]:
import pandas as pd 
import yfinance as yf
import sys
sys.path.append('../src')
from data import load_preprocessed_data, load_close_data
from strategies import MACD
from hydra import initialize, compose
import talib
import numpy as np

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# load preprocessed data
with initialize(version_base=None, config_path="../conf"):
    datapipeline_cfg = compose(config_name='datapipeline')
stock_data = load_close_data(datapipeline_cfg)
stock_data.head()

Unnamed: 0,Date,A_Close,AA_Close,AAIC_Close,AAP_Close,AAT_Close,AB_Close,ABB_Close,ABC_Close,ABEV_Close,...,X_Close,XOM_Close,XOXO_Close,XPO_Close,XRM_Close,XRX_Close,XYL_Close,YUM_Close,ZBH_Close,ZTR_Close
0,2002-01-02,20.922747,85.739037,108.0,15.926667,21.25,48.349998,9.89,15.7125,0.6,...,17.5,39.599998,0.47,1.72927,240.0,27.457182,24.25,8.62509,29.349516,28.4
1,2002-01-03,22.246065,86.844421,116.0,15.1,21.309999,48.889999,10.61,15.425,0.6,...,18.129999,39.66,0.46,1.72927,233.0,27.40448,25.35,8.806614,29.165049,28.559999
2,2002-01-04,23.447783,89.631897,119.599998,14.166667,21.370001,50.049999,11.11,15.0375,0.6,...,18.450001,40.0,0.59,1.72927,233.0,26.482212,25.16,9.148095,28.68932,28.799999
3,2002-01-07,23.354794,91.698479,117.0,13.933333,21.25,50.299999,10.7,14.5125,0.6,...,18.4,39.650002,0.54,1.79844,233.199997,26.034256,25.24,9.318835,28.68932,28.84
4,2002-01-08,23.426323,89.72802,123.0,13.7,21.209999,50.130001,10.83,14.605,0.6,...,18.41,39.700001,0.59,1.839943,235.0,26.21871,24.75,9.498562,29.61165,28.84


In [3]:
df = stock_data.copy()

### Size of data 

In [61]:
num_companies = len(df.filter(regex='Close').columns.tolist())
print(f'20 years of daily data from {num_companies} companies')

20 years of daily data from 1425 companies


### Missing values

In [55]:
# Top 10 tickers with missing values 
missing_values = df.isnull().any().sum()
print(f'Number of tickers with missing values: {missing_values}\
      \nTop 10 tickers with missing values:')
df.isnull().sum().sort_values(ascending=False)[:10]

Number of tickers with missing values: 468      
Top 10 tickers with missing values:


CPRI_Close    2508
CIVI_Close    2508
VTLE_Close    2508
BUI_Close     2493
APTV_Close    2489
VAC_Close     2482
TTP_Close     2474
GNE_Close     2473
UI_Close      2465
XYL_Close     2464
dtype: int64

### MACD

In [41]:
df_new = df.iloc[:,1:].apply(lambda x: MACD(x))
df_new['Date'] = df['Date']
df_new

Unnamed: 0,A_Close,AA_Close,AAIC_Close,AAP_Close,AAT_Close,AB_Close,ABB_Close,ABC_Close,ABEV_Close,ABG_Close,...,X_Close,XOM_Close,XOXO_Close,XPO_Close,XRM_Close,XRX_Close,XYL_Close,YUM_Close,ZBH_Close,ZTR_Close
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5282,,,,,,,,,,,...,,,,,,,,,,
5283,,,,,,,,,,,...,,long,,,,,,,,
5284,,,,,,,,,,,...,,,,,,,,,,
5285,,,,,,,,,,,...,,,,,,,,,,


In [46]:
macd_df = pd.DataFrame()
test_df = yf.download('A', start="2002-01-01",
                   end="2003-01-01", interval = "1d",
                   )
macd = talib.MACD(test_df['Close'], fastperiod=12, slowperiod=26, signalperiod=9) 
macd_df['histogram'] = macd[2] # macd line - signal line
macd_df['macd'] = macd[0]

#entry where 1 = enter and 0 = do not enter trade
macd_df['long_signal'] = np.where((macd_df['histogram']>0) &(macd_df['histogram'].shift(1)<=0),1,0)
macd_df['short_signal'] = np.where((macd_df['histogram']<0) &(macd_df['histogram'].shift(1)>=0),1,0)
# trading positions where you enter the trade 1 day after the signal (assuming that the closing price is used to calculate the EMA)
macd_df['long_position'] = np.where(macd_df['long_signal'].shift(1)==1,1,0) 
macd_df['short_position'] = np.where(macd_df['short_signal'].shift(1)==1,1,0)
macd_df['position'] = np.where(macd_df['long_position'] == 1, 'long', np.where(macd_df['short_position'] == 1, 'short', np.nan))
macd_df


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,histogram,macd,long_signal,short_signal,long_position,short_position,position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2002-01-02,,,0,0,0,0,
2002-01-03,,,0,0,0,0,
2002-01-04,,,0,0,0,0,
2002-01-07,,,0,0,0,0,
2002-01-08,,,0,0,0,0,
...,...,...,...,...,...,...,...
2002-12-24,-0.034280,0.384308,0,0,0,0,
2002-12-26,-0.018510,0.395451,0,0,0,0,
2002-12-27,-0.015994,0.393969,0,0,0,0,
2002-12-30,-0.025989,0.377476,0,0,0,0,


In [62]:
macd_df = pd.DataFrame()
test_df = yf.download('AMZN', start="2017-01-01",
                   end="2017-04-30", interval = "1d",
                   )
macd = talib.MACD(test_df['Close'], fastperiod=12, slowperiod=26, signalperiod=9) 
macd_df['histogram'] = macd[2] # macd line - signal line
macd_df['macd'] = macd[0]

#entry where 1 = enter and 0 = do not enter trade
macd_df['long_signal'] = np.where((macd_df['histogram']>0) &(macd_df['histogram'].shift(1)<=0),1,0)
macd_df['short_signal'] = np.where((macd_df['histogram']<0) &(macd_df['histogram'].shift(1)>=0),1,0)
# trading positions where you enter the trade 1 day after the signal (assuming that the closing price is used to calculate the EMA)
macd_df['long_position'] = np.where(macd_df['long_signal'].shift(1)==1,1,0) 
macd_df['short_position'] = np.where(macd_df['short_signal'].shift(1)==1,1,0)
macd_df['position'] = np.where(macd_df['long_position'] == 1, 'long', np.where(macd_df['short_position'] == 1, 'short', np.nan))
# drop unnecessary columns to free space 
macd_df = macd_df.drop(['long_position', 'short_position','long_signal','short_signal'], axis=1)
# macd_df['position'] = np.where((macd_df['long_position']==1)|(macd_df['short_position']==1),1,0)

#exit (2-day RSI of single day is greater than 65 for long positions, and less than 35 for short positions)
macd_df['Close'] = test_df['Close']
macd_df['Date'] = macd_df.index
macd_df.reset_index(inplace=True,drop=True)
macd_df['rsi'] = talib.RSI(macd_df['Close'],timeperiod=2)
macd_df['entry_date'] = macd_df.apply(lambda _: '', axis=1)
for i in macd_df.loc[macd_df['position']==1].index:
    macd_df['entry_date'].iloc[i] = macd_df['Date'].iloc[i]
macd_df['exit_long'] = np.where(macd_df['rsi'].shift(1)>65,1,0) #signal to exit is generated when 2-period RSI above 65, but exit trade is only executed on following day.
macd_df['exit_short'] = np.where(macd_df['rsi'].shift(1)<35,1,0)
macd_df['exit_date'] = ''
# exit long positions
for i in macd_df.loc[macd_df['long_position']==1].index:
    new_macd_df = macd_df.loc[i+2:]
    if len(new_macd_df) == 0: #index out of range, so use last date as exit
        macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]
    for j in range(len(new_macd_df)):
        if new_macd_df['exit_long'].iloc[j]==1:
            macd_df['exit_date'].iloc[i] = new_macd_df['Date'].iloc[j]
            break
        else:
            macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]
# exit short positions
for i in macd_df.loc[macd_df['short_position']==1].index:
    new_macd_df = macd_df.loc[i+2:]
    if len(new_macd_df) == 0: #index out of range, so use last date as exit
        macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]
    for j in range(len(new_macd_df)):
        if new_macd_df['exit_short'].iloc[j]==1:
            macd_df['exit_date'].iloc[i] = new_macd_df['Date'].iloc[j]
            break
        else:
            macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]

### Profit
macd_df['profit'] = ''
macd_df['winlose'] = ''

#long profit
for i in macd_df[macd_df['long_position']==1].index:
    start_price = float(macd_df[macd_df['Date']==macd_df['entry_date'].iloc[i]]['Close'])
    end_price = float(macd_df[macd_df['Date']==macd_df['exit_date'].iloc[i]]['Close'])    
    profit = 1+(end_price-start_price)/start_price
    if profit > 1:
        winlose = 1 
    else:
        winlose = 0
    macd_df['profit'].iloc[i] = profit
    macd_df['winlose'].iloc[i] = winlose

#short profit
for i in macd_df[macd_df['short_position']==1].index:
    start_price = float(macd_df[macd_df['Date']==macd_df['exit_date'].iloc[i]]['Close'])
    end_price = float(macd_df[macd_df['Date']==macd_df['entry_date'].iloc[i]]['Close'])  
    profit = 1+(end_price-start_price)/start_price
    if profit > 1:
        winlose = 1 
    else:
        winlose = 0
    macd_df['profit'].iloc[i] = profit
    macd_df['winlose'].iloc[i] = winlose

return macd_df[macd_df['position']==1]


[*********************100%***********************]  1 of 1 completed


KeyError: 'long_position'

In [35]:
index = macd_df[macd_df['Date'] == '2017-03-30'].index[0]
macd_df.loc[index:]

Unnamed: 0,histogram,macd,long_signal,short_signal,long_position,short_position,position,Close,Date,rsi,entry_date,exit_long,exit_short,exit_date
60,0.065857,0.329368,0,0,1,0,1,43.817001,2017-03-30,98.265638,2017-03-30 00:00:00,1,0,2017-04-03 00:00:00
61,0.126896,0.422131,0,0,0,0,0,44.327,2017-03-31,99.295241,,1,0,
62,0.171669,0.509822,0,0,0,0,0,44.5755,2017-04-03,99.553532,,1,0,
63,0.236534,0.63382,0,0,0,0,0,45.341499,2017-04-04,99.863023,,1,0,
64,0.268987,0.73352,0,0,0,0,0,45.464001,2017-04-05,99.887881,,1,0,
65,0.235892,0.759398,0,0,0,0,0,44.914001,2017-04-06,37.985993,,1,0,
66,0.187161,0.757457,0,0,0,0,0,44.743999,2017-04-07,27.464372,,0,0,
67,0.180408,0.795806,0,0,0,0,0,45.352001,2017-04-10,75.669417,,0,1,
68,0.146174,0.798116,0,0,0,0,0,45.118,2017-04-11,50.060957,,1,0,
69,0.091551,0.76638,0,0,0,0,0,44.811501,2017-04-12,26.535672,,0,0,


In [25]:
macd_df[macd_df['long_position']==1]

Unnamed: 0,histogram,macd,long_signal,short_signal,long_position,short_position,position,Close,Date,rsi,entry_date,exit_long,exit_short,exit_date
60,0.065857,0.329368,0,0,1,0,1,43.817001,2017-03-30,98.265638,2017-03-30 00:00:00,1,0,2017-04-03 00:00:00


In [26]:
macd_df.loc['2017-03-27':]

Unnamed: 0_level_0,histogram,macd,long_signal,short_signal,long_position,short_position,position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-03-27,-0.105279,0.156655,0,0,0,0,0
2017-03-28,-0.070606,0.173677,0,0,0,0,0
2017-03-29,0.011058,0.258105,1,0,0,0,0
2017-03-30,0.065857,0.329368,0,0,1,0,1
2017-03-31,0.126896,0.422131,0,0,0,0,0
2017-04-03,0.171669,0.509822,0,0,0,0,0
2017-04-04,0.236534,0.63382,0,0,0,0,0
2017-04-05,0.268987,0.73352,0,0,0,0,0
2017-04-06,0.235892,0.759398,0,0,0,0,0
2017-04-07,0.187161,0.757457,0,0,0,0,0


In [5]:
aapl = yf.Ticker("aapl")
aapl.dividends

Date
1987-05-11 00:00:00-04:00    0.000536
1987-08-10 00:00:00-04:00    0.000536
1987-11-17 00:00:00-05:00    0.000714
1988-02-12 00:00:00-05:00    0.000714
1988-05-16 00:00:00-04:00    0.000714
                               ...   
2022-02-04 00:00:00-05:00    0.220000
2022-05-06 00:00:00-04:00    0.230000
2022-08-05 00:00:00-04:00    0.230000
2022-11-04 00:00:00-04:00    0.230000
2023-02-10 00:00:00-05:00    0.230000
Name: Dividends, Length: 78, dtype: float64

In [7]:
data = pd.DataFrame()
for i in ['AMZN']:
    stock = yf.download(i, start="2017-01-01",
                   end="2017-04-30", interval = "1d",
                   )
    stock = stock.dropna().reset_index() 
    #stock = pd.MultiIndex.from_frame(stock,names=[])
stock

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-01-03,37.896000,37.938000,37.384998,37.683498,37.683498,70422000
1,2017-01-04,37.919498,37.984001,37.709999,37.859001,37.859001,50210000
2,2017-01-05,38.077499,39.119999,38.013000,39.022499,39.022499,116602000
3,2017-01-06,39.118000,39.972000,38.924000,39.799500,39.799500,119724000
4,2017-01-09,39.900002,40.088501,39.588501,39.846001,39.846001,68922000
...,...,...,...,...,...,...,...
76,2017-04-24,45.433998,45.499500,45.191002,45.370499,45.370499,62458000
77,2017-04-25,45.352001,45.473999,45.150002,45.381001,45.381001,67612000
78,2017-04-26,45.514999,45.787498,45.377998,45.464500,45.464500,52178000
79,2017-04-27,45.719501,46.092999,45.605499,45.918999,45.918999,106110000
