## Backtesting trading strategies

In [50]:
import pandas as pd 
import yfinance as yf
import sys
sys.path.append('../src')
from data import load_preprocessed_data, load_close_data
from strategies import MACD
from hydra import initialize, compose
import talib
import numpy as np
import multiprocessing
from golden_death_cross import GoldenDeathCross

sys.path.append("../")
import conf

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# load preprocessed data
with initialize(version_base=None, config_path="../conf"):
    datapipeline_cfg = compose(config_name='datapipeline')
stock_data = load_close_data(datapipeline_cfg)
stock_data.head()

Unnamed: 0,Date,A_Close,AA_Close,AAIC_Close,AAP_Close,AAT_Close,AB_Close,ABB_Close,ABC_Close,ABEV_Close,...,X_Close,XOM_Close,XOXO_Close,XPO_Close,XRM_Close,XRX_Close,XYL_Close,YUM_Close,ZBH_Close,ZTR_Close
0,2002-01-02,20.922747,85.739037,108.0,15.926667,21.25,48.349998,9.89,15.7125,0.6,...,17.5,39.599998,0.47,1.72927,240.0,27.457182,24.25,8.62509,29.349516,28.4
1,2002-01-03,22.246065,86.844421,116.0,15.1,21.309999,48.889999,10.61,15.425,0.6,...,18.129999,39.66,0.46,1.72927,233.0,27.40448,25.35,8.806614,29.165049,28.559999
2,2002-01-04,23.447783,89.631897,119.599998,14.166667,21.370001,50.049999,11.11,15.0375,0.6,...,18.450001,40.0,0.59,1.72927,233.0,26.482212,25.16,9.148095,28.68932,28.799999
3,2002-01-07,23.354794,91.698479,117.0,13.933333,21.25,50.299999,10.7,14.5125,0.6,...,18.4,39.650002,0.54,1.79844,233.199997,26.034256,25.24,9.318835,28.68932,28.84
4,2002-01-08,23.426323,89.72802,123.0,13.7,21.209999,50.130001,10.83,14.605,0.6,...,18.41,39.700001,0.59,1.839943,235.0,26.21871,24.75,9.498562,29.61165,28.84


In [4]:
df = stock_data.copy()

### Size of data 

In [12]:
num_companies = len(df.filter(regex='Close').columns.tolist())
print(f'20 years of daily data from {num_companies} companies')

20 years of daily data from 1429 companies


### Missing values

In [13]:
# Top 10 tickers with missing values 
missing_values = df.isnull().any().sum()
print(f'Number of tickers with missing values: {missing_values}\
      \nTop 10 tickers with missing values:')
df.isnull().sum().sort_values(ascending=False)[:10]

Number of tickers with missing values: 477      
Top 10 tickers with missing values:


CIVI_Close    2508
VTLE_Close    2508
CPRI_Close    2508
BUI_Close     2493
APTV_Close    2489
VAC_Close     2482
TTP_Close     2474
GNE_Close     2473
UI_Close      2465
XYL_Close     2464
dtype: int64

### MACD

In [53]:
macd_positions = MACD(df) 
positions = macd_positions.get_entry_exit_signal()
positions

Unnamed: 0_level_0,AAIC_Entry,AAIC_Exit,AAP_Entry,AAP_Exit,AAT_Entry,AAT_Exit,AA_Entry,AA_Exit,ABB_Entry,ABB_Exit,...,XYL_Entry,XYL_Exit,X_Entry,X_Exit,YUM_Entry,YUM_Exit,ZBH_Entry,ZBH_Exit,ZTR_Entry,ZTR_Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-01-02,,,,,,,,,,,...,,,,,,,,,,
2002-01-03,,,,,,,,,,,...,,,,,,,,,,
2002-01-04,,,,,,,,,,,...,,,,,,,,,,
2002-01-07,,exit_long,,exit_short,,exit_long,,exit_long,,exit_long,...,,exit_long,,exit_long,,exit_long,,exit_short,,exit_long
2002-01-08,,exit_long,,exit_short,,exit_short,,exit_long,,,...,,exit_long,,exit_long,,exit_long,,exit_short,,exit_long
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,,exit_short,,exit_long,,,,,,exit_short,...,,,,,,,,exit_long,,exit_short
2022-12-27,,,,exit_long,,,,,,,...,,,,,,,,,,
2022-12-28,,exit_short,,exit_long,,,,exit_long,,exit_long,...,,,,,,exit_long,,exit_long,,
2022-12-29,,exit_short,,exit_long,,,,exit_short,,,...,,,,exit_short,,,,exit_short,,exit_short


In [5]:
macd_positions = pd.read_csv('../data/intermediate/macd_positions.csv')
macd_positions.drop(['Unnamed: 0'], axis=1,inplace=True)
macd_positions.head()

  macd_positions = pd.read_csv('../data/intermediate/macd_positions.csv')


Unnamed: 0,A_Position,A_Entry,A_Exit,AA_Position,AA_Entry,AA_Exit,AAIC_Position,AAIC_Entry,AAIC_Exit,AAP_Position,...,XYL_Exit,YUM_Position,YUM_Entry,YUM_Exit,ZBH_Position,ZBH_Entry,ZBH_Exit,ZTR_Position,ZTR_Entry,ZTR_Exit
0,long,2002-03-11,2002-03-19,long,2002-03-19,2002-03-20,long,2002-03-08,2002-03-11,long,...,2002-04-01,long,2002-02-26,2002-02-27,long,2002-04-29,2002-05-01,long,2002-03-06,2002-03-07
1,long,2002-03-21,2002-03-22,long,2002-06-20,2002-06-21,long,2002-04-12,2002-04-15,long,...,2002-05-09,long,2002-04-10,2002-04-11,long,2002-06-20,2002-06-28,long,2002-03-21,2002-03-26
2,long,2002-04-29,2002-05-01,long,2002-07-29,2002-07-30,long,2002-04-30,2002-05-01,long,...,2002-08-02,long,2002-04-23,2002-04-24,long,2002-07-26,2002-07-29,long,2002-03-26,2002-03-28
3,long,2002-06-13,2002-06-14,long,2002-09-12,2002-09-17,long,2002-05-20,2002-06-04,long,...,2002-08-21,long,2002-05-16,2002-05-20,long,2002-09-09,2002-09-19,long,2002-04-15,2002-04-17
4,long,2002-08-01,2002-08-07,long,2002-09-30,2002-10-03,long,2002-06-11,2002-06-12,long,...,2002-08-21,long,2002-06-11,2002-06-19,long,2002-09-23,2002-09-24,long,2002-07-08,2002-07-18


### Golden death cross

In [31]:
golden_death_cross = GoldenDeathCross(df)
golden_death_dates = golden_death_cross.get_cross_dates()
golden_death_dates

ValueError: If using all scalar values, you must pass an index

In [36]:
golden_death_cross = GoldenDeathCross(df)
output = golden_death_cross.get_cross(df['A_Close'])
output

Unnamed: 0,Date,cross,long_term_ma
0,2002-01-02,,
1,2002-01-03,,
2,2002-01-04,,
3,2002-01-07,,
4,2002-01-08,,
...,...,...,...
5282,2022-12-23,,131.31755
5283,2022-12-27,,131.41375
5284,2022-12-28,,131.51630
5285,2022-12-29,,131.62035


### Optimising MACD strategy

In [15]:
# try and optimise 

def get_entry_exit_position(enter: pd.Series, exit: pd.Series):
    position_df = pd.DataFrame(columns=["Position", "Entry", "Exit"])

    # Process long positions
    entry_long_idx = pd.Series(enter[enter == "enter_long"].index)
    exit_long_idx = pd.Series(exit[exit == "exit_long"].index)

    long_positions = pd.concat([entry_long_idx, exit_long_idx])
    long_positions = long_positions.sort_values()

    # Get exit positions for long positions
    exit_long_positions = long_positions[long_positions.isin(exit_long_idx)]
    # exit_long_positions = exit_long_positions[1:]  # Exclude the first entry position

    return exit_long_positions

get_entry_exit_position(positions[column_pairs[0][0]],positions[column_pairs[0][1]])

0       2002-01-07
1       2002-01-08
2       2002-01-09
3       2002-01-18
4       2002-01-23
           ...    
1975    2022-11-28
1976    2022-12-02
1977    2022-12-05
1978    2022-12-20
1979    2022-12-30
Name: Date, Length: 2154, dtype: object

In [6]:
# timeit
import timeit
mysetup = """
import pandas as pd
import sys
sys.path.append('../src')
from data import load_preprocessed_data, load_close_data
from strategies import MACD
from hydra import initialize, compose
import numpy as np
with initialize(version_base=None, config_path="../conf"):
    datapipeline_cfg = compose(config_name='datapipeline')
stock_data = load_close_data(datapipeline_cfg)
df = stock_data.copy()
macd_positions = MACD(df) 
positions = macd_positions.apply()
"""
mycode = '''
columns = positions.columns
for i in range(0, len(columns), 2):
    col1 = columns[i]
    col2 = columns[i + 1]
    col_name = f'{col1.split("_")[0]}_Exit_Position'
    exit_position = macd_positions.get_exit_position(positions[col1],positions[col2]).rename(col_name)
    positions = pd.concat([positions, exit_position], axis=1)
'''
print(timeit.timeit(setup=mysetup,
                    stmt=mycode,
                    number=1))

1050.2912589


Dask parallel processing saved about 4min 

In [54]:
import dask
def lazy_function(pair):
    col1,col2 = pair[0],pair[1]
    exit_position = macd_positions.get_entry_exit_position(positions[col1],positions[col2])
    return exit_position
lazy_list = []
column_pairs = [(positions.columns[i], positions.columns[i+1]) for i in range(0, len(positions.columns), 2)]
for pair in column_pairs:
    exit_position = dask.delayed(lazy_function)(pair)
    lazy_list.append(exit_position)
exit_position = dask.compute(*lazy_list)

In [31]:
# merge into 1 dataframe and save it
tickers = [col.split('_')[0] for col in df.iloc[:, 1:].columns]
for i in range(len(exit_position)):
    exit_position[i].rename(columns=lambda x: tickers[i] + '_' + x, inplace=True)
merged_df = pd.concat(exit_position, axis=1)

['A',
 'AA',
 'AAIC',
 'AAP',
 'AAT',
 'AB',
 'ABB',
 'ABC',
 'ABEV',
 'ABG',
 'ABM',
 'ABR',
 'ABT',
 'ACCO',
 'ACM',
 'ACN',
 'ACR',
 'ADC',
 'ADM',
 'ADX',
 'AEE',
 'AEG',
 'AEL',
 'AEM',
 'AEO',
 'AEP',
 'AER',
 'AES',
 'AET',
 'AFB',
 'AFG',
 'AFL',
 'AFT',
 'AG',
 'AGCO',
 'AGI',
 'AGM',
 'AGO',
 'AGRO',
 'AGX',
 'AHT',
 'AIG',
 'AIN',
 'AIR',
 'AIT',
 'AIV',
 'AIZ',
 'AJG',
 'AJRD',
 'AKR',
 'AL',
 'ALB',
 'ALE',
 'ALG',
 'ALK',
 'ALL',
 'ALR',
 'ALV',
 'ALX',
 'AME',
 'AMG',
 'AMN',
 'AMP',
 'AMRC',
 'AMT',
 'AN',
 'ANDV',
 'ANF',
 'AON',
 'AORT',
 'AOS',
 'AP',
 'APA',
 'APD',
 'APH',
 'APO',
 'APTV',
 'AQN',
 'ARC',
 'ARCO',
 'ARE',
 'ARGO',
 'ARI',
 'ARL',
 'AROC',
 'ARR',
 'ARW',
 'ASA',
 'ASB',
 'ASG',
 'ASGN',
 'ASH',
 'ASR',
 'ASXC',
 'ATCO',
 'ATGE',
 'ATI',
 'ATO',
 'ATOR',
 'ATR',
 'AUB',
 'AUD',
 'AUY',
 'AVA',
 'AVB',
 'AVD',
 'AVNT',
 'AVY',
 'AWF',
 'AWI',
 'AWK',
 'AWR',
 'AX',
 'AXL',
 'AXP',
 'AXR',
 'AXS',
 'AYI',
 'AZO',
 'AZZ',
 'B',
 'BA',
 'BAC',
 'BAH',
 

In [51]:
from dask.dataframe import from_pandas
# ddf = from_pandas(positions, npartitions=1425)

def lazy_function(partition):
    col1,col2 = partition.columns[0],partition.columns[1]
    exit_position = macd_positions.get_exit_position(partition[col1],partition[col2])
    return exit_position

# Apply split_dataframe_by_columns to each partition
partitions = [positions.iloc[:, i:i+2] for i in range(0, positions.shape[1], 2)]
ddf_partitions = [from_pandas(partition, npartitions=1) for partition in partitions]
result = [partition.map_partitions(lazy_function).compute() for partition in ddf_partitions]
# result = new_df.compute()
print(result)


KeyboardInterrupt: 

In [50]:
import multiprocessing as mp
pool = mp.Pool()

def lazy_function(pair):
    col1,col2 = pair[0],pair[1]
    exit_position = macd_positions.get_entry_exit_position(positions[col1],positions[col2])
    return exit_position
lazy_list = []
column_pairs = [(positions.columns[i], positions.columns[i+1]) for i in range(0, len(positions.columns), 2)]
for pair in column_pairs:
    result = pool.apply_async(lazy_function, args=(pair,))
    lazy_list.append(result)
pool.close()
pool.join()

[     AAIC_Entry   AAIC_Exit
 0           nan         nan
 1           nan         nan
 2           nan         nan
 3           nan   exit_long
 4           nan   exit_long
 ...         ...         ...
 5282        nan  exit_short
 5283        nan         nan
 5284        nan  exit_short
 5285        nan  exit_short
 5286        nan   exit_long
 
 [5287 rows x 2 columns],
      AAP_Entry    AAP_Exit
 0          nan         nan
 1          nan         nan
 2          nan         nan
 3          nan  exit_short
 4          nan  exit_short
 ...        ...         ...
 5282       nan   exit_long
 5283       nan   exit_long
 5284       nan   exit_long
 5285       nan   exit_long
 5286       nan   exit_long
 
 [5287 rows x 2 columns],
      AAT_Entry    AAT_Exit
 0          nan         nan
 1          nan         nan
 2          nan         nan
 3          nan   exit_long
 4          nan  exit_short
 ...        ...         ...
 5282       nan         nan
 5283       nan         nan
 5284   

In [14]:
import concurrent.futures
def lazy_function(pair):
    col1,col2 = pair[0],pair[1]
    exit_position = macd_positions.get_entry_exit_position(positions[col1],positions[col2])
    return exit_position
column_pairs = [(positions.columns[i], positions.columns[i+1]) for i in range(0, len(positions.columns), 2)]
with concurrent.futures.ProcessPoolExecutor() as executor:
    # can replace ProcessPoolExecutor with ThreadPoolExecutor
    result = list(executor.map(lazy_function, [pair for pair in column_pairs]))

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

In [31]:
# convert to dask df
from dask.dataframe import from_pandas
ddf = from_pandas(positions, npartitions=2)
column_pairs = [(ddf.columns[i], ddf.columns[i+1]) for i in range(0, len(ddf.columns), 2)]
for pair in column_pairs:
pd.concat([positions, exit_position], axis=1)


In [30]:
macd_df = pd.DataFrame()
test_df = yf.download('A', start="2002-01-01",
                   end="2003-01-01", interval = "1d",
                   )
macd_df["rsi"] = talib.RSI(test_df['Close'], timeperiod=2)
macd_df.iloc[2:10]

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,rsi
Date,Unnamed: 1_level_1
2002-01-04,100.0
2002-01-07,93.139912
2002-01-08,93.794802
2002-01-09,37.679751
2002-01-10,27.234886
2002-01-11,10.859347
2002-01-14,5.179905
2002-01-15,9.7479


In [46]:
macd_df = pd.DataFrame()
test_df = yf.download('A', start="2002-01-01",
                   end="2003-01-01", interval = "1d",
                   )
macd = talib.MACD(test_df['Close'], fastperiod=12, slowperiod=26, signalperiod=9) 
macd_df['histogram'] = macd[2] # macd line - signal line
macd_df['macd'] = macd[0]

#entry where 1 = enter and 0 = do not enter trade
macd_df['long_signal'] = np.where((macd_df['histogram']>0) &(macd_df['histogram'].shift(1)<=0),1,0)
macd_df['short_signal'] = np.where((macd_df['histogram']<0) &(macd_df['histogram'].shift(1)>=0),1,0)
# trading positions where you enter the trade 1 day after the signal (assuming that the closing price is used to calculate the EMA)
macd_df['long_position'] = np.where(macd_df['long_signal'].shift(1)==1,1,0) 
macd_df['short_position'] = np.where(macd_df['short_signal'].shift(1)==1,1,0)
macd_df['position'] = np.where(macd_df['long_position'] == 1, 'long', np.where(macd_df['short_position'] == 1, 'short', np.nan))
macd_df


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,histogram,macd,long_signal,short_signal,long_position,short_position,position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2002-01-02,,,0,0,0,0,
2002-01-03,,,0,0,0,0,
2002-01-04,,,0,0,0,0,
2002-01-07,,,0,0,0,0,
2002-01-08,,,0,0,0,0,
...,...,...,...,...,...,...,...
2002-12-24,-0.034280,0.384308,0,0,0,0,
2002-12-26,-0.018510,0.395451,0,0,0,0,
2002-12-27,-0.015994,0.393969,0,0,0,0,
2002-12-30,-0.025989,0.377476,0,0,0,0,


In [62]:
macd_df = pd.DataFrame()
test_df = yf.download('AMZN', start="2017-01-01",
                   end="2017-04-30", interval = "1d",
                   )
macd = talib.MACD(test_df['Close'], fastperiod=12, slowperiod=26, signalperiod=9) 
macd_df['histogram'] = macd[2] # macd line - signal line
macd_df['macd'] = macd[0]

#entry where 1 = enter and 0 = do not enter trade
macd_df['long_signal'] = np.where((macd_df['histogram']>0) &(macd_df['histogram'].shift(1)<=0),1,0)
macd_df['short_signal'] = np.where((macd_df['histogram']<0) &(macd_df['histogram'].shift(1)>=0),1,0)
# trading positions where you enter the trade 1 day after the signal (assuming that the closing price is used to calculate the EMA)
macd_df['long_position'] = np.where(macd_df['long_signal'].shift(1)==1,1,0) 
macd_df['short_position'] = np.where(macd_df['short_signal'].shift(1)==1,1,0)
macd_df['position'] = np.where(macd_df['long_position'] == 1, 'long', np.where(macd_df['short_position'] == 1, 'short', np.nan))
# drop unnecessary columns to free space 
macd_df = macd_df.drop(['long_position', 'short_position','long_signal','short_signal'], axis=1)
# macd_df['position'] = np.where((macd_df['long_position']==1)|(macd_df['short_position']==1),1,0)

#exit (2-day RSI of single day is greater than 65 for long positions, and less than 35 for short positions)
macd_df['Close'] = test_df['Close']
macd_df['Date'] = macd_df.index
macd_df.reset_index(inplace=True,drop=True)
macd_df['rsi'] = talib.RSI(macd_df['Close'],timeperiod=2)
macd_df['entry_date'] = macd_df.apply(lambda _: '', axis=1)
for i in macd_df.loc[macd_df['position']==1].index:
    macd_df['entry_date'].iloc[i] = macd_df['Date'].iloc[i]
macd_df['exit_long'] = np.where(macd_df['rsi'].shift(1)>65,1,0) #signal to exit is generated when 2-period RSI above 65, but exit trade is only executed on following day.
macd_df['exit_short'] = np.where(macd_df['rsi'].shift(1)<35,1,0)
macd_df['exit_date'] = ''
# exit long positions
for i in macd_df.loc[macd_df['long_position']==1].index:
    new_macd_df = macd_df.loc[i+2:]
    if len(new_macd_df) == 0: #index out of range, so use last date as exit
        macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]
    for j in range(len(new_macd_df)):
        if new_macd_df['exit_long'].iloc[j]==1:
            macd_df['exit_date'].iloc[i] = new_macd_df['Date'].iloc[j]
            break
        else:
            macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]
# exit short positions
for i in macd_df.loc[macd_df['short_position']==1].index:
    new_macd_df = macd_df.loc[i+2:]
    if len(new_macd_df) == 0: #index out of range, so use last date as exit
        macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]
    for j in range(len(new_macd_df)):
        if new_macd_df['exit_short'].iloc[j]==1:
            macd_df['exit_date'].iloc[i] = new_macd_df['Date'].iloc[j]
            break
        else:
            macd_df['exit_date'].iloc[i] = macd_df['Date'].iloc[-1]

### Profit
macd_df['profit'] = ''
macd_df['winlose'] = ''

#long profit
for i in macd_df[macd_df['long_position']==1].index:
    start_price = float(macd_df[macd_df['Date']==macd_df['entry_date'].iloc[i]]['Close'])
    end_price = float(macd_df[macd_df['Date']==macd_df['exit_date'].iloc[i]]['Close'])    
    profit = 1+(end_price-start_price)/start_price
    if profit > 1:
        winlose = 1 
    else:
        winlose = 0
    macd_df['profit'].iloc[i] = profit
    macd_df['winlose'].iloc[i] = winlose

#short profit
for i in macd_df[macd_df['short_position']==1].index:
    start_price = float(macd_df[macd_df['Date']==macd_df['exit_date'].iloc[i]]['Close'])
    end_price = float(macd_df[macd_df['Date']==macd_df['entry_date'].iloc[i]]['Close'])  
    profit = 1+(end_price-start_price)/start_price
    if profit > 1:
        winlose = 1 
    else:
        winlose = 0
    macd_df['profit'].iloc[i] = profit
    macd_df['winlose'].iloc[i] = winlose

return macd_df[macd_df['position']==1]


[*********************100%***********************]  1 of 1 completed


KeyError: 'long_position'

In [35]:
index = macd_df[macd_df['Date'] == '2017-03-30'].index[0]
macd_df.loc[index:]

Unnamed: 0,histogram,macd,long_signal,short_signal,long_position,short_position,position,Close,Date,rsi,entry_date,exit_long,exit_short,exit_date
60,0.065857,0.329368,0,0,1,0,1,43.817001,2017-03-30,98.265638,2017-03-30 00:00:00,1,0,2017-04-03 00:00:00
61,0.126896,0.422131,0,0,0,0,0,44.327,2017-03-31,99.295241,,1,0,
62,0.171669,0.509822,0,0,0,0,0,44.5755,2017-04-03,99.553532,,1,0,
63,0.236534,0.63382,0,0,0,0,0,45.341499,2017-04-04,99.863023,,1,0,
64,0.268987,0.73352,0,0,0,0,0,45.464001,2017-04-05,99.887881,,1,0,
65,0.235892,0.759398,0,0,0,0,0,44.914001,2017-04-06,37.985993,,1,0,
66,0.187161,0.757457,0,0,0,0,0,44.743999,2017-04-07,27.464372,,0,0,
67,0.180408,0.795806,0,0,0,0,0,45.352001,2017-04-10,75.669417,,0,1,
68,0.146174,0.798116,0,0,0,0,0,45.118,2017-04-11,50.060957,,1,0,
69,0.091551,0.76638,0,0,0,0,0,44.811501,2017-04-12,26.535672,,0,0,


In [25]:
macd_df[macd_df['long_position']==1]

Unnamed: 0,histogram,macd,long_signal,short_signal,long_position,short_position,position,Close,Date,rsi,entry_date,exit_long,exit_short,exit_date
60,0.065857,0.329368,0,0,1,0,1,43.817001,2017-03-30,98.265638,2017-03-30 00:00:00,1,0,2017-04-03 00:00:00


In [26]:
macd_df.loc['2017-03-27':]

Unnamed: 0_level_0,histogram,macd,long_signal,short_signal,long_position,short_position,position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-03-27,-0.105279,0.156655,0,0,0,0,0
2017-03-28,-0.070606,0.173677,0,0,0,0,0
2017-03-29,0.011058,0.258105,1,0,0,0,0
2017-03-30,0.065857,0.329368,0,0,1,0,1
2017-03-31,0.126896,0.422131,0,0,0,0,0
2017-04-03,0.171669,0.509822,0,0,0,0,0
2017-04-04,0.236534,0.63382,0,0,0,0,0
2017-04-05,0.268987,0.73352,0,0,0,0,0
2017-04-06,0.235892,0.759398,0,0,0,0,0
2017-04-07,0.187161,0.757457,0,0,0,0,0


In [5]:
aapl = yf.Ticker("aapl")
aapl.dividends

Date
1987-05-11 00:00:00-04:00    0.000536
1987-08-10 00:00:00-04:00    0.000536
1987-11-17 00:00:00-05:00    0.000714
1988-02-12 00:00:00-05:00    0.000714
1988-05-16 00:00:00-04:00    0.000714
                               ...   
2022-02-04 00:00:00-05:00    0.220000
2022-05-06 00:00:00-04:00    0.230000
2022-08-05 00:00:00-04:00    0.230000
2022-11-04 00:00:00-04:00    0.230000
2023-02-10 00:00:00-05:00    0.230000
Name: Dividends, Length: 78, dtype: float64

In [7]:
data = pd.DataFrame()
for i in ['AMZN']:
    stock = yf.download(i, start="2017-01-01",
                   end="2017-04-30", interval = "1d",
                   )
    stock = stock.dropna().reset_index() 
    #stock = pd.MultiIndex.from_frame(stock,names=[])
stock

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-01-03,37.896000,37.938000,37.384998,37.683498,37.683498,70422000
1,2017-01-04,37.919498,37.984001,37.709999,37.859001,37.859001,50210000
2,2017-01-05,38.077499,39.119999,38.013000,39.022499,39.022499,116602000
3,2017-01-06,39.118000,39.972000,38.924000,39.799500,39.799500,119724000
4,2017-01-09,39.900002,40.088501,39.588501,39.846001,39.846001,68922000
...,...,...,...,...,...,...,...
76,2017-04-24,45.433998,45.499500,45.191002,45.370499,45.370499,62458000
77,2017-04-25,45.352001,45.473999,45.150002,45.381001,45.381001,67612000
78,2017-04-26,45.514999,45.787498,45.377998,45.464500,45.464500,52178000
79,2017-04-27,45.719501,46.092999,45.605499,45.918999,45.918999,106110000
