# Computing Optimal Labels
Backtesting a Always Buy/Sell Strategies to compute Optimal strategy for a day-ticker combination using MILP

In [1]:
# !pip install import_ipynb --quiet
# !pip install anvil-uplink --quiet
# !pip install yfinance --quiet
# !pip install pandas_ta --quiet
# !pip install ipynb --quiet
# !pip install rpyc --quiet
# !pip install pulp

In [2]:
# # pull files from Github
# !git clone https://github.com/gmshroff/algostrats.git

In [3]:
# %cd algostrats

In [1]:
# change to True if on colab
colab=False

In [2]:
import pandas as pd
import numpy as np
import import_ipynb
from backtest import Backtest
from feeds import BackFeed,DataFeed
from validation import Validate
import pickle
from itertools import product
from tqdm.notebook import tqdm

importing Jupyter notebook from backtest.ipynb
importing Jupyter notebook from feeds.ipynb
importing Jupyter notebook from synfeed.ipynb
importing Jupyter notebook from india_calendar.ipynb
importing Jupyter notebook from featfuncs.ipynb
importing Jupyter notebook from feed_env.ipynb
importing Jupyter notebook from validation.ipynb


In [3]:
from rulestrats import RuleStrat,do_nothing,always_buy,always_sell

importing Jupyter notebook from rulestrats.ipynb


In [4]:
from pulp_jobs import Jobs
from featfuncs import feat_aug,add_addl_features_feed,add_ta_features_feed,add_sym_feature_feed
from featfuncs import add_global_indices_feed

importing Jupyter notebook from pulp_jobs.ipynb


In [5]:
import warnings
warnings.simplefilter("ignore")

In [6]:
loadfeed=True
savefeed=False
synthetic=False

Deterministic Strategy - 1 or -1

In [7]:
class DetStrat(RuleStrat):
    def __init__(self,action=0):
        self.action=action
        self.data_cols=['row_num','Close']
        self.model_type='rule-based'
        super().__init__()
    def check_entry_batch(self,dfD):
        if self.action==-1: return always_sell(dfD)
        elif self.action==1: return always_buy(dfD)
        else: return do_nothing(dfD)

In [8]:
data=pd.read_csv('./capvol100.csv')

In [9]:
tickers=list(data.iloc[0:50]['ticker'].values)

In [10]:
def stringify(x):
    return pd.to_datetime(x['Datetime']).strftime('%d-%b-%Y')

In [11]:
loadfeed_path = '../algodata/btfeed_hp_11_test.pkl'

In [12]:
if not loadfeed:
    feed=BackFeed(tickers=tickers,nd=3,nw=1,synthetic=synthetic)
    if savefeed: 
        with open('../../temp_data/feed.pickle','wb') as f: pickle.dump(feed,f)
elif loadfeed:
    with open(loadfeed_path,'rb') as f: feed=pickle.load(f)

Gather Data for Computing Optimal Strategies for a given Feed

In [13]:
def assemble_backtests(feed,topk=3,actions=[-1,1],
                       stops=[.01,.005,.02],
                       targets=[.01,.005,.02]):
    posD={}
    print('Assembling backtests')
    for idx, (action,stop,target) in tqdm(enumerate([c for c in product(actions,stops,targets)])):
        detStrat=DetStrat(action)
        try:
            with open(loadfeed_path,'rb') as f: feed=pickle.load(f)
            bt=Backtest(feed,tickers=feed.tickers,add_features=True,target=target,stop=stop,txcost=0.001,
                        loc_exit=True,scan=True,topk=3,deploy=False,save_dfs=False)
        except:
            print(f'Error on idx: {idx, action, stop, target}, datesQ: {feed.datesQ}')
            continue
        bt.run_all(tickers=feed.tickers,model=detStrat,verbose=False)
        for t in bt.results:
            if t not in posD: posD[t]={}
            for d in bt.results[t]:
                if d not in posD[t]: posD[t][d]=[]
                max_r=0
                max_e=0
                for p in bt.results[t][d]['rew']:
                    if p[1]>0 and (p[1]>max_r or p[0]>=max_e): 
                        posD[t][d]+=[(p[0],p[2],p[1],(p[0],action,stop,target,p[2],p[1]))]
                        max_r,max_e=max(max_r,p[1]),max(max_e,p[2])
    return posD

In [14]:
def compute_optimal(posD,min_row=0):
    optD={}
    for t in posD:
        if t not in optD: optD[t]={}
        for d in posD[t]:
            # print(f'Computing optimal strategy for {t} on {d} starting from {min_row}')
            jobs=Jobs()
            for p in posD[t][d]:
                if p[0]>=min_row: jobs.add_job(p[0],p[1],p[2],p[3])
            if jobs.k>0:
                jobs.setup()
                jobs.solve()
                x=jobs.get_soln()
                optD[t][d]=[j for i,j in enumerate(jobs.dL) if x[i]>0]
            else: optD[t][d]=[]
    [optD[t][d].sort(key=lambda x: x[0]) for t in optD for d in optD[t]]
    optR={t+' '+d:(lambda x: sum(x) if len(x)>0 else 0)([o[5] for o in optD[t][d]]) for t in optD for d in optD[t]}
    return optD,optR

In [15]:
loadposD=False

In [16]:
if loadfeed:
    posD_path = loadfeed_path.rstrip('.pkl') + "_optimal_posD.pkl"
else:
    posD_path = 'posD.pkl'

In [17]:
if loadposD: 
    with open(posD_path,'rb') as f: posD=pickle.load(f)
else: 
    posD=assemble_backtests(feed)
    with open(posD_path,'wb') as f: pickle.dump(posD,f)

Assembling backtests


0it [00:00, ?it/s]

In [18]:
optD,optR=compute_optimal(posD,min_row=5)

In [19]:
optD,optR

({'SYN': {'28-Oct-2022': [(6, -1, 0.01, 0.02, 9, 2.1315897935066546),
    (9, -1, 0.02, 0.01, 12, 1.6945039221041598),
    (12, -1, 0.01, 0.02, 18, 2.010422661392934),
    (18, 1, 0.005, 0.02, 25, 2.1834505795318018),
    (25, 1, 0.005, 0.02, 29, 2.8002418672683467),
    (29, 1, 0.005, 0.02, 32, 2.299520904254923),
    (32, 1, 0.02, 0.02, 35, 2.5032985824716545),
    (35, 1, 0.02, 0.02, 38, 2.5251277661610563),
    (38, 1, 0.02, 0.02, 41, 2.383097173867968),
    (41, 1, 0.005, 0.02, 44, 2.1033189706197137),
    (44, 1, 0.01, 0.02, 48, 2.520782103322164),
    (48, 1, 0.01, 0.02, 53, 1.9092749951849857),
    (53, -1, 0.005, 0.02, 64, 2.2083288135864665),
    (64, -1, 0.02, 0.005, 66, 0.6946574108080288),
    (66, -1, 0.02, 0.02, 70, 2.8775601848259504),
    (70, -1, 0.005, 0.02, 73, 2.260165438302639),
    (73, -1, 0.01, 0.01, 74, 1.1680213943303372)],
   '20-Dec-2022': [(6, 1, 0.005, 0.005, 9, 0.5434442015381525),
    (10, -1, 0.02, 0.005, 13, 0.8723386864601892),
    (13, -1, 0.02, 0.0

Compute optimal strategies starting from position t for t in [0,t_max] with steps of step

In [28]:
t_max,step=70,5

In [29]:
optDT,optRT={},{}
for t in range(0,t_max+1,step):
    optDT[t],optRT[t]=compute_optimal(posD,min_row=t)
    print(f'Done for t={t}')

Done for t=0
Done for t=5
Done for t=10
Done for t=15
Done for t=20
Done for t=25
Done for t=30
Done for t=35
Done for t=40
Done for t=45
Done for t=50
Done for t=55
Done for t=60
Done for t=65
Done for t=70


In [20]:
feed = feed.data
for ticker in feed:
    feed[ticker]['row_num'] = feed[ticker]['row_num'].apply(lambda x: x if x<75 else x-75)

In [21]:
for ticker in feed:
    feed[ticker]['Action'] = -5
    actions = []

    for t in optD[ticker]:
        for acts in optD[ticker][t]:
            rn, a = acts[0], acts[1]
            actions.append(a)
            # if a == -1: a = 2
            feed[ticker].loc[(feed[ticker]['Date']==t) & (feed[ticker]['row_num']==rn), 'Action'] = a

In [22]:
feed_old = feed.copy()
feed = pd.concat([feed[t] for t in feed], axis=0)
feed

Unnamed: 0,datetime,Date,ticker,Open,High,Low,Close,Volume,row_num,Open_n,...,BBP_5_2.0,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,VWAP_D,MOM_30,CMO_14,error,sym,Action
2022-09-29 09:15:00,2022-09-29 09:15:00,29-Sep-2022,SYN,1.000000,0.999915,1.000000,0.999915,1.0,0,1.000085,...,1.000000,1.000000,1.000000,1.000000,1.000028,1.000000,1.000000,-1,0,-5
2022-09-29 09:20:00,2022-09-29 09:20:00,29-Sep-2022,SYN,0.999894,0.999809,0.999894,0.999809,1.0,1,0.999979,...,1.000000,1.000000,1.000000,1.000000,0.999975,1.000000,1.000000,-1,0,-5
2022-09-29 09:25:00,2022-09-29 09:25:00,29-Sep-2022,SYN,0.999788,0.999704,0.999788,0.999704,1.0,2,0.999873,...,1.000000,1.000000,1.000000,1.000000,0.999922,1.000000,1.000000,-1,0,-5
2022-09-29 09:30:00,2022-09-29 09:30:00,29-Sep-2022,SYN,0.999682,0.999598,0.999682,0.999598,1.0,3,0.999767,...,1.000000,1.000000,1.000000,1.000000,0.999869,1.000000,1.000000,-1,0,-5
2022-09-29 09:35:00,2022-09-29 09:35:00,29-Sep-2022,SYN,0.999577,0.999492,0.999577,0.999492,1.0,4,0.999661,...,0.146535,1.000000,1.000000,1.000000,0.999817,1.000000,1.000000,-1,0,-5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-23 15:05:00,2023-02-23 15:05:00,23-Feb-2023,SYN,1.020304,1.017092,1.020304,1.017092,1.0,70,1.016977,...,0.144548,-0.015117,-0.003544,-0.011573,1.061744,-0.079374,-90.198684,0,0,-5
2023-02-23 15:10:00,2023-02-23 15:10:00,23-Feb-2023,SYN,1.016286,1.013051,1.016286,1.013051,1.0,71,1.012972,...,0.144875,-0.015901,-0.003462,-0.012439,1.061037,-0.082756,-91.107097,0,0,-5
2023-02-23 15:15:00,2023-02-23 15:15:00,23-Feb-2023,SYN,1.012240,1.008987,1.012240,1.008987,1.0,72,1.008939,...,0.145195,-0.016658,-0.003375,-0.013283,1.060294,-0.085993,-91.918318,0,0,-5
2023-02-23 15:20:00,2023-02-23 15:20:00,23-Feb-2023,SYN,1.008172,1.004907,1.008172,1.004907,1.0,73,1.004884,...,0.145512,-0.017385,-0.003282,-0.014103,1.059515,-0.089079,-92.643783,0,0,-5


In [23]:
feed['Action'].value_counts()

-5    5493
 1     332
-1     325
Name: Action, dtype: int64

In [31]:
for ticker in feed_old:
    feed_old[ticker]['Action'] = -5
    actions = []

    for offset in optDT:
        for t in optDT[offset][ticker]:
            for acts in optDT[offset][ticker][t]:
                rn, a = acts[0], acts[1]
                actions.append(a)
                # if a == -1: a = 2
                feed_old[ticker].loc[(feed_old[ticker]['Date']==t) & (feed_old[ticker]['row_num']==rn), 'Action'] = a

In [33]:
feed_all = pd.concat([feed_old[t] for t in feed_old], axis=0)
feed_all['Action'].value_counts()

-5    5086
-1     538
 1     526
Name: Action, dtype: int64

In [34]:
from dataclasses import dataclass

@dataclass
class OptimalData:
    optD: dict
    optR: dict
    optDT: dict
    optRT: dict
    feed_original: pd.DataFrame
    feed_all: pd.DataFrame

In [26]:
optimal_synthetic_hp_11 = OptimalData(optD=optD, optR=optR, feed_original=feed, optDT=optDT, optRT=optRT, feed_all=feed_all)

In [27]:
import os
with open(os.path.join('additional_utils', 'optimal_synthetic_hp11.pkl'), 'wb') as f:
    pickle.dump(optimal_synthetic_hp_11, f)

In [14]:
# from dataclasses import dataclass

# @dataclass
# class OptimalData:
#     optD: dict
#     optR: dict
#     optDT: dict
#     optRT: dict
#     feed_optimal: pd.DataFrame
#     feed_optimal_t: pd.DataFrame

In [9]:
import os
with open(os.path.join('additional_utils', 'optimal_alldata.pkl'), 'rb') as f:
    optimal_all_data = pickle.load(f)

In [16]:
optimal_all_data_2 = OptimalData(optimal_all_data.optD, optimal_all_data.optR, optimal_all_data.optDT, optimal_all_data.optRT, optimal_all_data.feed, None)

In [None]:
curr_feed = optimal_all_data_2.feed_optimal.copy()

for time in optimal_all_data_2.optDT:
    for ticker in optimal_all_data_2.optDT[time]:
        for t in optimal_all_data_2.optDT[time][ticker]:
            for acts in optimal_all_data_2.optDT[time][ticker][t]:
                rn, a = acts[0], acts[1]
                # actions.append(a)
                # if a == -1: a = 2
                try:
                    prev_action = curr_feed.loc[(curr_feed['Date']==t) & (curr_feed['row_num']==rn) & (curr_feed['ticker']==ticker), 'Action'].values[0]
                    curr_feed.loc[(curr_feed['Date']==t) & (curr_feed['row_num']==rn), 'Action'] = a
                    if prev_action!=-5 and prev_action != a:
                        print(f'Conflict: Prev:{prev_action} Curr:{a} where Date: {t} and row: {rn}')
                except:
                    print(f'Not found! {t},{ticker},{rn}')

In [32]:
optDT = optimal_all_data.optDT

In [None]:
data_store = {}

for offset in optDT:
    for ticker in optDT[offset]:
        for time in optDT[offset][ticker]:
            for acts in optDT[offset][ticker][time]:
                a, rn = acts[0], acts[1]

In [26]:
optimal_all_data = OptimalData(optD=optD, optR=optR, optDT=optDT, optRT=optRT, feed=feed)

import os
with open(os.path.join('additional_utils', 'optimal_alldata.pkl'), 'wb') as f:
    pickle.dump(optimal_all_data, f)

## Trading Plots of Optimal Strategy

In [30]:
import plotly.graph_objects as go

In [31]:
import plotly.express as px

In [32]:
def annotate_action(rew,act,df):
    if rew[1]>=0:color='Green'
    else: color='Red'
    if act[0]==1:text='Buy'
    elif act[0]==-1:text='Sell'
    ann=dict(font=dict(color=color,size=15),x=df.index[rew[0]],y=df.iloc[rew[0]]['Close'],
             showarrow=True,text=text)
    return ann

In [33]:
def annotate_exit(rew,act,anns,df):
    if rew[1]>=0:color='Green'
    else: color='Red'
    X=[a['x'] for a in anns if a is not None]
    if df.index[rew[2]] in X: 
        idx=X.index(df.index[rew[2]])
        anns[idx]['text']='Ex&'+anns[idx]['text']
    else:
        anns+=[dict(font=dict(color=color,size=15),x=df.index[rew[2]],y=df.iloc[rew[2]]['Close'],
                    showarrow=True,text='Exit')]

In [34]:
def plot_ticker_date(optD,ticker,date):
    global fig
    df=feed.ndata[ticker][date]
    df=df.loc[df['Date']==date]
    fig = go.Figure(data=
        [go.Candlestick(x = df.index,
                        open  = df["Open"],
                        high  = df["High"],
                        low   = df["Low"],
                        close = df["Close"])]
    )
    # reward=np.round(bt.results[ticker][date]["tot"],2)
    reward=(lambda x: sum(x) if len(x)>0 else 0)([o[5] for o in optD[ticker][date]])
    fig.update_layout(
        title=f'{ticker} on {date} return {reward}',
        yaxis_title="Price"
    )
    anns=[]
    for s,a,_,_,e,r in optD[ticker][date]:
        anns+=[annotate_action((s,r,e),(a,s),df)]
    for s,a,_,_,e,r in optD[ticker][date]:
        anns+=[annotate_exit((s,r,e),(a,s),anns,df)]
    
    # for r,a in zip(bt.results[ticker][date]['rew'],bt.results[ticker][date]['acts']):
    #     anns+=[annotate_action(r,a,df)]
    # for r,a in zip(bt.results[ticker][date]['rew'],bt.results[ticker][date]['acts']):
    #     anns+=[annotate_exit(r,a,anns,df)]
    for a in anns: 
        if a is not None: fig.add_annotation(a)
    fig.show()
    return fig

In [35]:
def combine_plotly_figs_to_html(plotly_figs, html_fname, include_plotlyjs='cdn', 
                                separator=None, auto_open=False):
    with open(html_fname, 'w') as f:
        f.write(plotly_figs[0].to_html(include_plotlyjs=include_plotlyjs))
        for fig in plotly_figs[1:]:
            if separator:
                f.write(separator)
            f.write(fig.to_html(full_html=False, include_plotlyjs=False))

    if auto_open:
        import pathlib, webbrowser
        uri = pathlib.Path(html_fname).absolute().as_uri()
        webbrowser.open(uri)

In [None]:
figs=[]
for t in optD:
    for d in optD[t]:
        figs+=[plot_ticker_date(optD,t,d)]

In [34]:
# # upload kaggle.json
# from google.colab import files
# uploaded=files.upload()

In [None]:
# !mkdir /root/.kaggle
# !mv ./kaggle.json /root/.kaggle/.
# !chmod 600 /root/.kaggle/kaggle.json

In [None]:
# %mkdir data
# %cd data
# !kaggle datasets download -d gmshroff/marketdatafivemin
# !unzip marketdatafivemin.zip
# %cd ..

In [None]:
# DATAFILE='augdata_16-Dec-2022_5m.csv'
# if not colab: DATAPATH='~/DataLocal/algo_fin_new/five_min_data/'
# else: DATAPATH='./data/'
# df=pd.read_csv(DATAPATH+DATAFILE)

In [None]:
# dataFeed=DataFeed(tickers=list(df.ticker.unique()[0:50]),dfgiven=True,df=df)

In [None]:
# bt=Backtest(dataFeed,tickers=dataFeed.tickers,target=.05,stop=.01,txcost=0.001,
#             loc_exit=True,scan=True,topk=3,deploy=True)

In [None]:
# bt.run_all(tickers=dataFeed.tickers,model=momStrat)

In [None]:
# bt.results

In [None]:
# bt.returns

Experiments