# Computing Optimal Labels
Backtesting a Always Buy/Sell Strategies to compute Optimal strategy for a day-ticker combination using MILP

In [1]:
# !pip install import_ipynb --quiet
# !pip install anvil-uplink --quiet
# !pip install yfinance --quiet
# !pip install pandas_ta --quiet
# !pip install ipynb --quiet
# !pip install rpyc --quiet
# !pip install pulp

In [2]:
# # pull files from Github
# !git clone https://github.com/gmshroff/algostrats.git

In [3]:
# %cd algostrats

In [1]:
# change to True if on colab
colab=False

In [2]:
import pandas as pd
import numpy as np
import import_ipynb
from backtest import Backtest
from feeds import BackFeed,DataFeed
from validation import Validate
import pickle
from itertools import product
from tqdm.notebook import tqdm

importing Jupyter notebook from backtest.ipynb
importing Jupyter notebook from feeds.ipynb
importing Jupyter notebook from synfeed.ipynb
importing Jupyter notebook from india_calendar.ipynb
importing Jupyter notebook from featfuncs.ipynb
importing Jupyter notebook from feed_env.ipynb
importing Jupyter notebook from validation.ipynb


In [3]:
from rulestrats import RuleStrat,do_nothing,always_buy,always_sell

importing Jupyter notebook from rulestrats.ipynb


In [4]:
from pulp_jobs import Jobs
from featfuncs import feat_aug,add_addl_features_feed,add_ta_features_feed,add_sym_feature_feed
from featfuncs import add_global_indices_feed

importing Jupyter notebook from pulp_jobs.ipynb


In [5]:
import warnings
warnings.simplefilter("ignore")

In [6]:
loadfeed=True
savefeed=False
synthetic=False

Deterministic Strategy - 1 or -1

In [7]:
class DetStrat(RuleStrat):
    def __init__(self,action=0):
        self.action=action
        self.data_cols=['row_num','Close']
        self.model_type='rule-based'
        super().__init__()
    def check_entry_batch(self,dfD):
        if self.action==-1: return always_sell(dfD)
        elif self.action==1: return always_buy(dfD)
        else: return do_nothing(dfD)

In [8]:
data=pd.read_csv('./capvol100.csv')

In [9]:
tickers=list(data.iloc[0:50]['ticker'].values)

In [10]:
def stringify(x):
    return pd.to_datetime(x['Datetime']).strftime('%d-%b-%Y')

In [11]:
loadfeed_path = '../algodata/realdata/datafeed_augdata_03-Feb-2022_5m_True_False.pkl'

In [12]:
if not loadfeed:
    feed=BackFeed(tickers=tickers,nd=3,nw=1,synthetic=synthetic)
    if savefeed: 
        with open('../../temp_data/feed.pickle','wb') as f: pickle.dump(feed,f)
elif loadfeed:
    with open(loadfeed_path,'rb') as f: feed=pickle.load(f)

Gather Data for Computing Optimal Strategies for a given Feed

In [13]:
def assemble_backtests(feed,topk=3,actions=[-1,1],
                       stops=[.01,.005,.02],
                       targets=[.01,.005,.02]):
    posD={}
    print('Assembling backtests')
    for idx, (action,stop,target) in tqdm(enumerate([c for c in product(actions,stops,targets)])):
        detStrat=DetStrat(action)
        try:
            with open(loadfeed_path,'rb') as f: feed=pickle.load(f)
            bt=Backtest(feed,tickers=feed.tickers,add_features=True,target=target,stop=stop,txcost=0.001,
                        loc_exit=True,scan=True,topk=3,deploy=False,save_dfs=False)
        except:
            print(f'Error on idx: {idx, action, stop, target}, datesQ: {feed.datesQ}')
            continue
        bt.run_all(tickers=feed.tickers,model=detStrat,verbose=False)
        for t in bt.results:
            if t not in posD: posD[t]={}
            for d in bt.results[t]:
                if d not in posD[t]: posD[t][d]=[]
                max_r=0
                max_e=0
                for p in bt.results[t][d]['rew']:
                    if p[1]>0 and (p[1]>max_r or p[0]>=max_e): 
                        posD[t][d]+=[(p[0],p[2],p[1],(p[0],action,stop,target,p[2],p[1]))]
                        max_r,max_e=max(max_r,p[1]),max(max_e,p[2])
    return posD

In [14]:
def compute_optimal(posD,min_row=0):
    optD={}
    for t in posD:
        if t not in optD: optD[t]={}
        for d in posD[t]:
            print(f'Computing optimal strategy for {t} on {d} starting from {min_row}')
            jobs=Jobs()
            for p in posD[t][d]:
                if p[0]>=min_row: jobs.add_job(p[0],p[1],p[2],p[3])
            if jobs.k>0:
                jobs.setup()
                jobs.solve()
                x=jobs.get_soln()
                optD[t][d]=[j for i,j in enumerate(jobs.dL) if x[i]>0]
            else: optD[t][d]=[]
    [optD[t][d].sort(key=lambda x: x[0]) for t in optD for d in optD[t]]
    optR={t+' '+d:(lambda x: sum(x) if len(x)>0 else 0)([o[5] for o in optD[t][d]]) for t in optD for d in optD[t]}
    return optD,optR

In [15]:
loadposD=False

In [16]:
if loadposD: 
    with open('../../temp_data/posD.pickle','rb') as f: posD=pickle.load(f)
else: 
    posD=assemble_backtests(feed)
    if loadfeed:
        posD_path = loadfeed_path.rstrip('.pkl') + "_optimal_posD.pkl"
    else:
        posD_path = 'posD.pkl'
    with open(posD_path,'wb') as f: pickle.dump(posD,f)

Assembling backtests


0it [00:00, ?it/s]

In [19]:
optD,optR=compute_optimal(posD,min_row=5)

Computing optimal strategy for MUTHOOTFIN.NS on 17-Jan-2022 starting from 5
Computing optimal strategy for WONDERLA.NS on 01-Feb-2022 starting from 5
Computing optimal strategy for VIPIND.NS on 01-Feb-2022 starting from 5
Computing optimal strategy for VIPIND.NS on 24-Jan-2022 starting from 5
Computing optimal strategy for VIPIND.NS on 02-Feb-2022 starting from 5
Computing optimal strategy for VIPIND.NS on 14-Jan-2022 starting from 5
Computing optimal strategy for VIPIND.NS on 13-Jan-2022 starting from 5
Computing optimal strategy for VIPIND.NS on 19-Jan-2022 starting from 5
Computing optimal strategy for VIPIND.NS on 12-Jan-2022 starting from 5
Computing optimal strategy for MTNL.NS on 25-Jan-2022 starting from 5
Computing optimal strategy for MTNL.NS on 11-Jan-2022 starting from 5
Computing optimal strategy for MTNL.NS on 10-Jan-2022 starting from 5
Computing optimal strategy for MTNL.NS on 20-Jan-2022 starting from 5
Computing optimal strategy for MTNL.NS on 13-Jan-2022 starting fro

In [20]:
optD,optR

({'MUTHOOTFIN.NS': {'17-Jan-2022': [(16,
     1,
     0.005,
     0.02,
     74,
     1.0868756122982919)]},
  'WONDERLA.NS': {'01-Feb-2022': [(15, -1, 0.01, 0.01, 27, 1.078176055899261),
    (28, -1, 0.01, 0.005, 39, 0.4246781779419758),
    (47, 1, 0.005, 0.005, 57, 0.6258883376768647),
    (58, -1, 0.005, 0.005, 62, 0.8100000000000145)]},
  'VIPIND.NS': {'01-Feb-2022': [(16, -1, 0.005, 0.01, 29, 1.075095816202822),
    (31, 1, 0.02, 0.01, 41, 1.6398093213615588),
    (44, -1, 0.005, 0.005, 48, 0.9916646765370359),
    (54, 1, 0.01, 0.02, 74, 1.2555731656427775)],
   '24-Jan-2022': [(6, -1, 0.005, 0.02, 13, 3.490952762560374),
    (13, -1, 0.01, 0.005, 15, 0.41922587939444433),
    (15, 1, 0.02, 0.02, 22, 2.50847457627119),
    (28, -1, 0.01, 0.02, 66, 1.9410404181976262),
    (71, -1, 0.005, 0.02, 74, 0.06931579772124495)],
   '02-Feb-2022': [(5, -1, 0.01, 0.02, 18, 2.4586149393807095),
    (26, 1, 0.005, 0.005, 30, 0.670063868825095),
    (30, 1, 0.01, 0.005, 35, 0.4763422452486487

Compute optimal strategies starting from position t for t in [0,t_max] with steps of step

In [None]:
t_max,step=75,5

In [None]:
optDT,optRT={},{}
for t in range(0,t_max+1,step):
    optDT[t],optRT[t]=compute_optimal(posD,min_row=t)

Computing optimal strategy for ICICIPRULI.NS on 17-Mar-2023 starting from 0
Computing optimal strategy for ADANITRANS.NS on 17-Mar-2023 starting from 0
Computing optimal strategy for HBSL.NS on 20-Mar-2023 starting from 0
Computing optimal strategy for IMPEXFERRO.NS on 20-Mar-2023 starting from 0
Computing optimal strategy for IMPEXFERRO.NS on 17-Mar-2023 starting from 0
Computing optimal strategy for ZENITHSTL.NS on 20-Mar-2023 starting from 0
Computing optimal strategy for ICICIPRULI.NS on 17-Mar-2023 starting from 5
Computing optimal strategy for ADANITRANS.NS on 17-Mar-2023 starting from 5
Computing optimal strategy for HBSL.NS on 20-Mar-2023 starting from 5
Computing optimal strategy for IMPEXFERRO.NS on 20-Mar-2023 starting from 5
Computing optimal strategy for IMPEXFERRO.NS on 17-Mar-2023 starting from 5
Computing optimal strategy for ZENITHSTL.NS on 20-Mar-2023 starting from 5
Computing optimal strategy for ICICIPRULI.NS on 17-Mar-2023 starting from 10
Computing optimal strateg

## Trading Plots of Optimal Strategy

In [21]:
import plotly.graph_objects as go

In [22]:
import plotly.express as px

In [23]:
def annotate_action(rew,act,df):
    if rew[1]>=0:color='Green'
    else: color='Red'
    if act[0]==1:text='Buy'
    elif act[0]==-1:text='Sell'
    ann=dict(font=dict(color=color,size=15),x=df.index[rew[0]],y=df.iloc[rew[0]]['Close'],
             showarrow=True,text=text)
    return ann

In [24]:
def annotate_exit(rew,act,anns,df):
    if rew[1]>=0:color='Green'
    else: color='Red'
    X=[a['x'] for a in anns if a is not None]
    if df.index[rew[2]] in X: 
        idx=X.index(df.index[rew[2]])
        anns[idx]['text']='Ex&'+anns[idx]['text']
    else:
        anns+=[dict(font=dict(color=color,size=15),x=df.index[rew[2]],y=df.iloc[rew[2]]['Close'],
                    showarrow=True,text='Exit')]

In [25]:
def plot_ticker_date(optD,ticker,date):
    global fig
    df=feed.ndata[ticker][date]
    df=df.loc[df['Date']==date]
    fig = go.Figure(data=
        [go.Candlestick(x = df.index,
                        open  = df["Open"],
                        high  = df["High"],
                        low   = df["Low"],
                        close = df["Close"])]
    )
    # reward=np.round(bt.results[ticker][date]["tot"],2)
    reward=(lambda x: sum(x) if len(x)>0 else 0)([o[5] for o in optD[ticker][date]])
    fig.update_layout(
        title=f'{ticker} on {date} return {reward}',
        yaxis_title="Price"
    )
    anns=[]
    for s,a,_,_,e,r in optD[ticker][date]:
        anns+=[annotate_action((s,r,e),(a,s),df)]
    for s,a,_,_,e,r in optD[ticker][date]:
        anns+=[annotate_exit((s,r,e),(a,s),anns,df)]
    
    # for r,a in zip(bt.results[ticker][date]['rew'],bt.results[ticker][date]['acts']):
    #     anns+=[annotate_action(r,a,df)]
    # for r,a in zip(bt.results[ticker][date]['rew'],bt.results[ticker][date]['acts']):
    #     anns+=[annotate_exit(r,a,anns,df)]
    for a in anns: 
        if a is not None: fig.add_annotation(a)
    fig.show()
    return fig

In [26]:
def combine_plotly_figs_to_html(plotly_figs, html_fname, include_plotlyjs='cdn', 
                                separator=None, auto_open=False):
    with open(html_fname, 'w') as f:
        f.write(plotly_figs[0].to_html(include_plotlyjs=include_plotlyjs))
        for fig in plotly_figs[1:]:
            if separator:
                f.write(separator)
            f.write(fig.to_html(full_html=False, include_plotlyjs=False))

    if auto_open:
        import pathlib, webbrowser
        uri = pathlib.Path(html_fname).absolute().as_uri()
        webbrowser.open(uri)

In [27]:
figs=[]
for t in optD:
    for d in optD[t]:
        figs+=[plot_ticker_date(optD,t,d)]

In [34]:
# # upload kaggle.json
# from google.colab import files
# uploaded=files.upload()

In [None]:
# !mkdir /root/.kaggle
# !mv ./kaggle.json /root/.kaggle/.
# !chmod 600 /root/.kaggle/kaggle.json

In [None]:
# %mkdir data
# %cd data
# !kaggle datasets download -d gmshroff/marketdatafivemin
# !unzip marketdatafivemin.zip
# %cd ..

In [None]:
# DATAFILE='augdata_16-Dec-2022_5m.csv'
# if not colab: DATAPATH='~/DataLocal/algo_fin_new/five_min_data/'
# else: DATAPATH='./data/'
# df=pd.read_csv(DATAPATH+DATAFILE)

In [None]:
# dataFeed=DataFeed(tickers=list(df.ticker.unique()[0:50]),dfgiven=True,df=df)

In [None]:
# bt=Backtest(dataFeed,tickers=dataFeed.tickers,target=.05,stop=.01,txcost=0.001,
#             loc_exit=True,scan=True,topk=3,deploy=True)

In [None]:
# bt.run_all(tickers=dataFeed.tickers,model=momStrat)

In [None]:
# bt.results

In [None]:
# bt.returns

Experiments