# Computing Optimal Labels
Backtesting a Always Buy/Sell Strategies to compute Optimal strategy for a day-ticker combination using MILP

In [1]:
# !pip install import_ipynb --quiet
# !pip install anvil-uplink --quiet
# !pip install yfinance --quiet
# !pip install pandas_ta --quiet
# !pip install ipynb --quiet
# !pip install rpyc --quiet
# !pip install pulp

In [2]:
# # pull files from Github
# !git clone https://github.com/gmshroff/algostrats.git

In [3]:
# %cd algostrats

In [1]:
# change to True if on colab
colab=False

In [2]:
import pandas as pd
import numpy as np
import import_ipynb
from backtest import Backtest
from feeds import BackFeed,DataFeed
from validation import Validate
import pickle
from itertools import product
from tqdm.notebook import tqdm

importing Jupyter notebook from backtest.ipynb
importing Jupyter notebook from feeds.ipynb
importing Jupyter notebook from synfeed.ipynb
importing Jupyter notebook from india_calendar.ipynb
importing Jupyter notebook from featfuncs.ipynb
importing Jupyter notebook from feed_env.ipynb
importing Jupyter notebook from validation.ipynb


In [3]:
from rulestrats import RuleStrat,do_nothing,always_buy,always_sell

importing Jupyter notebook from rulestrats.ipynb


In [4]:
from pulp_jobs import Jobs
from featfuncs import feat_aug,add_addl_features_feed,add_ta_features_feed,add_sym_feature_feed
from featfuncs import add_global_indices_feed

importing Jupyter notebook from pulp_jobs.ipynb


In [5]:
from dataclasses import dataclass

@dataclass
class OptimalData:
    optD: dict
    optR: dict
    optDT: dict
    optRT: dict
    feed: pd.DataFrame

In [6]:
import os
with open(os.path.join('additional_utils', 'optimal_alldata.pkl'), 'rb') as f:
    optimal_all_data = pickle.load(f)

In [10]:
loadfeed_path = '../algodata/realdata/datafeed_alldata_True_False.pkl'
with open(loadfeed_path,'rb') as f: 
    feed_original=pickle.load(f)

In [14]:
feed_original = feed_original.data
for ticker in feed_original:
    feed_original[ticker]['row_num'] = feed_original[ticker]['row_num'].apply(lambda x: x if x<75 else x-75)

In [19]:
optDT = optimal_all_data.optDT
data_store = {}

for offset in optDT:
    for ticker in optDT[offset]:
        for time in optDT[offset][ticker]:
            for acts in optDT[offset][ticker][time]:
                rn, a = acts[0], acts[1]
                serialized = f'{ticker}_{time}_{rn}'
                if serialized in data_store:
                    prev_action = data_store[serialized]
                    if prev_action != a:
                        print(f'Serialized: {serialized}, prev:{prev_action}, curr:{a}')
                    # else:
                    #     print('Found again!')
                else:
                    data_store[serialized] = a      

In [22]:
from copy import deepcopy

feed_original_1 = deepcopy(feed_original)
feed_original_all = deepcopy(feed_original)

In [23]:
optD = optimal_all_data.optD

In [24]:
for ticker in feed_original_1:
    feed_original_1[ticker]['Action'] = -5
    actions = []

    for t in optD[ticker]:
        for acts in optD[ticker][t]:
            rn, a = acts[0], acts[1]
            actions.append(a)
            # if a == -1: a = 2
            feed_original_1[ticker].loc[(feed_original_1[ticker]['Date']==t) & (feed_original_1[ticker]['row_num']==rn), 'Action'] = a

In [26]:
feed_original_1 = pd.concat([feed_original_1[t] for t in feed_original_1], axis=0)
feed_original_1['Action'].value_counts()

-5    93172
-1     1060
 1      857
Name: Action, dtype: int64

In [28]:
for ticker in feed_original_all:
    feed_original_all[ticker]['Action'] = -5
    actions = []

    for offset in optDT:
        for t in optDT[offset][ticker]:
            for acts in optDT[offset][ticker][t]:
                rn, a = acts[0], acts[1]
                actions.append(a)
                # if a == -1: a = 2
                feed_original_all[ticker].loc[(feed_original_all[ticker]['Date']==t) & (feed_original_all[ticker]['row_num']==rn), 'Action'] = a

In [29]:
feed_original_all = pd.concat([feed_original_all[t] for t in feed_original_all], axis=0)
feed_original_all['Action'].value_counts()

-5    91672
-1     1882
 1     1535
Name: Action, dtype: int64

In [36]:
@dataclass
class OptimalData:
    optD: dict
    optR: dict
    optDT: dict
    optRT: dict
    feed_original: pd.DataFrame
    feed_all: pd.DataFrame

In [37]:
optimal_all_data_new = OptimalData(optD=optD, optR=optimal_all_data.optR, optDT=optDT,
                                   optRT=optimal_all_data.optRT, feed_original=feed_original_1, feed_all=feed_original_all)

In [26]:
import os
with open(os.path.join('additional_utils', 'optimal_alldata_new.pkl'), 'wb') as f:
    pickle.dump(optimal_all_data_new, f)

## Trading Plots of Optimal Strategy

In [30]:
import plotly.graph_objects as go

In [31]:
import plotly.express as px

In [32]:
def annotate_action(rew,act,df):
    if rew[1]>=0:color='Green'
    else: color='Red'
    if act[0]==1:text='Buy'
    elif act[0]==-1:text='Sell'
    ann=dict(font=dict(color=color,size=15),x=df.index[rew[0]],y=df.iloc[rew[0]]['Close'],
             showarrow=True,text=text)
    return ann

In [33]:
def annotate_exit(rew,act,anns,df):
    if rew[1]>=0:color='Green'
    else: color='Red'
    X=[a['x'] for a in anns if a is not None]
    if df.index[rew[2]] in X: 
        idx=X.index(df.index[rew[2]])
        anns[idx]['text']='Ex&'+anns[idx]['text']
    else:
        anns+=[dict(font=dict(color=color,size=15),x=df.index[rew[2]],y=df.iloc[rew[2]]['Close'],
                    showarrow=True,text='Exit')]

In [34]:
def plot_ticker_date(optD,ticker,date):
    global fig
    df=feed.ndata[ticker][date]
    df=df.loc[df['Date']==date]
    fig = go.Figure(data=
        [go.Candlestick(x = df.index,
                        open  = df["Open"],
                        high  = df["High"],
                        low   = df["Low"],
                        close = df["Close"])]
    )
    # reward=np.round(bt.results[ticker][date]["tot"],2)
    reward=(lambda x: sum(x) if len(x)>0 else 0)([o[5] for o in optD[ticker][date]])
    fig.update_layout(
        title=f'{ticker} on {date} return {reward}',
        yaxis_title="Price"
    )
    anns=[]
    for s,a,_,_,e,r in optD[ticker][date]:
        anns+=[annotate_action((s,r,e),(a,s),df)]
    for s,a,_,_,e,r in optD[ticker][date]:
        anns+=[annotate_exit((s,r,e),(a,s),anns,df)]
    
    # for r,a in zip(bt.results[ticker][date]['rew'],bt.results[ticker][date]['acts']):
    #     anns+=[annotate_action(r,a,df)]
    # for r,a in zip(bt.results[ticker][date]['rew'],bt.results[ticker][date]['acts']):
    #     anns+=[annotate_exit(r,a,anns,df)]
    for a in anns: 
        if a is not None: fig.add_annotation(a)
    fig.show()
    return fig

In [35]:
def combine_plotly_figs_to_html(plotly_figs, html_fname, include_plotlyjs='cdn', 
                                separator=None, auto_open=False):
    with open(html_fname, 'w') as f:
        f.write(plotly_figs[0].to_html(include_plotlyjs=include_plotlyjs))
        for fig in plotly_figs[1:]:
            if separator:
                f.write(separator)
            f.write(fig.to_html(full_html=False, include_plotlyjs=False))

    if auto_open:
        import pathlib, webbrowser
        uri = pathlib.Path(html_fname).absolute().as_uri()
        webbrowser.open(uri)

In [None]:
figs=[]
for t in optD:
    for d in optD[t]:
        figs+=[plot_ticker_date(optD,t,d)]

In [34]:
# # upload kaggle.json
# from google.colab import files
# uploaded=files.upload()

In [None]:
# !mkdir /root/.kaggle
# !mv ./kaggle.json /root/.kaggle/.
# !chmod 600 /root/.kaggle/kaggle.json

In [None]:
# %mkdir data
# %cd data
# !kaggle datasets download -d gmshroff/marketdatafivemin
# !unzip marketdatafivemin.zip
# %cd ..

In [None]:
# DATAFILE='augdata_16-Dec-2022_5m.csv'
# if not colab: DATAPATH='~/DataLocal/algo_fin_new/five_min_data/'
# else: DATAPATH='./data/'
# df=pd.read_csv(DATAPATH+DATAFILE)

In [None]:
# dataFeed=DataFeed(tickers=list(df.ticker.unique()[0:50]),dfgiven=True,df=df)

In [None]:
# bt=Backtest(dataFeed,tickers=dataFeed.tickers,target=.05,stop=.01,txcost=0.001,
#             loc_exit=True,scan=True,topk=3,deploy=True)

In [None]:
# bt.run_all(tickers=dataFeed.tickers,model=momStrat)

In [None]:
# bt.results

In [None]:
# bt.returns

Experiments