In [1]:
from stable_baselines3 import PPO,A2C,DQN
from stable_baselines3.common.vec_env import StackedObservations
from stable_baselines3.common.monitor import Monitor as Mon

import warnings
warnings.simplefilter("ignore")

import import_ipynb
import pandas as pd
import numpy as np
import torch
from datetime import datetime
from datetime import datetime as dt
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler
import pickle
from india_calendar import IBDay
from threading import Thread
import threading
from IPython import display
from backtest import Backtest
from rlagents import RLStratAgentDyn, COLS, RLStratAgentDynFeatures
import time,getopt,sys,os

from feeds import BackFeed,DataFeed
from featfuncs import feat_aug,add_addl_features_feed,add_ta_features_feed,add_sym_feature_feed
from featfuncs import add_global_indices_feed

from feed_env import Episode
import aspectlib
import yaml
import pickle
import plotly.express as px
import plotly
import plotly.graph_objects as go

import plotly.express as px

importing Jupyter notebook from india_calendar.ipynb
importing Jupyter notebook from backtest.ipynb
importing Jupyter notebook from feeds.ipynb
importing Jupyter notebook from synfeed.ipynb
importing Jupyter notebook from featfuncs.ipynb
importing Jupyter notebook from feed_env.ipynb
importing Jupyter notebook from rlagents.ipynb
importing Jupyter notebook from aiagentbase.ipynb


In [2]:
import pandas as pd


def check_numeric(df, col):
    return df[col].dtype in ['float64', 'int64']

def difference_cols(df, a, b):
    df[f'{a}-{b}'] = df[a] - df[b]
    return df, f'{a}-{b}'

def get_ma_base_string(s):
    idx = s.find('_ma_')
    if idx == -1:
        return None
    return s[:idx]

def moving_avg(df, col, window_size=3, center=False):
    col_name = f'{col}_ma_{window_size}'
    df[col_name] = df[col].rolling(window_size, min_periods=1, center=center).mean()
    return df, col_name

def slope(df, col, window):
    col_name = f'{col}_slope_{window}'
    df[col_name] = df[col].diff(periods=window).fillna(df[col])/window
    return df, col_name

def max_change_helper(seq):
    ans = []
    tracker = {i:0 for i in range(seq[-1]+1)}
    for i in seq:
        tracker[i] += 1
        ans.append(tracker[i])
    return ans

def max_change(df, col):
    inc_tracker = df[col].diff().lt(0).cumsum().values
    dec_tracker = df[col].diff().gt(0).cumsum().values
    
    inc_values = max_change_helper(inc_tracker)
    dec_values = max_change_helper(dec_tracker)
    
    combined = [inc_values[i]-1 if inc_values[i] >= dec_values[i] \
                else -dec_values[i]+1 for i in range(len(inc_values))]
    
    col_name = f'{col}_changelen'
    df[col_name] = combined
    return df, col_name

def discretize(df, col, thresholds):
    stats = df[col].describe()
    low_thresh, high_thresh = stats['25%'], stats['75%']
    thresholds[col] = (low_thresh, high_thresh)
    df[f'{col}_val'] = df[col].apply(lambda x: 0 if x<=low_thresh else 2 if x>=high_thresh else 1)
    df[f'{col}_polarity'] = df[col].apply(lambda x: 1 if x>0 else -1)
    # df[f'{col}_discrete'] = df[f'{col}_val'] + df[f'{col}_polarity']
    return df, [f'{col}_val', f'{col}_polarity'] #, f'{col}_discrete'

def add_features(feed, columns_to_use=None, do_discretization=True):
    thresholds = {}
    if columns_to_use is None:
        columns_to_use = ['Open', 'High', 'Low', 'Close', 'Volume', 'row_num', 'Open_n', 
                    'High_n', 'Low_n', 'Close_n', 'Volume_n', 'SMA_10',
        'SMA_20', 'VOL_SMA_20', 'RSI_14', 'BBL_5_2.0', 'BBM_5_2.0', 'BBU_5_2.0',
        'BBB_5_2.0', 'BBP_5_2.0', 'MACD_12_26_9', 'MACDh_12_26_9',
        'MACDs_12_26_9', 'VWAP_D', 'MOM_30', 'CMO_14']
        
    subtract_col_names = [('High', 'Low'), ('Open', 'Close'), ('SMA_20', 'SMA_10'), ('Open_n', 'Close_n'), ('High_n', 'Low_n'), ('Open', 'High')]
    subtract_cols = []

    for cols in subtract_col_names:
        if cols[0] not in columns_to_use or cols[1] not in columns_to_use:
            continue
        feed, added_col = difference_cols(feed, cols[0], cols[1])
        subtract_cols.append(added_col)
        
    window_sizes = [1,5,10,20,50]
    pre_avg_cols = columns_to_use + subtract_cols
    avg_cols = []

    for window in window_sizes:
        for col in pre_avg_cols:
            feed, added_col = moving_avg(feed, col, window_size=window)
            avg_cols.append(added_col)
                
    pre_slope_cols = columns_to_use + subtract_cols + avg_cols
    window_sizes = [1,3,5,10,15]
    slope_cols = []

    for window in window_sizes:
        for col in pre_slope_cols:
            feed, added_col = slope(feed, col, window=window)
            slope_cols.append(added_col)
            
    intra_ma_diff_cols = []

    for i in range(len(avg_cols)-1):
        for j in range(i+1, len(avg_cols)):
            colA, colB = avg_cols[i], avg_cols[j]
            baseA, baseB = get_ma_base_string(colA), get_ma_base_string(colB)
            if baseA != baseB: continue
            
            feed, added_col = difference_cols(feed, colA, colB)
            intra_ma_diff_cols.append(added_col)
            
    pre_change_cols = columns_to_use + subtract_cols + avg_cols + slope_cols + intra_ma_diff_cols
    change_cols = []

    for col in pre_change_cols:
        feed, added_col = max_change(feed, col)
        change_cols.append(added_col)
        
    pre_discrete_cols = pre_change_cols + change_cols
    discrete_cols = []

    if do_discretization:
        for col in pre_discrete_cols:
            feed, added_cols = discretize(feed, col, thresholds)
            for added_col in added_cols: 
                discrete_cols.append(added_col)
    else:
        pass
        # Use the stored thresholds
        
    return feed, pre_discrete_cols, discrete_cols, thresholds

In [3]:
def stringify(x):
    return pd.to_datetime(x['Datetime']).strftime('%d-%b-%Y')

In [4]:
complete_thresh = {}

In [5]:
with open('additional_utils/cols.pkl', 'rb') as f:
        d = pickle.load(f)
imp_cols = d['imp_cols']
cols_to_use = d['cols_to_use']
prediscrete_imp_cols = d['prediscrete_imp_cols']

In [7]:
imp_cols_n = list(set(['Open_n-Close_n_ma_1-Open_n-Close_n_ma_10_val',
 'Close_n_slope_1_polarity',
 'Volume_n_ma_10_val',
 'Open_n-Close_n_slope_1_val',
 'MACDs_12_26_9_ma_20_changelen_val',
 'Open_n-Close_n_ma_1-Open_n-Close_n_ma_50_val',
 'BBP_5_2.0_slope_1_changelen_val',
 'BBM_5_2.0_ma_1-BBM_5_2.0_ma_20_changelen_val',
 'Open_n-High_n_ma_5-Open_n-High_n_ma_20_changelen_val',
 'BBL_5_2.0_ma_5_slope_1_changelen_polarity',
 'High_n-Low_n_ma_50_changelen_val',
 'High_n-Low_n_ma_20_slope_15_changelen_val',
 'BBL_5_2.0_ma_10_slope_15_val',
 'Open_n-Close_n_ma_10_val',
 'Volume_n_ma_5_slope_5_val',
 'Close_n_slope_1_polarity',
 'Low_n_ma_5_slope_3_polarity',
 'Open_n_ma_10_slope_5_changelen_polarity',
 'RSI_14_ma_1_val',
 'Open_n-Close_n_polarity',
 'Open_n-High_n_ma_1-Open_n-High_n_ma_50_val',
 'Open_n-Close_n_ma_1-Open_n-Close_n_ma_20_val',
 'Close_n_ma_1-Close_n_ma_50_changelen_polarity',
 'High_n_ma_1-High_n_ma_20_changelen_polarity',
 'Open_n-High_n_ma_1-Open_n-High_n_ma_50_polarity']))

In [None]:
import pickle
d['imp_cols_n'] = imp_cols_n

with open('additional_utils/cols.pkl', 'wb') as f:
        pickle.dump(d, f)

In [8]:
DATAFILE='../algodata/realdata/alldata.csv'
print(f'Reading datafile: {DATAFILE}')
df=pd.read_csv(DATAFILE)

if 'Date' not in df.columns: 
    print('Adding Date')
    df['Date']=df.apply(stringify,axis=1)

print('Creating feed')
data=pd.read_csv('./capvolfiltered.csv')
tickers=[t for t in list(df['ticker'].unique()) if t in list(data['ticker'].values)]
print(tickers)
feed=DataFeed(tickers=tickers,dfgiven=True,df=df)

print('Processing feed')
add_addl_features_feed(feed,tickers=feed.tickers)
add_sym_feature_feed(feed,tickers=feed.tickers)


print('Adding features to feed!')
for ticker in feed.data:
    print(f'[INFO] ON ticker={ticker}')
    df = feed.data[ticker]
    df, pre_discrete_cols, discrete_cols, thresholds = add_features(df, columns_to_use=cols_to_use)
    complete_thresh[ticker] = thresholds

Reading datafile: ../algodata/realdata/alldata.csv
['NELCAST.NS', 'AARTIDRUGS.NS', 'CYIENT.NS', 'HINDZINC.NS', 'TRENT.NS', 'TATAELXSI.NS', 'MINDTREE.NS', 'TI.NS', 'OBEROIRLTY.NS', 'SOMANYCERA.NS']
Creating feed
['NTPC.NS', 'DCM.NS', 'ACC.NS', 'TECHM.NS', 'MRF.NS', 'SUNPHARMA.NS', 'BPCL.NS', 'RELIANCE.NS', 'MUTHOOTFIN.NS', 'HINDUNILVR.NS', 'HDFC.NS', 'POWERGRID.NS', 'WIPRO.NS', 'PFC.NS', 'PETRONET.NS', 'DRREDDY.NS', 'ABBOTINDIA.NS', 'TCS.NS', 'BOSCHLTD.NS', 'KOTAKBANK.NS', 'SBIN.NS', 'MARUTI.NS']
Processing feed
Adding features to feed!
[INFO] ON ticker=NTPC.NS
[INFO] ON ticker=DCM.NS
[INFO] ON ticker=ACC.NS
[INFO] ON ticker=TECHM.NS
[INFO] ON ticker=MRF.NS
[INFO] ON ticker=SUNPHARMA.NS
[INFO] ON ticker=BPCL.NS
[INFO] ON ticker=RELIANCE.NS
[INFO] ON ticker=MUTHOOTFIN.NS
[INFO] ON ticker=HINDUNILVR.NS
[INFO] ON ticker=HDFC.NS
[INFO] ON ticker=POWERGRID.NS
[INFO] ON ticker=WIPRO.NS
[INFO] ON ticker=PFC.NS
[INFO] ON ticker=PETRONET.NS
[INFO] ON ticker=DRREDDY.NS
[INFO] ON ticker=ABBOTINDIA

In [10]:
final_thresh = {}
columns = complete_thresh['AARTIDRUGS.NS'].keys()

for col in columns:
    ls, hs = [], []
    for ticker in complete_thresh:
        l, h = complete_thresh[ticker][col]
        ls.append(l); hs.append(h)
    final_thresh[col] = (np.mean(ls), np.mean(hs))

In [18]:
# complete_thresh['WIPRO.NS']

In [25]:
import json
extra_tickers = [t for t in list(data['ticker'].values) if t not in tickers]
# extra_tickers = list(np.random.choice(extra_tickers, size=20, replace=False))
etra_tickers = {'tickers': extra_tickers}
with open('extra_ticker.json', 'w') as f:
    json.dump(extra_tickers, f)

In [28]:
print(tickers)

['NTPC.NS', 'DCM.NS', 'ACC.NS', 'TECHM.NS', 'MRF.NS', 'SUNPHARMA.NS', 'BPCL.NS', 'RELIANCE.NS', 'MUTHOOTFIN.NS', 'HINDUNILVR.NS', 'HDFC.NS', 'POWERGRID.NS', 'WIPRO.NS', 'PFC.NS', 'PETRONET.NS', 'DRREDDY.NS', 'ABBOTINDIA.NS', 'TCS.NS', 'BOSCHLTD.NS', 'KOTAKBANK.NS', 'SBIN.NS', 'MARUTI.NS']


In [27]:
print(extra_tickers)

['DMART.NS', 'NESTLEIND.NS', 'ULTRACEMCO.NS', 'SBILIFE.NS', 'ICICIBANK.NS', 'BAJAJ-AUTO.NS', 'HDFCLIFE.NS', 'COALINDIA.NS', 'BAJAJFINSV.NS', 'HDFCBANK.NS', 'BAJFINANCE.NS', 'ICICIPRULI.NS', 'IOC.NS', 'SBICARD.NS', 'SHREECEM.NS', 'LT.NS', 'ICICIGI.NS', 'ONGC.NS', 'PGHH.NS', 'HAL.NS', 'BAJAJHLDNG.NS', 'PAGEIND.NS', 'GILLETTE.NS', 'ITC.NS', 'INDIGO.NS', 'AXISBANK.NS', 'INDUSINDBK.NS', 'ASIANPAINT.NS', 'OBCL.NS', 'HONAUT.NS', 'SANOFI.NS', 'NDTV.NS', 'COLPAL.NS']


In [29]:
[t for t in list(df['ticker'].values) if t not in data['ticker'].values]

[]

In [31]:
DATAFILE='../algodata/realdata/alldata.csv'
print(f'Reading datafile: {DATAFILE}')
df=pd.read_csv(DATAFILE)

Reading datafile: ../algodata/realdata/alldata.csv


In [33]:
df['ticker'].nunique()

316

In [37]:
print([t for t in list(df['ticker'].unique()) if t not in data['ticker'].values])

['NELCAST.NS', 'AARTIDRUGS.NS', 'CYIENT.NS', 'HINDZINC.NS', 'TRENT.NS', 'TATAELXSI.NS', 'MINDTREE.NS', 'TI.NS', 'OBEROIRLTY.NS', 'SOMANYCERA.NS', 'CINEVISTA.NS', 'BAYERCROP.NS', 'VLSFINANCE.NS', 'GAIL.NS', 'WOCKPHARMA.NS', 'LOKESHMACH.NS', 'DBREALTY.NS', 'JKIL.NS', 'UTTAMSTL.NS', 'QUICKHEAL.NS', 'KOTHARIPRO.NS', 'RAMANEWS.NS', 'JBFIND.NS', 'SANWARIA.NS', 'SHYAMCENT.NS', 'WENDT.NS', 'DAAWAT.NS', 'PRAJIND.NS', 'JAGRAN.NS', 'UMESLTD.NS', 'TREEHOUSE.NS', 'ORIENTCEM.NS', 'OMAXE.NS', 'EKC.NS', 'INDOCO.NS', 'MTNL.NS', 'TRIGYN.NS', 'INDRAMEDCO.NS', 'VIPIND.NS', 'KKCL.NS', 'CYBERMEDIA.NS', 'CENTUM.NS', 'TATAMTRDVR.NS', 'NECCLTD.NS', 'CUBEXTUB.NS', 'KPRMILL.NS', 'SCHNEIDER.NS', 'SATIN.NS', 'ACCELYA.NS', 'ZUARIGLOB.NS', 'VHL.NS', 'ADSL.NS', 'NMDC.NS', 'NIITLTD.NS', 'MAGNUM.NS', 'OIL.NS', 'KSCL.NS', 'IRB.NS', 'DLF.NS', 'SUDARSCHEM.NS', 'PFS.NS', 'CTE.NS', 'GANESHHOUC.NS', 'MBECL.NS', 'MANAKSTEEL.NS', 'LICHSGFIN.NS', 'WELENT.NS', 'CENTENKA.NS', 'PRECOT.NS', 'HMVL.NS', 'MARICO.NS', 'PUNJLLOYD.NS', '

In [42]:
df['ticker'].value_counts()[:20]

TV18BRDCST.NS    12078
TRENT.NS         12064
WOCKPHARMA.NS    12024
SIEMENS.NS       12015
BHARTIARTL.NS    12005
AARTIIND.NS      12005
TATACHEM.NS      12004
WIPRO.NS         12004
VOLTAS.NS        12002
MUTHOOTFIN.NS    11934
PFC.NS           11933
PEL.NS           11933
JUNIORBEES.NS    11931
SBIN.NS          11930
TATAMTRDVR.NS    11929
DABUR.NS         11929
BANKINDIA.NS     11904
LICHSGFIN.NS     11863
TATAMOTORS.NS    11856
ZEEL.NS          11856
Name: ticker, dtype: int64

In [39]:
len([t for t in list(data['ticker'].values) if t not in df['ticker'].unique()])

33