In [13]:
import os
from datetime import timedelta, datetime
import talib
import pandas as pd
from sqlalchemy import create_engine
import dotenv

In [14]:
dotenv.load_dotenv()
SQLALCHEMY_DATABASE_URI = os.getenv('SQLALCHEMY_DATABASE_URI')

In [15]:
db_engine = create_engine(
    SQLALCHEMY_DATABASE_URI, 
    pool_size=10, 
    max_overflow=20,
    )

In [16]:
request_dict = {
    'symbol': 'HDFCBANK',
    'dates': {
        'start': '2018-01-01',
        'end': '2019-01-01'
    }
}

In [17]:
df = pd.read_sql('''
    SELECT *
    FROM stock_daily_data
    WHERE symbol = '{}'
    AND   TIMESTAMP BETWEEN '{}' AND '{}'
    ORDER BY TIMESTAMP
    LIMIT 1000;
    '''.format(
            request_dict['symbol'],
            datetime.strptime(request_dict['dates']['start'], '%Y-%m-%d') - timedelta(days=500),
            request_dict['dates']['end'],
        ), db_engine, index_col=['timestamp']);

In [18]:
'''
# Generate CSV
df.to_csv('CSV/{}_{}_{}.csv'.format(
    request_dict['symbol'],
    request_dict['dates']['start'],
    request_dict['dates']['end'],
))
'''
# Preview
df_extended = df.copy(deep=True)
df_extended

Unnamed: 0_level_0,Open,High,Low,Close,Volume,symbol,updated_at
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-08-19,625.0000,625.000,620.725,623.5500,2055318,HDFCBANK,2020-04-17 03:06:45
2016-08-22,622.5250,627.225,620.500,625.1500,2750646,HDFCBANK,2020-04-17 03:06:45
2016-08-23,625.5000,627.000,623.025,625.7250,2657262,HDFCBANK,2020-04-17 03:06:45
2016-08-24,626.0250,632.100,623.525,630.9000,2785468,HDFCBANK,2020-04-17 03:06:45
2016-08-25,632.0000,633.400,627.150,629.2750,4364944,HDFCBANK,2020-04-17 03:06:45
...,...,...,...,...,...,...,...
2018-12-26,1038.5000,1063.500,1034.500,1061.1801,3800958,HDFCBANK,2020-04-17 03:06:35
2018-12-27,1066.5000,1069.100,1049.220,1052.5300,5819934,HDFCBANK,2020-04-17 03:06:35
2018-12-28,1058.6801,1068.880,1057.750,1061.4500,4707806,HDFCBANK,2020-04-17 03:06:35
2018-12-31,1068.5000,1068.500,1058.350,1060.8500,2933264,HDFCBANK,2020-04-17 03:06:35


In [19]:
# calculating indicators now
df_extended['EMA50'] = talib.EMA(df_extended.Close, timeperiod=50) 
df_extended['EMA200'] = talib.EMA(df_extended.Close, timeperiod=200)
df_extended['rsa14'] = talib.RSI(df_extended.Close, timeperiod=14)
df_extended['sar'] = talib.SAR(
    df_extended.High,
    df_extended.Low,
    acceleration=0.02, 
    maximum=0.2,
    )
df_extended['macd'], df_extended['macdsignal'], df_extended['macdhist'] = talib.MACDEXT(
        df_extended.Close, 
        fastperiod=12, 
        fastmatype=0, 
        slowperiod=26, 
        slowmatype=0, 
        signalperiod=9, 
        signalmatype=0,
        )
df_extended['slowk'], df_extended['slowkd'] = talib.STOCH(
        df_extended.High, df_extended.Low, df_extended.Close,
        fastk_period=14, 
        slowk_period=3, 
        slowk_matype=0, 
        slowd_period=3, 
        slowd_matype=0,
        )
df_extended['upperband'], df_extended['middleband'], df_extended['lowerband'] = talib.BBANDS(
        df_extended.Close, 
        timeperiod=5, 
        nbdevup=2, 
        nbdevdn=2, 
        matype=0,
        )

In [20]:
# Preview
df_extended

Unnamed: 0_level_0,Open,High,Low,Close,Volume,symbol,updated_at,EMA50,EMA200,rsa14,sar,macd,macdsignal,macdhist,slowk,slowkd,upperband,middleband,lowerband
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2016-08-19,625.0000,625.000,620.725,623.5500,2055318,HDFCBANK,2020-04-17 03:06:45,,,,,,,,,,,,
2016-08-22,622.5250,627.225,620.500,625.1500,2750646,HDFCBANK,2020-04-17 03:06:45,,,,627.22500,,,,,,,,
2016-08-23,625.5000,627.000,623.025,625.7250,2657262,HDFCBANK,2020-04-17 03:06:45,,,,627.22500,,,,,,,,
2016-08-24,626.0250,632.100,623.525,630.9000,2785468,HDFCBANK,2020-04-17 03:06:45,,,,620.50000,,,,,,,,
2016-08-25,632.0000,633.400,627.150,629.2750,4364944,HDFCBANK,2020-04-17 03:06:45,,,,620.73200,,,,,,632.382911,626.92000,621.457089
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-26,1038.5000,1063.500,1034.500,1061.1801,3800958,HDFCBANK,2020-04-17 03:06:35,1027.956701,999.731937,57.965459,1032.39111,8.986534,18.938082,-9.951548,57.074778,63.437921,1076.604253,1057.50198,1038.399707
2018-12-27,1066.5000,1069.100,1049.220,1052.5300,5819934,HDFCBANK,2020-04-17 03:06:35,1028.920360,1000.257290,54.385594,1034.50000,7.912624,16.433053,-8.520429,55.484040,57.859707,1074.556491,1055.66398,1036.771469
2018-12-28,1058.6801,1068.880,1057.750,1061.4500,4707806,HDFCBANK,2020-04-17 03:06:35,1030.196032,1000.866173,57.313243,1034.50000,8.427560,14.274028,-5.846469,66.541130,59.699983,1069.665891,1054.20998,1038.754069
2018-12-31,1068.5000,1068.500,1058.350,1060.8500,2933264,HDFCBANK,2020-04-17 03:06:35,1031.398148,1001.463027,57.048009,1037.21200,7.964611,12.435602,-4.470991,66.368394,62.797855,1071.643328,1055.26600,1038.888672


In [21]:
# Generate CSV
df_extended.to_csv('CSV/{}_{}_{}_indicators.csv'.format(
    request_dict['symbol'],
    request_dict['dates']['start'],
    request_dict['dates']['end'],
))

In [25]:
n_days_lst = [5, 10, 15]
p_target = 0.10
p_stoploss = -0.10

In [23]:
def get_true_signal(change_n):
    if change_n >= p_target:
        return 'BUY'
    elif change_n <= p_stoploss:
        return 'SELL'
    else:
        return 'NEUTRAL'
    

In [26]:
df_close_plus = df.copy(deep=True).drop(columns=['updated_at'])
for n_days in n_days_lst:
    df_close_plus['Close_{}'.format(n_days)] = df_close_plus['Close'].shift(-n_days)
    df_close_plus['Change_{}'.format(n_days)] = (
        (df_close_plus['Close_{}'.format(n_days)] - df_close_plus['Close'])
    )/df_close_plus['Close']


    df_close_plus['Signal_{}'.format(n_days)] = df_close_plus['Change_{}'.format(n_days)].apply(get_true_signal)
# df_close_plus


with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(df_close_plus)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,symbol,Close_5,Change_5,Signal_5,Close_10,Change_10,Signal_10,Close_15,Change_15,Signal_15
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016-08-19,625.0,625.0,620.725,623.55,2055318,HDFCBANK,628.55,0.008019,NEUTRAL,642.55,0.030471,NEUTRAL,639.95,0.026301,NEUTRAL
2016-08-22,622.525,627.225,620.5,625.15,2750646,HDFCBANK,619.05,-0.009758,NEUTRAL,650.2,0.04007,NEUTRAL,634.725,0.015316,NEUTRAL
2016-08-23,625.5,627.0,623.025,625.725,2657262,HDFCBANK,634.325,0.013744,NEUTRAL,643.375,0.028207,NEUTRAL,639.8,0.022494,NEUTRAL
2016-08-24,626.025,632.1,623.525,630.9,2785468,HDFCBANK,645.6,0.0233,NEUTRAL,644.7,0.021874,NEUTRAL,648.925,0.02857,NEUTRAL
2016-08-25,632.0,633.4,627.15,629.275,4364944,HDFCBANK,641.85,0.019983,NEUTRAL,645.2,0.025307,NEUTRAL,645.075,0.025108,NEUTRAL
2016-08-26,630.0,630.95,626.95,628.55,1260568,HDFCBANK,642.55,0.022273,NEUTRAL,639.95,0.018137,NEUTRAL,644.225,0.024938,NEUTRAL
2016-08-29,627.75,627.75,618.1,619.05,1472092,HDFCBANK,650.2,0.050319,NEUTRAL,634.725,0.025321,NEUTRAL,646.5,0.044342,NEUTRAL
2016-08-30,622.55,635.25,621.975,634.325,4933804,HDFCBANK,643.375,0.014267,NEUTRAL,639.8,0.008631,NEUTRAL,653.45,0.03015,NEUTRAL
2016-08-31,634.5,647.5,632.75,645.6,5580682,HDFCBANK,644.7,-0.001394,NEUTRAL,648.925,0.00515,NEUTRAL,656.35,0.016651,NEUTRAL
2016-09-01,644.0,645.0,637.7,641.85,2612338,HDFCBANK,645.2,0.005219,NEUTRAL,645.075,0.005025,NEUTRAL,648.075,0.009699,NEUTRAL


In [29]:
# todo:
# target, stoploss to be provided by the little one
# for those t and s and taking a range of ns, gen True values, check how close they are to Nudge values.
# this will tell us nothing

# calc obv for nudge values
# run backtest
# deploy on prod

# IRR
# stock selection