# Importing Libraries

In [23]:
import yfinance as yf
import pandas_ta as ta
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Making the Dataset

In [24]:
folder_path = 'data/new_label_dataset/'

In [25]:
# Bank Nifty Stock Symbols
bank_nifty_stock_symbols = [
    "HDFCBANK.NS",
    "AXISBANK.NS",
    "ICICIBANK.NS",
    "HDFCBANK.NS",
    "KOTAKBANK.NS",
    "SBIN.NS",
    "INDUSINDBK.NS",
    "BANDHANBNK.NS",
    "FEDERALBNK.NS",
    "PNB.NS",
    "IDFCFIRSTB.NS",
    "BANKBARODA.NS",
    "AUBANK.NS"
]

In [26]:
start_date = "2017-01-01"
end_date = "2022-12-31"

In [27]:
def make_dataset(stock_symbol, start_date, end_date):

    stock_df = yf.download(stock_symbol, start=start_date, end=end_date)

    # Simple Moving Average (SMA)
    stock_df["SMA_9"] = ta.sma(stock_df["Close"], length = 9)
    stock_df["SMA_21"] = ta.sma(stock_df["Close"], length = 21)

    # Exponential Moving Average (EMA)
    stock_df["EMA_9"] = ta.ema(stock_df["Close"], length = 9)
    stock_df["EMA_21"] = ta.ema(stock_df["Close"], length = 21)

    # Double Exponential Moving Average (DEMA)
    stock_df["DEMA_9"] = ta.dema(stock_df["Close"], length = 9)
    stock_df["DEMA_21"] = ta.dema(stock_df["Close"], length = 21)

    # Moving Average Convergence Divergence (MACD)
    stock_df["MACD_Line"] = ta.macd(stock_df["Close"])["MACD_12_26_9"]
    stock_df["MACD_Signal_Line"] = ta.macd(stock_df["Close"])["MACDs_12_26_9"]

    # Relative Strength Index (RSI)
    stock_df["RSI_14"] = ta.rsi(stock_df["Close"], length = 14) 

    # Stochastic Oscillator
    stoch = ta.stoch(stock_df['High'], stock_df['Low'], stock_df['Close'], k=14, d=3)
    stock_df["Stoch_Oscillator_K"] = stoch["STOCHk_14_3_3"]
    stock_df["Stoch_Oscillator_D"] = stoch["STOCHd_14_3_3"]

    # Bollinger Bands / Standard Deviation
    bollinger_bands = ta.bbands(stock_df["Close"], length = 20, std = 2)
    stock_df['BB_middle'] = bollinger_bands['BBM_20_2.0']
    stock_df['BB_upper'] = bollinger_bands['BBU_20_2.0']
    stock_df['BB_lower'] = bollinger_bands['BBL_20_2.0']

    # Average Directional Index (ADX)
    adx = ta.adx(stock_df['High'], stock_df['Low'], stock_df['Close'], length = 14)
    stock_df['ADX'] = adx['ADX_14']
    stock_df['ADX+DI'] = adx['DMP_14'] 
    stock_df['ADX-DI'] = adx['DMN_14']

    # Chaikin Money Flow (CMF)
    stock_df['CMF'] = ta.cmf(stock_df['High'], stock_df['Low'], stock_df['Close'], stock_df['Volume'], length = 21)

    # On Balance Volume (OBV)
    stock_df['OBV'] = ta.obv(stock_df['Close'], stock_df['Volume'])

    # Commodity Channel Index (CCI)
    stock_df['CCI'] = ta.cci(stock_df['High'], stock_df['Low'], stock_df['Close'], length = 20)

    # Williams %R
    stock_df['Williams_%R'] = ta.willr(stock_df['High'], stock_df['Low'], stock_df['Close'], length = 14)

    # Average True Range (ATR)
    stock_df['ATR'] = ta.atr(stock_df['High'], stock_df['Low'], stock_df['Close'], length = 14)

    return stock_df

In [28]:
def fill_moving_averages(stock_df):
    moving_avg_columns = ['SMA_9', 'SMA_21', 'EMA_9', 'EMA_21', 'DEMA_9', 'DEMA_21']
    
    for col in moving_avg_columns:
        stock_df[col] = stock_df[col].fillna(stock_df[col].expanding().mean())
    
    return stock_df

In [29]:
for stock_symbol in bank_nifty_stock_symbols:
    stock_df = make_dataset(stock_symbol, start_date, end_date)
    print("Shape of the dataset: ", stock_df.shape)
    stock_df.to_csv(f"{folder_path}{stock_symbol}.csv")
    print(f"Data for {stock_symbol} stored in {folder_path}{stock_symbol}.csv")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of the dataset:  (1484, 28)
Data for HDFCBANK.NS stored in data/new_label_dataset/HDFCBANK.NS.csv
Shape of the dataset:  (1484, 28)
Data for AXISBANK.NS stored in data/new_label_dataset/AXISBANK.NS.csv


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of the dataset:  (1484, 28)
Data for ICICIBANK.NS stored in data/new_label_dataset/ICICIBANK.NS.csv
Shape of the dataset:  (1484, 28)
Data for HDFCBANK.NS stored in data/new_label_dataset/HDFCBANK.NS.csv


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of the dataset:  (1484, 28)
Data for KOTAKBANK.NS stored in data/new_label_dataset/KOTAKBANK.NS.csv
Shape of the dataset:  (1484, 28)
Data for SBIN.NS stored in data/new_label_dataset/SBIN.NS.csv



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of the dataset:  (1484, 28)
Data for INDUSINDBK.NS stored in data/new_label_dataset/INDUSINDBK.NS.csv
Shape of the dataset:  (1178, 28)
Data for BANDHANBNK.NS stored in data/new_label_dataset/BANDHANBNK.NS.csv



[*********************100%***********************]  1 of 1 completed


Shape of the dataset:  (1484, 28)
Data for FEDERALBNK.NS stored in data/new_label_dataset/FEDERALBNK.NS.csv
Shape of the dataset:  (1484, 28)


[*********************100%***********************]  1 of 1 completed


Data for PNB.NS stored in data/new_label_dataset/PNB.NS.csv
Shape of the dataset:  (1484, 28)
Data for IDFCFIRSTB.NS stored in data/new_label_dataset/IDFCFIRSTB.NS.csv


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of the dataset:  (1484, 28)
Data for BANKBARODA.NS stored in data/new_label_dataset/BANKBARODA.NS.csv
Shape of the dataset:  (1355, 28)
Data for AUBANK.NS stored in data/new_label_dataset/AUBANK.NS.csv





In [30]:
aubank_df = pd.read_csv(f"{folder_path}AUBANK.NS.csv")

In [31]:
aubank_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA_9,SMA_21,EMA_9,...,BB_upper,BB_lower,ADX,ADX+DI,ADX-DI,CMF,OBV,CCI,Williams_%R,ATR
0,2017-07-11,268.5,287.325012,266.200012,283.625,282.023376,28572624,,,,...,,,,,,,28572624.0,,,
1,2017-07-12,288.225006,321.350006,287.5,314.774994,312.997437,35549156,,,,...,,,,,,,64121780.0,,,
2,2017-07-13,326.0,349.475006,315.049988,345.575012,343.623566,43750394,,,,...,,,,,,,107872174.0,,,
3,2017-07-14,348.899994,362.75,280.549988,299.399994,297.70929,67296768,,,,...,,,,,,,40575406.0,,,
4,2017-07-17,295.0,306.850006,281.174988,298.049988,296.366913,23021580,,,,...,,,,,,,17553826.0,,,


In [32]:
aubank_df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA_9,SMA_21,EMA_9,EMA_21,...,BB_upper,BB_lower,ADX,ADX+DI,ADX-DI,CMF,OBV,CCI,Williams_%R,ATR
count,1355.0,1355.0,1355.0,1355.0,1355.0,1355.0,1347.0,1335.0,1347.0,1335.0,...,1336.0,1336.0,1328.0,1341.0,1341.0,1335.0,1355.0,1336.0,1342.0,1341.0
mean,430.413322,438.241274,421.833469,430.017878,428.271011,1769028.0,429.705172,429.238452,429.71238,429.320999,...,466.002984,392.547716,27.979974,21.226573,20.354704,0.019021,62594010.0,21.357652,-44.833658,17.033593
std,140.180256,142.510278,137.538472,139.875776,139.556089,3218745.0,138.760488,136.829869,138.237417,135.665593,...,150.297221,129.355492,10.737608,7.660815,8.011514,0.167709,72112040.0,113.426342,27.009421,6.89752
min,190.0,196.5,183.0,190.074997,189.362411,24610.0,198.42778,206.785715,200.185417,214.670555,...,228.058797,119.469471,10.782116,3.937697,2.616118,-0.487426,-19895540.0,-351.556824,-100.0,7.803421
25%,316.649994,322.450012,309.512512,317.125,315.682251,495465.0,317.986113,317.529763,317.250364,317.277248,...,341.31281,287.566688,19.829498,15.702769,14.500794,-0.104512,1101430.0,-65.991065,-67.139695,10.908416
50%,360.0,366.5,352.0,359.524994,357.853394,1113566.0,356.719449,352.058331,356.543037,351.045779,...,392.159109,334.549353,26.014522,20.602979,19.290568,0.02057,32518020.0,30.696951,-41.422026,15.598055
75%,580.987488,591.237488,570.012512,580.774994,578.796936,2074530.0,578.556942,579.760122,580.21576,582.249277,...,631.904671,531.197595,34.809767,26.146586,25.22502,0.121374,129203700.0,104.572593,-21.760324,22.120444
max,717.0,732.974976,711.5,717.525024,714.835083,67296770.0,700.98055,689.08333,696.143318,679.502438,...,745.690111,658.819359,62.939752,54.300496,49.483798,0.532465,218647500.0,403.456521,0.0,44.276451


In [33]:
aubank_df.isnull().sum()

Date                   0
Open                   0
High                   0
Low                    0
Close                  0
Adj Close              0
Volume                 0
SMA_9                  8
SMA_21                20
EMA_9                  8
EMA_21                20
DEMA_9                 8
DEMA_21               20
MACD_Line             25
MACD_Signal_Line      33
RSI_14                14
Stoch_Oscillator_K    15
Stoch_Oscillator_D    17
BB_middle             19
BB_upper              19
BB_lower              19
ADX                   27
ADX+DI                14
ADX-DI                14
CMF                   20
OBV                    0
CCI                   19
Williams_%R           13
ATR                   14
dtype: int64

In [34]:
def find_missing_indices(stock_df):
    
    missing_indices = {}

    for column in stock_df.columns:
        missing = stock_df[stock_df[column].isnull()].index.tolist()
        if missing:  
            missing_indices[column] = missing

    return missing_indices

In [35]:
missing_values = find_missing_indices(aubank_df)

for indicator, indices in missing_values.items():
    print(f"Missing values for {indicator}: {indices}")

Missing values for SMA_9: [0, 1, 2, 3, 4, 5, 6, 7]
Missing values for SMA_21: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Missing values for EMA_9: [0, 1, 2, 3, 4, 5, 6, 7]
Missing values for EMA_21: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Missing values for DEMA_9: [0, 1, 2, 3, 4, 5, 6, 7]
Missing values for DEMA_21: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Missing values for MACD_Line: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
Missing values for MACD_Signal_Line: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]
Missing values for RSI_14: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
Missing values for Stoch_Oscillator_K: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Missing values for Stoch_Oscillator_D: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
Missing 

In [36]:
def clean_datasets(folder_path):

    for stock_symbol in bank_nifty_stock_symbols:
        stock_df = pd.read_csv(f"{folder_path}{stock_symbol}.csv")
        if 'PSAR' in stock_df.columns:
            stock_df.drop('PSAR', axis=1, inplace=True)

        stock_df_clean = stock_df.dropna()

        print("Number of rows dropped: ", stock_df.shape[0] - stock_df_clean.shape[0])

        stock_df_clean.to_csv(f"{folder_path}{stock_symbol}.csv", index=False)
        print(f"Data for {stock_symbol} cleaned and stored in {folder_path}{stock_symbol}.csv")

In [37]:
clean_datasets(folder_path)

Number of rows dropped:  33
Data for HDFCBANK.NS cleaned and stored in data/new_label_dataset/HDFCBANK.NS.csv
Number of rows dropped:  33
Data for AXISBANK.NS cleaned and stored in data/new_label_dataset/AXISBANK.NS.csv
Number of rows dropped:  33
Data for ICICIBANK.NS cleaned and stored in data/new_label_dataset/ICICIBANK.NS.csv
Number of rows dropped:  0
Data for HDFCBANK.NS cleaned and stored in data/new_label_dataset/HDFCBANK.NS.csv
Number of rows dropped:  33
Data for KOTAKBANK.NS cleaned and stored in data/new_label_dataset/KOTAKBANK.NS.csv
Number of rows dropped:  33
Data for SBIN.NS cleaned and stored in data/new_label_dataset/SBIN.NS.csv
Number of rows dropped:  33
Data for INDUSINDBK.NS cleaned and stored in data/new_label_dataset/INDUSINDBK.NS.csv
Number of rows dropped:  33
Data for BANDHANBNK.NS cleaned and stored in data/new_label_dataset/BANDHANBNK.NS.csv
Number of rows dropped:  33
Data for FEDERALBNK.NS cleaned and stored in data/new_label_dataset/FEDERALBNK.NS.csv
Num

In [38]:
aubank_df_clean = pd.read_csv(f"{folder_path}AUBANK.NS.csv")
aubank_df_clean.isnull().sum()

Date                  0
Open                  0
High                  0
Low                   0
Close                 0
Adj Close             0
Volume                0
SMA_9                 0
SMA_21                0
EMA_9                 0
EMA_21                0
DEMA_9                0
DEMA_21               0
MACD_Line             0
MACD_Signal_Line      0
RSI_14                0
Stoch_Oscillator_K    0
Stoch_Oscillator_D    0
BB_middle             0
BB_upper              0
BB_lower              0
ADX                   0
ADX+DI                0
ADX-DI                0
CMF                   0
OBV                   0
CCI                   0
Williams_%R           0
ATR                   0
dtype: int64

In [39]:
print(aubank_df_clean.shape)

(1322, 29)


In [40]:
def create_target_label(folder_path):

    stock_maximums = {}

    for stock_symbol in bank_nifty_stock_symbols:
        stock_df = pd.read_csv(f"{folder_path}{stock_symbol}.csv")
        maximum_rise_in_price_in_consective_days = 0
        maximum_fall_in_price_in_consective_days = 0

        stock_df['Target'] = 0

        for i in range(1, stock_df.shape[0]):
            if stock_df['Close'][i] > stock_df['Close'][i-1]:
                maximum_rise_in_price_in_consective_days = max(maximum_rise_in_price_in_consective_days, stock_df['Close'][i] - stock_df['Close'][i-1])
            else:
                maximum_fall_in_price_in_consective_days = max(maximum_fall_in_price_in_consective_days, stock_df['Close'][i-1] - stock_df['Close'][i])

        stock_maximums[stock_symbol] = (maximum_rise_in_price_in_consective_days, maximum_fall_in_price_in_consective_days)
        
        for i in range(1, stock_df.shape[0]):
            change_in_price = stock_df['Close'][i] - stock_df['Close'][i-1]

            if (change_in_price > 0):
                percentage_change = (change_in_price / (maximum_rise_in_price_in_consective_days*(1.00)))
                percentage_change = percentage_change / 2
                percentage_change = round(percentage_change, 2)
                stock_df.loc[i, 'Target'] = 0.5 + percentage_change

            else:
                change_in_price = abs(change_in_price)
                percentage_change = (change_in_price / (maximum_fall_in_price_in_consective_days*(1.00)))
                percentage_change = percentage_change / 2
                percentage_change = round(percentage_change, 2)
                stock_df.loc[i, 'Target'] = 0.5 - percentage_change

        stock_df.to_csv(f"{folder_path}{stock_symbol}.csv", index=False)

    return stock_maximums

In [41]:
stock_maximums = create_target_label(folder_path)

In [42]:
aubank_df_target = pd.read_csv(f"{folder_path}AUBANK.NS.csv")

In [44]:
aubank_df_target.head(20)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA_9,SMA_21,EMA_9,...,BB_lower,ADX,ADX+DI,ADX-DI,CMF,OBV,CCI,Williams_%R,ATR,Target
0,2017-08-29,265.5,279.399994,265.5,272.399994,270.861786,1649802,268.661109,275.953571,269.74717,...,257.109623,46.315878,15.57986,27.511465,0.182936,-7916212.0,-22.486667,-43.905643,11.294367,0.0
1,2017-08-30,275.0,278.875,274.225006,275.424988,273.869659,712294,269.036109,275.057142,270.882733,...,258.701281,44.629755,14.862239,26.244265,0.162639,-7203918.0,24.239067,-22.460356,10.919995,0.53
2,2017-08-31,274.899994,278.225006,273.0,274.725006,273.173615,580208,269.366665,274.077381,271.651188,...,260.456179,43.268357,14.290231,26.136532,0.141817,-7784126.0,30.89228,-16.461228,10.480352,0.5
3,2017-09-01,275.5,285.0,273.774994,276.149994,274.590576,1765988,270.174998,273.27738,272.550949,...,262.568087,40.724434,18.056729,23.99953,0.124248,-6018138.0,96.957914,-36.875025,10.537509,0.51
4,2017-09-04,277.875,282.149994,271.024994,278.5,276.927307,1182770,271.108331,272.682142,273.740759,...,262.630387,38.798966,16.607432,24.057285,0.144057,-4835368.0,88.35322,-27.083333,10.582363,0.52
5,2017-09-05,280.049988,288.25,276.575012,286.575012,284.956726,1989224,273.533332,273.030952,276.30761,...,261.445038,36.017884,19.571585,22.056518,0.186246,-2846144.0,177.568397,-6.146744,10.665375,0.57
6,2017-09-06,284.0,289.0,281.799988,283.924988,282.321686,1157354,275.899997,273.353571,277.831085,...,260.914892,33.384987,19.092384,20.902004,0.166896,-4003498.0,160.379632,-18.125044,10.403286,0.48
7,2017-09-07,285.625,288.0,277.5,279.350006,277.772491,815452,277.33611,273.52738,278.13487,...,261.370567,31.613485,17.642099,22.42505,0.090621,-4818950.0,117.262176,-34.464264,10.41057,0.47
8,2017-09-08,280.5,282.75,275.0,277.174988,275.609802,831916,278.24722,273.194047,277.942893,...,261.367084,30.339722,16.637531,22.984957,0.036877,-5650866.0,67.87004,-42.232186,10.210966,0.49
9,2017-09-11,279.375,282.450012,275.5,279.299988,277.722839,697004,279.013885,273.401188,278.214312,...,261.489833,29.18188,15.770295,21.78686,0.068276,-4953862.0,69.59069,-34.642901,9.967196,0.52


In [45]:
print(stock_maximums)

{'HDFCBANK.NS': (150.800048828125, 111.29998779296875), 'AXISBANK.NS': (74.550048828125, 119.5), 'ICICIBANK.NS': (82.4000244140625, 61.70001220703125), 'KOTAKBANK.NS': (170.75, 164.0999755859375), 'SBIN.NS': (70.44999694824219, 32.5), 'INDUSINDBK.NS': (137.3499755859375, 143.3500366210937), 'BANDHANBNK.NS': (76.85000610351562, 113.0499877929687), 'FEDERALBNK.NS': (14.150001525878906, 12.80000305175782), 'PNB.NS': (63.850006103515625, 17.600006103515625), 'IDFCFIRSTB.NS': (5.69999694824218, 6.200000762939453), 'BANKBARODA.NS': (45.00000000000003, 22.150001525878906), 'AUBANK.NS': (60.20001220703125, 82.07501220703125)}
