<a href="https://colab.research.google.com/github/soham7707/DeepSectorAI/blob/main/GRU_LSTM_BiLstm_HybridwithGARCH_Beta_Bullish.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install arch

Collecting arch
  Downloading arch-7.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading arch-7.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (978 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m978.3/978.3 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: arch
Successfully installed arch-7.2.0


In [2]:
pip install yfinance pandas numpy matplotlib



In [3]:
sectors = {
    'IT': ['TCS.NS', 'INFY.NS', 'HCLTECH.NS', 'WIPRO.NS'],
    'Banking': ['HDFCBANK.NS', 'ICICIBANK.NS', 'KOTAKBANK.NS', 'AXISBANK.NS'],
    'Pharma': ['SUNPHARMA.NS', 'CIPLA.NS', 'DRREDDY.NS'],
    'Auto': ['TATAMOTORS.NS', 'M&M.NS', 'MARUTI.NS'],
    'FMCG': ['HINDUNILVR.NS', 'ITC.NS', 'DABUR.NS']
}

In [4]:
import yfinance as yf
import pandas as pd
from datetime import datetime

#Bearish Period

start_date = "2020-04-01"    #Bullish period
end_date = "2021-10-01"

def fetch_sector_data(sectors):
    all_data = {}
    for sector, stocks in sectors.items():
        print(f"Fetching data for {sector} sector...")
        data = yf.download(stocks, start=start_date, end=end_date)['Close']
        all_data[sector] = data
    return all_data

sector_data = fetch_sector_data(sectors)


Fetching data for IT sector...


  data = yf.download(stocks, start=start_date, end=end_date)['Close']
[*********************100%***********************]  4 of 4 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Banking sector...


[*********************100%***********************]  4 of 4 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Pharma sector...


[*********************100%***********************]  3 of 3 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Auto sector...


[*********************100%***********************]  3 of 3 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for FMCG sector...


[*********************100%***********************]  3 of 3 completed


In [5]:

for sector in sector_data:
    df = sector_data[sector]
    df = df.ffill().bfill()
    sector_data[sector] = df


In [6]:

import numpy as np

returns_data = {}

for sector, df in sector_data.items():
    returns = np.log(df / df.shift(1)).dropna()
    returns_data[sector] = returns


In [7]:
for sector, df in returns_data.items():
    df.to_csv(f"{sector}_returns.csv")

In [8]:
sector_data['Banking']

Ticker,AXISBANK.NS,HDFCBANK.NS,ICICIBANK.NS,KOTAKBANK.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-04-01,357.158722,787.779785,300.940399,1176.734009
2020-04-03,324.096832,772.777100,277.244293,1136.103638
2020-04-07,387.233154,850.876282,315.399841,1193.065552
2020-04-08,389.722748,844.039612,308.484467,1182.708984
2020-04-09,418.402985,878.365295,331.455200,1267.604248
...,...,...,...,...
2021-09-24,794.731689,1527.305786,701.275635,2020.665283
2021-09-27,793.636292,1549.764038,708.114258,2025.895996
2021-09-28,785.918518,1540.179932,695.649536,2060.618164
2021-09-29,778.897766,1519.962646,688.228821,2023.006592


In [9]:
returns_data['Banking']

Ticker,AXISBANK.NS,HDFCBANK.NS,ICICIBANK.NS,KOTAKBANK.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-04-03,-0.097138,-0.019228,-0.082013,-0.035138
2020-04-07,0.177985,0.096276,0.128942,0.048922
2020-04-08,0.006409,-0.008067,-0.022170,-0.008719
2020-04-09,0.071009,0.039863,0.071821,0.069321
2020-04-13,-0.002860,-0.032633,-0.035795,-0.017434
...,...,...,...,...
2021-09-24,-0.017514,0.019896,0.008264,0.002024
2021-09-27,-0.001379,0.014597,0.009704,0.002585
2021-09-28,-0.009772,-0.006203,-0.017759,0.016994
2021-09-29,-0.008973,-0.013213,-0.010725,-0.018421


In [10]:
from arch import arch_model
import pandas as pd
import numpy as np

In [11]:
def estimate_garch_volatility(returns_series):
    # Fit GARCH(1,1) model to the returns series
    model = arch_model(returns_series, vol='GARCH', p=1, q=1)
    res = model.fit(disp='off')
    # Return the estimated conditional volatility (σ_t)
    return res.conditional_volatility

In [12]:
garch_volatility_data = {}

for sector, returns_df in returns_data.items():
    print(f"Estimating GARCH volatility for {sector} sector...")
    sector_vols = pd.DataFrame(index=returns_df.index)

    for stock in returns_df.columns:
        try:
            vol = estimate_garch_volatility(returns_df[stock])
            sector_vols[stock] = vol
        except Exception as e:
            print(f"Error processing {stock}: {e}")

    garch_volatility_data[sector] = sector_vols

Estimating GARCH volatility for IT sector...


estimating the model parameters. The scale of y is 0.0003928. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0002982. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0002726. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0004274. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.



Estimating GARCH volatility for Banking sector...
Estimating GARCH volatility for Pharma sector...
Estimating GARCH volatility for Auto sector...


estimating the model parameters. The scale of y is 0.0008205. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0003756. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006786. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0004424. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0003964. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1

Estimating GARCH volatility for FMCG sector...


estimating the model parameters. The scale of y is 0.0009982. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0002118. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0002307. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0002866. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.



In [13]:
print(garch_volatility_data['FMCG'].tail())

            DABUR.NS  HINDUNILVR.NS    ITC.NS
Date                                         
2021-09-24  0.012002       0.013704  0.017865
2021-09-27  0.011823       0.013846  0.017852
2021-09-28  0.013390       0.014031  0.017394
2021-09-29  0.013340       0.013516  0.016991
2021-09-30  0.013098       0.013838  0.016573


In [14]:
pip install numpy pandas scikit-learn tensorflow



In [15]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def prepare_lstm_data(series, lookback=20):
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(series.values.reshape(-1, 1))

    X, y = [], []
    for i in range(lookback, len(scaled_data)):
        X.append(scaled_data[i-lookback:i])
        y.append(scaled_data[i])

    X = np.array(X)
    y = np.array(y)
    return X, y, scaler

In [16]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=False, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

In [17]:
def predict_lstm_volatility(returns_series, lookback=20):
    X, y, scaler = prepare_lstm_data(returns_series, lookback)
    model = build_lstm_model((X.shape[1], X.shape[2]))
    model.fit(X, y, epochs=10, batch_size=16, verbose=0)

    y_pred_scaled = model.predict(X)
    y_pred = scaler.inverse_transform(y_pred_scaled).flatten()

    # Estimate volatility as rolling std of predicted returns
    predicted_volatility = pd.Series(y_pred).rolling(window=lookback).std()
    predicted_volatility.index = returns_series.index[lookback:]
    return predicted_volatility

In [18]:
lstm_volatility_data = {}

for sector, returns_df in returns_data.items():
    print(f"Predicting LSTM volatility for {sector} sector...")
    sector_vols = pd.DataFrame()

    for stock in returns_df.columns:
        try:
            vol = predict_lstm_volatility(returns_df[stock])
            sector_vols[stock] = vol
        except Exception as e:
            print(f"Error with {stock}: {e}")

    lstm_volatility_data[sector] = sector_vols

Predicting LSTM volatility for IT sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  
Predicting LSTM volatility for Banking sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  
Predicting LSTM volatility for Pharma sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  
Predicting LSTM volatility for Auto sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  
Predicting LSTM volatility for FMCG sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


In [19]:
print(lstm_volatility_data['IT'].tail())

            HCLTECH.NS   INFY.NS    TCS.NS  WIPRO.NS
Date                                                
2021-09-24    0.000935  0.000651  0.001585  0.000860
2021-09-27    0.001023  0.000700  0.001607  0.000889
2021-09-28    0.001063  0.000750  0.001622  0.000965
2021-09-29    0.001063  0.000754  0.001699  0.001142
2021-09-30    0.001094  0.000755  0.001745  0.001358


In [20]:
from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import LSTM, Dense, Bidirectional


def build_bilstm_model(input_shape):
    model = Sequential()
    model.add(Bidirectional(LSTM(units=50, return_sequences=False), input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model


In [21]:
def predict_bilstm_volatility(returns_series, lookback=20):
    X, y, scaler = prepare_lstm_data(returns_series, lookback)
    model = build_bilstm_model((X.shape[1], X.shape[2]))
    model.fit(X, y, epochs=10, batch_size=16, verbose=0)

    y_pred_scaled = model.predict(X)
    y_pred = scaler.inverse_transform(y_pred_scaled).flatten()

    predicted_volatility = pd.Series(y_pred).rolling(window=lookback).std()
    predicted_volatility.index = returns_series.index[lookback:]
    return predicted_volatility

In [22]:
bilstm_volatility_data = {}

for sector, returns_df in returns_data.items():
    print(f"Predicting BiLSTM volatility for {sector} sector...")
    sector_vols = pd.DataFrame()

    for stock in returns_df.columns:
        try:
            vol = predict_bilstm_volatility(returns_df[stock])
            sector_vols[stock] = vol
        except Exception as e:
            print(f"Error with {stock}: {e}")

    bilstm_volatility_data[sector] = sector_vols

Predicting BiLSTM volatility for IT sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  
Predicting BiLSTM volatility for Banking sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  
Predicting BiLSTM volatility for Pharma sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  
Predicting BiLSTM volatility for Auto sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  
Predicting BiLSTM volatility for FMCG sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


In [23]:
print(bilstm_volatility_data['IT'].tail())

            HCLTECH.NS   INFY.NS    TCS.NS  WIPRO.NS
Date                                                
2021-09-24    0.000685  0.000843  0.001052  0.001168
2021-09-27    0.000614  0.000772  0.001135  0.001146
2021-09-28    0.000703  0.000742  0.001232  0.001239
2021-09-29    0.000738  0.000743  0.001317  0.001382
2021-09-30    0.000747  0.000745  0.001451  0.001494


In [24]:
from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from tensorflow.keras.layers import GRU, Dense



def build_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(units=50, return_sequences=False, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model



In [25]:
def predict_gru_volatility(returns_series, lookback=20):
    X, y, scaler = prepare_lstm_data(returns_series, lookback)  # same prepare function works
    model = build_gru_model((X.shape[1], X.shape[2]))
    model.fit(X, y, epochs=10, batch_size=16, verbose=0)

    y_pred_scaled = model.predict(X)
    y_pred = scaler.inverse_transform(y_pred_scaled).flatten()

    predicted_volatility = pd.Series(y_pred).rolling(window=lookback).std()
    predicted_volatility.index = returns_series.index[lookback:]
    return predicted_volatility

In [26]:
gru_volatility_data = {}

for sector, returns_df in returns_data.items():
    print(f"Predicting GRU volatility for {sector} sector...")
    sector_vols = pd.DataFrame()

    for stock in returns_df.columns:
        try:
            vol = predict_gru_volatility(returns_df[stock])
            sector_vols[stock] = vol
        except Exception as e:
            print(f"Error with {stock}: {e}")

    gru_volatility_data[sector] = sector_vols

Predicting GRU volatility for IT sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  
Predicting GRU volatility for Banking sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step  
Predicting GRU volatility for Pharma sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  
Predicting GRU volatility for Auto sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  
Predicting GRU volatility for FMCG sector...


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  


In [27]:
print(gru_volatility_data['IT'].tail())


            HCLTECH.NS   INFY.NS    TCS.NS  WIPRO.NS
Date                                                
2021-09-24    0.001054  0.000991  0.001492  0.001672
2021-09-27    0.001148  0.001209  0.001482  0.001662
2021-09-28    0.001150  0.001235  0.001472  0.001861
2021-09-29    0.001201  0.001223  0.001502  0.002245
2021-09-30    0.001338  0.001259  0.001590  0.002457


In [28]:
gru_volatility_data['IT'].dropna

In [29]:
gru_volatility_data['IT'].tail()


Unnamed: 0_level_0,HCLTECH.NS,INFY.NS,TCS.NS,WIPRO.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-09-24,0.001054,0.000991,0.001492,0.001672
2021-09-27,0.001148,0.001209,0.001482,0.001662
2021-09-28,0.00115,0.001235,0.001472,0.001861
2021-09-29,0.001201,0.001223,0.001502,0.002245
2021-09-30,0.001338,0.001259,0.00159,0.002457


In [30]:
import yfinance as yf
import numpy as np
import pandas as pd

# Download NIFTY-50 index (symbol: ^NSEI)
nifty = yf.download("^NSEI", start="2015-01-01")['Close']
nifty_returns = np.log(nifty / nifty.shift(1)).dropna()
nifty_returns_df = pd.DataFrame(nifty_returns)
nifty_returns_df.columns = ['Market']

  nifty = yf.download("^NSEI", start="2015-01-01")['Close']
[*********************100%***********************]  1 of 1 completed


In [31]:
import statsmodels.api as sm

def compute_beta(stock_returns, market_returns_df):
    aligned = pd.concat([stock_returns, market_returns_df], axis=1).dropna()
    aligned.columns = ['Stock', 'Market']

    X = sm.add_constant(aligned['Market'])
    y = aligned['Stock']
    model = sm.OLS(y, X).fit()
    return model.params['Market']  # This is the beta




beta_values = {}

for sector, returns_df in returns_data.items():
    print(f"Computing Beta for {sector} sector...")
    sector_betas = {}

    for stock in returns_df.columns:
        try:
            beta = compute_beta(returns_df[stock], nifty_returns_df)
            sector_betas[stock] = beta
        except Exception as e:
            print(f"Error with {stock}: {e}")

    beta_values[sector] = sector_betas



beta_df = pd.DataFrame(beta_values).T  # Transpose to have stocks as columns
beta_df = beta_df.fillna(0)
print(beta_df)


Computing Beta for IT sector...
Computing Beta for Banking sector...
Computing Beta for Pharma sector...
Computing Beta for Auto sector...
Computing Beta for FMCG sector...
         HCLTECH.NS   INFY.NS    TCS.NS  WIPRO.NS  AXISBANK.NS  HDFCBANK.NS  \
IT         0.774227  0.745205  0.695667  0.697242     0.000000     0.000000   
Banking    0.000000  0.000000  0.000000  0.000000     1.731114     1.252224   
Pharma     0.000000  0.000000  0.000000  0.000000     0.000000     0.000000   
Auto       0.000000  0.000000  0.000000  0.000000     0.000000     0.000000   
FMCG       0.000000  0.000000  0.000000  0.000000     0.000000     0.000000   

         ICICIBANK.NS  KOTAKBANK.NS  CIPLA.NS  DRREDDY.NS  SUNPHARMA.NS  \
IT           0.000000      0.000000  0.000000    0.000000      0.000000   
Banking      1.746477      1.136998  0.000000    0.000000      0.000000   
Pharma       0.000000      0.000000  0.357097    0.369719      0.562617   
Auto         0.000000      0.000000  0.000000    0.0

In [32]:
sharpe_ratios = {}

for sector, df in returns_data.items():
    sector_sharpes = {}
    for stock in df.columns:
        r = df[stock].dropna()
        mean_return = r.mean()
        std_return = r.std()
        sharpe = mean_return / std_return if std_return != 0 else 0
        sector_sharpes[stock] = sharpe
    sharpe_ratios[sector] = sector_sharpes



features = []

for sector in returns_data:
    for stock in returns_data[sector].columns:
        try:
            beta = beta_values[sector][stock]
            garch_mean = garch_volatility_data[sector][stock].mean()
            lstm_mean = lstm_volatility_data[sector][stock].mean()
            bilstm_mean = bilstm_volatility_data[sector][stock].mean()
            gru_mean = gru_volatility_data[sector][stock].mean()
            sharpe = sharpe_ratios[sector][stock]

            features.append([sector, stock, beta, garch_mean, lstm_mean, bilstm_mean, gru_mean, sharpe])
        except Exception as e:
            print(f"Skipping {stock} due to missing data: {e}")

features_df = pd.DataFrame(
    features,
    columns=['Sector', 'Stock', 'Beta', 'GARCH_Vol', 'LSTM_Vol', 'BiLSTM_Vol', 'GRU_Vol', 'Sharpe']
)


In [33]:
features_df = pd.DataFrame(features, columns=['Sector', 'Stock', 'Beta', 'GARCH_Vol','LSTM_Vol', 'BiLSTM_Vol', 'GRU_Vol', 'Sharpe'])

In [34]:
from sklearn.linear_model import LinearRegression

X = features_df[['Beta', 'GARCH_Vol', 'GRU_Vol','LSTM_Vol']]
y = features_df['Sharpe']

model = LinearRegression().fit(X, y)
weights = model.coef_
intercept = model.intercept_

print("Learned Weights:", weights)
print("Intercept:", intercept)

Learned Weights: [-1.53294524e-02  4.32548799e+00  2.51814622e+01 -6.76057290e+01]
Intercept: 0.09256411591793089


In [35]:
X = features_df[['Beta', 'GARCH_Vol', 'GRU_Vol','LSTM_Vol']]
y = features_df['Sharpe']

# Drop rows with NaNs
df_clean = pd.concat([X, y], axis=1).dropna()

X = df_clean[['Beta', 'GARCH_Vol', 'GRU_Vol','LSTM_Vol']]
y = df_clean['Sharpe']

model = LinearRegression().fit(X, y)
print("Learned Weights:", model.coef_)
print("Intercept:", model.intercept_)

Learned Weights: [-1.53294524e-02  4.32548799e+00  2.51814622e+01 -6.76057290e+01]
Intercept: 0.09256411591793089


In [36]:
features_df['Risk_Score'] = X @ weights

In [37]:
for sector in features_df['Sector'].unique():
    print(f"\nTop 3 safest stocks in {sector}:")
    print(features_df[features_df['Sector'] == sector].sort_values('Risk_Score').head(3)[['Stock', 'Risk_Score']])


Top 3 safest stocks in IT:
        Stock  Risk_Score
2      TCS.NS   -0.002207
1     INFY.NS    0.006633
0  HCLTECH.NS    0.007567

Top 3 safest stocks in Banking:
          Stock  Risk_Score
6  ICICIBANK.NS   -0.012910
7  KOTAKBANK.NS    0.004665
5   HDFCBANK.NS    0.005522

Top 3 safest stocks in Pharma:
           Stock  Risk_Score
10  SUNPHARMA.NS    0.004609
8       CIPLA.NS    0.015088
9     DRREDDY.NS    0.028312

Top 3 safest stocks in Auto:
            Stock  Risk_Score
12      MARUTI.NS   -0.014431
11         M&M.NS    0.025998
13  TATAMOTORS.NS    0.032243

Top 3 safest stocks in FMCG:
            Stock  Risk_Score
15  HINDUNILVR.NS   -0.005326
16         ITC.NS    0.004694
14       DABUR.NS    0.008709


In [38]:
from sklearn.linear_model import LinearRegression

X = features_df[['Beta', 'GARCH_Vol', 'GRU_Vol','BiLSTM_Vol']]
y = features_df['Sharpe']

model = LinearRegression().fit(X, y)
weights = model.coef_
intercept = model.intercept_

print("Learned Weights:", weights)
print("Intercept:", intercept)


Learned Weights: [ -0.03407943   3.71052086  11.44585229 -18.19007997]
Intercept: 0.059856120013924666


In [39]:
features_df['Risk_Score_GRU_Bi_Beta_GARCH'] = X @ weights

In [40]:
for sector in features_df['Sector'].unique():
    print(f"\nSafest stocks in {sector} (beta + GARCH + GRU+ bilstm):")
    print(features_df[features_df['Sector'] == sector]
          .sort_values('Risk_Score_GRU_Bi_Beta_GARCH')
          .head(3)[['Stock', 'Risk_Score_GRU_Bi_Beta_GARCH']])



Safest stocks in IT (beta + GARCH + GRU+ bilstm):
        Stock  Risk_Score_GRU_Bi_Beta_GARCH
2      TCS.NS                      0.035369
1     INFY.NS                      0.036739
0  HCLTECH.NS                      0.037033

Safest stocks in Banking (beta + GARCH + GRU+ bilstm):
          Stock  Risk_Score_GRU_Bi_Beta_GARCH
5   HDFCBANK.NS                      0.028968
6  ICICIBANK.NS                      0.033188
7  KOTAKBANK.NS                      0.035454

Safest stocks in Pharma (beta + GARCH + GRU+ bilstm):
           Stock  Risk_Score_GRU_Bi_Beta_GARCH
10  SUNPHARMA.NS                      0.049197
9     DRREDDY.NS                      0.053836
8       CIPLA.NS                      0.062272

Safest stocks in Auto (beta + GARCH + GRU+ bilstm):
            Stock  Risk_Score_GRU_Bi_Beta_GARCH
12      MARUTI.NS                      0.031178
11         M&M.NS                      0.045345
13  TATAMOTORS.NS                      0.069720

Safest stocks in FMCG (beta + GARCH + GRU+ b

In [41]:
from sklearn.linear_model import LinearRegression

X = features_df[['Beta', 'GARCH_Vol', 'LSTM_Vol','BiLSTM_Vol']]
y = features_df['Sharpe']

model = LinearRegression().fit(X, y)
weights = model.coef_
intercept = model.intercept_

print("Learned Weights:", weights)
print("Intercept:", intercept)

Learned Weights: [-2.72881363e-02  4.85835742e+00 -2.95665702e+01  1.72836975e+01]
Intercept: 0.05323920116662573


In [42]:
features_df['Risk_Score_lstm_bi_garch_Beta'] = X @ weights

In [43]:
for sector in features_df['Sector'].unique():
    print(f"\nSafest stocks in {sector} (LSTM+ BiLSTM + GARCH + Beta ):")
    print(features_df[features_df['Sector'] == sector]
          .sort_values('Risk_Score_lstm_bi_garch_Beta')
          .head(3)[['Stock', 'Risk_Score_lstm_bi_garch_Beta']])



Safest stocks in IT (LSTM+ BiLSTM + GARCH + Beta ):
        Stock  Risk_Score_lstm_bi_garch_Beta
2      TCS.NS                       0.034513
1     INFY.NS                       0.040970
0  HCLTECH.NS                       0.051184

Safest stocks in Banking (LSTM+ BiLSTM + GARCH + Beta ):
          Stock  Risk_Score_lstm_bi_garch_Beta
5   HDFCBANK.NS                       0.031231
6  ICICIBANK.NS                       0.033353
7  KOTAKBANK.NS                       0.045546

Safest stocks in Pharma (LSTM+ BiLSTM + GARCH + Beta ):
           Stock  Risk_Score_lstm_bi_garch_Beta
10  SUNPHARMA.NS                       0.050650
8       CIPLA.NS                       0.056647
9     DRREDDY.NS                       0.063492

Safest stocks in Auto (LSTM+ BiLSTM + GARCH + Beta ):
            Stock  Risk_Score_lstm_bi_garch_Beta
12      MARUTI.NS                       0.036914
13  TATAMOTORS.NS                       0.066550
11         M&M.NS                       0.066602

Safest stocks in FMC

In [44]:
from sklearn.linear_model import LinearRegression

X = features_df[['Beta', 'GARCH_Vol', 'GRU_Vol','LSTM_Vol']]
y = features_df['Sharpe']

model = LinearRegression().fit(X, y)
weights = model.coef_
intercept = model.intercept_

print("Learned Weights:", weights)
print("Intercept:", intercept)

Learned Weights: [-1.53294524e-02  4.32548799e+00  2.51814622e+01 -6.76057290e+01]
Intercept: 0.09256411591793089


In [45]:
features_df['Risk_Score_GRU_LSTM_GARCH_Beta'] = X @ weights

In [46]:
for sector in features_df['Sector'].unique():
    print(f"\nSafest stocks in {sector} (GARCH + Beta + GRU + LSTM):")
    print(features_df[features_df['Sector'] == sector]
          .sort_values('Risk_Score_GRU_LSTM_GARCH_Beta')
          .head(3)[['Stock', 'Risk_Score_GRU_LSTM_GARCH_Beta']])


Safest stocks in IT (GARCH + Beta + GRU + LSTM):
        Stock  Risk_Score_GRU_LSTM_GARCH_Beta
2      TCS.NS                       -0.002207
1     INFY.NS                        0.006633
0  HCLTECH.NS                        0.007567

Safest stocks in Banking (GARCH + Beta + GRU + LSTM):
          Stock  Risk_Score_GRU_LSTM_GARCH_Beta
6  ICICIBANK.NS                       -0.012910
7  KOTAKBANK.NS                        0.004665
5   HDFCBANK.NS                        0.005522

Safest stocks in Pharma (GARCH + Beta + GRU + LSTM):
           Stock  Risk_Score_GRU_LSTM_GARCH_Beta
10  SUNPHARMA.NS                        0.004609
8       CIPLA.NS                        0.015088
9     DRREDDY.NS                        0.028312

Safest stocks in Auto (GARCH + Beta + GRU + LSTM):
            Stock  Risk_Score_GRU_LSTM_GARCH_Beta
12      MARUTI.NS                       -0.014431
11         M&M.NS                        0.025998
13  TATAMOTORS.NS                        0.032243

Safest stocks in

In [47]:
features_df[['Stock', 'Risk_Score', 'Risk_Score_lstm_bi_garch_Beta', 'Risk_Score_GRU_Bi_Beta_GARCH', 'Risk_Score_GRU_LSTM_GARCH_Beta']].head()

Unnamed: 0,Stock,Risk_Score,Risk_Score_lstm_bi_garch_Beta,Risk_Score_GRU_Bi_Beta_GARCH,Risk_Score_GRU_LSTM_GARCH_Beta
0,HCLTECH.NS,0.007567,0.051184,0.037033,0.007567
1,INFY.NS,0.006633,0.04097,0.036739,0.006633
2,TCS.NS,-0.002207,0.034513,0.035369,-0.002207
3,WIPRO.NS,0.036749,0.067136,0.047276,0.036749
4,AXISBANK.NS,0.014094,0.052702,0.042008,0.014094


In [48]:

def compare_sector_models(sector_name, top_n=5):
    df = features_df[features_df['Sector'] == sector_name].copy()

    models = ['Risk_Score', 'Risk_Score_lstm_bi_garch_Beta', 'Risk_Score_GRU_Bi_Beta_GARCH', 'Risk_Score_GRU_LSTM_GARCH_Beta']

    print(f"\n📊 Sector: {sector_name}")
    for model in models:
        print(f"\n🔹 Top {top_n} safest stocks using {model}:")
        display(df.sort_values(model).head(top_n)[['Stock', model]])


In [49]:
compare_sector_models("IT", top_n=3)
compare_sector_models("Banking", top_n=3)
compare_sector_models("Pharma", top_n=3)



📊 Sector: IT

🔹 Top 3 safest stocks using Risk_Score:


Unnamed: 0,Stock,Risk_Score
2,TCS.NS,-0.002207
1,INFY.NS,0.006633
0,HCLTECH.NS,0.007567



🔹 Top 3 safest stocks using Risk_Score_lstm_bi_garch_Beta:


Unnamed: 0,Stock,Risk_Score_lstm_bi_garch_Beta
2,TCS.NS,0.034513
1,INFY.NS,0.04097
0,HCLTECH.NS,0.051184



🔹 Top 3 safest stocks using Risk_Score_GRU_Bi_Beta_GARCH:


Unnamed: 0,Stock,Risk_Score_GRU_Bi_Beta_GARCH
2,TCS.NS,0.035369
1,INFY.NS,0.036739
0,HCLTECH.NS,0.037033



🔹 Top 3 safest stocks using Risk_Score_GRU_LSTM_GARCH_Beta:


Unnamed: 0,Stock,Risk_Score_GRU_LSTM_GARCH_Beta
2,TCS.NS,-0.002207
1,INFY.NS,0.006633
0,HCLTECH.NS,0.007567



📊 Sector: Banking

🔹 Top 3 safest stocks using Risk_Score:


Unnamed: 0,Stock,Risk_Score
6,ICICIBANK.NS,-0.01291
7,KOTAKBANK.NS,0.004665
5,HDFCBANK.NS,0.005522



🔹 Top 3 safest stocks using Risk_Score_lstm_bi_garch_Beta:


Unnamed: 0,Stock,Risk_Score_lstm_bi_garch_Beta
5,HDFCBANK.NS,0.031231
6,ICICIBANK.NS,0.033353
7,KOTAKBANK.NS,0.045546



🔹 Top 3 safest stocks using Risk_Score_GRU_Bi_Beta_GARCH:


Unnamed: 0,Stock,Risk_Score_GRU_Bi_Beta_GARCH
5,HDFCBANK.NS,0.028968
6,ICICIBANK.NS,0.033188
7,KOTAKBANK.NS,0.035454



🔹 Top 3 safest stocks using Risk_Score_GRU_LSTM_GARCH_Beta:


Unnamed: 0,Stock,Risk_Score_GRU_LSTM_GARCH_Beta
6,ICICIBANK.NS,-0.01291
7,KOTAKBANK.NS,0.004665
5,HDFCBANK.NS,0.005522



📊 Sector: Pharma

🔹 Top 3 safest stocks using Risk_Score:


Unnamed: 0,Stock,Risk_Score
10,SUNPHARMA.NS,0.004609
8,CIPLA.NS,0.015088
9,DRREDDY.NS,0.028312



🔹 Top 3 safest stocks using Risk_Score_lstm_bi_garch_Beta:


Unnamed: 0,Stock,Risk_Score_lstm_bi_garch_Beta
10,SUNPHARMA.NS,0.05065
8,CIPLA.NS,0.056647
9,DRREDDY.NS,0.063492



🔹 Top 3 safest stocks using Risk_Score_GRU_Bi_Beta_GARCH:


Unnamed: 0,Stock,Risk_Score_GRU_Bi_Beta_GARCH
10,SUNPHARMA.NS,0.049197
9,DRREDDY.NS,0.053836
8,CIPLA.NS,0.062272



🔹 Top 3 safest stocks using Risk_Score_GRU_LSTM_GARCH_Beta:


Unnamed: 0,Stock,Risk_Score_GRU_LSTM_GARCH_Beta
10,SUNPHARMA.NS,0.004609
8,CIPLA.NS,0.015088
9,DRREDDY.NS,0.028312


In [50]:
def sector_ranking_table(sector_name):
    df = features_df[features_df['Sector'] == sector_name].copy()
    ranking_df = df[['Stock']].copy()

    for col in ['Risk_Score', 'Risk_Score_lstm_bi_garch_Beta', 'Risk_Score_GRU_Bi_Beta_GARCH', 'Risk_Score_GRU_LSTM_GARCH_Beta']:
        ranking_df[col + '_Rank'] = df[col].rank(method='min')

    return ranking_df.sort_values('Risk_Score_Rank')

In [51]:
sector_ranking_table("FMCG")

Unnamed: 0,Stock,Risk_Score_Rank,Risk_Score_lstm_bi_garch_Beta_Rank,Risk_Score_GRU_Bi_Beta_GARCH_Rank,Risk_Score_GRU_LSTM_GARCH_Beta_Rank
15,HINDUNILVR.NS,1.0,1.0,2.0,1.0
16,ITC.NS,2.0,2.0,3.0,2.0
14,DABUR.NS,3.0,3.0,1.0,3.0
