<a href="https://colab.research.google.com/github/soham7707/DeepSectorAI/blob/main/GRU_GARCH_BETA_Bearish.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install arch


Collecting arch
  Downloading arch-7.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading arch-7.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (978 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m978.3/978.3 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: arch
Successfully installed arch-7.2.0


In [2]:
pip install yfinance pandas numpy matplotlib




In [3]:
# Define your sectors and representative stocks
sectors = {
    'IT': ['TCS.NS', 'INFY.NS', 'HCLTECH.NS', 'WIPRO.NS'],
    'Banking': ['HDFCBANK.NS', 'ICICIBANK.NS', 'KOTAKBANK.NS', 'AXISBANK.NS'],
    'Pharma': ['SUNPHARMA.NS', 'CIPLA.NS', 'DRREDDY.NS'],
    'Auto': ['TATAMOTORS.NS', 'M&M.NS', 'MARUTI.NS'],
    'FMCG': ['HINDUNILVR.NS', 'ITC.NS', 'DABUR.NS']
}


In [4]:
import yfinance as yf
import pandas as pd
from datetime import datetime

start_date = "2022-01-01"
end_date = "2022-06-30"

def fetch_sector_data(sectors):
    all_data = {}
    for sector, stocks in sectors.items():
        print(f"Fetching data for {sector} sector...")
        data = yf.download(stocks, start=start_date, end=end_date)['Close']
        all_data[sector] = data
    return all_data

sector_data = fetch_sector_data(sectors)


Fetching data for IT sector...


  data = yf.download(stocks, start=start_date, end=end_date)['Close']
[*********************100%***********************]  4 of 4 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Banking sector...


[*********************100%***********************]  4 of 4 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Pharma sector...


[*********************100%***********************]  3 of 3 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Auto sector...


[*********************100%***********************]  3 of 3 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for FMCG sector...


[*********************100%***********************]  3 of 3 completed


handling missing data


In [5]:
for sector in sector_data:
    df = sector_data[sector]
    df = df.ffill().bfill()  # Forward and backward fill
    sector_data[sector] = df


Calculating Log Returns

In [6]:
import numpy as np

returns_data = {}

for sector, df in sector_data.items():
    returns = np.log(df / df.shift(1)).dropna()
    returns_data[sector] = returns


In [7]:
for sector, df in returns_data.items():
    df.to_csv(f"{sector}_returns.csv")


In [8]:
sector_data['Banking']

Ticker,AXISBANK.NS,HDFCBANK.NS,ICICIBANK.NS,KOTAKBANK.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-03,693.454529,1449.202637,741.773987,1817.761719
2022-01-04,706.201416,1457.689941,749.679565,1845.808472
2022-01-05,723.877563,1492.307129,764.423828,1915.103638
2022-01-06,727.263428,1468.370850,761.513855,1884.964600
2022-01-07,727.562134,1478.670166,769.467957,1897.269409
...,...,...,...,...
2022-06-23,626.733093,1288.059692,678.189209,1677.926392
2022-06-24,631.363892,1306.052002,692.060486,1692.173950
2022-06-27,634.450867,1308.415527,695.843506,1685.349121
2022-06-28,639.679077,1298.430542,689.004883,1662.682373


In [9]:
returns_data['Banking']

Ticker,AXISBANK.NS,HDFCBANK.NS,ICICIBANK.NS,KOTAKBANK.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-04,0.018215,0.005839,0.010601,0.015311
2022-01-05,0.024722,0.023470,0.019477,0.036854
2022-01-06,0.004666,-0.016170,-0.003814,-0.015863
2022-01-07,0.000411,0.006990,0.010391,0.006507
2022-01-10,0.016561,0.005531,0.021821,0.022689
...,...,...,...,...
2022-06-23,0.004060,0.003677,0.018114,0.008587
2022-06-24,0.007362,0.013872,0.020247,0.008455
2022-06-27,0.004877,0.001808,0.005451,-0.004041
2022-06-28,0.008207,-0.007661,-0.009876,-0.013541


In [10]:
pip install arch




In [11]:
from arch import arch_model
import pandas as pd
import numpy as np


In [12]:
def estimate_garch_volatility(returns_series):
    # Fit GARCH(1,1) model to the returns series
    model = arch_model(returns_series, vol='GARCH', p=1, q=1)
    res = model.fit(disp='off')
    # Return the estimated conditional volatility (σ_t)
    return res.conditional_volatility


In [13]:
garch_volatility_data = {}

for sector, returns_df in returns_data.items():
    print(f"Estimating GARCH volatility for {sector} sector...")
    sector_vols = pd.DataFrame(index=returns_df.index)

    for stock in returns_df.columns:
        try:
            vol = estimate_garch_volatility(returns_df[stock])
            sector_vols[stock] = vol
        except Exception as e:
            print(f"Error processing {stock}: {e}")

    garch_volatility_data[sector] = sector_vols


Estimating GARCH volatility for IT sector...
Estimating GARCH volatility for Banking sector...


estimating the model parameters. The scale of y is 0.0003374. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0003659. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0002585. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0004054. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0004009. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1

Estimating GARCH volatility for Pharma sector...
Estimating GARCH volatility for Auto sector...
Estimating GARCH volatility for FMCG sector...


estimating the model parameters. The scale of y is 0.0002738. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0003046. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0002077. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.



In [14]:
print(garch_volatility_data['FMCG'].tail())


            DABUR.NS  HINDUNILVR.NS    ITC.NS
Date                                         
2022-06-23  0.017310       0.017990  0.014771
2022-06-24  0.016514       0.018030  0.014771
2022-06-27  0.015483       0.018068  0.014771
2022-06-28  0.015254       0.018030  0.014771
2022-06-29  0.014115       0.017986  0.014771


LSTM

In [15]:
pip install numpy pandas scikit-learn tensorflow




In [16]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def prepare_lstm_data(series, lookback=20):
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(series.values.reshape(-1, 1))

    X, y = [], []
    for i in range(lookback, len(scaled_data)):
        X.append(scaled_data[i-lookback:i])
        y.append(scaled_data[i])

    X = np.array(X)
    y = np.array(y)
    return X, y, scaler


In [17]:
from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from tensorflow.keras.layers import GRU, Dense



def build_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(units=50, return_sequences=False, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model





In [18]:
def predict_gru_volatility(returns_series, lookback=20):
    X, y, scaler = prepare_lstm_data(returns_series, lookback)  # same prepare function works
    model = build_gru_model((X.shape[1], X.shape[2]))
    model.fit(X, y, epochs=10, batch_size=16, verbose=0)

    y_pred_scaled = model.predict(X)
    y_pred = scaler.inverse_transform(y_pred_scaled).flatten()

    predicted_volatility = pd.Series(y_pred).rolling(window=lookback).std()
    predicted_volatility.index = returns_series.index[lookback:]
    return predicted_volatility


In [None]:
gru_volatility_data = {}

for sector, returns_df in returns_data.items():
    print(f"Predicting GRU volatility for {sector} sector...")
    sector_vols = pd.DataFrame()

    for stock in returns_df.columns:
        try:
            vol = predict_gru_volatility(returns_df[stock])
            sector_vols[stock] = vol
        except Exception as e:
            print(f"Error with {stock}: {e}")

    gru_volatility_data[sector] = sector_vols


Predicting GRU volatility for IT sector...


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 141ms/step


  super().__init__(**kwargs)


In [None]:
print(gru_volatility_data['IT'].tail())


In [None]:
gru_volatility_data['IT'].dropna

In [None]:
gru_volatility_data['IT'].head()

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd

# Download NIFTY-50 index (symbol: ^NSEI)
nifty = yf.download("^NSEI", start="2015-01-01")['Close']
nifty_returns = np.log(nifty / nifty.shift(1)).dropna()
nifty_returns_df = pd.DataFrame(nifty_returns)
nifty_returns_df.columns = ['Market']


In [None]:
import statsmodels.api as sm

def compute_beta(stock_returns, market_returns_df):
    aligned = pd.concat([stock_returns, market_returns_df], axis=1).dropna()
    aligned.columns = ['Stock', 'Market']

    X = sm.add_constant(aligned['Market'])
    y = aligned['Stock']
    model = sm.OLS(y, X).fit()
    return model.params['Market']  # This is the beta



In [None]:
beta_values = {}

for sector, returns_df in returns_data.items():
    print(f"Computing Beta for {sector} sector...")
    sector_betas = {}

    for stock in returns_df.columns:
        try:
            beta = compute_beta(returns_df[stock], nifty_returns_df)
            sector_betas[stock] = beta
        except Exception as e:
            print(f"Error with {stock}: {e}")

    beta_values[sector] = sector_betas


In [None]:
beta_df = pd.DataFrame(beta_values).T  # Transpose to have stocks as columns
beta_df = beta_df.fillna(0)
print(beta_df)


Computing Sharpe Ratio

In [None]:
sharpe_ratios = {}

for sector, df in returns_data.items():
    sector_sharpes = {}
    for stock in df.columns:
        r = df[stock].dropna()
        mean_return = r.mean()
        std_return = r.std()
        sharpe = mean_return / std_return if std_return != 0 else 0
        sector_sharpes[stock] = sharpe
    sharpe_ratios[sector] = sector_sharpes


In [None]:
features = []

for sector in returns_data:
    for stock in returns_data[sector].columns:
        try:
            beta = beta_values[sector][stock]
            garch_mean = garch_volatility_data[sector][stock].mean()
            gru_mean = gru_volatility_data[sector][stock].mean()
            sharpe = sharpe_ratios[sector][stock]
            features.append([sector, stock, beta, garch_mean, gru_mean, sharpe])
        except Exception as e:
            print(f"Skipping {stock} due to missing data: {e}")

features_df = pd.DataFrame(
    features,
    columns=['Sector', 'Stock', 'Beta', 'GARCH_Vol', 'GRU_Vol', 'Sharpe']
)


In [None]:
features_df = pd.DataFrame(features, columns=['Sector', 'Stock', 'Beta', 'GARCH_Vol', 'GRU_Vol', 'Sharpe'])


In [None]:
from sklearn.linear_model import LinearRegression

X = features_df[['Beta', 'GARCH_Vol', 'GRU_Vol']]
y = features_df['Sharpe']

model = LinearRegression().fit(X, y)
weights = model.coef_
intercept = model.intercept_

print("Learned Weights:", weights)
print("Intercept:", intercept)


In [None]:
features_df['Risk_Score'] = X @ weights


In [None]:
for sector in features_df['Sector'].unique():
    print(f"\nTop 3 safest stocks in {sector}:")
    print(features_df[features_df['Sector'] == sector].sort_values('Risk_Score').head(3)[['Stock', 'Risk_Score']])


GRU AND GARCH ONLY

In [None]:
from sklearn.linear_model import LinearRegression

X_new = features_df[['GARCH_Vol', 'GRU_Vol']]
y_new = features_df['Sharpe']

model_new = LinearRegression().fit(X_new, y_new)
weights_new = model_new.coef_
intercept_new = model_new.intercept_

print("New Weights (GARCH, GRU):", weights_new)


In [None]:
features_df['Risk_Score_GRU_GARCH'] = X_new @ weights_new


In [None]:
for sector in features_df['Sector'].unique():
    print(f"\nSafest stocks in {sector} (GARCH + GRU only):")
    print(features_df[features_df['Sector'] == sector]
          .sort_values('Risk_Score_GRU_GARCH')
          .head(3)[['Stock', 'Risk_Score_GRU_GARCH']])


GRU AND BETA ONLY

In [None]:
from sklearn.linear_model import LinearRegression

X_lb = features_df[['Beta', 'GRU_Vol']]
y_lb = features_df['Sharpe']

model_lb = LinearRegression().fit(X_lb, y_lb)
weights_lb = model_lb.coef_
intercept_lb = model_lb.intercept_

print("Weights: Beta =", weights_lb[0], ", GRU =", weights_lb[1])
print("Intercept:", intercept_lb)


In [None]:
features_df['Risk_Score_GRU_Beta'] = X_lb @ weights_lb


In [None]:
for sector in features_df['Sector'].unique():
    print(f"\nSafest stocks in {sector} (BiLSTM + Beta only):")
    print(features_df[features_df['Sector'] == sector]
          .sort_values('Risk_Score_GRU_Beta')
          .head(3)[['Stock', 'Risk_Score_GRU_Beta']])


GARCH AND BETA ONLY

In [None]:
from sklearn.linear_model import LinearRegression

X_gb = features_df[['Beta', 'GARCH_Vol']]
y_gb = features_df['Sharpe']

model_gb = LinearRegression().fit(X_gb, y_gb)
weights_gb = model_gb.coef_
intercept_gb = model_gb.intercept_

print("Weights: Beta =", weights_gb[0], ", GARCH =", weights_gb[1])
print("Intercept:", intercept_gb)


In [None]:
features_df['Risk_Score_GARCH_Beta'] = X_gb @ weights_gb


In [None]:
for sector in features_df['Sector'].unique():
    print(f"\nSafest stocks in {sector} (GARCH + Beta only):")
    print(features_df[features_df['Sector'] == sector]
          .sort_values('Risk_Score_GARCH_Beta')
          .head(3)[['Stock', 'Risk_Score_GARCH_Beta']])


In [None]:
features_df[['Stock', 'Risk_Score', 'Risk_Score_GRU_GARCH', 'Risk_Score_GRU_Beta', 'Risk_Score_GARCH_Beta']].head()


Comparison per sector

In [None]:
def compare_sector_models(sector_name, top_n=5):
    df = features_df[features_df['Sector'] == sector_name].copy()

    models = ['Risk_Score', 'Risk_Score_GRU_GARCH', 'Risk_Score_GRU_Beta', 'Risk_Score_GARCH_Beta']

    print(f"\n📊 Sector: {sector_name}")
    for model in models:
        print(f"\n🔹 Top {top_n} safest stocks using {model}:")
        display(df.sort_values(model).head(top_n)[['Stock', model]])


In [None]:
compare_sector_models("IT", top_n=3)
compare_sector_models("Banking", top_n=3)
compare_sector_models("Pharma", top_n=3)


Comparison table

In [None]:
def sector_ranking_table(sector_name):
    df = features_df[features_df['Sector'] == sector_name].copy()
    ranking_df = df[['Stock']].copy()

    for col in ['Risk_Score', 'Risk_Score_GRU_GARCH', 'Risk_Score_GRU_Beta', 'Risk_Score_GARCH_Beta']:
        ranking_df[col + '_Rank'] = df[col].rank(method='min')

    return ranking_df.sort_values('Risk_Score_Rank')


In [None]:
sector_ranking_table("FMCG")
