In [1]:
!pip install yfinance arch tensorflow scikit-learn statsmodels --quiet


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/978.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━[0m [32m798.7/978.3 kB[0m [31m19.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/978.3 kB[0m [31m16.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m978.3/978.3 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Bidirectional

def build_lstm(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=False, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

def build_gru(input_shape):
    model = Sequential()
    model.add(GRU(50, return_sequences=False, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

def build_bilstm(input_shape):
    model = Sequential()
    model.add(Bidirectional(LSTM(50, return_sequences=False), input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model


In [12]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def prepare_lstm_data(series, lookback=20):
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(series.values.reshape(-1, 1))

    X, y = [], []
    for i in range(lookback, len(scaled_data)):
        X.append(scaled_data[i-lookback:i])
        y.append(scaled_data[i])

    X = np.array(X)
    y = np.array(y)
    return X, y, scaler


In [4]:
def predict_volatility_with_model(returns_series, build_fn, lookback=20, epochs=10):
    X, y, scaler = prepare_lstm_data(returns_series, lookback)
    model = build_fn((X.shape[1], X.shape[2]))
    model.fit(X, y, epochs=epochs, batch_size=16, verbose=0)

    y_pred_scaled = model.predict(X)
    y_pred = scaler.inverse_transform(y_pred_scaled).flatten()

    # rolling std = volatility
    predicted_vol = pd.Series(y_pred).rolling(window=lookback).std()
    predicted_vol.index = returns_series.index[lookback:]
    return predicted_vol


In [5]:
from sklearn.linear_model import LinearRegression

def hybrid_volatility(returns_series, lookback=20):
    # Predict using each model
    vol_lstm  = predict_volatility_with_model(returns_series, build_lstm, lookback)
    vol_gru   = predict_volatility_with_model(returns_series, build_gru, lookback)
    vol_bilstm= predict_volatility_with_model(returns_series, build_bilstm, lookback)

    # Align indexes
    df = pd.concat([vol_lstm, vol_gru, vol_bilstm], axis=1).dropna()
    df.columns = ["LSTM", "GRU", "BiLSTM"]

    # Target volatility = realized rolling std from true returns
    realized_vol = returns_series.rolling(window=lookback).std().dropna()
    realized_vol = realized_vol.loc[df.index]

    # Fit linear regression (learn weights)
    reg = LinearRegression().fit(df, realized_vol)

    # Hybrid volatility prediction
    hybrid_pred = reg.predict(df)
    hybrid_vol = pd.Series(hybrid_pred, index=df.index)
    return hybrid_vol, reg.coef_


In [6]:
def compute_risk_scores(returns_data, lookback=20):
    risk_scores = {}

    for sector, df in returns_data.items():
        print(f"Computing hybrid risk scores for {sector}...")
        sector_scores = {}

        for stock in df.columns:
            r = df[stock].dropna()
            if len(r) < lookback*2:
                continue

            try:
                hybrid_vol, weights = hybrid_volatility(r, lookback)

                # Sharpe Ratio
                mean_return = r.mean()
                std_return = r.std()
                sharpe = mean_return / std_return if std_return != 0 else 0

                # Risk Score = Volatility / Sharpe
                last_vol = hybrid_vol.iloc[-1] if not hybrid_vol.empty else np.nan
                risk_score = last_vol / sharpe if sharpe != 0 else np.inf

                sector_scores[stock] = {
                    "Sharpe": sharpe,
                    "Volatility": last_vol,
                    "RiskScore": risk_score,
                    "Weights": weights
                }
            except Exception as e:
                print(f"Error {stock}: {e}")

        # Sort by ascending volatility (safer → riskier)
        sorted_scores = dict(sorted(sector_scores.items(), key=lambda x: x[1]["Volatility"]))
        risk_scores[sector] = sorted_scores

    return risk_scores


In [7]:
# Define your sectors and representative stocks
sectors = {
    'IT': ['TCS.NS', 'INFY.NS', 'HCLTECH.NS', 'WIPRO.NS'],
    'Banking': ['HDFCBANK.NS', 'ICICIBANK.NS', 'KOTAKBANK.NS', 'AXISBANK.NS'],
    'Pharma': ['SUNPHARMA.NS', 'CIPLA.NS', 'DRREDDY.NS'],
    'Auto': ['TATAMOTORS.NS', 'M&M.NS', 'MARUTI.NS'],
    'FMCG': ['HINDUNILVR.NS', 'ITC.NS', 'DABUR.NS']
}


In [8]:
import yfinance as yf
import pandas as pd
from datetime import datetime

start_date = "2015-01-01"
end_date = datetime.today().strftime('%Y-%m-%d')

def fetch_sector_data(sectors):
    all_data = {}
    for sector, stocks in sectors.items():
        print(f"Fetching data for {sector} sector...")
        data = yf.download(stocks, start=start_date, end=end_date)['Close']
        all_data[sector] = data
    return all_data

sector_data = fetch_sector_data(sectors)


Fetching data for IT sector...


  data = yf.download(stocks, start=start_date, end=end_date)['Close']
[*********************100%***********************]  4 of 4 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Banking sector...


[*********************100%***********************]  4 of 4 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Pharma sector...


[*********************100%***********************]  3 of 3 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for Auto sector...


[*********************100%***********************]  3 of 3 completed
  data = yf.download(stocks, start=start_date, end=end_date)['Close']


Fetching data for FMCG sector...


[*********************100%***********************]  3 of 3 completed


In [9]:
for sector in sector_data:
    df = sector_data[sector]
    df = df.ffill().bfill()  # Forward and backward fill
    sector_data[sector] = df


In [10]:
import numpy as np

returns_data = {}

for sector, df in sector_data.items():
    returns = np.log(df / df.shift(1)).dropna()
    returns_data[sector] = returns


In [13]:
risk_scores = compute_risk_scores(returns_data, lookback=20)

# Example: Check IT sector rankings
#pd.DataFrame(risk_scores['IT']).T


Computing hybrid risk scores for IT...


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
Computing hybrid risk scores for Banking...


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step
Computing hybrid risk scores for Pharma...


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step
Computing hybrid risk scores for Auto...


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
Computing hybrid risk scores for FMCG...


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


In [15]:
pd.DataFrame(risk_scores['IT']).head()

Unnamed: 0,TCS.NS,WIPRO.NS,INFY.NS,HCLTECH.NS
Sharpe,0.027437,0.023255,0.031129,0.035872
Volatility,0.009214,0.011952,0.012484,0.013585
RiskScore,0.335814,0.513936,0.401031,0.37871
Weights,"[32.36101120147554, -45.58452290620839, 6.0295...","[11.02299983995142, -7.379659165188148, 4.4794...","[-14.049053649147261, 26.228147032695183, 1.94...","[-25.441896003089116, 36.27910576326558, 1.209..."


NameError: name 'features_df' is not defined