In [1]:
import pandas as pd

url = 'https://en.wikipedia.org/wiki/NIFTY_500'
nifty500 = pd.read_html(url)[4]

# If columns are not named, assign them manually
expected_columns = ['Sl.No', 'Company Name', 'Industry', 'Symbol', 'Series', 'ISIN Code']
if list(nifty500.columns) != expected_columns:
    nifty500.columns = expected_columns

# Now extract the 'Symbol' column and add '.NS'
nifty500['Symbol'] = nifty500['Symbol'].str.strip() + '.NS'

# Get the top 250 symbols
symbols_list = nifty500['Symbol'].tolist()[1:]

print(symbols_list)
print(len(symbols_list))

['360ONE.NS', '3MINDIA.NS', 'ABB.NS', 'ACC.NS', 'AIAENG.NS', 'APLAPOLLO.NS', 'AUBANK.NS', 'AARTIIND.NS', 'AAVAS.NS', 'ABBOTINDIA.NS', 'ACE.NS', 'ADANIENSOL.NS', 'ADANIENT.NS', 'ADANIGREEN.NS', 'ADANIPORTS.NS', 'ADANIPOWER.NS', 'ATGL.NS', 'AWL.NS', 'ABCAPITAL.NS', 'ABFRL.NS', 'AEGISLOG.NS', 'AETHER.NS', 'AFFLE.NS', 'AJANTPHARM.NS', 'APLLTD.NS', 'ALKEM.NS', 'ALKYLAMINE.NS', 'ALLCARGO.NS', 'ALOKINDS.NS', 'ARE&M.NS', 'AMBER.NS', 'AMBUJACEM.NS', 'ANANDRATHI.NS', 'ANGELONE.NS', 'ANURAS.NS', 'APARINDS.NS', 'APOLLOHOSP.NS', 'APOLLOTYRE.NS', 'APTUS.NS', 'ACI.NS', 'ASAHIINDIA.NS', 'ASHOKLEY.NS', 'ASIANPAINT.NS', 'ASTERDM.NS', 'ASTRAZEN.NS', 'ASTRAL.NS', 'ATUL.NS', 'AUROPHARMA.NS', 'AVANTIFEED.NS', 'DMART.NS', 'AXISBANK.NS', 'BEML.NS', 'BLS.NS', 'BSE.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BAJAJHLDNG.NS', 'BALAMINES.NS', 'BALKRISIND.NS', 'BALRAMCHIN.NS', 'BANDHANBNK.NS', 'BANKBARODA.NS', 'BANKINDIA.NS', 'MAHABANK.NS', 'BATAINDIA.NS', 'BAYERCROP.NS', 'BERGEPAINT.NS', 'BDL.NS', 'BE

In [2]:
import yfinance as yf
import numpy as np

# Set date range matching research paper's 8-year window
end_date = '2025-05-31'
start_date = '2017-01-01'  # Matches paper's 2017-2025 data range

# Download data with 1-day interval
data = yf.download(
    tickers=symbols_list,
    start=start_date,
    end=end_date,
    interval='1d',
    group_by='ticker',
    auto_adjust=False,
    threads=True
)

# Extract Close prices and align with paper's data structure
close_df = pd.DataFrame({
    sym: data[sym]['Close'] 
    for sym in symbols_list if sym in data
}).ffill().dropna(axis=1, how='any')



[*********************100%***********************]  500 of 500 completed

3 Failed downloads:
['GLS.NS', 'HBLPOWER.NS', 'GMRINFRA.NS']: YFTzMissingError('possibly delisted; no timezone found')


In [3]:
# Calculate daily returns (percentage change)
returns_df = close_df.pct_change().dropna()

# Compute mean returns and covariance matrix as in paper
mu = returns_df.mean().values * 252  # Annualized
sigma = returns_df.cov().values * 252  # Annualized

# Save processed data for SNN input
np.save('mu.npy', mu)  # Expected returns vector
np.save('sigma.npy', sigma)  # Covariance matrix
close_df.to_csv('processed_prices.csv')
returns_df.to_csv('daily_returns.csv')

print(f"Data shape: {close_df.shape} (Days × Stocks)")
print(f"Mean returns vector shape: {mu.shape}")
print(f"Covariance matrix shape: {sigma.shape}")


Data shape: (2077, 356) (Days × Stocks)
Mean returns vector shape: (356,)
Covariance matrix shape: (356, 356)
