In [1]:
import pandas as pd

url = 'https://en.wikipedia.org/wiki/NIFTY_500'
nifty500 = pd.read_html(url)[4]

# If columns are not named, assign them manually
expected_columns = ['Sl.No', 'Company Name', 'Industry', 'Symbol', 'Series', 'ISIN Code']
if list(nifty500.columns) != expected_columns:
    nifty500.columns = expected_columns

# Now extract the 'Symbol' column and add '.NS'
nifty500['Symbol'] = nifty500['Symbol'].str.strip() + '.NS'

# Get the top 250 symbols
symbols_list = nifty500['Symbol'].tolist()[1:]

print(symbols_list)
print(len(symbols_list))

['360ONE.NS', '3MINDIA.NS', 'ABB.NS', 'ACC.NS', 'AIAENG.NS', 'APLAPOLLO.NS', 'AUBANK.NS', 'AARTIIND.NS', 'AAVAS.NS', 'ABBOTINDIA.NS', 'ACE.NS', 'ADANIENSOL.NS', 'ADANIENT.NS', 'ADANIGREEN.NS', 'ADANIPORTS.NS', 'ADANIPOWER.NS', 'ATGL.NS', 'AWL.NS', 'ABCAPITAL.NS', 'ABFRL.NS', 'AEGISLOG.NS', 'AETHER.NS', 'AFFLE.NS', 'AJANTPHARM.NS', 'APLLTD.NS', 'ALKEM.NS', 'ALKYLAMINE.NS', 'ALLCARGO.NS', 'ALOKINDS.NS', 'ARE&M.NS', 'AMBER.NS', 'AMBUJACEM.NS', 'ANANDRATHI.NS', 'ANGELONE.NS', 'ANURAS.NS', 'APARINDS.NS', 'APOLLOHOSP.NS', 'APOLLOTYRE.NS', 'APTUS.NS', 'ACI.NS', 'ASAHIINDIA.NS', 'ASHOKLEY.NS', 'ASIANPAINT.NS', 'ASTERDM.NS', 'ASTRAZEN.NS', 'ASTRAL.NS', 'ATUL.NS', 'AUROPHARMA.NS', 'AVANTIFEED.NS', 'DMART.NS', 'AXISBANK.NS', 'BEML.NS', 'BLS.NS', 'BSE.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BAJAJHLDNG.NS', 'BALAMINES.NS', 'BALKRISIND.NS', 'BALRAMCHIN.NS', 'BANDHANBNK.NS', 'BANKBARODA.NS', 'BANKINDIA.NS', 'MAHABANK.NS', 'BATAINDIA.NS', 'BAYERCROP.NS', 'BERGEPAINT.NS', 'BDL.NS', 'BE

In [2]:
import yfinance as yf
import numpy as np

# Set date range matching research paper's 8-year window
end_date = '2025-05-31'
start_date = '2017-01-01'  # Matches paper's 2017-2025 data range

# Download data with 1-day interval
data = yf.download(
    tickers=symbols_list,
    start=start_date,
    end=end_date,
    interval='1d',
    group_by='ticker',
    auto_adjust=False,
    threads=True
)

# Extract Close prices and align with paper's data structure
close_df = pd.DataFrame({
    sym: data[sym]['Close'] 
    for sym in symbols_list if sym in data
}).ffill().dropna(axis=1, how='any')



[*********************100%***********************]  500 of 500 completed

3 Failed downloads:
['GLS.NS', 'HBLPOWER.NS', 'GMRINFRA.NS']: YFTzMissingError('possibly delisted; no timezone found')


In [None]:
# Calculate daily returns (percentage change)
returns_df = close_df.pct_change().dropna()

# Compute mean returns and covariance matrix as in paper
mu = returns_df.mean().values * 252  # Annualized
sigma = returns_df.cov().values * 252  # Annualized

# Save processed data for SNN input
np.save('mu.npy', mu)  # Expected returns vector
np.save('sigma.npy', sigma)  # Covariance matrix
close_df.to_csv('processed_prices.csv')
returns_df.to_csv('daily_returns.csv')

print(f"Data shape: {close_df.shape} (Days × Stocks)")
print(f"Mean returns vector shape: {mu.shape}")
print(f"Covariance matrix shape: {sigma.shape}")


Data shape: (2077, 356) (Days × Stocks)
Mean returns vector shape: (356,)
Covariance matrix shape: (356, 356)


In [2]:
import pandas as pd

df = pd.read_csv('processed_prices.csv')

print(df.shape);
df.head()

(2077, 357)


Unnamed: 0,Date,3MINDIA.NS,ABB.NS,ACC.NS,AIAENG.NS,APLAPOLLO.NS,AARTIIND.NS,ABBOTINDIA.NS,ACE.NS,ADANIENT.NS,...,WELCORP.NS,WELSPUNLIV.NS,WHIRLPOOL.NS,WIPRO.NS,YESBANK.NS,ZFCVINDIA.NS,ZEEL.NS,ZENSARTECH.NS,ZYDUSLIFE.NS,ECLERX.NS
0,2017-01-02,11056.450195,941.358704,1351.900024,1319.25,91.970001,168.941742,4716.0,46.0,41.930149,...,75.949997,66.699997,894.049988,88.415649,228.929993,5241.850098,451.600006,188.600006,354.5,944.466675
1,2017-01-03,10917.200195,949.03009,1343.550049,1292.400024,92.514999,170.92215,4657.649902,46.75,41.712189,...,76.150002,69.449997,931.849976,87.562523,232.580002,5247.950195,458.299988,189.190002,354.799988,931.700012
2,2017-01-04,10808.450195,942.947449,1319.400024,1299.949951,93.949997,169.60997,4619.25,47.25,42.393314,...,76.900002,71.150002,949.549988,89.175018,232.630005,5260.899902,458.600006,187.639999,362.049988,944.099976
3,2017-01-05,11097.400391,940.314636,1332.849976,1293.849976,94.190002,170.424011,4609.049805,47.599998,43.128933,...,79.099998,71.199997,934.299988,90.07502,242.039993,5263.549805,470.700012,189.800003,367.100006,951.0
4,2017-01-06,11305.0,948.122253,1333.900024,1271.650024,95.309998,170.035217,4604.450195,49.400002,42.85648,...,79.199997,70.150002,920.400024,88.115646,249.350006,5206.100098,463.100006,187.839996,379.450012,943.466675
