In [3]:
import pandas as pd
from pathlib import Path
import yfinance as yf
import os


Getting Nifty 50 constituents data

In [None]:
# Define the Nifty50 folder path
nifty50_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/Nifty50")

# Find all nifty50_mcwb.csv files
csv_files = list(nifty50_dir.glob("*/nifty50_mcwb.csv"))
csv_files.sort()  # Sort for consistent ordering

In [4]:
def single_file(i):
    df = pd.read_csv(csv_files[i], header=2)
    df = df.iloc[:, 1:]  # Use .iloc to slice columns starting from the second column
    file_date = os.path.basename(os.path.dirname(csv_files[i]))[-5:]
    df.insert(0, 'Date', file_date)
    df['Date'] = pd.to_datetime(df['Date'] , format='%b%y') + pd.offsets.MonthEnd(0)
    df.dropna(inplace=True)
    return df



In [5]:
nifty50_cons = pd.concat([single_file(i) for i in range(len(csv_files))], ignore_index=True).sort_values(by='Date')
nifty50_cons

Unnamed: 0,Date,Security Symbol,Security Name,Industry,Equity Capital (In Rs.),Free Float Market Capitalisation (Rs. Crores),Weightage (%),Beta,R2,Volatility (%),Monthly Return,Avg. Impact Cost (%),Index Market Capitalisation (Rs. Crores)
0,2021-03-31,ADANIPORTS,Adani Ports and Special Economic Zone Ltd.,PORT,4.063504e+09,51375.69,0.81,0.81,0.25,2.43,3.92,0.03,
27,2021-03-31,JSWSTEEL,JSW Steel Ltd.,STEEL,2.417220e+09,45293.88,0.71,1.24,0.47,2.09,18.46,0.03,
28,2021-03-31,KOTAKBANK,Kotak Mahindra Bank Ltd.,PRIVATE SECTOR BANK,9.908109e+09,257059.94,4.05,1.16,0.45,1.61,-1.54,0.02,
29,2021-03-31,LT,Larsen & Toubro Ltd.,ENGINEERING-DESIGNING-CONSTRUCTION,2.808870e+09,171376.76,2.70,0.87,0.35,1.55,-1.64,0.03,
30,2021-03-31,M&M,Mahindra & Mahindra Ltd.,PASSENGER/UTILITY VEHICLES,6.215963e+09,76125.96,1.20,1.27,0.40,1.70,-1.38,0.02,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,2025-09-30,HINDALCO,Hindalco Industries Ltd.,ALUMINIUM,2.247227e+09,,0.98,1.26,0.34,1.02,8.24,0.02,110049.75
470,2025-09-30,HINDUNILVR,Hindustan Unilever Ltd.,DIVERSIFIED,2.349591e+09,,1.98,0.45,0.09,0.81,-5.47,0.01,222528.38
471,2025-09-30,ICICIBANK,ICICI Bank Ltd.,PRIVATE SECTOR BANK,1.428635e+10,,8.52,0.89,0.45,0.63,-3.56,0.02,959257.48
461,2025-09-30,COALINDIA,Coal India Ltd.,INDUSTRIAL MINERALS,6.162728e+10,,0.78,0.91,0.31,0.84,4.04,0.03,88309.73


In [12]:
# Ensure the directory exists
output_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project")
# output_dir.mkdir(parents=True, exist_ok=True)

# Save the file
nifty50_cons.to_csv(output_dir / "nifty50_cons.csv", index=False)

In [6]:
nifty50_cons[['Date', 'Weightage (%)']].groupby('Date').sum()

Unnamed: 0_level_0,Weightage (%)
Date,Unnamed: 1_level_1
2021-03-31,100.01
2021-09-30,100.01
2022-03-31,100.01
2022-09-30,99.97
2023-03-31,100.0
2023-09-30,100.04
2024-03-31,99.98
2024-09-30,100.0
2025-03-31,100.0
2025-09-30,99.98


Getting Daily Nifty 50 index values from 2021-2025

In [31]:
output_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project")

ticker = "^NSEI"  # NIFTY 50 Index
start_date = "2020-01-01"
end_date = "2025-12-31"

print("Downloading NIFTY 50 index data from Yahoo Finance...")
df = yf.download(
    ticker,
    start=start_date,
    end=end_date,
    interval="1d",
    auto_adjust=False,
    progress=True,
)

if df.empty:
    raise RuntimeError("No data downloaded. Check ticker or date range.")

# Clean column names
df.reset_index(inplace=True)

# Remove MultiIndex from DataFrame columns
df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns]

output_file = "nifty50_index_prices_2021_2025.csv"

df.to_csv(output_dir / output_file, index=False)

print(f"Saved daily NIFTY 50 index prices to: {output_file}")
print(f"Rows: {len(df)}")

df




Downloading NIFTY 50 index data from Yahoo Finance...


[*********************100%***********************]  1 of 1 completed

Saved daily NIFTY 50 index prices to: nifty50_index_prices_2021_2025.csv
Rows: 1485





Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2020-01-01,12182.500000,12182.500000,12222.200195,12165.299805,12202.150391,304100
1,2020-01-02,12282.200195,12282.200195,12289.900391,12195.250000,12198.549805,407700
2,2020-01-03,12226.650391,12226.650391,12265.599609,12191.349609,12261.099609,428800
3,2020-01-06,11993.049805,11993.049805,12179.099609,11974.200195,12170.599609,396500
4,2020-01-07,12052.950195,12052.950195,12152.150391,12005.349609,12079.099609,447800
...,...,...,...,...,...,...,...
1480,2025-12-23,26177.150391,26177.150391,26233.550781,26119.050781,26205.199219,216600
1481,2025-12-24,26142.099609,26142.099609,26236.400391,26123.000000,26170.650391,188800
1482,2025-12-26,26042.300781,26042.300781,26144.199219,26008.599609,26121.250000,142200
1483,2025-12-29,25942.099609,25942.099609,26106.800781,25920.300781,26063.349609,234300


Get price for all unique securities to calcualte beta and vol with 1 year history from respective time period. In this case, we take from 2020 - 2025

In [32]:
base_dir = '/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/'
index_values_file = 'nifty50_index_prices_2021_2025.csv'
index_cons_file = 'nifty50_cons.csv'

In [33]:
index_cons = pd.read_csv(base_dir + index_cons_file, parse_dates=['Date'], usecols=['Date', 'Security Symbol'])
index_cons.insert(1, 'Index_name', 'Nifty 50')
index_cons

Unnamed: 0,Date,Index_name,Security Symbol
0,2021-03-31,Nifty 50,ADANIPORTS
1,2021-03-31,Nifty 50,JSWSTEEL
2,2021-03-31,Nifty 50,KOTAKBANK
3,2021-03-31,Nifty 50,LT
4,2021-03-31,Nifty 50,M&M
...,...,...,...
495,2025-09-30,Nifty 50,HINDALCO
496,2025-09-30,Nifty 50,HINDUNILVR
497,2025-09-30,Nifty 50,ICICIBANK
498,2025-09-30,Nifty 50,COALINDIA


In [34]:
index_values = pd.read_csv(base_dir + index_values_file, usecols=['Date', 'Adj Close'], parse_dates=['Date'])
index_values

Unnamed: 0,Date,Adj Close
0,2020-01-01,12182.500000
1,2020-01-02,12282.200195
2,2020-01-03,12226.650391
3,2020-01-06,11993.049805
4,2020-01-07,12052.950195
...,...,...
1480,2025-12-23,26177.150391
1481,2025-12-24,26142.099609
1482,2025-12-26,26042.300781
1483,2025-12-29,25942.099609


In [35]:
unique_symbols = index_cons['Security Symbol'].unique()
unique_symbols_list = unique_symbols.tolist()
len(unique_symbols_list)

61

Get prices for all unique securites for beta and vol calculation. We will use 1 year history for each beta and vol calculation

In [21]:
start_date = "2020-01-01"
end_date = "2025-12-31"

In [22]:
failed_downloads = []
for symbol in unique_symbols_list:
    print(f"Downloading data for {symbol}...")
    try:
        stock_data = yf.download(symbol + '.NS', start=start_date, end=end_date)
        stock_data.reset_index(inplace=True)
        stock_data.columns = [col[0] if isinstance(col, tuple) else col for col in stock_data.columns]

        stock_data.to_csv(os.path.join(base_dir, 'price_history', f'{symbol}.csv'), index=False)
    except Exception as e:
        print(f"Error downloading data for {symbol}: {e}")
        failed_downloads.append(symbol)
        continue
print("Data download complete.")
if failed_downloads:
    print("Failed downloads for the following symbols:")
    for symbol in failed_downloads:
        print(symbol)

Downloading data for ADANIPORTS...


[*********************100%***********************]  1 of 1 completed


Downloading data for JSWSTEEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for KOTAKBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for LT...


[*********************100%***********************]  1 of 1 completed


Downloading data for M&M...


[*********************100%***********************]  1 of 1 completed


Downloading data for MARUTI...


[*********************100%***********************]  1 of 1 completed


Downloading data for NTPC...


[*********************100%***********************]  1 of 1 completed


Downloading data for NESTLEIND...


[*********************100%***********************]  1 of 1 completed


Downloading data for ONGC...


[*********************100%***********************]  1 of 1 completed


Downloading data for POWERGRID...


[*********************100%***********************]  1 of 1 completed


Downloading data for RELIANCE...


[*********************100%***********************]  1 of 1 completed


Downloading data for INFY...


[*********************100%***********************]  1 of 1 completed


Downloading data for SBILIFE...


[*********************100%***********************]  1 of 1 completed


Downloading data for SBIN...


[*********************100%***********************]  1 of 1 completed


Downloading data for SUNPHARMA...


[*********************100%***********************]  1 of 1 completed


Downloading data for TCS...


[*********************100%***********************]  1 of 1 completed


Downloading data for TATACONSUM...


[*********************100%***********************]  1 of 1 completed


Downloading data for TATAMOTORS...


$TATAMOTORS.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['TATAMOTORS.NS']: possibly delisted; no timezone found


Downloading data for TATASTEEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for TECHM...


[*********************100%***********************]  1 of 1 completed


Downloading data for TITAN...


[*********************100%***********************]  1 of 1 completed


Downloading data for UPL...


[*********************100%***********************]  1 of 1 completed


Downloading data for ULTRACEMCO...


[*********************100%***********************]  1 of 1 completed


Downloading data for SHREECEM...


[*********************100%***********************]  1 of 1 completed


Downloading data for INDUSINDBK...


[*********************100%***********************]  1 of 1 completed


Downloading data for WIPRO...


[*********************100%***********************]  1 of 1 completed


Downloading data for ITC...


[*********************100%***********************]  1 of 1 completed


Downloading data for IOC...


[*********************100%***********************]  1 of 1 completed


Downloading data for ASIANPAINT...


[*********************100%***********************]  1 of 1 completed


Downloading data for AXISBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for BAJAJ-AUTO...


[*********************100%***********************]  1 of 1 completed


Downloading data for BAJFINANCE...


[*********************100%***********************]  1 of 1 completed


Downloading data for BAJAJFINSV...


[*********************100%***********************]  1 of 1 completed


Downloading data for BPCL...


[*********************100%***********************]  1 of 1 completed


Downloading data for BRITANNIA...


[*********************100%***********************]  1 of 1 completed


Downloading data for CIPLA...


[*********************100%***********************]  1 of 1 completed


Downloading data for COALINDIA...


[*********************100%***********************]  1 of 1 completed


Downloading data for DIVISLAB...


[*********************100%***********************]  1 of 1 completed


Downloading data for BHARTIARTL...


[*********************100%***********************]  1 of 1 completed


Downloading data for EICHERMOT...


[*********************100%***********************]  1 of 1 completed


Downloading data for GRASIM...


[*********************100%***********************]  1 of 1 completed


Downloading data for HCLTECH...


[*********************100%***********************]  1 of 1 completed


Downloading data for ICICIBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for HDFCBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for HDFCLIFE...


[*********************100%***********************]  1 of 1 completed


Downloading data for HEROMOTOCO...


[*********************100%***********************]  1 of 1 completed


Downloading data for HINDALCO...


[*********************100%***********************]  1 of 1 completed


Downloading data for HINDUNILVR...


[*********************100%***********************]  1 of 1 completed


Downloading data for HDFC...


$HDFC.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['HDFC.NS']: possibly delisted; no timezone found


Downloading data for DRREDDY...


[*********************100%***********************]  1 of 1 completed


Downloading data for APOLLOHOSP...


[*********************100%***********************]  1 of 1 completed


Downloading data for ADANIENT...


[*********************100%***********************]  1 of 1 completed


Downloading data for LTIM...


[*********************100%***********************]  1 of 1 completed


Downloading data for SHRIRAMFIN...


[*********************100%***********************]  1 of 1 completed


Downloading data for BEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for TRENT...


[*********************100%***********************]  1 of 1 completed


Downloading data for ZOMATO...


$ZOMATO.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['ZOMATO.NS']: possibly delisted; no timezone found


Downloading data for JIOFIN...


[*********************100%***********************]  1 of 1 completed


Downloading data for MAXHEALTH...


[*********************100%***********************]  1 of 1 completed


Downloading data for INDIGO...


[*********************100%***********************]  1 of 1 completed


Downloading data for ETERNAL...


[*********************100%***********************]  1 of 1 completed

Data download complete.





In [23]:
print(failed_downloads)

[]


In [24]:
no_price_data_files = []
price_dir = Path(os.path.join(base_dir, 'price_history'))
for files in list(price_dir.glob("*csv")):
    df = pd.read_csv(os.path.join(base_dir, 'price_history', files))
    if len(df) == 0:
        print(f"File {files} is empty.")
        no_price_data_files.append(files)
no_price_data_files

File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HDFC.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/ZOMATO.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/TATAMOTORS.csv is empty.


[PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HDFC.csv'),
 PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/ZOMATO.csv'),
 PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/TATAMOTORS.csv')]

Deal with error files (manual intervention)

HDFC is HDFCBANK, and ZOMATO is now ETERNAL, which we have

TATAMOTORS is renamed as TMCV

In [25]:
symbol = 'TMCV'
single_stock = yf.download(symbol + '.NS', start=start_date, end=end_date)
single_stock.reset_index(inplace=True)
single_stock.columns = [col[0] if isinstance(col, tuple) else col for col in single_stock.columns]
single_stock
single_stock.to_csv(os.path.join(base_dir, 'price_history', f'{symbol}.csv'), index=False)

[*********************100%***********************]  1 of 1 completed


Single file for all price

In [30]:
price_data = pd.DataFrame()
price_dir = Path(os.path.join(base_dir, 'price_history'))
for files in list(price_dir.glob("*csv")):
    df = pd.read_csv(os.path.join(base_dir, 'price_history', files), usecols=['Date', 'Close', 'Volume'], parse_dates=['Date'])
    
    if len(df) == 0:
        print(f"File {files} is empty.")
    else:    
        df['Symbol'] = os.path.splitext(os.path.basename(files))[0]
        df.insert(0, 'Symbol', df.pop('Symbol'))
        price_data = pd.concat([price_data, df])

price_data.to_csv(os.path.join(base_dir, 'all_price_data.csv'), index=False)
price_data

File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HDFC.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/ZOMATO.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/TATAMOTORS.csv is empty.


Unnamed: 0,Symbol,Date,Close,Volume
0,BHARTIARTL,2020-01-01,433.316467,5154996
1,BHARTIARTL,2020-01-02,435.132721,4933053
2,BHARTIARTL,2020-01-03,435.037109,5154587
3,BHARTIARTL,2020-01-06,429.827362,7538915
4,BHARTIARTL,2020-01-07,425.477997,4353883
...,...,...,...,...
1483,M&M,2025-12-24,3636.699951,842897
1484,M&M,2025-12-25,3636.699951,0
1485,M&M,2025-12-26,3623.100098,829910
1486,M&M,2025-12-29,3592.100098,1034692
