In [1]:
import pandas as pd
from pathlib import Path
import yfinance as yf
import os


Getting Nifty 50 constituents data

In [2]:
# Define the Nifty50 folder path
nifty50_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/Nifty50")

# Find all nifty50_mcwb.csv files
csv_files = list(nifty50_dir.glob("*/nifty50_mcwb.csv"))
csv_files.sort()  # Sort for consistent ordering

In [3]:
def single_file(i):
    df = pd.read_csv(csv_files[i], header=2)
    df = df.iloc[:, 1:]  # Use .iloc to slice columns starting from the second column
    file_date = os.path.basename(os.path.dirname(csv_files[i]))[-5:]
    df.insert(0, 'Date', file_date)
    df['Date'] = pd.to_datetime(df['Date'] , format='%b%y') + pd.offsets.MonthEnd(0)
    df.dropna(inplace=True)
    return df



In [4]:
nifty50_cons = pd.concat([single_file(i) for i in range(len(csv_files))], ignore_index=True).sort_values(by='Date')
nifty50_cons

Unnamed: 0,Date,Security Symbol,Security Name,Industry,Equity Capital (In Rs.),Free Float Market Capitalisation (Rs. Crores),Weightage (%),Beta,R2,Volatility (%),Monthly Return,Avg. Impact Cost (%),Index Market Capitalisation (Rs. Crores)
0,2016-03-31,ACC,ACC Ltd.,CEMENT AND CEMENT PRODUCTS,1.881916e+09,13003.10,0.47,0.82,0.36,1.58,15.75,0.03,
27,2016-03-31,INFY,Infosys Ltd.,COMPUTERS - SOFTWARE,1.148472e+10,243457.99,8.78,0.82,0.26,1.47,12.38,0.03,
28,2016-03-31,KOTAKBANK,Kotak Mahindra Bank Ltd.,BANKS,9.166275e+09,69877.08,2.52,1.09,0.47,1.27,7.97,0.03,
29,2016-03-31,LT,Larsen & Toubro Ltd.,ENGINEERING,1.862596e+09,99713.71,3.60,1.21,0.52,2.09,13.08,0.03,
30,2016-03-31,LUPIN,Lupin Ltd.,PHARMACEUTICALS,9.010671e+08,35321.94,1.27,0.74,0.14,3.23,-15.69,0.03,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
971,2025-09-30,HINDALCO,Hindalco Industries Ltd.,ALUMINIUM,2.247227e+09,,0.98,1.26,0.34,1.02,8.24,0.02,110049.75
972,2025-09-30,HINDUNILVR,Hindustan Unilever Ltd.,DIVERSIFIED,2.349591e+09,,1.98,0.45,0.09,0.81,-5.47,0.01,222528.38
973,2025-09-30,ICICIBANK,ICICI Bank Ltd.,PRIVATE SECTOR BANK,1.428635e+10,,8.52,0.89,0.45,0.63,-3.56,0.02,959257.48
963,2025-09-30,COALINDIA,Coal India Ltd.,INDUSTRIAL MINERALS,6.162728e+10,,0.78,0.91,0.31,0.84,4.04,0.03,88309.73


In [5]:
# Ensure the directory exists
output_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project")
# output_dir.mkdir(parents=True, exist_ok=True)

# Save the file
nifty50_cons.to_csv(output_dir / "nifty50_cons.csv", index=False)

In [6]:
nifty50_cons[['Date', 'Weightage (%)']].groupby('Date').sum()

Unnamed: 0_level_0,Weightage (%)
Date,Unnamed: 1_level_1
2016-03-31,99.96
2016-09-30,100.02
2017-03-31,100.0
2017-09-30,99.97
2018-03-31,100.03
2018-09-30,99.97
2019-03-31,100.01
2019-09-30,99.99
2020-03-31,100.02
2020-09-30,99.96


Getting Daily Nifty 50 index values from 2021-2025

In [13]:
output_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project")

ticker = "^NSEI"  # NIFTY 50 Index
start_date = "2016-01-01"
end_date = "2025-12-31"

print("Downloading NIFTY 50 index data from Yahoo Finance...")
df = yf.download(
    ticker,
    start=start_date,
    end=end_date,
    interval="1d",
    auto_adjust=False,
    progress=True,
)

if df.empty:
    raise RuntimeError("No data downloaded. Check ticker or date range.")

# Clean column names
df.reset_index(inplace=True)

# Remove MultiIndex from DataFrame columns
df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns]

output_file = "nifty50_index_prices_2016_2025.csv"

df.to_csv(output_dir / output_file, index=False)

print(f"Saved daily NIFTY 50 index prices to: {output_file}")
print(f"Rows: {len(df)}")

df




Failed to get ticker '^NSEI' reason: Expecting value: line 1 column 1 (char 0)
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['^NSEI']: Exception('%ticker%: No timezone found, symbol may be delisted')


Downloading NIFTY 50 index data from Yahoo Finance...


RuntimeError: No data downloaded. Check ticker or date range.

Get price for all unique securities to calcualte beta and vol with 1 year history from respective time period. In this case, we take from 2020 - 2025

In [6]:
base_dir = '/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/'
index_values_file = 'nifty50_index_prices_2016_2025.csv'
index_cons_file = 'nifty50_cons.csv'

In [4]:
index_cons = pd.read_csv(base_dir + index_cons_file, parse_dates=['Date'], usecols=['Date', 'Security Symbol'])
index_cons.insert(1, 'Index_name', 'Nifty 50')
index_cons

Unnamed: 0,Date,Index_name,Security Symbol
0,2016-03-31,Nifty 50,ACC
1,2016-03-31,Nifty 50,INFY
2,2016-03-31,Nifty 50,KOTAKBANK
3,2016-03-31,Nifty 50,LT
4,2016-03-31,Nifty 50,LUPIN
...,...,...,...
997,2025-09-30,Nifty 50,HINDALCO
998,2025-09-30,Nifty 50,HINDUNILVR
999,2025-09-30,Nifty 50,ICICIBANK
1000,2025-09-30,Nifty 50,COALINDIA


In [8]:
index_values = pd.read_csv(base_dir + index_values_file, usecols=['Date', 'Adj Close'], parse_dates=['Date'])
index_values

Unnamed: 0,Date,Adj Close
0,2016-04-01,7791.299805
1,2016-05-01,7784.649902
2,2016-06-01,7741.000000
3,2016-07-01,7568.299805
4,2016-08-01,7601.350098
...,...,...
2707,2025-12-23,26177.150390
2708,2025-12-24,26142.099610
2709,2025-12-26,26042.300780
2710,2025-12-29,25942.099610


In [9]:
unique_symbols = index_cons['Security Symbol'].unique()
unique_symbols_list = unique_symbols.tolist()
len(unique_symbols_list)

80

Get prices for all unique securites for beta and vol calculation. We will use 1 year history for each beta and vol calculation

In [10]:
start_date = "2016-01-01"
end_date = "2025-12-31"

In [11]:
failed_downloads = []
for symbol in unique_symbols_list:
    print(f"Downloading data for {symbol}...")
    try:
        stock_data = yf.download(symbol + '.NS', start=start_date, end=end_date)
        stock_data.reset_index(inplace=True)
        stock_data.columns = [col[0] if isinstance(col, tuple) else col for col in stock_data.columns]

        stock_data.to_csv(os.path.join(base_dir, 'price_history', f'{symbol}.csv'), index=False)
    except Exception as e:
        print(f"Error downloading data for {symbol}: {e}")
        failed_downloads.append(symbol)
        continue
print("Data download complete.")
if failed_downloads:
    print("Failed downloads for the following symbols:")
    for symbol in failed_downloads:
        print(symbol)

Downloading data for ACC...


[*********************100%***********************]  1 of 1 completed


Downloading data for INFY...


[*********************100%***********************]  1 of 1 completed


Downloading data for KOTAKBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for LT...


[*********************100%***********************]  1 of 1 completed


Downloading data for LUPIN...


[*********************100%***********************]  1 of 1 completed


Downloading data for M&M...


[*********************100%***********************]  1 of 1 completed


Downloading data for MARUTI...


[*********************100%***********************]  1 of 1 completed


Downloading data for NTPC...


[*********************100%***********************]  1 of 1 completed


Downloading data for ONGC...


[*********************100%***********************]  1 of 1 completed


Downloading data for POWERGRID...


[*********************100%***********************]  1 of 1 completed


Downloading data for RELIANCE...


[*********************100%***********************]  1 of 1 completed


Downloading data for INDUSINDBK...


[*********************100%***********************]  1 of 1 completed


Downloading data for SBIN...


[*********************100%***********************]  1 of 1 completed


Downloading data for TCS...


[*********************100%***********************]  1 of 1 completed


Downloading data for TATAMOTORS...


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: TATAMOTORS.NS"}}}
$TATAMOTORS.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['TATAMOTORS.NS']: possibly delisted; no timezone found


Downloading data for TATAPOWER...


[*********************100%***********************]  1 of 1 completed


Downloading data for TATASTEEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for TECHM...


[*********************100%***********************]  1 of 1 completed


Downloading data for ULTRACEMCO...


[*********************100%***********************]  1 of 1 completed


Downloading data for VEDL...


[*********************100%***********************]  1 of 1 completed


Downloading data for WIPRO...


[*********************100%***********************]  1 of 1 completed


Downloading data for YESBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for ZEEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for SUNPHARMA...


[*********************100%***********************]  1 of 1 completed


Downloading data for IDEA...


[*********************100%***********************]  1 of 1 completed


Downloading data for PNB...


[*********************100%***********************]  1 of 1 completed


Downloading data for ITC...


[*********************100%***********************]  1 of 1 completed


Downloading data for ICICIBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for AMBUJACEM...


[*********************100%***********************]  1 of 1 completed


Downloading data for ASIANPAINT...


[*********************100%***********************]  1 of 1 completed


Downloading data for AXISBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for BAJAJ-AUTO...


[*********************100%***********************]  1 of 1 completed


Downloading data for BANKBARODA...


[*********************100%***********************]  1 of 1 completed


Downloading data for BHEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for BPCL...


[*********************100%***********************]  1 of 1 completed


Downloading data for BHARTIARTL...


[*********************100%***********************]  1 of 1 completed


Downloading data for BOSCHLTD...


[*********************100%***********************]  1 of 1 completed


Downloading data for CAIRN...


$CAIRN.NS: possibly delisted; no price data found  (1d 2016-01-01 -> 2025-12-31)
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['CAIRN.NS']: possibly delisted; no price data found  (1d 2016-01-01 -> 2025-12-31)


Downloading data for ADANIPORTS...


[*********************100%***********************]  1 of 1 completed


Downloading data for COALINDIA...


[*********************100%***********************]  1 of 1 completed


Downloading data for CIPLA...


[*********************100%***********************]  1 of 1 completed


Downloading data for HINDUNILVR...


[*********************100%***********************]  1 of 1 completed


Downloading data for HINDALCO...


[*********************100%***********************]  1 of 1 completed


Downloading data for HEROMOTOCO...


[*********************100%***********************]  1 of 1 completed


Downloading data for HDFCBANK...


[*********************100%***********************]  1 of 1 completed


Downloading data for HDFC...


$HDFC.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['HDFC.NS']: possibly delisted; no timezone found


Downloading data for GRASIM...


[*********************100%***********************]  1 of 1 completed


Downloading data for GAIL...


[*********************100%***********************]  1 of 1 completed


Downloading data for DRREDDY...


[*********************100%***********************]  1 of 1 completed


Downloading data for HCLTECH...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['HCLTECH.NS']: TypeError("'NoneType' object is not subscriptable")


Downloading data for TATAMTRDVR...


$TATAMTRDVR.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['TATAMTRDVR.NS']: possibly delisted; no timezone found


Downloading data for AUROPHARMA...


[*********************100%***********************]  1 of 1 completed


Downloading data for INFRATEL...


$INFRATEL.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['INFRATEL.NS']: possibly delisted; no timezone found


Downloading data for EICHERMOT...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['EICHERMOT.NS']: TypeError("'NoneType' object is not subscriptable")


Downloading data for IBULHSGFIN...


$IBULHSGFIN.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['IBULHSGFIN.NS']: possibly delisted; no timezone found


Downloading data for IOC...


[*********************100%***********************]  1 of 1 completed


Downloading data for BAJFINANCE...


[*********************100%***********************]  1 of 1 completed


Downloading data for HINDPETRO...


[*********************100%***********************]  1 of 1 completed


Downloading data for UPL...


[*********************100%***********************]  1 of 1 completed


Downloading data for BAJAJFINSV...


[*********************100%***********************]  1 of 1 completed


Downloading data for TITAN...


[*********************100%***********************]  1 of 1 completed


Downloading data for JSWSTEEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for BRITANNIA...


[*********************100%***********************]  1 of 1 completed


Downloading data for NESTLEIND...


[*********************100%***********************]  1 of 1 completed


Downloading data for SHREECEM...


[*********************100%***********************]  1 of 1 completed


Downloading data for HDFCLIFE...


[*********************100%***********************]  1 of 1 completed


Downloading data for DIVISLAB...


[*********************100%***********************]  1 of 1 completed


Downloading data for SBILIFE...


[*********************100%***********************]  1 of 1 completed


Downloading data for TATACONSUM...


[*********************100%***********************]  1 of 1 completed


Downloading data for APOLLOHOSP...


[*********************100%***********************]  1 of 1 completed


Downloading data for ADANIENT...


[*********************100%***********************]  1 of 1 completed


Downloading data for LTIM...


[*********************100%***********************]  1 of 1 completed


Downloading data for SHRIRAMFIN...


[*********************100%***********************]  1 of 1 completed


Downloading data for BEL...


[*********************100%***********************]  1 of 1 completed


Downloading data for TRENT...


[*********************100%***********************]  1 of 1 completed


Downloading data for ZOMATO...


$ZOMATO.NS: possibly delisted; no timezone found
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['ZOMATO.NS']: possibly delisted; no timezone found


Downloading data for JIOFIN...


[*********************100%***********************]  1 of 1 completed


Downloading data for MAXHEALTH...


[*********************100%***********************]  1 of 1 completed


Downloading data for INDIGO...


[*********************100%***********************]  1 of 1 completed


Downloading data for ETERNAL...


[*********************100%***********************]  1 of 1 completed

Data download complete.





In [12]:
print(failed_downloads)

[]


In [13]:
no_price_data_files = []
price_dir = Path(os.path.join(base_dir, 'price_history'))
for files in list(price_dir.glob("*csv")):
    df = pd.read_csv(os.path.join(base_dir, 'price_history', files))
    if len(df) == 0:
        print(f"File {files} is empty.")
        no_price_data_files.append(files)
no_price_data_files

File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/IBULHSGFIN.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/TATAMTRDVR.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HCLTECH.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HDFC.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/ZOMATO.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/CAIRN.csv is empty.
File /Users/sidd

[PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/IBULHSGFIN.csv'),
 PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/TATAMTRDVR.csv'),
 PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HCLTECH.csv'),
 PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HDFC.csv'),
 PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/ZOMATO.csv'),
 PosixPath('/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/CAIRN.csv'),
 PosixPath('/Use

Deal with error files (manual intervention)

HDFC is HDFCBANK, and ZOMATO is now ETERNAL, which we have

TATAMOTORS is renamed as TMCV

In [14]:
symbol = 'TMCV'
single_stock = yf.download(symbol + '.NS', start=start_date, end=end_date)
single_stock.reset_index(inplace=True)
single_stock.columns = [col[0] if isinstance(col, tuple) else col for col in single_stock.columns]
single_stock
single_stock.to_csv(os.path.join(base_dir, 'price_history', f'{symbol}.csv'), index=False)

[*********************100%***********************]  1 of 1 completed


Single file for all price

In [15]:
price_data = pd.DataFrame()
price_dir = Path(os.path.join(base_dir, 'price_history'))
for files in list(price_dir.glob("*csv")):
    df = pd.read_csv(os.path.join(base_dir, 'price_history', files), usecols=['Date', 'Close', 'Volume'], parse_dates=['Date'])
    
    if len(df) == 0:
        print(f"File {files} is empty.")
    else:    
        df['Symbol'] = os.path.splitext(os.path.basename(files))[0]
        df.insert(0, 'Symbol', df.pop('Symbol'))
        price_data = pd.concat([price_data, df])

price_data.to_csv(os.path.join(base_dir, 'all_price_data.csv'), index=False)
price_data

File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/IBULHSGFIN.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/TATAMTRDVR.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HCLTECH.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/HDFC.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/ZOMATO.csv is empty.
File /Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/price_history/CAIRN.csv is empty.
File /Users/sidd

Unnamed: 0,Symbol,Date,Close,Volume
0,BHARTIARTL,2016-01-01,290.676270,1387248
1,BHARTIARTL,2016-01-04,278.980865,3498169
2,BHARTIARTL,2016-01-05,276.121124,2351113
3,BHARTIARTL,2016-01-06,275.054047,2678635
4,BHARTIARTL,2016-01-07,275.182037,3135368
...,...,...,...,...
2464,M&M,2025-12-23,3625.100098,1413628
2465,M&M,2025-12-24,3636.699951,842897
2466,M&M,2025-12-26,3623.100098,829910
2467,M&M,2025-12-29,3592.100098,1034692
