In [1]:
import pandas as pd
from pathlib import Path
import yfinance as yf
import os


Getting Nifty 50 constituents data

In [None]:
# Define the Nifty50 folder path
nifty50_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project/Nifty50")

# Find all nifty50_mcwb.csv files
csv_files = list(nifty50_dir.glob("*/nifty50_mcwb.csv"))
csv_files.sort()  # Sort for consistent ordering

In [4]:
def single_file(i):
    df = pd.read_csv(csv_files[i], header=2)
    df = df.iloc[:, 1:]  # Use .iloc to slice columns starting from the second column
    file_date = os.path.basename(os.path.dirname(csv_files[i]))[-5:]
    df.insert(0, 'Date', file_date)
    df['Date'] = pd.to_datetime(df['Date'] , format='%b%y') + pd.offsets.MonthEnd(0)
    df.dropna(inplace=True)
    return df



In [5]:
nifty50_cons = pd.concat([single_file(i) for i in range(len(csv_files))], ignore_index=True).sort_values(by='Date')
nifty50_cons

Unnamed: 0,Date,Security Symbol,Security Name,Industry,Equity Capital (In Rs.),Free Float Market Capitalisation (Rs. Crores),Weightage (%),Beta,R2,Volatility (%),Monthly Return,Avg. Impact Cost (%),Index Market Capitalisation (Rs. Crores)
0,2021-03-31,ADANIPORTS,Adani Ports and Special Economic Zone Ltd.,PORT,4.063504e+09,51375.69,0.81,0.81,0.25,2.43,3.92,0.03,
27,2021-03-31,JSWSTEEL,JSW Steel Ltd.,STEEL,2.417220e+09,45293.88,0.71,1.24,0.47,2.09,18.46,0.03,
28,2021-03-31,KOTAKBANK,Kotak Mahindra Bank Ltd.,PRIVATE SECTOR BANK,9.908109e+09,257059.94,4.05,1.16,0.45,1.61,-1.54,0.02,
29,2021-03-31,LT,Larsen & Toubro Ltd.,ENGINEERING-DESIGNING-CONSTRUCTION,2.808870e+09,171376.76,2.70,0.87,0.35,1.55,-1.64,0.03,
30,2021-03-31,M&M,Mahindra & Mahindra Ltd.,PASSENGER/UTILITY VEHICLES,6.215963e+09,76125.96,1.20,1.27,0.40,1.70,-1.38,0.02,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,2025-09-30,HINDALCO,Hindalco Industries Ltd.,ALUMINIUM,2.247227e+09,,0.98,1.26,0.34,1.02,8.24,0.02,110049.75
470,2025-09-30,HINDUNILVR,Hindustan Unilever Ltd.,DIVERSIFIED,2.349591e+09,,1.98,0.45,0.09,0.81,-5.47,0.01,222528.38
471,2025-09-30,ICICIBANK,ICICI Bank Ltd.,PRIVATE SECTOR BANK,1.428635e+10,,8.52,0.89,0.45,0.63,-3.56,0.02,959257.48
461,2025-09-30,COALINDIA,Coal India Ltd.,INDUSTRIAL MINERALS,6.162728e+10,,0.78,0.91,0.31,0.84,4.04,0.03,88309.73


In [12]:
# Ensure the directory exists
output_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project")
# output_dir.mkdir(parents=True, exist_ok=True)

# Save the file
nifty50_cons.to_csv(output_dir / "nifty50_cons.csv", index=False)

In [6]:
nifty50_cons[['Date', 'Weightage (%)']].groupby('Date').sum()

Unnamed: 0_level_0,Weightage (%)
Date,Unnamed: 1_level_1
2021-03-31,100.01
2021-09-30,100.01
2022-03-31,100.01
2022-09-30,99.97
2023-03-31,100.0
2023-09-30,100.04
2024-03-31,99.98
2024-09-30,100.0
2025-03-31,100.0
2025-09-30,99.98


Getting Daily Nifty 50 index values from 2021-2025

In [18]:
output_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project")

ticker = "^NSEI"  # NIFTY 50 Index
start_date = "2021-01-01"
end_date = "2025-12-31"

print("Downloading NIFTY 50 index data from Yahoo Finance...")
df = yf.download(
    ticker,
    start=start_date,
    end=end_date,
    interval="1d",
    auto_adjust=False,
    progress=True,
)

if df.empty:
    raise RuntimeError("No data downloaded. Check ticker or date range.")

# Clean column names
df.reset_index(inplace=True)

# Remove MultiIndex from DataFrame columns
df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns]

output_file = "nifty50_index_prices_2021_2025.csv"

df.to_csv(output_dir / output_file, index=False)

print(f"Saved daily NIFTY 50 index prices to: {output_file}")
print(f"Rows: {len(df)}")

df




[*********************100%***********************]  1 of 1 completed

Downloading NIFTY 50 index data from Yahoo Finance...
Saved daily NIFTY 50 index prices to: nifty50_index_prices_2021_2025.csv
Rows: 1235





Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2021-01-01,14018.500000,14018.500000,14049.849609,13991.349609,13996.099609,358100
1,2021-01-04,14132.900391,14132.900391,14147.950195,13953.750000,14104.349609,495000
2,2021-01-05,14199.500000,14199.500000,14215.599609,14048.150391,14075.150391,492500
3,2021-01-06,14146.250000,14146.250000,14244.150391,14039.900391,14240.950195,632300
4,2021-01-07,14137.349609,14137.349609,14256.250000,14123.099609,14253.750000,559200
...,...,...,...,...,...,...,...
1230,2025-12-23,26177.150391,26177.150391,26233.550781,26119.050781,26205.199219,216600
1231,2025-12-24,26142.099609,26142.099609,26236.400391,26123.000000,26170.650391,188800
1232,2025-12-26,26042.300781,26042.300781,26144.199219,26008.599609,26121.250000,142200
1233,2025-12-29,25942.099609,25942.099609,26106.800781,25920.300781,26063.349609,234300
