In [1]:
# import necessary libraries
import pandas as pd
import yfinance as yf
import os
from datetime import datetime , timedelta

In [None]:
# Define the date range for data collection
# Last 2 years from today , always dynamic.
end_date = datetime.today()
start_date = end_date - timedelta(days=365*2)  # last 2 years
print(f" Start Date: {start_date}\n End Date: {end_date}")


 Start Date: 2024-01-06 14:17:36.860066
 End Date: 2026-01-05 14:17:36.860066


In [None]:
# Define stock symbols and their corresponding Yahoo Finance tickers (indcies and stocks)
symbols = {
    "NIFTY_50": ("^NSEI", "Index"),
    "BANK_NIFTY": ("^NSEBANK", "Index"),
    "RELIANCE": ("RELIANCE.NS", "Stock"),
    "TCS": ("TCS.NS", "Stock"),
    "HDFCBANK": ("HDFCBANK.NS", "Stock")
}



In [None]:
def download_data(symbol, asset_type):
    df = yf.download(symbol, start=start_date, end=end_date)

    df.columns = df.columns.get_level_values(0)
    df.reset_index(inplace=True)

    df["Symbol"] = symbol
    df["Asset_Type"] = asset_type

    df = df[["Date", "Open", "High", "Low", "Close", "Volume", "Symbol", "Asset_Type"]]

    return df


In [None]:
stock_data = []

for name, (ticker, asset_type) in symbols.items():
    df = download_data(ticker, asset_type)

    df["Name"] = name
    stock_data.append(df)

    df.to_csv(f"Day_2_Stock_Data/{name}.csv", index=False)


In [18]:
# Create an empty dictionary to store dataframes
dataframe = {}
# Download historical data for each symbol and store in the dictionary
for name , symbol in symbols.items():
    df = yf.download(symbol, start=start_date, end=end_date)
    df.columns = df.columns.get_level_values(0) # Flatten multi-level columns if any
    df.reset_index(inplace=True) # Reset index to make 'Date' a column
    df['Ticker'] = name # Add a column for the ticker name
    dataframe[name] = df 

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [19]:
# Display stock name and corresponding dataframe shape ==>>(rows, columns)
for name, df in dataframe.items():
    print(f"{name}: {df.shape}")

NIFTY_50: (492, 7)
BANK_NIFTY: (488, 7)
RELIANCE: (494, 7)
TCS: (494, 7)
HDFCBANK: (494, 7)


In [None]:
# Create a summary DataFrame showing the number of rows and missing values for each stock
summary = []

for name, df in dataframe.items():
    summary.append({
        "Ticker": name,
        "Rows": df.shape[0],
        "Missing_Values": df.isna().sum().sum()
    })

summary_df = pd.DataFrame(summary)
summary_df


Unnamed: 0,Ticker,Rows,Missing_Values
0,NIFTY_50,492,0
1,BANK_NIFTY,488,0
2,RELIANCE,494,0
3,TCS,494,0
4,HDFCBANK,494,0


In [21]:
os.makedirs("Day_2_Stock_Data", exist_ok=True)

for name, df in dataframe.items():
    df.to_csv(f"Day_2_Stock_Data/{name}.csv", index=False)


In [2]:
nifty = pd.read_csv("Day_2_Stock_Data/NIFTY_50.csv")
banknifty = pd.read_csv("Day_2_Stock_Data/BANK_NIFTY.csv")
reliance = pd.read_csv("Day_2_Stock_Data/RELIANCE.csv")
tcs = pd.read_csv("Day_2_Stock_Data/TCS.csv")
hdfcbank = pd.read_csv("Day_2_Stock_Data/HDFCBANK.csv")
all_data = pd.concat([nifty, banknifty, reliance, tcs, hdfcbank], ignore_index=True)
all_data.to_csv("Day_2_Stock_Data/All_Stocks_Data.csv", index=False)
