In [3]:
import yfinance as yf
import pandas as pd
from pathlib import Path
import time

# --- Configuration ---
DATA_DIR = Path("historical_data_NSE")
DATA_DIR.mkdir(parents=True, exist_ok=True)

SYMBOLS = ["RELIANCE.NS", "INFY.NS", "TCS.NS", "HDFCBANK.NS", "SBIN.NS"] # Add more
# For intraday, yfinance has limitations. This is just for structure.
# For serious intraday, you'd use a proper broker/vendor API.
# For yfinance intraday: period="60d", interval="1m" (max 60 days for 1m)
# or period="7d", interval="1m" (max 7 days for 1m with more recent data)
PERIOD = "5y" # For daily data
INTERVAL = "1d" # For daily data
# PERIOD = "60d"
# INTERVAL = "5m" # Example: 5 minute data for last 60 days

def fetch_and_store_symbol_data(symbol):
    filepath = DATA_DIR / f"{symbol.replace('.NS', '')}_{INTERVAL}.parquet"
    print(f"Fetching data for {symbol}...")
    try:
        ticker = yf.Ticker(symbol)
        # For intraday, adjust period and interval
        hist = ticker.history(period=PERIOD, interval=INTERVAL, auto_adjust=True) # auto_adjust handles splits/dividends

        if hist.empty:
            print(f"No data found for {symbol} for the given period/interval.")
            return

        # Basic Sanity Check (very simple)
        if 'Close' not in hist.columns or hist['Close'].isnull().all():
            print(f"Close data is missing or all nulls for {symbol}")
            return
        
        # Ensure datetime index is timezone-naive or consistent (yfinance usually is for NSE)
        if hist.index.tz is not None:
            hist.index = hist.index.tz_localize(None)


        hist.to_parquet(filepath)
        print(f"Stored data for {symbol} at {filepath}")

    except Exception as e:
        print(f"Error fetching/storing {symbol}: {e}")

    # Respect API limits if any (yfinance is generally okay for moderate use)
    time.sleep(1) # Small delay

# --- Main Ingestion ---
if __name__ == "__main__":
    for sym in SYMBOLS:
        fetch_and_store_symbol_data(sym)
    print("Historical data download process finished.")

 
    try:
        reliance_df = pd.read_parquet(DATA_DIR / "RELIANCE_1d.parquet")
        print("\nReliance Data Sample:")
        print(reliance_df.head())
        print(reliance_df.tail())
        print(reliance_df.info())
    except FileNotFoundError:
        print("Reliance parquet file not found. Run the script first.")

Fetching data for RELIANCE.NS...
Stored data for RELIANCE.NS at historical_data_NSE\RELIANCE_1d.parquet
Fetching data for INFY.NS...
Stored data for INFY.NS at historical_data_NSE\INFY_1d.parquet
Fetching data for TCS.NS...
Stored data for TCS.NS at historical_data_NSE\TCS_1d.parquet
Fetching data for HDFCBANK.NS...
Stored data for HDFCBANK.NS at historical_data_NSE\HDFCBANK_1d.parquet
Fetching data for SBIN.NS...
Stored data for SBIN.NS at historical_data_NSE\SBIN_1d.parquet
Historical data download process finished.

Reliance Data Sample:
                  Open        High         Low       Close    Volume  \
Date                                                                   
2020-05-14  670.630833  683.276516  652.849315  655.542786  49266771   
2020-05-15  659.217832  669.580911  646.024349  666.248291  62152260   
2020-05-18  671.087369  676.565599  651.913443  657.734070  62796089   
2020-05-19  665.152595  667.298267  640.614550  643.193848  42315854   
2020-05-20  643.69603