In [55]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime
import os

In [56]:
# Define index tickers
tickers = {
    "Nifty50": "^NSEI",
    "Nifty100": "^CNX100",
    "Nifty200": "^CNX200",
    "Nifty500": "^CRSLDX"  # or "^CNX500"
}

# Output folder
output_folder = r"C:\Users\Stevi\OneDrive\Documents\Projects\Crude-Oil\Data ingestion"
os.makedirs(output_folder, exist_ok=True)

# Target columns
standard_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

# Function to fix and rename columns
def fix_columns(df):
    if isinstance(df.columns, pd.MultiIndex):
        # Check if second level has expected names like 'Open', else try first level
        level0 = df.columns.get_level_values(0)
        level1 = df.columns.get_level_values(1)
        if any(l in standard_columns for l in level1):
            df.columns = level1
        elif any(l in standard_columns for l in level0):
            df.columns = level0
        else:
            # fallback to numbered columns
            df.columns = standard_columns[:len(df.columns)]

    df = df.reset_index()
    return df

# Fetch and clean data
def fetch_and_clean(ticker_symbol, name):
    print(f"Fetching: {name} ({ticker_symbol})")
    df = yf.download(ticker_symbol, start="2013-01-01", auto_adjust=False)

    if df.empty:
        print(f"⚠️ Warning: No data for {name}")
        return pd.DataFrame()

    df = fix_columns(df)
    df['Ticker'] = name

    required = ['Date'] + standard_columns
    missing = [col for col in required if col not in df.columns]
    if missing:
        raise ValueError(f"❌ Missing expected columns in {name}: {missing}")

    df = df[required + ['Ticker']]
    return df

# Main loop
for name, symbol in tickers.items():
    try:
        df = fetch_and_clean(symbol, name)
        if not df.empty:
            file_path = os.path.join(output_folder, f"{name}.csv")
            df.to_csv(file_path, index=False)
            print(f"✅ Saved: {file_path}")
    except Exception as e:
        print(str(e))

Fetching: Nifty50 (^NSEI)


[*********************100%***********************]  1 of 1 completed


✅ Saved: C:\Users\Stevi\OneDrive\Documents\Projects\Crude-Oil\Data ingestion\Nifty50.csv
Fetching: Nifty100 (^CNX100)


[*********************100%***********************]  1 of 1 completed


✅ Saved: C:\Users\Stevi\OneDrive\Documents\Projects\Crude-Oil\Data ingestion\Nifty100.csv
Fetching: Nifty200 (^CNX200)


[*********************100%***********************]  1 of 1 completed


✅ Saved: C:\Users\Stevi\OneDrive\Documents\Projects\Crude-Oil\Data ingestion\Nifty200.csv
Fetching: Nifty500 (^CRSLDX)


[*********************100%***********************]  1 of 1 completed

✅ Saved: C:\Users\Stevi\OneDrive\Documents\Projects\Crude-Oil\Data ingestion\Nifty500.csv





In [57]:
# Date range
start_date = "2024-01-01"
end_date = "2025-07-15"

# Output folder
output_dir = r"C:\Users\Stevi\OneDrive\Documents\Projects\Crude-Oil\Data ingestion"
os.makedirs(output_dir, exist_ok=True)

# Download data
wti = yf.download("CL=F", start=start_date, end=end_date)
brent = yf.download("BZ=F", start=start_date, end=end_date)
usd_inr = yf.download("INR=X", start=start_date, end=end_date)

# Prepare USDINR Close column
usd_inr = usd_inr[["Close"]].rename(columns={"Close": "USDINR"})

# Robust conversion function
def convert_to_inr(df_usd, fx_df):
    df = df_usd.copy()
    
    # Merge on date index
    df_merged = df.merge(fx_df, how="inner", left_index=True, right_index=True)
    
    usd_to_inr = df_merged["USDINR"].values  # get values to avoid index alignment issues
    
    # Multiply only numeric columns
    numeric_cols = df_merged.select_dtypes(include='number').columns.drop("USDINR")
    for col in numeric_cols:
        df_merged[col] = (df_merged[col].values * usd_to_inr).round(2)
    
    return df_merged.drop(columns=["USDINR"])

# After conversion
wti_inr = convert_to_inr(wti, usd_inr)
brent_inr = convert_to_inr(brent, usd_inr)

# Flatten column headers if needed
for df in [wti_inr, brent_inr]:
    if isinstance(df.columns[0], tuple):  # MultiIndex check
        df.columns = [col[0] for col in df.columns]

# Save to CSV
wti_inr.to_csv(os.path.join(output_dir, "wti_inr.csv"), index=True)
brent_inr.to_csv(os.path.join(output_dir, "brent_inr.csv"), index=True)

print("✅ Data saved with clean headers.")

  wti = yf.download("CL=F", start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  brent = yf.download("BZ=F", start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  usd_inr = yf.download("INR=X", start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed

✅ Data saved with clean headers.



  numeric_cols = df_merged.select_dtypes(include='number').columns.drop("USDINR")
  return df_merged.drop(columns=["USDINR"])
  numeric_cols = df_merged.select_dtypes(include='number').columns.drop("USDINR")
  return df_merged.drop(columns=["USDINR"])
