In [1]:
import yfinance as yf
import pandas as pd

# ------------------------------
# Step 1: User Inputs
# ------------------------------
symbol = input("Enter stock symbol (e.g., SBIN.NS, RELIANCE.NS, AAPL): ").strip()
start_date = input("Enter start date (YYYY-MM-DD): ").strip()
end_date = input("Enter end date (YYYY-MM-DD): ").strip()
timeframe = input("Enter timeframe (e.g., 5min, 15min, 1h, 1d): ").strip()

# ------------------------------
# Step 2: Validate inputs
# ------------------------------
if not symbol:
    print("‚ùå Invalid stock symbol.")
    exit()

try:
    start_dt = pd.to_datetime(start_date)
    end_dt = pd.to_datetime(end_date)
except:
    print("‚ùå Invalid date format. Use YYYY-MM-DD.")
    exit()

if start_dt >= end_dt:
    print("‚ùå Start date must be before end date.")
    exit()

# ------------------------------
# Step 3: Map timeframe to yfinance interval and adjust download interval
# ------------------------------
# Map user timeframe to yfinance interval
timeframe_map = {
    '5min': '5m', '15min': '15m', '30min': '30m',
    '1h': '1h', '1d': '1d', '1wk': '1wk', '1mo': '1mo'
}

if timeframe not in timeframe_map:
    print(f"‚ùå Unsupported timeframe: {timeframe}. Using '1d' as default.")
    download_interval = '1d'
    timeframe = '1d'
else:
    download_interval = timeframe_map[timeframe]

# Check for intraday data limitations
minute_intervals = ['5m', '15m', '30m', '1h']
delta_days = (end_dt - start_dt).days

if download_interval in minute_intervals and delta_days > 60:
    print(f"‚ö†Ô∏è Intraday data limited to 60 days. Switching to '1d'.")
    download_interval = '1d'
elif download_interval in minute_intervals and delta_days > 7:
    print(f"‚ö†Ô∏è Note: For {timeframe} data, yfinance may return limited historical data.")

# ------------------------------
# Step 4: Download data
# ------------------------------
print(f"\nüì• Downloading {symbol} data from {start_date} to {end_date} at interval '{download_interval}' ...")

try:
    data = yf.download(
        tickers=symbol,
        start=start_date,
        end=end_date,
        interval=download_interval,
        progress=True  # Show progress bar
    )
except Exception as e:
    print(f"‚ùå Error downloading data: {e}")
    exit()

if data.empty:
    print("‚ùå No data found. Check symbol, date range, or market hours.")
    exit()

print(f"‚úÖ Downloaded {len(data)} records")

# ------------------------------
# Step 5: Flatten MultiIndex columns if necessary
# ------------------------------
if isinstance(data.columns, pd.MultiIndex):
    data.columns = data.columns.get_level_values(0)  # Fix: get first level only

# ------------------------------
# Step 6: Keep only OHLCV columns
# ------------------------------
expected_cols = ['Open', 'High', 'Low', 'Close', 'Volume']

# Check which columns are available
available_cols = [col for col in expected_cols if col in data.columns]
if not available_cols:
    print(f"‚ùå No OHLCV columns found. Available columns: {list(data.columns)}")
    exit()

data = data[available_cols].dropna()

# Ensure datetime index
if not isinstance(data.index, pd.DatetimeIndex):
    data.index = pd.to_datetime(data.index)

# ------------------------------
# Step 7: Resample data if needed (when download interval doesn't match target timeframe)
# ------------------------------
if download_interval != timeframe_map.get(timeframe, timeframe):
    try:
        print(f"üîÑ Resampling from {download_interval} to {timeframe}...")
        
        # Map timeframe to pandas resample rule
        resample_rule = timeframe
        if timeframe == '1h':
            resample_rule = '1H'
        
        resampled = data.resample(resample_rule).agg({
            'Open': 'first',
            'High': 'max',
            'Low': 'min',
            'Close': 'last',
            'Volume': 'sum'
        }).dropna()
        print(f"‚úÖ Resampled to {len(resampled)} records")
    except Exception as e:
        print(f"‚ùå Error in resampling: {e}")
        resampled = data.copy()
else:
    resampled = data.copy()

# ------------------------------
# Step 8: Format date and time
# ------------------------------
resampled = resampled.reset_index()

# Handle different column names for datetime
datetime_col = None
for col in ['Datetime', 'Date', 'index']:
    if col in resampled.columns:
        datetime_col = col
        break

if datetime_col is None:
    print("‚ùå Could not find datetime column in data")
    exit()

# Convert to proper datetime and extract date/time
resampled[datetime_col] = pd.to_datetime(resampled[datetime_col])
resampled['date'] = resampled[datetime_col].dt.date.astype(str)
resampled['time'] = resampled[datetime_col].dt.time.astype(str)

# Select and rename columns
final_columns = ['date', 'time', 'Open', 'High', 'Low', 'Close', 'Volume']
resampled = resampled[final_columns]
resampled.columns = ['date', 'time', 'open', 'high', 'low', 'close', 'volume']

# Remove any remaining NaN values
resampled = resampled.dropna()

# ------------------------------
# Step 9: Save CSV
# ------------------------------
filename = f"{symbol.replace('.', '_')}_{timeframe}.csv"
resampled.to_csv(filename, index=False)

print(f"\n‚úÖ CSV file saved successfully as: {filename}")
print(f"üìä Final data shape: {resampled.shape}")
print(f"üìÖ Date range: {resampled['date'].min()} to {resampled['date'].max()}")
print(f"üïí Time range: {resampled['time'].min()} to {resampled['time'].max()}")

# Display first few rows
print(f"\nüìã First 3 rows of data:")
print(resampled.head(3).to_string(index=False))

Enter stock symbol (e.g., SBIN.NS, RELIANCE.NS, AAPL):  RELIANCE.NS
Enter start date (YYYY-MM-DD):   2025-10-10
Enter end date (YYYY-MM-DD):  2025-10-20
Enter timeframe (e.g., 5min, 15min, 1h, 1d):  5MIN


‚ùå Unsupported timeframe: 5MIN. Using '1d' as default.

üì• Downloading RELIANCE.NS data from 2025-10-10 to 2025-10-20 at interval '1d' ...


  data = yf.download(
[*********************100%***********************]  1 of 1 completed

‚úÖ Downloaded 6 records

‚úÖ CSV file saved successfully as: RELIANCE_NS_1d.csv
üìä Final data shape: (6, 7)
üìÖ Date range: 2025-10-10 to 2025-10-17
üïí Time range: 00:00:00 to 00:00:00

üìã First 3 rows of data:
      date     time        open        high         low       close  volume
2025-10-10 00:00:00 1377.800049 1388.000000 1375.099976 1381.699951 6373084
2025-10-13 00:00:00 1376.900024 1377.699951 1367.800049 1375.000000 7600682
2025-10-14 00:00:00 1380.000000 1388.000000 1370.099976 1375.900024 9768174



