In [1]:
# data_collection.py
import pandas as pd
import yfinance as yf
import time

def download_and_save_data(tickers, start_date="2005-01-01", end_date="2025-05-14", filename="price_data.csv"):
    """
    Download price data with retry logic and save to CSV
    """
    max_retries = 3
    retry_delay = 5  # seconds
    
    for attempt in range(max_retries):
        try:
            print(f"Downloading data for {tickers}... (Attempt {attempt+1}/{max_retries})")
            data = yf.download(tickers, start=start_date, end=end_date, threads=False)['Close'] # can change threads back to True
            
            # Save to CSV
            data.to_csv(filename)
            print(f"Data successfully saved to {filename}")
            return True
            
        except Exception as e:
            print(f"Error downloading data: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
                retry_delay *= 2  # Exponential backoff
            else:
                print("Max retries reached. Could not download data.")
                return False

if __name__ == "__main__":
    # Define tickers (using ETFs: SPY for stocks, TLT for long-term bonds, GLD for gold)
    tickers = ["SPY", "TLT", "GLD"]
    
    # Download and save data
    # download_and_save_data(tickers, start_date="2006-01-01", end_date="2025-05-15", filename="price_data.csv")
    download_and_save_data(tickers, start_date="2025-05-15", end_date="2025-10-13", filename="price_data2.csv")

Downloading data for ['SPY', 'TLT', 'GLD']... (Attempt 1/3)
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  3 of 3 completed

3 Failed downloads:
['TLT', 'GLD', 'SPY']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Data successfully saved to price_data2.csv
