In [5]:
from typing import *
import yfinance as yf
import pandas as pd
import datetime
import time

In [2]:
# List of tickers
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN"]

# Define the time range and interval
end_date = datetime.date.today().strftime("%Y-%m-%d")
start_date = (datetime.date.today() - datetime.timedelta(days=365)).strftime("%Y-%m-%d")
interval = "1h"  # 1 hour interval

# Dictionary to store data
data = {}

for ticker in tickers:
    print(f"Downloading {ticker}...")
    stock = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)
    data[ticker] = stock

# Example: show AAPL data
print(data["AAPL"].head(-2))


Downloading AAPL...
YF.download() has changed argument auto_adjust default to True
Downloading MSFT...
Downloading GOOGL...
Downloading AMZN...
Price                           Close        High         Low        Open  \
Ticker                           AAPL        AAPL        AAPL        AAPL   
Datetime                                                                    
2024-04-08 13:30:00+00:00  168.820007  169.190002  168.240005  169.059998   
2024-04-08 14:30:00+00:00  168.815002  169.130005  168.649994  168.824997   
2024-04-08 15:30:00+00:00  168.412399  168.815002  168.264999  168.815002   
2024-04-08 16:30:00+00:00  168.645004  168.835007  168.404999  168.414993   
2024-04-08 17:30:00+00:00  168.790100  168.899994  168.470001  168.645004   
...                               ...         ...         ...         ...   
2025-04-04 13:30:00+00:00  195.279999  199.880005  192.800003  193.925003   
2025-04-04 14:30:00+00:00  194.490005  197.039993  191.059998  195.294998   
2025-04-0

In [3]:
def load_predefined_tickers_list() -> List[str]:
    # Load NASDAQ symbols from a raw GitHub URL
    url = 'https://raw.githubusercontent.com/rreichel3/US-Stock-Symbols/main/nasdaq/nasdaq_tickers.txt'
    nasdaq_symbols = pd.read_csv(url, header=None).squeeze().tolist()
    
    print(nasdaq_symbols[:10])  # Display first 10 symbols as a sample
    return nasdaq_symbols

In [4]:
nasdaq_symbols = load_predefined_tickers_list()
print(len(nasdaq_symbols))

['AACBU', 'AACG', 'AAL', 'AAME', 'AAOI', 'AAON', 'AAPG', 'AAPL', 'AARD', 'ABAT']
3865


In [10]:
def batch_load_from_yfinance(tickers: List[str], start_date: str, end_date: str, interval: str="1h") -> None:    
    # Dictionary to store data
    data: Dict[str, pd.DataFrame] = {}
    cnt: int = 0

    for ticker in tickers:            
        for attempt in range(6):
            try:
                stock = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)
                data[ticker] = stock
                break
            except yf.exceptions.YFRateLimitError as e:
                wait_time = 2**(attempt + 1)
                print(f"Attempt {attempt + 1} rate limited for ticker: {ticker}, pause for {wait_time}")
                time.sleep(wait_time)  # exponential backoff
                
        data[ticker].columns = [c[0] for c in data[ticker].columns]
        data[ticker]["ticker"] = ticker
        cnt += 1

        if cnt % 100 == 0:  # Saving output every 100 tickers.
            print(f"Downloaded {cnt} stocks data.")
            temp_out: pd.DataFrame = pd.concat(data.values(), axis=0)
            temp_out.to_parquet(f"main_base_{cnt}.parquet")
            
    temp_out: pd.DataFrame = pd.concat(data.values(), axis=0)
    temp_out.to_parquet(f"main_base_{cnt}.parquet")


In [None]:
batch_load_from_yfinance(nasdaq_symbols, start_date, end_date)


1 Failed download:
['ABLLW']: YFInvalidPeriodError("ABLLW: Period 'max' is invalid, must be of the format 1d, 5d, etc.")

1 Failed download:
['ABLVW']: YFInvalidPeriodError("ABLVW: Period 'max' is invalid, must be of the format 1d, 5d, etc.")

1 Failed download:
['ABPWW']: YFInvalidPeriodError("ABPWW: Period 'max' is invalid, must be of the format 1d, 5d, etc.")
