In [17]:
import yfinance as yf
import pandas as pd
import os
import time
from datetime import datetime, timedelta

In [18]:
def download_and_cache_ticker(ticker, start="2000-01-01", end="2025-01-10", save_dir="data/etfs"):
    os.makedirs(save_dir, exist_ok=True)
    path = os.path.join(save_dir, f"{ticker}.csv")

    if os.path.exists(path):
        print(f"Cached: {ticker}")
        return

    print(f"Downloading: {ticker}")
    df = yf.download(ticker, start=start, end=end, auto_adjust=True)
    df.columns = df.columns.get_level_values(0)
    df = df.drop(columns={'High', 'Low', 'Open', 'Volume'})
    df.to_csv(path)
    print(f"Saved: {path}")

In [19]:
tickers = ['SMH', 'SOXX', 'PSI', 'XSD', 'IYW', 'XLK', 'VGT', 'QQQ', 'IGM', 'IXN']
#tickers = ['SMH']
for ticker in tickers:
    download_and_cache_ticker(ticker)

Cached: SMH
Downloading: SOXX


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/SOXX.csv
Downloading: PSI


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/PSI.csv
Downloading: XSD


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/XSD.csv
Downloading: IYW


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/IYW.csv
Downloading: XLK


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/XLK.csv
Downloading: VGT


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/VGT.csv
Downloading: QQQ


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/QQQ.csv
Downloading: IGM


[*********************100%***********************]  1 of 1 completed


Saved: data/etfs/IGM.csv
Downloading: IXN


[*********************100%***********************]  1 of 1 completed

Saved: data/etfs/IXN.csv





In [5]:
def load_etf_data(ticker, start_date, end_date, data_dir="data/etfs"):
    path = os.path.join(data_dir, f"{ticker}.csv")
    df = pd.read_csv(path, index_col=0, parse_dates=True)
    return df[(df.index >= start_date) & (df.index <= end_date)].copy()


In [21]:
etf_data = load_etf_data('SMH', "2000-01-01", "2025-01-10")


In [22]:
print(etf_data.head(3))
print(etf_data.tail(3))

                Close
Date                 
2000-06-05  42.031502
2000-06-06  40.301258
2000-06-07  40.966736
                 Close
Date                  
2025-01-06  260.230011
2025-01-07  254.039993
2025-01-08  252.229996


In [6]:
etf_data = load_etf_data('SMH', "2000-01-01", "2024-01-01")


In [7]:
print(etf_data.head(3))
print(etf_data.tail(3))

                Close
Date                 
2000-06-05  42.031502
2000-06-06  40.301258
2000-06-07  40.966736
                 Close
Date                  
2023-12-27  175.083984
2023-12-28  175.044174
2023-12-29  174.098358


In [8]:
etf_data = load_etf_data('SMH', "2000-01-01", "2022-01-01")

In [9]:
print(etf_data.head(3))
print(etf_data.tail(3))

                Close
Date                 
2000-06-05  42.031502
2000-06-06  40.301258
2000-06-07  40.966736
                 Close
Date                  
2021-12-29  153.325623
2021-12-30  151.583893
2021-12-31  151.075073


In [10]:
def generate_week_ranges(start_date, end_date):
    """
    Generate a list of weekly date ranges from start_date to end_date.
    Each range is 7 days long (e.g., Monday to Sunday).
    """
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        # Each period is one week long (7 days)
        week_end = start + timedelta(days=6)
        # If the next 7-day block goes beyond end, truncate to end
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        
        # Move to the day after week_end to start the next week
        start = week_end + timedelta(days=1)
    
    return week_ranges

def gather_etf_data_weekly(tickers, week_ranges):
    """
    Download weekly ETF data for the specified tickers over the given weekly ranges.
    Returns a dictionary keyed by 'start_date to end_date', each containing data per ticker.
    """
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            #etf_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            #etf_data.columns = etf_data.columns.get_level_values(0)
            etf_data = load_etf_data(ticker, start_date, end_date)

            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
    return etf_histories

In [11]:
week_ranges = generate_week_ranges("2024-01-01", "2024-12-31")

In [12]:
etf_weekly = gather_etf_data_weekly(['SMH'], week_ranges)

In [13]:
etf_weekly

{'2024-01-01 to 2024-01-07': {'SMH':                  Close
  Date                  
  2024-01-02  168.224396
  2024-01-03  165.237640
  2024-01-04  164.261963
  2024-01-05  165.347153},
 '2024-01-08 to 2024-01-14': {'SMH':                  Close
  Date                  
  2024-01-08  171.201202
  2024-01-09  171.659164
  2024-01-10  171.818466
  2024-01-11  172.794144
  2024-01-12  172.186829},
 '2024-01-15 to 2024-01-21': {'SMH':                  Close
  Date                  
  2024-01-16  174.924698
  2024-01-17  173.869370
  2024-01-18  179.514359
  2024-01-19  186.354034},
 '2024-01-22 to 2024-01-28': {'SMH':                  Close
  Date                  
  2024-01-22  187.220200
  2024-01-23  188.175964
  2024-01-24  191.521133
  2024-01-25  191.521133
  2024-01-26  187.439240},
 '2024-01-29 to 2024-02-04': {'SMH':                  Close
  Date                  
  2024-01-29  189.639481
  2024-01-30  187.757812
  2024-01-31  185.049820
  2024-02-01  186.742325
  2024-02-02  190

In [14]:
def generate_month_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    month_ranges = []
    while start < end:
        month_start = start
        month_end = (start + timedelta(days=31)).replace(day=1) - timedelta(days=1)
        if month_end > end:
            month_end = end
        month_ranges.append((month_start.strftime('%Y-%m-%d'), month_end.strftime('%Y-%m-%d')))
        start = month_end + timedelta(days=1)
    return month_ranges

def gather_etf_data(tickers, month_ranges):
    etf_histories = {}
    for start_date, end_date in month_ranges:
        month = f"{start_date} to {end_date}"
        etf_histories[month] = {}
        for ticker in tickers:
            #etf_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            #etf_data.columns = etf_data.columns.get_level_values(0)
            etf_data = load_etf_data(ticker, start_date, end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {month}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[month][ticker] = etf_data
    return etf_histories

In [15]:
month_ranges = generate_month_ranges("2024-01-01", "2024-12-31")
etf_monthly = gather_etf_data(['SMH'], month_ranges)

In [16]:
etf_monthly

{'2024-01-01 to 2024-01-31': {'SMH':                  Close
  Date                  
  2024-01-02  168.224396
  2024-01-03  165.237640
  2024-01-04  164.261963
  2024-01-05  165.347153
  2024-01-08  171.201202
  2024-01-09  171.659164
  2024-01-10  171.818466
  2024-01-11  172.794144
  2024-01-12  172.186829
  2024-01-16  174.924698
  2024-01-17  173.869370
  2024-01-18  179.514359
  2024-01-19  186.354034
  2024-01-22  187.220200
  2024-01-23  188.175964
  2024-01-24  191.521133
  2024-01-25  191.521133
  2024-01-26  187.439240
  2024-01-29  189.639481
  2024-01-30  187.757812
  2024-01-31  185.049820},
 '2024-02-01 to 2024-02-29': {'SMH':                  Close
  Date                  
  2024-02-01  186.742325
  2024-02-02  190.376221
  2024-02-05  194.089752
  2024-02-06  191.889511
  2024-02-07  195.762344
  2024-02-08  198.549973
  2024-02-09  202.950485
  2024-02-12  202.163971
  2024-02-13  198.420563
  2024-02-14  202.402908
  2024-02-15  201.725906
  2024-02-16  200.859741
  2