In [0]:
!pip install yfinance

In [0]:
dbutils.library.restartPython()

In [0]:
import yfinance as yf
import pandas as pd
from datetime import datetime
import re
import time
from pyspark.sql.functions import regexp_replace

In [0]:
TICKER_TABLE = "finance_catalog.db_landing.src_raw_index_keys"
TABLE_NAME = "finance_catalog.db_landing.src_raw_stock_prices"

In [0]:
tickers = (
    spark
        .table(TICKER_TABLE)
        .select(regexp_replace("ticker", "\\.", "-").alias("ticker"))
        .toPandas()["ticker"]
        .tolist()
)

In [0]:
ticker_data = []
batch_ini = 0
batch_end = 0

for ticker in tickers[batch_ini:batch_end]:

    df = yf.download(ticker, period='1d', start=datetime(1900, 1, 1, 0, 0, 0), end=datetime.now(), auto_adjust=True)
    df = df.reset_index()
    df.columns = df.columns.droplevel(1)
    df.columns = [re.sub(r"\W+", "_", col).strip("_").lower() for col in df.columns]
    df['ticker'] = ticker
    df['id'] = ticker + df['date'].dt.strftime('%Y%m%d')
    df.set_index('id', inplace=True)
    df.reset_index(inplace=True)
    ticker_data.append(df)

if ticker_data:
    df = pd.concat(ticker_data, ignore_index=True)
    df['volume'] = df['volume'].astype(float)

(
    spark
        .createDataFrame(df)
        .orderBy(["ticker", "date"], ascending=[True, False])
        .write
        .format("delta")
        .mode("append")
        .saveAsTable(TABLE_NAME)
)

In [0]:
# spark.table(TABLE_NAME).display()
# spark.sql(f"DROP TABLE IF EXISTS {TABLE_NAME}")