### Import Required Libraries

In [0]:
import requests
import pandas as pd
import re
import os
import io
from datetime import datetime
from pyspark.sql.types import *

### Read Execution Date from Databricks Widget

In [0]:
dbutils.widgets.text("ODATE", "")
now_str = dbutils.widgets.get("ODATE")
now = datetime.strptime(now_str, "%Y%m%d")

today = now.strftime("%Y%m%d")
year = now.strftime("%Y")
month = now.strftime("%m")
day = now.strftime("%d")

### Build Finviz Request URL and Download Data

In [0]:
token = os.getenv("FINVIZ_TOKEN")

url = f"https://elite.finviz.com/news_export.ashx?v=2&auth={token}"

response = requests.get(url)
df = pd.read_csv(io.BytesIO(response.content))
df['load_date'] = today

### Save Raw File to Parquet

In [0]:
folder_path = f"/Volumes/raw/finviz/news_data/{year}/{month}/{day}"
os.makedirs(folder_path, exist_ok=True)

df.to_parquet(fr"{folder_path}/finviz_news_data_{today}.parquet", engine="pyarrow")

### Define Schema and Convert to Spark

In [0]:
schema = StructType([
    StructField("title", StringType(), True),
    StructField("source", StringType(), True),
    StructField("date", StringType(), True),
    StructField("url", StringType(), True),
    StructField("category", StringType(), True),
    StructField("load_date", StringType(), True)
])

columns = [re.sub(r"\W+", "_", col.replace("%", "pct")).strip("_").lower() for col in df.columns]
df.columns = columns

spark_df = spark.createDataFrame(df, schema=schema)

### Write Data to Delta Table

In [0]:
table_name = "raw.finviz.news_data"
spark_df.write.format("delta").mode("append").partitionBy("load_date").saveAsTable(table_name)