In [0]:
%run ./aws_secret_manager

In [0]:
%pip install -r ../requirements.txt

In [0]:
# Standard library imports
import csv
import datetime as dt
import os
from ast import literal_eval

# Third-party library imports
import pyarrow as pa
import requests as r
from dotenv import load_dotenv
from pyspark.sql.functions import cast, col, current_timestamp
from pyspark.sql.types import (
    DateType,
    DecimalType,
    DoubleType,
    IntegerType,
    StringType,
    StructField,
    StructType,
    TimestampType
)

load_dotenv()

raw_zone_path = os.getenv('RAW_ZONE_PATH')
input_load_date = dt.datetime(2025, 2, 21)
catalog_name = os.getenv('DATABRICKS_CATALOG_NAME')
schema_name = os.getenv('DATABRICKS_SCHEMA_NAME')

In [0]:
stock_price_schema = StructType([
    StructField("ticker_symbol", StringType(), True),
    StructField("open_price", StringType(), True),
    StructField("close_price", StringType(), True),
    StructField("highest_price", StringType(), True),
    StructField("lowest_price", StringType(), True),
    StructField("trading_date", DateType(), True)
])

In [0]:
SP500_tickers = (spark.read.table(f'{catalog_name}.{schema_name}.kdayno_bronze_SP500_companies')
                  .select('ticker_symbol')
                )

SP500_tickers_list = [row['ticker_symbol'] for row in SP500_tickers.collect()]

## Load Stock Price Data for S&P500 Companies

In [0]:
polygon_api_key = literal_eval(get_secret("POLYGON_CREDENTIALS"))['AWS_SECRET_ACCESS_KEY']

next_day = input_load_date + dt.timedelta(days=1)

current_date = input_load_date.strftime('%Y-%m-%d')
next_day = next_day.strftime('%Y-%m-%d')

print(f'Loading data for date: {current_date}')

stock_data = {'ticker_symbol':[], 'open_price':[], 'close_price':[], 'highest_price':[], 'lowest_price':[], 'trading_date':[]}

for stock in SP500_tickers_list:
    polygon_url = f"https://api.polygon.io/v2/aggs/ticker/{stock}/range/1/day/{current_date}/{next_day}?adjusted=true&sort=asc&apiKey="

    data = r.get(f'{polygon_url}{polygon_api_key}').json()
    print(f'Getting data for ticker: {stock} ...')

    stock_data['ticker_symbol'].append(data['ticker'])
    stock_data['open_price'].append(data['results'][0]['o'])
    stock_data['close_price'].append(data['results'][0]['c'])
    stock_data['highest_price'].append(data['results'][0]['h'])
    stock_data['lowest_price'].append(data['results'][0]['l'])
    stock_data['trading_date'].append(dt.datetime.fromtimestamp(data['results'][0]['t'] / 1000))

stock_df = spark.createDataFrame(list(zip(*stock_data.values())), stock_price_schema)


In [0]:
stock_transformed_df = (stock_df.withColumn('open_price', col('open_price').cast('decimal(10,2)'))
                                .withColumn('close_price', col('close_price').cast('decimal(10,2)'))
                                .withColumn('highest_price', col('highest_price').cast('decimal(10,2)'))
                                .withColumn('lowest_price', col('open_price').cast('decimal(10,2)'))
                                .withColumn('load_date_ts', current_timestamp())
                     )

In [0]:
spark.sql(f"""
          DELETE FROM {catalog_name}.{schema_name}.kdayno_bronze_SP500_stock_prices
          WHERE trading_date = '{input_load_date.strftime('%Y-%m-%d')}'
          """)

stock_transformed_df.write.mode("append").format("delta").saveAsTable(f'{catalog_name}.{schema_name}.kdayno_bronze_SP500_stock_prices')