## Pipeline Parameters

In [0]:
dbutils.widgets.text("input_load_date", "YYYY-MM-DD", "Input Load Date")

# Audit Parameters
dbutils.widgets.text("job_id", "")
dbutils.widgets.text("job_name", "")
dbutils.widgets.text("job_start_date", "")
dbutils.widgets.text("job_start_datetime", "")
dbutils.widgets.text("task_run_id", "")
dbutils.widgets.text("task_name", "")

In [0]:
%run ../utils/audit_logger

In [0]:
# Standard library imports
import os
import datetime as dt

# Third-party library imports
from dotenv import load_dotenv

In [0]:
load_dotenv()

input_load_date = dbutils.widgets.get("input_load_date")

catalog_name = os.getenv('DATABRICKS_CATALOG_NAME')
schema_name = os.getenv('DATABRICKS_SCHEMA_NAME')

source_table_name = ['kdayno_bronze_SP500_stock_prices', 'kdayno_bronze_sp500_companies']
target_table_name = 'kdayno_silver_SP500_stock_prices'

# Audit Variables
job_id = dbutils.widgets.get('job_id')
job_name = dbutils.widgets.get('job_name')
job_start_date = dbutils.widgets.get('job_start_date')
job_start_datetime = dbutils.widgets.get('job_start_datetime')
task_run_id = dbutils.widgets.get('task_run_id')
task_name = dbutils.widgets.get('task_name')

## Pipeline Audit

In [0]:
audit_logger(job_id, job_name, input_load_date, job_start_date, job_start_datetime, task_run_id,  task_name, source_table_name, target_table_name)

In [0]:
if dt.datetime.strptime(input_load_date,'%Y-%m-%d').weekday() > 4:
    dbutils.notebook.exit(f"WARNING: Input date: {input_load_date} lands on a weekend. No data available for the specified date.")

## ETL

In [0]:
print(f'Transforming data for date: {input_load_date}')

silver_SP500_stock_prices_df = spark.sql(
    f"""
    WITH silver_SP500_stock_prices AS (
    SELECT sp.ticker_symbol
            , c.company_name
            , c.gics_sector
            , c.gics_sub_industry
            , sp.open_price
            , sp.close_price
            , sp.highest_price
            , sp.lowest_price
            , sp.trading_date
    FROM {catalog_name}.{schema_name}.{source_table_name[0]} sp
    INNER JOIN  {catalog_name}.{schema_name}.{source_table_name[1]} c ON sp.ticker_symbol = c.ticker_symbol)
    
    SELECT * 
    FROM silver_SP500_stock_prices 
    WHERE trading_date = '{input_load_date}'                        
    """)

In [0]:
spark.sql(f"""
        DELETE FROM {catalog_name}.{schema_name}.{target_table_name}
        WHERE trading_date = '{input_load_date}'
        """)

(silver_SP500_stock_prices_df.write.format("delta")
                           .mode("append")
                           .partitionBy('trading_date')
                           .saveAsTable(f'{catalog_name}.{schema_name}.{target_table_name}'))

print(f'Run successful for: {input_load_date}')