## Pipeline Parameters

In [0]:
dbutils.widgets.text("target_table_name", 'kdayno_gold_SP500_stock_financial_ratios_agg')

target_table_name = dbutils.widgets.get("target_table_name")

In [0]:
# Standard library imports
import os

# Third-party library imports
from dotenv import load_dotenv

In [0]:
load_dotenv()

catalog_name = os.getenv('DATABRICKS_CATALOG_NAME')
schema_name = os.getenv('DATABRICKS_SCHEMA_NAME')

In [0]:
print(f"Running aggregate process... ")

SP500_stock_financial_ratios_agg_df = spark.sql(
    f"""
    SELECT 
        ticker_symbol
        , company_name
        , gics_sector
        , COALESCE(price_to_earnings_ratio, 0) AS price_to_earnings_ratio
        , COALESCE(return_on_equity_ratio, 0) AS return_on_equity_ratio
        , COALESCE(debt_to_equity_ratio, 0) AS debt_to_equity_ratio
        , COALESCE(market_capitalization, 0) AS market_capitalization
        , AVG(price_to_earnings_ratio) OVER(PARTITION BY gics_sector) AS sector_average_price_to_earnings_ratio
        , AVG(return_on_equity_ratio) OVER(PARTITION BY gics_sector) AS sector_average_return_on_equity_ratio
        , AVG(debt_to_equity_ratio) OVER(PARTITION BY gics_sector) AS sector_average_debt_to_equity_ratio
        , AVG(market_capitalization) OVER(PARTITION BY gics_sector) AS sector_average_market_capitalization
    FROM {catalog_name}.{schema_name}.kdayno_silver_SP500_stock_financials
    """)

In [0]:
print(f"Loading data to target table: {catalog_name}.{schema_name}.{target_table_name}")

(SP500_stock_financial_ratios_agg_df.write.format('delta')
                                .mode('overwrite')
                                .saveAsTable(f'{catalog_name}.{schema_name}.{target_table_name}'))