In [0]:
# Standard library imports
import os

# Third-party library imports
from dotenv import load_dotenv

In [0]:
load_dotenv()

catalog_name = os.getenv('DATABRICKS_CATALOG_NAME')
schema_name = os.getenv('DATABRICKS_SCHEMA_NAME')

In [0]:
silver_reddit_all_posts_df = spark.sql(
    f""" 
    WITH all_posts_with_sentiment AS (
        SELECT * FROM {catalog_name}.{schema_name}.kdayno_silver_reddit_top_posts_sentiment
        UNION ALL
        SELECT * FROM {catalog_name}.{schema_name}.kdayno_silver_reddit_hot_posts_sentiment
    ),

    all_posts_with_sentiment_transformed AS (
        SELECT 
            ticker_symbol
            , post_id
            , created_utc AS created_date_utc
            , score AS number_of_upvotes
            , ROUND((score * (1 - upvote_ratio)) / upvote_ratio) AS number_of_downvotes
            , upvote_ratio
            , (1 - upvote_ratio) as downvote_ratio
            , num_comments AS number_of_comments
            , sentiment AS sentiment_category
            , sentiment_score
        FROM all_posts_with_sentiment
    ),

    all_posts_with_company_info AS (
        SELECT
            p.*
            , number_of_upvotes + number_of_downvotes AS total_number_of_votes
            , c.company_name
            , c.gics_sector
            , c.gics_sub_industry
        FROM all_posts_with_sentiment_transformed p
        INNER JOIN {catalog_name}.{schema_name}.kdayno_bronze_sp500_companies c ON p.ticker_symbol = c.company_name
    )

    SELECT * FROM all_posts_with_company_info;
    """)

In [0]:
spark.sql(f"""DELETE FROM {catalog_name}.{schema_name}.kdayno_silver_reddit_all_posts""")

(silver_reddit_all_posts_df.write.format("delta")
                           .mode("append")
                           .partitionBy('created_date_utc')
                           .saveAsTable(f'{catalog_name}.{schema_name}.kdayno_silver_reddit_all_posts'))