In [None]:
##data_updates
##harmonized tables

from snowflake.snowpark import Session

from snowflake.snowpark.functions import col, to_date, avg

from snowflake.snowpark.window import Window
 
# Function to transform CO2 data

def transform_co2_data(session):

    try:

        # Step 1: Read data from the raw table

        # We're accessing the DAILY_MEASUREMENTS table from the CO2_DB.RAW_CO2 schema

        raw_df = session.table("CO2_DB.RAW_CO2.DAILY_MEASUREMENTS")
 
        # Step 2: Apply transformations to the raw data

        # Convert the 'DATE' column to a proper DATE type and cast 'CO2_PPM' to float

        harmonized_df = raw_df.select(

            to_date(col("DATE")).alias("DATE"),  # Convert the 'DATE' column to a DATE type

            col("CO2_PPM").cast("float").alias("CO2_PPM")  # Cast 'CO2_PPM' to float for numerical analysis

        )
 
        # Step 3: Remove any rows with null values

        # This is important to ensure that no invalid (null) data is included in the transformations

        harmonized_df = harmonized_df.na.drop()
 
        # Step 4: Calculate a 7-day rolling average of the CO2_PPM values

        # This uses a window function that looks at the previous 6 days (including the current day)

        window = Window.orderBy(col("DATE")).rowsBetween(-6, 0)

        harmonized_df = harmonized_df.with_column(

            "ROLLING_7DAY_AVG",  # New column for the rolling average

            avg(col("CO2_PPM")).over(window)  # Apply the average over the defined window of 7 days

        )
 
        # Step 5: Write the transformed data to the harmonized table

        # The transformed data will be saved into the CO2_EMISSIONS_HARMONIZED table in the HARMONIZED_CO2 schema

        harmonized_df.write.mode("overwrite").save_as_table("CO2_DB.HARMONIZED_CO2.CO2_EMISSIONS_HARMONIZED")
 
        # Print a success message to indicate that the transformation is complete

        print("Data transformation completed successfully.")

    except Exception as e:

        # If an error occurs, print the error message and re-raise the exception for debugging

        print(f"Transformation failed: {e}")

        raise  # Re-raise the exception for the notebook to display
 
# Main block to execute the script

if __name__ == "__main__":

    try:

        # Step 1: Create a Snowflake session

        # Snowflake sessions are automatically authenticated, so no need to provide credentials explicitly.

        session = Session.builder.appName("CO2_Data_Transformation").getOrCreate()

        print("Snowflake session established.")
 
        # Step 2: Run the CO2 data transformation function

        transform_co2_data(session)

        print("Transformation completed.")
 
    except Exception as e:

        # If there is an issue with session creation or transformation, print the error message

        print(f"Error during session creation or transformation: {e}")

        raise  # Re-raise the exception for the notebook to display
 
    finally:

        # Step 3: Close the Snowflake session after the operation completes

        # It ensures that the session is closed properly to release resources.

        if 'session' in locals():  # Check if session was created before attempting to close it

            session.close()

            print("Snowflake session closed.")
 


In [None]:
##change in percentage udf

from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, lag, when
from snowflake.snowpark.window import Window
 
def calculate_co2_percentage_change(session: Session):
    """Calculates the percentage change in CO2_PPM from the previous day."""
    # Access the harmonized table
    harmonized_df = session.table("CO2_DB.HARMONIZED_CO2.CO2_EMISSIONS_HARMONIZED")
 
    # Create a window specification ordered by 'DATE'
    window_spec = Window.orderBy(col("DATE"))
 
    # Calculate the previous CO2_PPM value using lag function
    harmonized_df = harmonized_df.with_column(
        "PREVIOUS_CO2", lag(col("CO2_PPM")).over(window_spec)
    )
 
    # Calculate the percentage change from the previous day
    harmonized_df = harmonized_df.with_column(
        "PERCENTAGE_CHANGE",
        when(col("PREVIOUS_CO2").isNotNull(),
             ((col("CO2_PPM") - col("PREVIOUS_CO2")) / col("PREVIOUS_CO2")) * 100
        ).otherwise(None)
    )
 
    # Show the result (this should display the dataframe in Snowflake Notebooks)
    harmonized_df.show()
 
    # Optionally, if you want to save the results back to the table:
    harmonized_df.write.mode("overwrite").save_as_table("CO2_DB.HARMONIZED_CO2.CO2_EMISSIONS_HARMONIZED_WITH_PERCENTAGE_CHANGE")
 
    # Return the dataframe (or if you want to keep working with it)
    return harmonized_df
 
# Call the function
harmonized_df_with_percentage_change = calculate_co2_percentage_change(session)

In [None]:
## sql udf
from snowflake.snowpark import Session

# Assuming `session` is already created and connected via your GitHub setup
# If `session` is not created yet, use this to initialize it:
# session = Session.builder.configs(connection_options).create()

# Define the SQL code for creating the function
sql_create_function = """
CREATE OR REPLACE FUNCTION CO2_DB.HARMONIZED_CO2.CALCULATE_SEASONAL_VARIATION()
RETURNS TABLE (
    MONTH INT,
    AVG_CO2_PPM FLOAT,
    DEVIATION_FROM_ANNUAL_MEAN FLOAT
)
AS
$$
    WITH annual_mean AS (
        SELECT AVG(CO2_PPM) AS mean
        FROM CO2_DB.HARMONIZED_CO2.CO2_EMISSIONS_HARMONIZED
    ),
    monthly_avg AS (
        SELECT 
            EXTRACT(MONTH FROM DATE)::INT AS MONTH,
            AVG(CO2_PPM) AS AVG_CO2_PPM
        FROM CO2_DB.HARMONIZED_CO2.CO2_EMISSIONS_HARMONIZED
        GROUP BY EXTRACT(MONTH FROM DATE)
    )
    SELECT 
        m.MONTH,
        m.AVG_CO2_PPM,
        m.AVG_CO2_PPM - a.mean AS DEVIATION_FROM_ANNUAL_MEAN
    FROM monthly_avg m, annual_mean a
    ORDER BY m.MONTH
$$;
"""

# Execute the SQL query to create the function
session.sql(sql_create_function).collect()

# Optionally, verify that the function was created successfully
print("Function 'CALCULATE_SEASONAL_VARIATION' created successfully.")


In [None]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, date_trunc, avg, sum
 
def create_daily_metrics_table(session):
    """Creates a daily performance metrics table for CO2 levels."""
    # Fully qualify the source table with the database and schema
    qualified_source_table = "CO2_DB.HARMONIZED_CO2.CO2_EMISSIONS_HARMONIZED"
 
    # Load data from the source table (fully qualified)
    df = session.table(qualified_source_table)
 
    # Aggregate the data by day, calculating the average and sum of CO2_PPM
    daily_metrics = df.groupBy(date_trunc('day', col("DATE")).alias("DAILY_DATE")) \
                      .agg(avg("CO2_PPM").alias("AVG_CO2_PPM"), 
                           sum("CO2_PPM").alias("SUM_CO2_PPM"))
 
    # Fully qualify the target table with the database and schema
    qualified_target_table = "CO2_DB.ANALYTICS_CO2.daily_co2_metrics"
 
    # Write the resulting daily metrics to the target table
    daily_metrics.write.mode("overwrite").save_as_table(qualified_target_table)
 
    # Print a confirmation message
    print(f"Daily CO2 metrics table created: {qualified_target_table}")
 
# Main block to execute the script
if __name__ == "__main__":
    try:
        # Create Snowflake session
        session = Session.builder.appName("CO2_Daily_Metrics").getOrCreate()
 
        # Call the function to create daily CO2 metrics table
        create_daily_metrics_table(session)
 
    except Exception as e:
        # Handle errors during session creation or transformation
        print(f"Error: {e}")
 
    finally:
        # Close the session after execution
        if 'session' in locals():
            session.close()
            print("Snowflake session closed.")

In [None]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, date_trunc, avg, sum
 
def create_weekly_metrics_table(session: Session):
    """Creates a weekly performance metrics table for CO2 levels."""
    # Fully qualify the source table with the database and schema
    qualified_source_table = f"CO2_DB.HARMONIZED_CO2.CO2_EMISSIONS_HARMONIZED"
    # Load the data from the source table (fully qualified)
    df = session.table(qualified_source_table)
    # Aggregate the data by week, calculating the average and sum of CO2_PPM
    weekly_metrics = df.groupBy(date_trunc('week', col("DATE")).alias("WEEKLY_DATE")) \
                        .agg(avg("CO2_PPM").alias("AVG_CO2_PPM"), 
                             sum("CO2_PPM").alias("SUM_CO2_PPM"))
    # Fully qualify the target table with the database and schema
    qualified_target_table = f"CO2_DB.ANALYTICS_CO2.weekly_co2_metrics"
    # Write the resulting weekly metrics to the target table
    weekly_metrics.write.mode("overwrite").save_as_table(qualified_target_table)
    # Print a confirmation message
    print(f"Weekly metrics table created: {qualified_target_table}")
 
# Main block to execute the script
if __name__ == "__main__":
    try:
        # Create Snowflake session
        session = Session.builder.appName("CO2_Weekly_Metrics").getOrCreate()
 
        # Call the function to create weekly CO2 metrics table
        create_weekly_metrics_table(session)
 
    except Exception as e:
        # Handle errors during session creation or transformation
        print(f"Error: {e}")
 
    finally:
        # Close the session after execution
        if 'session' in locals():
            session.close()
            print("Snowflake session closed.")