In [None]:
from pyspark.sql.functions import col
from pyspark.sql.types import *
from delta.tables import DeltaTable

class FactRecordWrangler:

    @staticmethod
    def extract_silver_df(silver_df, dim_desc_table):

        return silver_df.alias('df1').join(
            dim_desc_table.alias('df2'),
            (silver_df.LevelOneDesc == dim_desc_table.LevelOneDesc) & (silver_df.LevelTwoDesc == dim_desc_table.LevelTwoDesc) & (silver_df.LevelThreeDesc == dim_desc_table.LevelThreeDesc),
            'left'
        ) \
        .select(
            col('df1.ReportedDate'), \
            col('df1.Suburb'), \
            col('df1.Postcode'), \
            col('df2.DescID'), \
            col('df1.Count'), \
        ) \
        .orderBy(col('df1.ReportedDate'), col('df2.DescID'))

    @staticmethod
    def create_delta_table(spark_session, table_name):
        DeltaTable.createIfNotExists(spark_session) \
            .tableName(table_name) \
            .addColumn('ReportedDate', DateType()) \
            .addColumn('Suburb', StringType()) \
            .addColumn('Postcode', ShortType()) \
            .addColumn('DescID', IntegerType()) \
            .addColumn('Count', IntegerType()) \
            .execute()

    @staticmethod
    def upsert_delta_table(delta_table, df):

        df_updates = df
    
        delta_table.alias('gold') \
            .merge(
                df_updates.alias('updates'),
                'gold.ReportedDate = updates.ReportedDate and gold.Suburb = updates.Suburb and gold.Postcode = updates.Postcode and gold.DescID = updates.DescID'
            ) \
            .whenMatchedUpdate(
                condition='gold.Count != updates.Count',
                set=
                {
                    'gold.Count': 'updates.Count'
                }
            ) \
            .whenNotMatchedInsert(values=
                {
                    'ReportedDate': 'updates.ReportedDate',
                    'Suburb': 'updates.Suburb',
                    'Postcode': 'updates.Postcode',
                    'DescID': 'updates.DescID',
                    'Count': 'updates.Count'
                }
            ) \
            .execute()