In [None]:
from pyspark.sql.functions import monotonically_increasing_id, row_number, col, when, coalesce, max, lit
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import DeltaTable

class DimDescWrangler:

    @staticmethod
    def extract_silver_df(silver_df):
        return silver_df.dropDuplicates(['LevelOneDesc', 'LevelTwoDesc', 'LevelThreeDesc']).select(
            col('LevelOneDesc'),
            col('LevelTwoDesc'),
            col('LevelThreeDesc')
        )


    @staticmethod
    def create_delta_table(spark_session, table_name):
        DeltaTable.createIfNotExists(spark_session) \
            .tableName(table_name) \
            .addColumn('LevelOneDesc', StringType()) \
            .addColumn('LevelTwoDesc',  StringType()) \
            .addColumn('LevelThreeDesc', StringType()) \
            .addColumn('DescID', IntegerType()) \
            .execute()

    
    @staticmethod
    def setup_id(spark_session, table_name, upsert_df):
        table_df = spark.read.table(table_name)
    
        max_desc_id = table_df.select(coalesce(max(col('DescID')), lit(0)).alias('MAXDescID')).first()[0]

        upsert_df = upsert_df.join(
            table_df, 
            (upsert_df.LevelOneDesc == table_df.LevelOneDesc) & (upsert_df.LevelTwoDesc == table_df.LevelTwoDesc) & (upsert_df.LevelThreeDesc == table_df.LevelThreeDesc),
            'left_anti'
        )

        upsert_df = upsert_df.withColumn('DescID', monotonically_increasing_id())
        window = Window.orderBy('DescID')
        return upsert_df.withColumn('DescID', row_number().over(window) + max_desc_id)


    @staticmethod
    def upsert_delta_table(delta_table, df):

        df_updates = df

        delta_table.alias('existing') \
            .merge(
                df_updates.alias('updates'),
                'existing.LevelOneDesc = updates.LevelOneDesc and existing.LevelTwoDesc = updates.LevelTwoDesc and existing.LevelThreeDesc = updates.LevelThreeDesc'
            ) \
            .whenMatchedUpdate(set=
                {  
                }
            ) \
            .whenNotMatchedInsert(values=
                {
                    'LevelOneDesc': 'updates.LevelOneDesc',
                    'LevelTwoDesc': 'updates.LevelTwoDesc',
                    'LevelThreeDesc': 'updates.LevelThreeDesc',
                    'DescID': 'updates.DescID'
                }
            ) \
            .execute()