In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

use storage credential to access data from data lake 

# Create flag parameter

In [0]:
dbutils.widgets.text('Incremental_flag','0')

In [0]:
Incremental_flag = dbutils.widgets.get('Incremental_flag')


## creating dimensions

### fetching relative columns

In [0]:
df_src = spark.sql('''select distinct(Model_ID) as Model_ID,Model_category from parquet.`abfss://silver@carsathvikdatalake.dfs.core.windows.net/carsales`''')

### dim_model sink initial and incremental load

In [0]:
if spark.catalog.tableExists('hive_metastore.gold.dim_model'):
    
         df_sink = spark.sql('''
                    select dim_model_key,Model_ID,Model_category 
                    from hive_metastore.gold.dim_model
                    ''')
    
else :
          df_sink = spark.sql('''
                    select 1 as dim_model_key,Model_ID,Model_category 
                    from parquet.`abfss://silver@carsathvikdatalake.dfs.core.windows.net/carsales` 
                    where 1=0''')
     


In [0]:
%python
df_filter = df_src.join(
    df_sink, 
    df_src.Model_ID == df_sink.Model_ID, 
    'left'
).select(
    df_src['Model_ID'], 
    df_src['Model_category'], 
    df_sink['dim_model_key']
)


In [0]:
df_filter_old = df_filter.filter(col('dim_model_key').isNotNull())

In [0]:
df_filter_new = df_filter.filter(col('dim_model_key').isNull()).select(df_src['Model_ID'],df_src['Model_category'])

## Adding max surrogate key

In [0]:
if (Incremental_flag == '0'):
    max_value = 1
else :
     max_value_df = spark.sql("select max(dim_model_key) from hive_metastore.gold.dim_model")
     max_value = max_value_df.collect()[0][0]

# Creating surrogate key

In [0]:
df_filter_new = df_filter_new.withColumn('dim_model_key', max_value+monotonically_increasing_id())

In [0]:
df_final = df_filter_old.union(df_filter_new)

# SCD type 1

In [0]:
from delta.tables import DeltaTable

In [0]:

if spark.catalog.tableExists('hive_metastore.gold.dim_model'):
        delta_tbl = DeltaTable.forPath(spark, 'abfss://gold@carsathvikdatalake.dfs.core.windows.net/dim_model')
        delta_tbl.alias('trg').merge(df_final.alias('src'), 'trg.dim_model_key = src.dim_model_key')\
                              .whenMatchedUpdateAll()\
                              .whenNotMatchedInsertAll()\
                              .execute()
       
else :
        df_final.write.format("delta")\
              .option('path',"abfss://gold@carsathvikdatalake.dfs.core.windows.net/dim_model")\
              .mode('overwrite')\
              .saveAsTable('hive_metastore.gold.dim_model')
            