In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import DeltaTable

layers = ['bronze','silver','gold']
storage_account = dbutils.widgets.get('storage_account')
dim_branch_table = dbutils.widgets.get('dim_branch_table')
dim_date_table = dbutils.widgets.get('dim_date_table')
dim_dealer_table = dbutils.widgets.get('dim_dealer_table')
dim_model_table = dbutils.widgets.get('dim_model_table')
fact_table = dbutils.widgets.get('fact_table')
adls = {layer: f'abfss://{layer}@{storage_account}.dfs.core.windows.net' for layer in layers}

date_id = dbutils.widgets.get('date_id')

adls_bronze = adls['bronze']
adls_silver = adls['silver']
adls_gold = adls['gold']

# Create fact talbe

### Reading silver data

In [0]:
df_silver = spark.sql(f'''
                      select * from parquet.`{adls_silver}/car_sales/{date_id}`
                      ''')

In [0]:
df_silver.limit(10).display()

Branch_ID,Dealer_ID,Model_ID,Revenue,Units_Sold,Date_ID,Day,Month,Year,BranchName,DealerName,Product_Name,model_category,RevPerUnit
BR0001,DLR0001,BMW-M1,13363978,2,DT00001,1,1,2017,AC Cars Motors,AC Cars Motors,BMW,BMW,6681989.0
BR0003,DLR0228,Hon-M218,17376468,3,DT00001,10,5,2017,AC Cars Motors,Deccan Motors,Honda,Hon,5792156.0
BR0004,DLR0208,Tat-M188,9664767,3,DT00002,12,1,2017,AC Cars Motors,Wiesmann Motors,Tata,Tat,3221589.0
BR0005,DLR0188,Hyu-M158,5525304,3,DT00002,16,9,2017,AC Cars Motors,Subaru Motors,Hyundai,Hyu,1841768.0
BR0006,DLR0168,Ren-M128,12971088,3,DT00003,20,5,2017,AC Cars Motors,Saab Motors,Renault,Ren,4323696.0
BR0008,DLR0128,Hon-M68,7321228,1,DT00004,28,4,2017,AC Cars Motors,Messerschmitt Motors,Honda,Hon,7321228.0
BR0009,DLR0108,Cad-M38,11379294,2,DT00004,31,12,2017,AC Cars Motors,Lexus Motors,Cadillac,Cad,5689647.0
BR0010,DLR0088,Mer-M8,11611234,2,DT00005,4,9,2017,AC Cars Motors,"IFA (including Trabant, Wartburg, Barkas) Motors",Mercedes-Benz,Mer,5805617.0
BR0011,DLR0002,BMW-M2,19979446,2,DT00005,2,1,2017,Acura Motors,Acura Motors,BMW,BMW,9989723.0
BR0011,DLR0069,Vol-M256,14181510,3,DT00006,9,5,2017,Acura Motors,Geo Motors,Volkswagen,Vol,4727170.0


### Reading all dims

In [0]:
df_dealer = spark.sql(f'select * from cars_catalog.gold.{dim_dealer_table}')
df_branch = spark.sql(f'select * from cars_catalog.gold.{dim_branch_table}')
df_date = spark.sql(f'select * from cars_catalog.gold.{dim_date_table}')
df_model = spark.sql(f'select * from cars_catalog.gold.{dim_model_table}')

### Bring keys to fact table

In [0]:
df_fact = df_silver.join(df_branch,df_silver['Branch_ID'] == df_branch['Branch_ID'],how='left')\
                    .join(df_dealer,df_silver['Dealer_ID'] == df_dealer['Dealer_ID'],how='left')\
                    .join(df_date,df_silver['Date_ID'] == df_date['Date_ID'],how='left')\
                    .join(df_model,df_silver['Model_ID'] == df_model['Model_ID'],how='left')\
                    .select(
                        df_branch['dim_branch_key'],df_dealer['dim_dealer_key'],df_date['dim_date_key'],df_model['dim_model_key'],
                        df_silver['RevPerUnit'],df_silver['Revenue'],df_silver['Units_Sold']
                    )

In [0]:
df_fact.limit(10).display()

dim_branch_key,dim_dealer_key,dim_date_key,dim_model_key,RevPerUnit,Revenue,Units_Sold
418,6,825,155,6681989.0,13363978,2
1557,197,825,252,5792156.0,17376468,3
1058,104,752,199,3221589.0,9664767,3
789,95,752,183,1841768.0,5525304,3
497,231,882,106,4323696.0,12971088,3
1804,41,988,41,7321228.0,7321228,1
734,177,988,107,5689647.0,11379294,2
1211,182,1043,110,5805617.0,11611234,2
116,204,1043,185,9989723.0,19979446,2
116,160,826,238,4727170.0,14181510,3


### Writing fact table

In [0]:
if spark.catalog.tableExists(f'cars_catalog.gold.{fact_table}'):
    delta_table = DeltaTable.forName(spark,f'cars_catalog.gold.{fact_table}')
    delta_table.alias('target').merge(
        df_fact.alias('source'),
        'target.dim_branch_key = source.dim_branch_key AND target.dim_dealer_key = source.dim_dealer_key AND target.dim_date_key = source.dim_date_key AND target.dim_model_key = source.dim_model_key')\
        .whenMatchedUpdateAll()\
        .whenNotMatchedInsertAll()\
        .execute()
    
else:
    df_fact.write.format('delta')\
        .mode('overwrite')\
        .option('path',f'{adls_gold}/fact_sales')\
        .saveAsTable(f'cars_catalog.gold.{fact_table}')

In [0]:
%sql
select * from cars_catalog.gold.fact_sales limit(10)

dim_branch_key,dim_dealer_key,dim_date_key,dim_model_key,RevPerUnit,Revenue,Units_Sold
418,6,825,155,6681989.0,13363978,2
1557,197,825,252,5792156.0,17376468,3
1058,104,752,199,3221589.0,9664767,3
789,95,752,183,1841768.0,5525304,3
497,231,882,106,4323696.0,12971088,3
1804,41,988,41,7321228.0,7321228,1
734,177,988,107,5689647.0,11379294,2
1211,182,1043,110,5805617.0,11611234,2
116,204,1043,185,9989723.0,19979446,2
116,160,826,238,4727170.0,14181510,3
