### **Import packages**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import *


### **Define paths**

In [0]:
silver_path = "abfss://silver@stfinancedev.dfs.core.windows.net/s_transactions"
gold_path = "abfss://gold@stfinancedev.dfs.core.windows.net/fact_transactions"

### **Create fact_transactions delta lake table schema**

In [0]:
%sql
CREATE TABLE IF NOT EXISTS finance_cata.gold.fact_transactions(
  Transactionkey BIGINT GENERATED ALWAYS AS IDENTITY(START WITH 1 INCREMENT BY 1),
  TransactionID STRING,
  Accountkey BIGINT,
  CustomerKey BIGINT,
  Branchkey BIGINT,
  TransactionDatekey BIGINT,
  TransactionType STRING,
  Amount DECIMAL(18,2),
  Description STRING,
  MCC STRING,
  FraudScore DECIMAL(5,2),
  FraudFlag STRING,
  CreatedAt TIMESTAMP
)USING DELTA 
LOCATION "abfss://gold@stfinancedev.dfs.core.windows.net/fact_transactions"

In [0]:
spark.read.format("delta").load(silver_path).printSchema()

### **Incremental load**

In [0]:
if DeltaTable.isDeltaTable(spark, gold_path):
    fact_df = DeltaTable.forPath(spark, gold_path).toDF()
    max_datekey = fact_df.agg(max(col("TransactionDatekey")).alias("max_datekey"))\
                        .first()["max_datekey"]
else:
    max_datekey = None

if max_datekey:
    inc_df = spark.read.format("delta").load(silver_path)\
             .withColumn("TransactionDateTime", date_format(col("TransactionDateTime"), "yyyyMMdd").cast("bigint"))\
             .filter(col("TransactionDateTime") > lit(max_datekey))
else:
    inc_df = spark.read.format("delta").load(silver_path)\
             .withColumn("TransactionDateTime", date_format(col("TransactionDateTime"), "yyyyMMdd").cast("bigint"))

In [0]:
#create temp view 
inc_df.createOrReplaceTempView("fact_trn")

In [0]:
%sql
select * from fact_trn;

### **Create fact_transaction**

In [0]:
df = spark.sql("""
               SELECT 
                ft.TransactionID,
                da.Accountkey,
                dc.CustomerKey,
                db.Branchkey ,
                ft.TransactionDateTime ,
                ft.TransactionType ,
                ft.Amount ,
                ft.Description ,
                ft.MCC ,
                ft.FraudScore ,
                ft.FraudFlag 
               FROM fact_trn AS ft
               INNER JOIN finance_cata.gold.dim_accounts AS da
               ON ft.AccountID = da.AccountID
               INNER JOIN finance_cata.gold.dim_customers AS dc
               ON da.CustomerID = dc.CustomerID
               LEFT JOIN finance_cata.gold.dim_branches AS db
               ON da.BranchID = db.BranchID
               """)

In [0]:
df.display()


### **Add stg columns**

In [0]:
fact_ready = df.withColumnRenamed("TransactionDateTime", "TransactionDatekey")\
               .withColumn('CreatedAt', current_timestamp())




### **Write data in gold layer**

In [0]:
fact_ready.write.format("delta").mode("append").save(gold_path)


In [0]:
%sql
select * from finance_cata.gold.fact_transactions