### **Import packages**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import *

### **Define paths**

In [0]:
silver_path = "abfss://silver@stfinancedev.dfs.core.windows.net/s_loanpayments"
gold_path = "abfss://gold@stfinancedev.dfs.core.windows.net/fact_loanpayments"

### **Create fact_loanpayments delta lake table _schema_**

In [0]:
%sql
CREATE TABLE IF NOT EXISTS finance_cata.gold.fact_loanpayments(
  LoanPaymentkey BIGINT GENERATED ALWAYS AS IDENTITY(START WITH 1 INCREMENT BY 1),
  PaymentID STRING,
  LoanKey BIGINT,
  CustomerKey BIGINT,
  PaymentDatekey BIGINT,
  PaymentAmount STRING,
  Status STRING,
  CreatedAt TIMESTAMP
)USING DELTA 
LOCATION 'abfss://gold@stfinancedev.dfs.core.windows.net/fact_loanpayments'

### **Incremental load**

In [0]:
if DeltaTable.isDeltaTable(spark, gold_path):
    fact_df = DeltaTable.forPath(spark, gold_path).toDF()
    max_datekey = fact_df.agg(max(col("PaymentDatekey")).alias("max_datekey"))\
                        .first()["max_datekey"]
else:
    max_datekey = None

if max_datekey:
    inc_df = spark.read.format("delta").load(silver_path)\
             .withColumn("PaymentDateTime", date_format(col("PaymentDateTime"), "yyyyMMdd").cast("bigint"))\
             .filter(col("PaymentDateTime") > lit(max_datekey))
else:
    inc_df = spark.read.format("delta").load(silver_path)\
             .withColumn("PaymentDateTime", date_format(col("PaymentDateTime"), "yyyyMMdd").cast("bigint"))

In [0]:
#create temp view 
inc_df.createOrReplaceTempView("fact_loanpayments")

### **Create Fact_loanpayments**

In [0]:
df = spark.sql("""
               SELECT 
                fp.PaymentID ,
                dl.LoanKey ,
                dc.CustomerKey ,
                fp.PaymentDateTime ,
                fp.PaymentAmount ,
                fp.Status 
               FROM fact_loanpayments AS fp
               INNER JOIN finance_cata.gold.dim_loans AS dl
               ON fp.LoanID = dl.LoanID
               INNER JOIN finance_cata.gold.dim_customers AS dc
               ON dl.CustomerID = dc.CustomerID
           """)

In [0]:
df.display()

### **Add stg columns**

In [0]:
df_ready = df.withColumnRenamed("PaymentDateTime", "PaymentDatekey")\
            .withColumn("CreatedAt", current_timestamp())

In [0]:
df_ready.display()


### **Write in gold layer**

In [0]:
df_ready.write.format("delta").mode("append").save(gold_path)

In [0]:
%sql
select * from finance_cata.gold.fact_loanpayments