### **Import packages**

In [0]:
import sys
sys.path.append('dbfs:/FileStore/tables/')

from schema_utils import validate_schema
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import DeltaTable

### **Define paths**

In [0]:
bronze_path = "abfss://bronze@stfinancedev.dfs.core.windows.net/bnz_loans"
silver_path = "abfss://silver@stfinancedev.dfs.core.windows.net/s_loans"

### **Schema Validation**

In [0]:
#Define schema
expected_schema = StructType([
    StructField('LoanID', StringType(), True), 
    StructField('CustomerID', StringType(), True), 
    StructField('AccountID', StringType(), True), 
    StructField('LoanType', StringType(), True), 
    StructField('PrincipalAmount', DecimalType(18,2), True), 
    StructField('InterestRate', DecimalType(5,2), True), 
    StructField('LoanStartDateTime', TimestampType(), True), 
    StructField('LoanEndDateTime', TimestampType(), True), 
    StructField('LoanStatus', StringType(), True), 
    StructField('TenureMonths', IntegerType(), True)    
])

In [0]:
#load incoming data
incoming_df = spark.read.format("parquet").load(bronze_path)

In [0]:
#schema validation
validate_schema(incoming_df, expected_schema,"loans")

In [0]:
incoming_df.display()

### **Incremental load**

In [0]:
if DeltaTable.isDeltaTable(spark, silver_path):
    silver_df = DeltaTable.forPath(spark, silver_path).toDF()
    max_ts = silver_df.agg(max(col("LoanStartDateTime")).alias("max_ts"))\
                      .first()["max_ts"]
else:
    max_ts = None

if max_ts:
    inc_df = incoming_df.filter(col("LoanStartDateTime") > lit(max_ts))
else:
    inc_df = incoming_df

### **Data validations**

In [0]:
#Null handle
df_clean = inc_df.dropna(subset=['LoanID','CustomerID','AccountID','LoanType','PrincipalAmount','InterestRate','LoanStartDateTime','LoanEndDateTime','LoanStatus','TenureMonths'])

#duplicte handle
df_clean = df_clean.dropDuplicates(['LoanID'])


**Data count**

In [0]:
incoming_rows = inc_df.count()
loaded_rows = df_clean.count()
rejected_rows = incoming_rows - loaded_rows
print(f"Incoming rows from bronze layer {incoming_rows}")
print(f"Loaded rows to silver layer {loaded_rows}")
print(f"Rejected rows {rejected_rows}")

### **Write in silver layer**

In [0]:
df_clean.write.format("delta")\
       .mode("append")\
       .save(silver_path)