### **Import packages**

In [0]:
import sys
sys.path.append('dbfs:/FileStore/tables/')

from schema_utils import validate_schema
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import DeltaTable

### **Define paths**

In [0]:
bronze_path = "abfss://bronze@stfinancedev.dfs.core.windows.net/bnz_transactions"
silver_path = "abfss://silver@stfinancedev.dfs.core.windows.net/s_transactions"

### **Schema Validation**

In [0]:
#Define schema
expected_schema = StructType([
    StructField('TransactionID', StringType(), True), 
    StructField('AccountID', StringType(), True), 
    StructField('CardID', StringType(), True), 
    StructField('TransactionDateTime', TimestampType(), True), 
    StructField('TransactionType', StringType(), True), 
    StructField('Amount', DecimalType(18,2), True), 
    StructField('Description', StringType(), True), 
    StructField('MCC', StringType(), True), 
    StructField('FraudScore', DecimalType(5,2), True), 
    StructField('FraudFlag', StringType(), True), 
    StructField('rn', IntegerType(), True)  
])

In [0]:
#load incoming data
incoming_df = spark.read.format("parquet").load(bronze_path)

In [0]:
#schema validation
validate_schema(incoming_df, expected_schema,"Transactions")

In [0]:
incoming_df.display()

### **Incremental load**

In [0]:
if DeltaTable.isDeltaTable(spark, silver_path):
    silver_df = DeltaTable.forPath(spark, silver_path).toDF()
    max_ts = silver_df.agg(max(col("TransactionDateTime")).alias("max_ts"))\
                      .first()["max_ts"]
else:
    max_ts = None

if max_ts:
    inc_df = incoming_df.filter(col("LoanStartDateTime") > lit(max_ts))
else:
    inc_df = incoming_df

### **Data validations**

In [0]:
#Null handle 
df_clean = inc_df.fillna("0.0")

#duplicate handle
df_clean = df_clean.dropDuplicates(['TransactionID'])

In [0]:
df_clean.display()

### **Data count**

In [0]:
incoming_rows = inc_df.count()
loaded_rows = df_clean.count()
rejected_rows = incoming_rows - loaded_rows
print(f"Incoming rows from bronze layer {incoming_rows}")
print(f"Loaded rows to silver layer {loaded_rows}")
print(f"Rejected rows {rejected_rows}")

### **Write in silver layer**

In [0]:
df_clean.write.format("delta")\
       .mode("append")\
       .save(silver_path)