In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from datetime import datetime
import os
import time

In [0]:
%run ../logs/logs_notebook

In [0]:
%run ../utilities/Futuredate

In [0]:
%run ../utilities/Pastdate

In [0]:
# Define Paths
bronze_path = "/mnt/mock_prajwal/Healthcare_practice/bronze/"
silver_path = "/mnt/mock_prajwal/Healthcare_practice/silver/"

In [0]:
# try block to handle exceptions
try:
    # check if the bronze path exists
    if dbutils.fs.ls(bronze_path):
        # get the list of files in the bronze path
        files = [file.name for file in dbutils.fs.ls(bronze_path)]
        
        # check if file Billing exists in the files
        if 'Billing/' in files:
            # set file path and other file details
            file_path = bronze_path + "Billing"
            file_name = os.path.basename(file_path)
            file_extension = file_name.split(".")[-1]
            file_name = file_name.split(".")[0]
            file_info = dbutils.fs.ls(file_path)[0]
            file_size_kb = file_info.size / 1024
            file_mod_time = datetime.fromtimestamp(file_info.modificationTime / 1000)
            processed_by = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().apply('user')
            file_type = "parquet"  # Define file_type
            Layer = "silver"
            
            # start time of the files
            start_time = time.time()
            log_message(file_path, file_type, file_size_kb, file_mod_time, None, "PROCESSING", 0, processed_by, f"Reading Parquet File {file_name}", Layer)
            
            # Load the parquet file into a DataFrame
            df = spark.read.format("parquet").load(bronze_path + "Billing")

            df = df.toDF(*[c.lower() for c in df.columns])

            df = df.withColumn("amountpaid", round(col("amountpaid").cast("double"), 2))
            df = df.withColumn("totalamount", round(col("totalamount").cast("double"), 2))

            df = df.withColumn("totalamount", col("totalamount").cast("double"))
            df = df.fillna({"totalamount": 11111})

            df = df.withColumn("paymentmode", 
                   when(col("paymentmode") == "NetBanking", "Net Banking")
                   .when(col("paymentmode") == "Credit Card CC", "Credit Card")
                   .when(col("paymentmode") == "DebitCard", "Debit Card")
                   .otherwise(col("paymentmode")))
            
            df.write.mode("overwrite").format("delta").partitionBy("ingestion_time").option("overwriteSchema", "true").save(silver_path + "Billing")

            # Record count and processing time
            record_count = df.count()
            processing_time_sec = int(time.time() - start_time)
            
            # Final status
            log_message(file_path, file_type, file_size_kb, file_mod_time, record_count, "COMPLETED", processing_time_sec, processed_by, f"Successfully processed {file_path}", Layer)

            df.write.mode("overwrite").format("delta").partitionBy("ingestion_time").option("overwriteSchema", "true").save(silver_path + "Billing")
# handle exceptions
except Exception as e:
    processing_time_sec = int(time.time() - start_time)
    log_message(file_path, file_type, file_size_kb, file_mod_time, 0, "FAILED", processing_time_sec, processed_by, f"Error processing file {file_path}: {str(e)}", Layer)
    raise e



In [0]:
# Read the data from the silver layer
df_silver_admissions = spark.read.format("delta").load(silver_path + "Billing")
display(df_silver_admissions)

In [0]:
log_path = "/mnt/mock_prajwal/Healthcare_practice/logs"
df_logs = spark.read.format("delta").load(log_path)
df_logs_today = df_logs.filter(df_logs['processed_time'].cast("date") == "2025-05-19")
display(df_logs_today)