In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from datetime import datetime
import os
import time

In [0]:
%run ../logs/logs_notebook

In [0]:
%run ../utilities/Futuredate

In [0]:
%run ../utilities/Pastdate

In [0]:
# Define Paths
bronze_path = "/mnt/mock_prajwal/example/bronze/"
silver_path = "/mnt/mock_prajwal/example/silver/"

In [0]:
# try block to handle exceptions
try:
    # check if the bronze path exists
    if dbutils.fs.ls(bronze_path):
        # get the list of files in the bronze path
        files = [file.name for file in dbutils.fs.ls(bronze_path)]
        
        # check if 'TowerInfo/' exists in the files
        if 'TowerInfo/' in files:
            # set file path and other file details
            file_path = bronze_path + "TowerInfo"
            file_name = os.path.basename(file_path)
            file_extension = file_name.split(".")[-1]
            file_name = file_name.split(".")[0]
            file_info = dbutils.fs.ls(file_path)[0]
            file_size_kb = file_info.size / 1024
            file_mod_time = datetime.fromtimestamp(file_info.modificationTime / 1000)
            processed_by = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().apply('user')
            file_type = "parquet"  # Define file_type
            Layer = "silver"
            
            # start time of the files
            start_time = time.time()
            log_message(file_path, file_type, file_size_kb, file_mod_time, None, "PROCESSING", 0, processed_by, f"Reading Parquet File {file_name}", Layer)
            
            # Load the parquet file into a DataFrame
            df = spark.read.format("parquet").load(bronze_path + "TowerInfo")
            df = df.toDF(*[c.replace(" ", "") for c in df.columns])
            df = df.toDF(*[c.lower().replace(" ", "_") for c in df.columns])
            pastdate = udf(date_format_udf, DateType())
            futuredate = udf(date_format_udf_Policy_future, DateType())

            df = df.withColumn("installed_date", pastdate(col("installed_date")))

            # Record count and processing time
            record_count = df.count()
            processing_time_sec = int(time.time() - start_time)
            
            # Final status
            log_message(file_path, file_type, file_size_kb, file_mod_time, record_count, "COMPLETED", processing_time_sec, processed_by, f"Successfully processed {file_path}", Layer)

            df.write.mode("overwrite").format("delta").partitionBy("ingestion_time").option("overwriteSchema", "true").save(silver_path + "TowerInfo")
# handle exceptions
except Exception as e:
    processing_time_sec = int(time.time() - start_time)
    log_message(file_path, file_type, file_size_kb, file_mod_time, 0, "FAILED", processing_time_sec, processed_by, f"Error processing file {file_path}: {str(e)}", Layer)
    raise e



In [0]:
# Read the data from the silver layer
df_silver = spark.read.format("delta").load(silver_path + "TowerInfo")
display(df_silver)

In [0]:
# Read the data from the bronze layer
df_bronze = spark.read.format("parquet").load(bronze_path + "TowerInfo")

# Count the records in bronze and silver layers
bronze_count = df_bronze.count()
silver_count = df_silver.count()

# Display the counts
counts_df = spark.createDataFrame([(bronze_count, silver_count)], ["bronze_count", "silver_count"])
display(counts_df)