In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql.functions import monotonically_increasing_id, col, count, row_number, lit, current_timestamp, coalesce, max
from pyspark.sql.window import Window
from delta.tables import DeltaTable

In [0]:
# Define Paths
silver_path = "/mnt/mock_prajwal/example/silver/"
gold_path = "/mnt/mock_prajwal/example/gold/"

In [0]:

df = spark.read.format("delta").load(silver_path + "TowerInfo")
display(df)


In [0]:
df.printSchema()

In [0]:
from pyspark.sql.functions import col
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number

df_selected = df.select(
    col("city"),
    col("country"),
    col("installed_date"),
    col("network_type"),
    col("signal_strength"),
    col("state"),
    col("tower_id"),
    col("tower_location")
).dropDuplicates(['tower_id'])

window_spec = Window.orderBy("tower_id")

df_selected = df_selected.withColumn("tower_key", row_number().over(window_spec))

display(df_selected)

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, DateType, LongType

schema = StructType([
    StructField("city", StringType(), True),
    StructField("country", StringType(), True),
    StructField("installed_date", DateType(), True),
    StructField("network_type", StringType(), True),
    StructField("signal_strength", StringType(), True),
    StructField("state", StringType(), True),
    StructField("tower_id", LongType(), True),
    StructField("tower_location", StringType(), True),
    StructField("tower_key", IntegerType(), True)
])

# Create the schema if it does not exist
spark.sql("CREATE SCHEMA IF NOT EXISTS Prajwal_Telecom")

# Create the table with the specified schema
spark.sql("""
    CREATE TABLE IF NOT EXISTS Prajwal_Telecom.Dim_Tower (
        city STRING,
        country STRING,
        installed_date DATE,
        network_type STRING,
        signal_strength STRING,
        state STRING,
        tower_id LONG,
        tower_location STRING,
        tower_key  INT NOT NULL
    )
        USING DELTA 
    LOCATION "/mnt/mock_prajwal/example/gold/Dim_Tower"
""")

In [0]:
df_selected.write.mode("overwrite").format("delta").save(gold_path + "TowerInfo")

In [0]:
# Load silver layer table
silver_df = spark.read.format("delta").load(silver_path + "TowerInfo")

# Load gold layer table
gold_df = spark.read.format("delta").load("/mnt/mock_prajwal/example/gold/TowerInfo")

# Record count for silver layer
silver_count = silver_df.count()

# Record count for gold layer
gold_count = gold_df.count()

# Display counts
display(spark.createDataFrame([(silver_count, gold_count)], ["Silver Layer Count", "Gold Layer Count"]))

In [0]:
# Read from gold layer table
gold_df = spark.read.format("delta").load(gold_path + "TowerInfo")

# Display the gold layer DataFrame
display(gold_df)