In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql.functions import monotonically_increasing_id, col, count, row_number, lit, current_timestamp, coalesce, max
from pyspark.sql.window import Window
from delta.tables import DeltaTable

In [0]:
silver_path = "/mnt/mock_prajwal/Mock2/silver/"
gold_path = "/mnt/mock_prajwal/Mock2/gold/"

In [0]:
df = spark.read.format("delta").load(silver_path + "FF_Customer_Details_Day0")
df.printSchema()

In [0]:
product_dim = df.select("product_name", "product_sub_category", "product_category", "product_container", "product_base_margin").dropDuplicates(['product_name'])

product_dim = product_dim.withColumn("product_key", row_number().over(Window.orderBy("product_name")))
display(product_dim)

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType

# Define the schema
schema = StructType([
    StructField("product_name", StringType(), True),
    StructField("product_sub_category", StringType(), True),
    StructField("product_category", StringType(), True),
    StructField("product_container", StringType(), True),
    StructField("product_base_margin", DoubleType(), True),
    StructField("product_key", IntegerType(), False)
])

# Create the schema if it does not exist
spark.sql("CREATE SCHEMA IF NOT EXISTS Prajwal_Mock")

# Create the table if it does not exist
spark.sql("""
    CREATE TABLE IF NOT EXISTS Prajwal_Mock.Dim_Product (
        product_name STRING,
        product_sub_category STRING,
        product_category STRING,
        product_container STRING,
        product_base_margin DOUBLE,
        product_key INTEGER NOT NULL
    )
    USING DELTA 
    LOCATION "/mnt/mock_prajwal/Mock2/gold/Dim_Product"
""")

In [0]:
product_dim.write.mode("overwrite").format("delta").save(gold_path + "Dim_Product")

In [0]:
# Read from gold path
gold_path = "/mnt/mock_prajwal/Mock2/gold/"
product_dim_df = spark.read.format("delta").load("/mnt/mock_prajwal/Mock2/gold/Dim_Product")

# Read from product dim table
product_dim_table_df = spark.table("Prajwal_Mock.Dim_Product")

# Display the dataframes
display(product_dim_df)
display(product_dim_table_df)

In [0]:
# Count records in the gold layer
gold_count = product_dim_df.count()

# Assuming silver layer is stored in a DataFrame named silver_df
silver_df = spark.read.format("delta").load(silver_path + "FF_Customer_Details_Day0")
silver_count = silver_df.count()

# Display counts
display(spark.createDataFrame([(silver_count, gold_count)], ["Silver Layer Count", "Gold Layer Count"]))

In [0]:
# # Drop the product_dim table
# spark.sql("DROP TABLE IF EXISTS Prajwal_Mock.Product_dim")

In [0]:
# Show all tables from Prajwal_Mock
tables_df = spark.sql("SHOW TABLES IN Prajwal_Mock")
display(tables_df)