# GOLD FACT

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql.functions import monotonically_increasing_id, col, count, row_number, lit, current_timestamp, coalesce, max
from pyspark.sql.window import Window
from delta.tables import DeltaTable

In [0]:
# Define Paths
silver_path = "/mnt/mock_prajwal/example/silver/"
gold_path = "/mnt/mock_prajwal/example/gold/"

In [0]:
df_Cus = spark.read.format("delta").load(silver_path + "CustMaster")

df_Dev = spark.read.format("delta").load(silver_path + "DeviceInfo")

df_tow = spark.read.format("delta").load(silver_path + "TowerInfo")

df_Rec = spark.read.format("delta").load(silver_path + "CallRecords")

In [0]:
df_calls = df_Rec.withColumn("customer_id", col('customer_id').cast("int"))

In [0]:
df_join = df_calls.join(df_Dev, "customer_id", "left") \
                  .join(df_tow, "tower_id", "left") \
                  .select(df_calls['*'], df_Dev['device_id'], df_tow['tower_id'])

In [0]:
display(df_join)

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType, TimestampType, DoubleType

schema = StructType([
    StructField("call_id", StringType(), True),
    StructField("customer_id", IntegerType(), True),
    StructField("tower_id", StringType(), True),
    StructField("date_id", DateType(), True),
    StructField("call_start_time", TimestampType(), True),
    StructField("call_end_time", TimestampType(), True),
    StructField("call_duration", DoubleType(), True),
    StructField("call_type", StringType(), True),
    StructField("roaming_flag", StringType(), True),
    StructField("charge_amount", IntegerType(), True),
    StructField("device_id", IntegerType(), True)
])

# Create the schema if it does not exist
spark.sql("CREATE SCHEMA IF NOT EXISTS Prajwal_Telecom")

# Create the table with the specified schema
spark.sql("""
    CREATE TABLE IF NOT EXISTS Prajwal_Telecom.Fact (
        call_id STRING,
        customer_id INT,
        tower_id STRING,
        date_id DATE,
        call_start_time TIMESTAMP,
        call_end_time TIMESTAMP,
        call_duration DOUBLE,
        call_type STRING,
        roaming_flag STRING,
        charge_amount INT,
        device_id INT
    )
    USING DELTA 
    LOCATION '/mnt/mock_prajwal/example/gold/Fact'
""")

In [0]:
fact_df = spark.sql("SELECT * FROM Prajwal_Telecom.Fact")
display(fact_df)

In [0]:
df_join.write.mode("append").option("mergeSchema", "true").format("delta").save(gold_path + "Fact")