In [0]:
# Notebook: 04_silver_student_parent_class
# Layer: Silver
# Purpose: Build student dimension by joining student, parent, and class

In [0]:
student_df = spark.table("student_project.bronze_student")
parent_df  = spark.table("student_project.bronze_parent")
class_df   = spark.table("student_project.bronze_class")

In [0]:
from pyspark.sql.functions import col, to_date, upper, trim, concat_ws

In [0]:
student_clean_df = student_df.select(
    col("student_id"),
    col("parent_id"),
    col("class_id"),
    col("section_id"),
    col("academic_yr"),

    # Build full student name safely
    upper(
        trim(
            concat_ws(
                " ",
                col("first_name"),
                col("mid_name"),
                col("last_name")
            )
        )
    ).alias("student_name"),

    to_date(col("dob")).alias("dob"),
    col("gender"),
    to_date(col("admission_date")).alias("admission_date"),
    col("isActive")
)

In [0]:
student_clean_df.head(1)

In [0]:
student_clean_df.count()

In [0]:
parent_clean_df = parent_df.select(
    col("parent_id"),
    upper(trim(col("father_name"))).alias("father_name"),
    col("f_mobile"),
    col("f_email")
)

In [0]:
student_parent_df = student_clean_df.join(
    parent_clean_df,
    on="parent_id",
    how="left"
)

In [0]:
student_parent_df.show(5)

In [0]:
class_clean_df = class_df.select(
    col("class_id"),
    col("name")
)

In [0]:
student_parent_class_df = student_parent_df.join(
    class_clean_df,
    on="class_id",
    how="left"
)

In [0]:
student_parent_class_df.printSchema()
student_parent_class_df.show(5)

In [0]:
from pyspark.sql.functions import col

In [0]:
class_clean_df = class_df.select(
    col("class_id"),
    col("name").alias("class_name")
)

In [0]:
student_parent_class_df = student_parent_df.join(
    class_clean_df,
    on="class_id",
    how="left"
)

In [0]:
student_parent_class_df.printSchema()
student_parent_class_df.show(5)

In [0]:
student_parent_class_df.select(
    "class_id", "class_name"
).distinct().show()

In [0]:
student_parent_class_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("student_project.silver_student_dimension")

In [0]:
spark.table("student_project.silver_student_dimension").show(5)