`Note: ` Above 4 exercise are just for practice purpose only 

`Rule 1:` For just data movement donot use notebooks(spark)

`In Microsoft Fabric, for data ingestion, you should strictly use the Fabric Data Pipeline’s lakehouse or Files layer ingestion capabilities, typically via Copy Data or Data Flow activities. This ensures that raw data from sources like CSV, JSON, Parquet, or databases lands in the Bronze layer of the lakehouse in a reliable, schema-compliant, and versioned manner. Using these native Fabric ingestion methods guarantees scalability, lineage tracking, and compatibility`

# 1️⃣ Read Bronze data

In [13]:
from pyspark.sql.functions import (
    col, to_date, trim, coalesce, lit, when
)

# Read Bronze data from Files
emp_bronze_df = (
    spark.read
         .option("header", "true")
         .option("inferSchema", "true")
         .csv("Files/bronze/emp.csv")
)

# Preview data (Git-friendly)
emp_bronze_df.printSchema()
emp_bronze_df.show(5)


StatementMeta(, ca95096c-3642-46f2-8b3b-689d541a980f, 15, Finished, Available, Finished)

root
 |-- empno: integer (nullable = true)
 |-- ename: string (nullable = true)
 |-- job: string (nullable = true)
 |-- mgr: integer (nullable = true)
 |-- hiredate: string (nullable = true)
 |-- sal: integer (nullable = true)
 |-- comm: integer (nullable = true)
 |-- deptno: integer (nullable = true)

+-----+------+--------+----+---------+----+----+------+
|empno| ename|     job| mgr| hiredate| sal|comm|deptno|
+-----+------+--------+----+---------+----+----+------+
| 7369| SMITH|   CLERK|7902|17-Dec-80| 800|NULL|    20|
| 7900| JAMES|   CLERK|7698| 3-Dec-81| 950|NULL|    30|
| 7876| ADAMS|   CLERK|7788|23-May-87|1000|NULL|    20|
| 7521|  WARD|SALESMAN|7698|22-Feb-81|1250| 500|    30|
| 7654|MARTIN|SALESMAN|7698|28-Sep-81|1250|1400|    30|
+-----+------+--------+----+---------+----+----+------+
only showing top 5 rows



# 2️⃣ Apply transformations

In [12]:
spark.conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY")
emp_silver_df = (
    emp_bronze_df
        # Convert hiredate string to DATE
        .withColumn(
            "hiredate",
            to_date(trim(col("hiredate")), "dd-MMM-yy")
        )
        # Replace NULL commission with 0
        .withColumn(
            "comm",
            coalesce(col("comm"), lit(0))
        )
        # Calculate total salary
        .withColumn(
            "totalsal",
            col("sal") + col("comm")
        )
        # Derive salary grade
        .withColumn(
            "salgrade",
            when(col("totalsal") < 2000, "LOW")
            .when(col("totalsal").between(2000, 3400), "MEDIUM")
            .otherwise("HIGH")
        )
)

# Preview transformed data
emp_silver_df.printSchema()
emp_silver_df.show(5)


StatementMeta(, ca95096c-3642-46f2-8b3b-689d541a980f, 14, Finished, Available, Finished)

root
 |-- empno: integer (nullable = true)
 |-- ename: string (nullable = true)
 |-- job: string (nullable = true)
 |-- mgr: integer (nullable = true)
 |-- hiredate: date (nullable = true)
 |-- sal: integer (nullable = true)
 |-- comm: integer (nullable = false)
 |-- deptno: integer (nullable = true)
 |-- totalsal: integer (nullable = true)
 |-- salgrade: string (nullable = false)

+-----+------+--------+----+----------+----+----+------+--------+--------+
|empno| ename|     job| mgr|  hiredate| sal|comm|deptno|totalsal|salgrade|
+-----+------+--------+----+----------+----+----+------+--------+--------+
| 7369| SMITH|   CLERK|7902|1980-12-17| 800|   0|    20|     800|     LOW|
| 7900| JAMES|   CLERK|7698|1981-12-03| 950|   0|    30|     950|     LOW|
| 7876| ADAMS|   CLERK|7788|1987-05-23|1000|   0|    20|    1000|     LOW|
| 7521|  WARD|SALESMAN|7698|1981-02-22|1250| 500|    30|    1750|     LOW|
| 7654|MARTIN|SALESMAN|7698|1981-09-28|1250|1400|    30|    2650|  MEDIUM|
+-----+------+-

# 3. Write to Silver layer (Delta)

In [None]:
emp_silver_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("silver.silver_emp")


StatementMeta(, ca95096c-3642-46f2-8b3b-689d541a980f, 17, Finished, Available, Finished)