# S1 J5 ? Delta Lake (Write + Read)

This notebook writes a Delta table and reads it back.

If you are running locally and Delta is missing, install it first:
- `pip install delta-spark`


In [None]:
from pyspark.sql import SparkSession

spark = (
    SparkSession.builder
    .appName("delta-lake-demo")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
    .getOrCreate()
)


In [None]:
from pyspark.sql import functions as F

data_path = "../../data/example.csv"
delta_path = "../../data/delta/users"

raw = (
    spark.read
    .option("header", True)
    .option("inferSchema", True)
    .csv(data_path)
)

silver = (
    raw
    .withColumn("signup_date", F.to_date("signup_date"))
    .withColumn("spend", F.col("spend").cast("double"))
)


In [None]:
# Write Delta table
(
    silver
    .write
    .format("delta")
    .mode("overwrite")
    .save(delta_path)
)


In [None]:
# Read Delta table
delta_df = spark.read.format("delta").load(delta_path)
delta_df.show(truncate=False)


In [None]:
# Register as a table for SQL queries (optional)
spark.sql(f"DROP TABLE IF EXISTS users_delta")
spark.sql(f"CREATE TABLE users_delta USING DELTA LOCATION '{delta_path}'")

spark.sql("SELECT plan, COUNT(*) AS users, ROUND(SUM(spend), 2) AS total_spend FROM users_delta GROUP BY plan").show()
