# S1 J5 ? Delta Lake (Write + Read)

This notebook writes a Delta table and reads it back.

If you are running locally and Delta is missing, install it first:
- `pip install delta-spark`


In [16]:
pip install -U delta-spark

Note: you may need to restart the kernel to use updated packages.


In [17]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-17-openjdk-amd64"
os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]


In [19]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip

try:
    spark.stop()
except Exception:
    pass

builder = (
    SparkSession.builder
    .appName("delta-lake-demo")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
)

spark = configure_spark_with_delta_pip(builder).getOrCreate()



In [20]:
from pyspark.sql import functions as F

data_path = "../../data/example.csv"
delta_path = "../../data/delta/users"

raw = (
    spark.read
    .option("header", True)
    .option("inferSchema", True)
    .csv(data_path)
)

silver = (
    raw
    .withColumn("signup_date", F.to_date("signup_date"))
    .withColumn("spend", F.col("spend").cast("double"))
)


In [14]:
import os
from pathlib import Path
print("cwd:", os.getcwd())
print("data_path resolved:", Path(data_path).resolve())
print("delta_path resolved:", Path(delta_path).resolve())


cwd: /home/lainislain/spark-databricks-learning/notebooks/gold_aggregation
data_path resolved: /home/lainislain/spark-databricks-learning/data/example.csv
delta_path resolved: /home/lainislain/spark-databricks-learning/data/delta/users


In [21]:
# Write Delta table
(
    silver
    .write
    .format("delta")
    .mode("overwrite")
    .save(delta_path)
)


                                                                                

In [24]:
# Read Delta table
delta_df = spark.read.format("delta").load(delta_path)
delta_df.show(truncate=False)


+------+---+---------+-----------+----------+---------+------+
|name  |age|city     |signup_date|plan      |is_active|spend |
+------+---+---------+-----------+----------+---------+------+
|Amina |29 |Seattle  |2024-01-15 |Pro       |true     |129.5 |
|Ben   |34 |Austin   |2023-11-02 |Basic     |false    |0.0   |
|Chen  |41 |San Jose |2022-06-30 |Pro       |true     |560.0 |
|Daria |23 |Chicago  |2024-09-10 |Free      |true     |12.75 |
|Eli   |38 |New York |2021-03-22 |Enterprise|true     |1240.0|
|Fatima|31 |Denver   |2024-05-05 |Basic     |true     |89.99 |
|Gabe  |27 |Boston   |2023-12-12 |Free      |false    |0.0   |
|Hiro  |45 |San Diego|2020-08-19 |Enterprise|true     |2300.1|
|Ines  |36 |Portland |2022-02-14 |Pro       |true     |410.25|
|Jamal |30 |Atlanta  |2024-07-01 |Basic     |true     |59.0  |
+------+---+---------+-----------+----------+---------+------+

