# 🏞️ Local Data Lake with Delta Lake & PySpark

This notebook shows how to use Delta Lake on your local Windows machine using your filesystem as a data lake.

In [None]:
# 📦 Install necessary packages (run once)
!pip install pyspark delta-spark

In [None]:
# ⚙️ Configure Spark Session with Delta Lake
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip

builder = SparkSession.builder \
    .appName("Local Delta Lake Example") \
    .master("local[*]") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [None]:
# 📁 Define local data lake path (adjust to your system)
lake_path = "file:///C:/data-lake/users"

In [None]:
# 📝 Create sample data and write as Delta table
data = [(1, "Alice"), (2, "Bob"), (3, "Cathy")]
df = spark.createDataFrame(data, ["id", "name"])
df.write.format("delta").mode("overwrite").save(lake_path)

In [None]:
# 📖 Read data from local Delta table
df_read = spark.read.format("delta").load(lake_path)
df_read.show()

In [None]:
# 🧪 Time travel: view older version (if any)
# Example: spark.read.format("delta").option("versionAsOf", 0).load(lake_path).show()

In [None]:
# 🧹 Stop Spark session when done
spark.stop()