In [4]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("LazyEvalDemo").getOrCreate()


In [5]:
# Create a simple DataFrame
data = [(1, "Alice"), (2, "Bob"), (3, "Charlie")]
df = spark.createDataFrame(data, ["id", "name"])

# Apply some transformations (but these won't run yet!)
df2 = df.filter(df.id > 1)
df3 = df2.withColumnRenamed("name", "person_name")

# Nothing has happened yet â€” Spark is just building a plan
print("Transformations done, but no computation yet!")

# Action: triggers the actual computation
df3.show()

Transformations done, but no computation yet!
+---+-----------+
| id|person_name|
+---+-----------+
|  2|        Bob|
|  3|    Charlie|
+---+-----------+



In [6]:
# Create another simple DataFrame
numbers = spark.range(0, 5)

# Transformations (still lazy)
doubled = numbers.withColumn("double", numbers["id"] * 2)
filtered = doubled.filter(doubled["double"] > 5)

# View the DAG plan Spark will execute
filtered.explain()

== Physical Plan ==
*(1) Project [id#37L, (id#37L * 2) AS double#39L]
+- *(1) Filter ((id#37L * 2) > 5)
   +- *(1) Range (0, 5, step=1, splits=22)




In [7]:
# Action that runs the DAG
filtered.show()

+---+------+
| id|double|
+---+------+
|  3|     6|
|  4|     8|
+---+------+

