In PySpark, createOrReplaceTempView() (or createTempView()) lets you register a DataFrame as a temporary SQL table so you can query it using Spark SQL syntax.

In [0]:
data = [
    (1, "Alice", 25, "HR"),
    (2, "Bob", 30, "IT"),
    (3, "Cathy", 28, "Finance"),
    (4, "David", 35, "IT"),
    (5, "Eva", 40, "HR")
]

columns = ["id", "name", "age", "department"]

df = spark.createDataFrame(data, columns)

df.display()

id,name,age,department
1,Alice,25,HR
2,Bob,30,IT
3,Cathy,28,Finance
4,David,35,IT
5,Eva,40,HR


In [0]:
# Create a temporary view
df.createOrReplaceTempView("employees")


In [0]:
# Select specific columns
result1 = spark.sql("SELECT name, age FROM employees")
result1.display()

# Apply filter
result2 = spark.sql("SELECT * FROM employees WHERE department = 'IT'")
result2.display()

# Aggregation
result3 = spark.sql("""
    SELECT department, AVG(age) as avg_age
    FROM employees
    GROUP BY department
""")
result3.display()


name,age
Alice,25
Bob,30
Cathy,28
David,35
Eva,40


id,name,age,department
2,Bob,30,IT
4,David,35,IT


department,avg_age
HR,32.5
IT,32.5
Finance,28.0


✅ createOrReplaceTempView() = valid only for the current Spark session.

✅ If you want a global view across all sessions, use createOrReplaceGlobalTempView().