# Spark Setup Verification

Run this notebook to verify your Spark installation is working correctly.

In [None]:
from pyspark.sql import SparkSession

# Create Spark session
spark = SparkSession.builder \
    .appName("Setup Test") \
    .config("spark.sql.adaptive.enabled", "true") \
    .getOrCreate()

print(f"Spark Version: {spark.version}")
print(f"Python Version: {spark.sparkContext.pythonVer}")

In [None]:
# Test DataFrame creation
data = [("Alice", 30), ("Bob", 25), ("Charlie", 35)]
df = spark.createDataFrame(data, ["name", "age"])
df.show()

In [None]:
# Test reading CSV
employees = spark.read.csv("../datasets/csv/employees.csv", header=True, inferSchema=True)
employees.show(5)
employees.printSchema()

In [None]:
# Test reading JSON
orders = spark.read.json("../datasets/json/orders.json")
orders.show(3, truncate=False)
orders.printSchema()

In [None]:
# Test Spark SQL
employees.createOrReplaceTempView("employees")
spark.sql("SELECT department, COUNT(*) as count, AVG(salary) as avg_salary FROM employees GROUP BY department").show()

In [None]:
# Stop Spark session
spark.stop()
print("Setup verification complete!")