In [9]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
import time

# Function to create SparkSession with specific configuration
def create_spark_session(executor_instances=None, app_name="SparkTest"):
    builder = SparkSession.builder.appName(app_name)
    
    if executor_instances:
        builder = builder.config("spark.executor.instances", executor_instances)
    
    return builder.getOrCreate()

# Test function using DataFrame operations
def run_test():
    # Create a large DataFrame and perform operations
    df = spark.range(0, 1000000)
    
    # Perform some CPU-intensive operations
    result = df.select(
        col("id"),
        (col("id") * col("id")).alias("squared"),
        (col("id") % 2 == 0).alias("is_even")
    ).filter(col("is_even") == True) \
     .count()
    
    return result

# Test with default configuration
print("Testing with default configuration:")
spark = create_spark_session(app_name="DefaultConfig")
start_time = time.time()
result1 = run_test()
time1 = time.time() - start_time
print(f"Time taken: {time1:.2f} seconds")
print(f"Result: {result1}")

# Stop the current session
spark.stop()

# Test with 2 executors
print("\nTesting with 2 executors:")
spark = create_spark_session(executor_instances=2, app_name="TwoExecutors")
start_time = time.time()
result2 = run_test()
time2 = time.time() - start_time
print(f"Time taken: {time2:.2f} seconds")
print(f"Result: {result2}")

# Print comparison
print("\nComparison:")
print(f"Default configuration: {time1:.2f} seconds")
print(f"With 2 executors: {time2:.2f} seconds")
print(f"Difference: {abs(time2 - time1):.2f} seconds")
if time1 > time2:
    print(f"Speedup with executors: {time1/time2:.2f}x")
else:
    print(f"Slowdown with executors: {time2/time1:.2f}x")

Testing with default configuration:


25/01/07 23:57:04 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
                                                                                

Time taken: 0.34 seconds
Result: 500000

Testing with 2 executors:
Time taken: 0.19 seconds
Result: 500000

Comparison:
Default configuration: 0.34 seconds
With 2 executors: 0.19 seconds
Difference: 0.14 seconds
Speedup with executors: 1.75x
