In [1]:
from pyspark import SparkContext
from pyspark.sql import SparkSession

# Try to get or create a SparkContext
try:
    sc = SparkContext.getOrCreate()
except ValueError as e:
    print(e)  # This will print the "insecure Py4j gateway" error, but it's expected
    sc = SparkContext("local", "SimpleRDDExample")

# Create a SparkSession
spark = SparkSession(sc)

# Create an RDD from a list of numbers
data = [1, 2, 3, 4, 5]
rdd = sc.parallelize(data)

# Transformation: Square each element
squared_rdd = rdd.map(lambda x: x * x)

# Print the first 10 elements of the squared data
print("Squared data before reduce:", squared_rdd.take(10))

# Action: Sum the squared values
sum_of_squares = squared_rdd.reduce(lambda x, y: x + y)

# Print the results
print("Original data:", data)
print("Sum of squared values:", sum_of_squares)

# Stop the SparkContext
sc.stop()


Squared data before reduce: [1, 4, 9, 16, 25]
Original data: [1, 2, 3, 4, 5]
Sum of squared values: 55


In [5]:
!java -version

java version "21.0.1" 2023-10-17 LTS
Java(TM) SE Runtime Environment (build 21.0.1+12-LTS-29)
Java HotSpot(TM) 64-Bit Server VM (build 21.0.1+12-LTS-29, mixed mode, sharing)
