In [1]:
import os
import sys

from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

from platform import python_version

# Create Spark config for our Kubernetes based cluster manager
sparkConf = SparkConf()
sparkConf.setMaster("k8s://https://kubernetes.docker.internal:6443")
sparkConf.setAppName("spark")
sparkConf.set("spark.kubernetes.container.image", "mzelazkiewicz/spark-py:v3.4.1")
sparkConf.set("spark.kubernetes.namespace", "spark")
sparkConf.set("spark.executor.instances", "2")
sparkConf.set("spark.executor.cores", "1")
sparkConf.set("spark.driver.memory", "512m")
sparkConf.set("spark.executor.memory", "512m")
sparkConf.set("spark.kubernetes.pyspark.pythonVersion", "3")
sparkConf.set("spark.kubernetes.authenticate.driver.serviceAccountName", "spark")
sparkConf.set("spark.kubernetes.authenticate.serviceAccountName", "spark")
sparkConf.set("spark.driver.port", "29413")
sparkConf.set("spark.driver.host", "spark-driver.spark.svc.cluster.local")

# Initialize our Spark cluster, this will actually generate the worker nodes.
spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()
sc = spark.sparkContext

print(f'Python version: {python_version()}')
print(f'The PySpark {spark.version} version is running...')

Python version: 3.10.11
The PySpark 3.4.0 version is running...


In [2]:
from random import random
from operator import add
partitions = 7
n = 10000000 * partitions
def f(_):
    x = random() * 2 - 1
    y = random() * 2 - 1
    
    return 1 if x ** 2 + y ** 2 <= 1 else 0
count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
print("Pi is roughly %f" % (4.0 * count / n))

Pi is roughly 3.141080


In [3]:
# 1. Creating an RDD from a Python List
data = [1, 2, 3, 5, 8, 13, 21, 34, 55]
rdd = sc.parallelize(data)

# 2. Filtering Data using Spark Transformations
filtered_rdd = rdd.filter(lambda x: x < 10)

# 3. Action to Fetch the Result
count = filtered_rdd.count()

print(f"Count of numbers less than 10: {count}")

Count of numbers less than 10: 5
