#### We must first initialize some variables for the current environment

In [None]:
import os
import socket
hostname=socket.gethostname()
IPAddr=socket.gethostbyname(hostname)

with open('/var/run/secrets/kubernetes.io/serviceaccount/namespace', 'r') as f:
    current_namespace = f.readline()

#### We can then launch a Spark job directly from the notebook

In [None]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

# Create Spark config for our Kubernetes based cluster manager
sparkConf = SparkConf()
sparkConf.setMaster("k8s://https://" + os.environ["KUBERNETES_SERVICE_HOST"] + ":443")
sparkConf.set("spark.submit.deployMode","client")
sparkConf.set("spark.kubernetes.container.image", "quay.io/opendatahub-contrib/pyspark:s3.3.1-h3.3.4_v0.1.1")
sparkConf.set("spark.kubernetes.namespace", current_namespace)
sparkConf.set("spark.driver.host", IPAddr)
sparkConf.set("spark.executor.instances", "3")
sparkConf.set("spark.executor.memory", "512m")
sparkConf.set("spark.executor.cores", "1")
sparkConf.set("spark.kubernetes.pyspark.pythonVersion", "3")
# Initialize our Spark cluster, this will actually
# generate the worker nodes.
spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()
sc = spark.sparkContext

#### You can now launch jobs directly from your notebook

In [None]:
from random import random
from operator import add

partitions = 7
n = 10000000 * partitions
def f(_):
    x = random() * 2 - 1
    y = random() * 2 - 1
    
    return 1 if x ** 2 + y ** 2 <= 1 else 0
count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
print("Pi is roughly %f" % (4.0 * count / n))

#### Don't forget to shut down your cluster!

In [None]:
sc.stop()