From ad8aeb700f1a4ddef347d49a8f066d4d03a7b8a1 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 29 Sep 2025 11:47:27 +0200 Subject: [PATCH 1/3] chore(spark): bump version to 3.5.7 --- .../create-spark-ingestion-job.yaml | 4 ++-- .../create-spark-report.yaml | 2 +- .../create-spark-anomaly-detection-job.yaml | 22 +++++++++---------- stacks/airflow/airflow.yaml | 3 ++- .../jupyterhub-pyspark-hdfs/jupyterlab.yaml | 11 +++++++--- stacks/jupyterhub-pyspark-hdfs/notebook.ipynb | 2 +- .../spark_connect.yaml | 4 ++-- 7 files changed, 27 insertions(+), 21 deletions(-) diff --git a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml index c35a1ad8..ab948286 100644 --- a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml +++ b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml @@ -144,13 +144,13 @@ data: stackable.tech/vendor: Stackable spec: sparkImage: - productVersion: 3.5.6 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/spark-ingest-into-lakehouse.py deps: packages: - org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 - - org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.6 + - org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.7 s3connection: reference: minio sparkConf: diff --git a/demos/end-to-end-security/create-spark-report.yaml b/demos/end-to-end-security/create-spark-report.yaml index 79b32e9b..25621a76 100644 --- a/demos/end-to-end-security/create-spark-report.yaml +++ b/demos/end-to-end-security/create-spark-report.yaml @@ -55,7 +55,7 @@ data: name: spark-report spec: sparkImage: - productVersion: 3.5.6 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/spark-report.py deps: diff --git a/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml b/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml index 928037f2..6ee70ea0 100644 --- a/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml +++ b/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml @@ -10,22 +10,22 @@ spec: - name: wait-for-testdata image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev command: - - bash - - -euo - - pipefail - - -c - - | + - bash + - -euo + - pipefail + - -c + - | echo 'Waiting for job load-ny-taxi-data to finish' kubectl wait --for=condition=complete --timeout=30m job/load-ny-taxi-data containers: - name: create-spark-anomaly-detection-job image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev command: - - bash - - -euo - - pipefail - - -c - - | + - bash + - -euo + - pipefail + - -c + - | echo 'Submitting Spark job' kubectl apply -f /tmp/manifest/spark-ad-job.yaml volumeMounts: @@ -51,7 +51,7 @@ data: name: spark-ad spec: sparkImage: - productVersion: 3.5.6 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///spark-scripts/spark-ad.py deps: diff --git a/stacks/airflow/airflow.yaml b/stacks/airflow/airflow.yaml index b86a6c05..d4a9ea6d 100644 --- a/stacks/airflow/airflow.yaml +++ b/stacks/airflow/airflow.yaml @@ -303,7 +303,7 @@ data: spec: version: "1.0" sparkImage: - productVersion: 3.5.6 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py job: @@ -331,6 +331,7 @@ data: memory: limit: 1024Mi replicas: 3 + # {% endraw %} --- apiVersion: v1 diff --git a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml index 1d0b637f..8234a9a4 100644 --- a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml @@ -21,7 +21,7 @@ spec: serviceAccountName: default containers: - name: jupyterlab - image: oci.stackable.tech/stackable/spark-connect-client:3.5.6-stackable0.0.0-dev + image: oci.stackable.tech/stackable/spark-connect-client:3.5.7-stackable0.0.0-dev imagePullPolicy: IfNotPresent command: - bash @@ -39,8 +39,13 @@ spec: name: notebook initContainers: - name: download-notebook - image: oci.stackable.tech/stackable/spark-connect-client:3.5.6-stackable0.0.0-dev - command: ['sh', '-c', 'curl https://raw.githubusercontent.com/stackabletech/demos/main/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb -o /notebook/notebook.ipynb'] + image: oci.stackable.tech/stackable/spark-connect-client:3.5.7-stackable0.0.0-dev + command: + [ + "sh", + "-c", + "curl https://raw.githubusercontent.com/stackabletech/demos/main/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb -o /notebook/notebook.ipynb", + ] volumeMounts: - mountPath: /notebook name: notebook diff --git a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb index 3e81c879..ecfcc560 100644 --- a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb +++ b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb @@ -53,7 +53,7 @@ "#\n", "# See: https://issues.apache.org/jira/browse/SPARK-46032\n", "#\n", - "spark.addArtifacts(\"/stackable/spark/connect/spark-connect_2.12-3.5.6.jar\")" + "spark.addArtifacts(\"/stackable/spark/connect/spark-connect_2.12-3.5.7.jar\")" ] }, { diff --git a/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml b/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml index 5fe372b2..10a3044d 100644 --- a/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml @@ -30,8 +30,8 @@ spec: image: # Using an image that includes scikit-learn (among other things) # because this package needs to be available on the executors. - custom: oci.stackable.tech/stackable/spark-connect-client:3.5.6-stackable0.0.0-dev - productVersion: 3.5.6 + custom: oci.stackable.tech/stackable/spark-connect-client:3.5.7-stackable0.0.0-dev + productVersion: 3.5.7 pullPolicy: IfNotPresent args: server: From de0dee3b2b9d296b1e5ee4825c7f6f1559b12c46 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 29 Sep 2025 14:41:49 +0200 Subject: [PATCH 2/3] format command args consistently --- stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml index 8234a9a4..603b3aba 100644 --- a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml @@ -41,11 +41,10 @@ spec: - name: download-notebook image: oci.stackable.tech/stackable/spark-connect-client:3.5.7-stackable0.0.0-dev command: - [ - "sh", - "-c", - "curl https://raw.githubusercontent.com/stackabletech/demos/main/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb -o /notebook/notebook.ipynb", - ] + - bash + args: + - -c + - curl https://raw.githubusercontent.com/stackabletech/demos/main/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb -o /notebook/notebook.ipynb volumeMounts: - mountPath: /notebook name: notebook From f8dad1e50ea5a650370fe7aac8c98e42b83fe6fe Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:51:13 +0200 Subject: [PATCH 3/3] update to spark 4.0.1 --- .../create-spark-ingestion-job.yaml | 6 +++--- demos/end-to-end-security/create-spark-report.yaml | 4 ++-- .../create-spark-anomaly-detection-job.yaml | 4 ++-- stacks/airflow/airflow.yaml | 3 ++- stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml | 4 ++-- stacks/jupyterhub-pyspark-hdfs/notebook.ipynb | 2 +- stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml | 4 ++-- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml index ab948286..2990b0bd 100644 --- a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml +++ b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml @@ -144,13 +144,13 @@ data: stackable.tech/vendor: Stackable spec: sparkImage: - productVersion: 3.5.7 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/spark-ingest-into-lakehouse.py deps: packages: - - org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 - - org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.7 + - org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.0 + - org.apache.spark:spark-sql-kafka-0-10_2.13:4.0.1 s3connection: reference: minio sparkConf: diff --git a/demos/end-to-end-security/create-spark-report.yaml b/demos/end-to-end-security/create-spark-report.yaml index 25621a76..4efd848d 100644 --- a/demos/end-to-end-security/create-spark-report.yaml +++ b/demos/end-to-end-security/create-spark-report.yaml @@ -55,12 +55,12 @@ data: name: spark-report spec: sparkImage: - productVersion: 3.5.7 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/spark-report.py deps: packages: - - org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 + - org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.0 sparkConf: spark.driver.extraClassPath: /stackable/config/hdfs spark.executor.extraClassPath: /stackable/config/hdfs diff --git a/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml b/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml index 6ee70ea0..46f1cd79 100644 --- a/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml +++ b/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml @@ -51,12 +51,12 @@ data: name: spark-ad spec: sparkImage: - productVersion: 3.5.7 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///spark-scripts/spark-ad.py deps: packages: - - org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 + - org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.1 requirements: - scikit-learn==1.4.0 s3connection: diff --git a/stacks/airflow/airflow.yaml b/stacks/airflow/airflow.yaml index d4a9ea6d..01094310 100644 --- a/stacks/airflow/airflow.yaml +++ b/stacks/airflow/airflow.yaml @@ -303,7 +303,7 @@ data: spec: version: "1.0" sparkImage: - productVersion: 3.5.7 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py job: @@ -332,6 +332,7 @@ data: limit: 1024Mi replicas: 3 + # {% endraw %} --- apiVersion: v1 diff --git a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml index 603b3aba..c6639b53 100644 --- a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml @@ -21,7 +21,7 @@ spec: serviceAccountName: default containers: - name: jupyterlab - image: oci.stackable.tech/stackable/spark-connect-client:3.5.7-stackable0.0.0-dev + image: oci.stackable.tech/stackable/spark-connect-client:4.0.1-stackable0.0.0-dev imagePullPolicy: IfNotPresent command: - bash @@ -39,7 +39,7 @@ spec: name: notebook initContainers: - name: download-notebook - image: oci.stackable.tech/stackable/spark-connect-client:3.5.7-stackable0.0.0-dev + image: oci.stackable.tech/stackable/spark-connect-client:4.0.1-stackable0.0.0-dev command: - bash args: diff --git a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb index ecfcc560..92a9eb7b 100644 --- a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb +++ b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb @@ -53,7 +53,7 @@ "#\n", "# See: https://issues.apache.org/jira/browse/SPARK-46032\n", "#\n", - "spark.addArtifacts(\"/stackable/spark/connect/spark-connect_2.12-3.5.7.jar\")" + "spark.addArtifacts(\"/stackable/spark/connect/spark-connect-4.0.1.jar\")" ] }, { diff --git a/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml b/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml index 10a3044d..3bdefa71 100644 --- a/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml @@ -30,8 +30,8 @@ spec: image: # Using an image that includes scikit-learn (among other things) # because this package needs to be available on the executors. - custom: oci.stackable.tech/stackable/spark-connect-client:3.5.7-stackable0.0.0-dev - productVersion: 3.5.7 + custom: oci.stackable.tech/stackable/spark-connect-client:4.0.1-stackable0.0.0-dev + productVersion: 4.0.1 pullPolicy: IfNotPresent args: server: