diff --git a/CHANGELOG.md b/CHANGELOG.md index e7a63404..13f30106 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,14 +8,20 @@ All notable changes to this project will be documented in this file. - Add experimental support for Spark 4 ([#589]) - Helm: Allow Pod `priorityClassName` to be configured ([#608]). +- Support for Spark 3.5.7 ([#610]). ### Fixed - SparkConnectServer: The `imagePullSecret` is now correctly passed to Spark executor pods ([#603]). +### Removed + +- Support for Spark versions 3.5.5 has been dropped ([#610]). + [#589]: https://github.com/stackabletech/spark-k8s-operator/pull/589 [#603]: https://github.com/stackabletech/spark-k8s-operator/pull/603 [#608]: https://github.com/stackabletech/spark-k8s-operator/pull/608 +[#610]: https://github.com/stackabletech/spark-k8s-operator/pull/610 ## [25.7.0] - 2025-07-23 diff --git a/docs/modules/spark-k8s/examples/example-history-app.yaml b/docs/modules/spark-k8s/examples/example-history-app.yaml index 30cd0e96..e963dada 100644 --- a/docs/modules/spark-k8s/examples/example-history-app.yaml +++ b/docs/modules/spark-k8s/examples/example-history-app.yaml @@ -5,7 +5,7 @@ metadata: name: spark-pi-s3-1 spec: sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 pullPolicy: IfNotPresent mode: cluster mainClass: org.apache.spark.examples.SparkPi diff --git a/docs/modules/spark-k8s/examples/example-history-server.yaml b/docs/modules/spark-k8s/examples/example-history-server.yaml index 75f8df27..b303f4fd 100644 --- a/docs/modules/spark-k8s/examples/example-history-server.yaml +++ b/docs/modules/spark-k8s/examples/example-history-server.yaml @@ -5,11 +5,11 @@ metadata: name: spark-history spec: image: - productVersion: 4.0.0 - logFileDirectory: # <1> + productVersion: 3.5.7 + logFileDirectory: # <1> s3: - prefix: eventlogs/ # <2> - bucket: # <3> + prefix: eventlogs/ # <2> + bucket: # <3> inline: bucketName: spark-logs connection: @@ -19,10 +19,10 @@ spec: accessStyle: Path credentials: secretClass: history-credentials-class - sparkConf: # <4> + sparkConf: # <4> nodes: roleGroups: default: - replicas: 1 # <5> + replicas: 1 # <5> config: cleaner: true # <6> diff --git a/docs/modules/spark-k8s/examples/example-spark-connect.yaml b/docs/modules/spark-k8s/examples/example-spark-connect.yaml index 3b6d82af..db18b1b8 100644 --- a/docs/modules/spark-k8s/examples/example-spark-connect.yaml +++ b/docs/modules/spark-k8s/examples/example-spark-connect.yaml @@ -5,7 +5,7 @@ metadata: name: spark-connect # <1> spec: image: - productVersion: "3.5.6" # <2> + productVersion: "3.5.7" # <2> pullPolicy: IfNotPresent args: - "--package org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1" # <3> @@ -22,7 +22,7 @@ spec: - -Dmy.custom.jvm.arg=customValue # <5> config: logging: - enableVectorAgent: False + enableVectorAgent: false containers: spark: custom: @@ -37,7 +37,7 @@ spec: spark.executor.instances: "3" config: logging: - enableVectorAgent: False + enableVectorAgent: false containers: spark: custom: diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml index 824ab97b..92415a02 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml @@ -6,7 +6,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 mode: cluster mainApplicationFile: s3a://stackable-spark-k8s-jars/jobs/ny-tlc-report-1.1.0.jar # <3> mainClass: tech.stackable.demo.spark.NYTLCReport @@ -22,7 +22,7 @@ spec: config: volumeMounts: - name: cm-job-arguments # <6> - mountPath: /arguments # <7> + mountPath: /arguments # <7> executor: replicas: 3 config: diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml index d24b2055..8ce2cb8c 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml @@ -7,7 +7,7 @@ metadata: spec: image: oci.stackable.tech/stackable/ny-tlc-report:0.2.0 # <1> sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/ny_tlc_report.py # <2> args: diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml index edd66888..bf80d0bf 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml @@ -6,7 +6,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 mode: cluster mainApplicationFile: s3a://my-bucket/app.jar # <1> mainClass: org.example.App # <2> diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml index de8c510a..6e05fbf4 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml @@ -5,7 +5,7 @@ metadata: name: example-sparkapp-s3-private spec: sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 mode: cluster mainApplicationFile: s3a://my-bucket/spark-examples.jar # <1> mainClass: org.apache.spark.examples.SparkPi # <2> diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml index 83a230eb..3302b8f8 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml @@ -6,7 +6,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/streaming/hdfs_wordcount.py args: diff --git a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh index 36639bc3..53d92ae7 100755 --- a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh +++ b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh @@ -56,7 +56,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py driver: diff --git a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 index 71c0255d..52643457 100755 --- a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 +++ b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 @@ -56,7 +56,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 4.0.0 + productVersion: 3.5.7 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py driver: diff --git a/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc b/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc index 916d8e0b..9b7bc4f9 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc @@ -29,7 +29,7 @@ Below is an example of a custom image that includes a JDBC driver: [source, Dockerfile] ---- -FROM oci.stackable.tech/sdp/spark-k8s:4.0.0-stackable0.0.0-dev # <1> +FROM oci.stackable.tech/sdp/spark-k8s:3.5.7-stackable0.0.0-dev # <1> RUN curl --fail -o /stackable/spark/jars/postgresql-42.6.0.jar "https://jdbc.postgresql.org/download/postgresql-42.6.0.jar" # <2> ---- @@ -41,8 +41,8 @@ Build your custom image and push it to your container registry. [source, bash] ---- -docker build -t my-registry/spark-k8s:4.0.0-psql . -docker push my-registry/spark-k8s:4.0.0-psql +docker build -t my-registry/spark-k8s:3.5.7-psql . +docker push my-registry/spark-k8s:3.5.7-psql ---- And the following snippet showcases an application that uses the custom image: @@ -55,8 +55,8 @@ metadata: name: spark-jdbc spec: sparkImage: - custom: "my-registry/spark-k8s:4.0.0-psql" # <1> - productVersion: "4.0.0" # <2> + custom: "my-registry/spark-k8s:3.5.7-psql" # <1> + productVersion: "3.5.7" # <2> ... ---- diff --git a/docs/modules/spark-k8s/partials/supported-versions.adoc b/docs/modules/spark-k8s/partials/supported-versions.adoc index bf080b97..90c410c5 100644 --- a/docs/modules/spark-k8s/partials/supported-versions.adoc +++ b/docs/modules/spark-k8s/partials/supported-versions.adoc @@ -3,12 +3,11 @@ // Stackable Platform documentation. // Please sort the versions in descending order (newest first) -- 4.0.0 (Hadoop 3.4.1, Scala 2.13, Python 3.11, Java 17) (Experimental) -- 3.5.6 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (LTS) -- 3.5.5 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (Deprecated) +- 4.0.1 (Hadoop 3.4.1, Scala 2.13, Python 3.11, Java 17) (Experimental) +- 3.5.6 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (Deprecated) +- 3.5.7 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (LTS) -Some reasons why Spark 4 is considered experimental (as of August 2025): +Some reasons why Spark 4 is considered experimental (as of September 2025): - Missing HBase compatibility (See: https://github.com/apache/hbase-connectors/pull/130) -- Missing Iceberg compatibility (See: https://github.com/apache/iceberg/issues/13358) - Executors fail to load logging libs (maybe related: https://issues.apache.org/jira/browse/SPARK-52585) diff --git a/examples/README-examples.md b/examples/README-examples.md index 33756eb4..4b9c931a 100644 --- a/examples/README-examples.md +++ b/examples/README-examples.md @@ -8,30 +8,30 @@ This note outlines a few things that are needed to run these examples on a local Create a new local cluster (e.g. with [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/) and the [stackablectl tool](https://github.com/stackabletech/stackablectl)). This creates a cluster named `stackable-data-platform`. -````text +```text kind create cluster --name stackable-data-platform stackablectl operator install spark-k8s commons secret -```` +``` Load the `ny-tlc-report` image to the cluster: -````text +```text kind load docker-image oci.stackable.tech/stackable/ny-tlc-report:0.2.0 --name stackable-data-platform -```` +``` ## Set up the `PersistentVolumeClaim` The PVC should contain a few dependencies that Spark needs to access S3: -````text +```text kubectl apply -f kind/kind-pvc.yaml -```` +``` ## Set up the `minio` object store Use a local object store to avoid external dependencies: -````text +```text helm install test-minio \ --version 4.0.15 \ --set mode=standalone \ @@ -40,26 +40,26 @@ helm install test-minio \ --set 'buckets[0].name=my-bucket,buckets[0].policy=public' \ --set resources.requests.memory=1Gi \ --repo https://charts.min.io/ minio -```` +``` -````text +```text kubectl apply -f kind/minio.yaml -```` +``` Several resources are needed in this store. These can be loaded like this: -````text +```text kubectl exec minio-mc-0 -- sh -c 'mc alias set test-minio http://test-minio:9000/' -kubectl cp examples/ny-tlc-report-1.1.0-3.5.6.jar minio-mc-0:/tmp +kubectl cp examples/ny-tlc-report-1.1.0-3.5.7.jar minio-mc-0:/tmp kubectl cp apps/ny_tlc_report.py minio-mc-0:/tmp kubectl cp examples/yellow_tripdata_2021-07.csv minio-mc-0:/tmp -kubectl exec minio-mc-0 -- mc cp /tmp/ny-tlc-report-1.1.0-3.5.6.jar test-minio/my-bucket +kubectl exec minio-mc-0 -- mc cp /tmp/ny-tlc-report-1.1.0-3.5.7.jar test-minio/my-bucket kubectl exec minio-mc-0 -- mc cp /tmp/ny_tlc_report.py test-minio/my-bucket kubectl exec minio-mc-0 -- mc cp /tmp/yellow_tripdata_2021-07.csv test-minio/my-bucket -```` +``` We now have a local S3-implementation with the bucket populated with the resources we need for the examples, which can be run like this: -````text +```text kubectl apply -f examples/ny-tlc-report.yaml -```` +``` diff --git a/examples/ny-tlc-report-external-dependencies.yaml b/examples/ny-tlc-report-external-dependencies.yaml index 209df660..cc45bed2 100644 --- a/examples/ny-tlc-report-external-dependencies.yaml +++ b/examples/ny-tlc-report-external-dependencies.yaml @@ -6,7 +6,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 3.5.6 + productVersion: 3.5.7 pullPolicy: IfNotPresent mode: cluster mainApplicationFile: s3a://my-bucket/ny_tlc_report.py diff --git a/examples/ny-tlc-report-image.yaml b/examples/ny-tlc-report-image.yaml index 93f49fc0..73daa848 100644 --- a/examples/ny-tlc-report-image.yaml +++ b/examples/ny-tlc-report-image.yaml @@ -7,7 +7,7 @@ metadata: spec: # everything under /jobs will be copied to /stackable/spark/jobs image: oci.stackable.tech/stackable/ny-tlc-report:0.2.0 - sparkImage: oci.stackable.tech/sdp/spark-k8s:3.5.6-stackable0.0.0-dev + sparkImage: oci.stackable.tech/sdp/spark-k8s:3.5.7-stackable0.0.0-dev sparkImagePullPolicy: IfNotPresent mode: cluster mainApplicationFile: local:///stackable/spark/jobs/ny_tlc_report.py diff --git a/examples/ny-tlc-report.yaml b/examples/ny-tlc-report.yaml index 02afea5a..c20ed4a3 100644 --- a/examples/ny-tlc-report.yaml +++ b/examples/ny-tlc-report.yaml @@ -13,9 +13,9 @@ metadata: name: spark-ny-cm spec: sparkImage: - productVersion: 3.5.6 + productVersion: 3.5.7 mode: cluster - mainApplicationFile: s3a://my-bucket/ny-tlc-report-1.1.0-3.5.6.jar + mainApplicationFile: s3a://my-bucket/ny-tlc-report-1.1.0-3.5.7.jar mainClass: tech.stackable.demo.spark.NYTLCReport volumes: - name: cm-job-arguments diff --git a/rust/operator-binary/src/crd/affinity.rs b/rust/operator-binary/src/crd/affinity.rs index ef719dbf..44faf89e 100644 --- a/rust/operator-binary/src/crd/affinity.rs +++ b/rust/operator-binary/src/crd/affinity.rs @@ -47,7 +47,7 @@ mod test { name: spark-history spec: image: - productVersion: 3.5.6 + productVersion: 3.5.7 logFileDirectory: s3: prefix: eventlogs/ diff --git a/rust/operator-binary/src/crd/history.rs b/rust/operator-binary/src/crd/history.rs index da317eb3..08c5d9de 100644 --- a/rust/operator-binary/src/crd/history.rs +++ b/rust/operator-binary/src/crd/history.rs @@ -447,7 +447,7 @@ mod test { name: spark-history spec: image: - productVersion: 3.5.6 + productVersion: 3.5.7 logFileDirectory: s3: prefix: eventlogs/ diff --git a/rust/operator-binary/src/history/config/jvm.rs b/rust/operator-binary/src/history/config/jvm.rs index 8ffb0037..ed3fd7ba 100644 --- a/rust/operator-binary/src/history/config/jvm.rs +++ b/rust/operator-binary/src/history/config/jvm.rs @@ -65,7 +65,7 @@ mod tests { name: spark-history spec: image: - productVersion: 3.5.6 + productVersion: 3.5.7 logFileDirectory: s3: prefix: eventlogs/ @@ -98,7 +98,7 @@ mod tests { name: spark-history spec: image: - productVersion: 3.5.6 + productVersion: 3.5.7 logFileDirectory: s3: prefix: eventlogs/ diff --git a/tests/templates/kuttl/hbase-connector/04-install-hbase.yaml.j2 b/tests/templates/kuttl/hbase-connector/04-install-hbase.yaml.j2 index dfc0af00..49a3fa91 100644 --- a/tests/templates/kuttl/hbase-connector/04-install-hbase.yaml.j2 +++ b/tests/templates/kuttl/hbase-connector/04-install-hbase.yaml.j2 @@ -17,6 +17,7 @@ spec: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + gracefulShutdownTimeout: 30s # speed up tests roleGroups: default: replicas: 2 @@ -24,6 +25,7 @@ spec: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + gracefulShutdownTimeout: 30s # speed up tests roleGroups: default: replicas: 1 diff --git a/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 index 17099750..76245c96 100644 --- a/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 @@ -14,8 +14,7 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['spark-hbase-connector'] }}" {% endif %} - # pullPolicy: IfNotPresent - pullPolicy: Always + pullPolicy: IfNotPresent mode: cluster mainApplicationFile: local:///stackable/spark/jobs/test-hbase.py sparkConf: diff --git a/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-3.5.5.jar b/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-3.5.5.jar deleted file mode 100644 index e5e16993..00000000 Binary files a/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-3.5.5.jar and /dev/null differ diff --git a/examples/ny-tlc-report-1.1.0-3.5.5.jar b/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-3.5.7.jar similarity index 90% rename from examples/ny-tlc-report-1.1.0-3.5.5.jar rename to tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-3.5.7.jar index e5e16993..f969e32c 100644 Binary files a/examples/ny-tlc-report-1.1.0-3.5.5.jar and b/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-3.5.7.jar differ diff --git a/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.0.jar b/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.1.jar similarity index 91% rename from tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.0.jar rename to tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.1.jar index 965cedd9..92968e1d 100644 Binary files a/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.0.jar and b/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.1.jar differ diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 37693cf6..b03c550e 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -5,31 +5,29 @@ dimensions: - "false" - name: spark values: - - 3.5.5 - 3.5.6 - - 4.0.0 + - 3.5.7 + - 4.0.1 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. # - 3.5.6,oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev - name: spark-logging values: - - 3.5.5 - - 3.5.6 + - 3.5.7 - name: spark-hbase-connector values: - - 3.5.5 - - 3.5.6 + - 3.5.7 - name: spark-delta-lake values: - - 3.5.6 + - 3.5.7 # - 3.5.6,oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev - name: spark-iceberg values: - - 3.5.6 + - 3.5.7 - name: spark-connect values: - - 3.5.6 - - 4.0.0 + - 3.5.7 + - 4.0.1 # - 3.5.6,oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev - name: hbase values: