Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,20 @@ All notable changes to this project will be documented in this file.

- Add experimental support for Spark 4 ([#589])
- Helm: Allow Pod `priorityClassName` to be configured ([#608]).
- Support for Spark 3.5.7 ([#610]).

### Fixed

- SparkConnectServer: The `imagePullSecret` is now correctly passed to Spark executor pods ([#603]).

### Removed

- Support for Spark versions 3.5.5 has been dropped ([#610]).

[#589]: https://github.com/stackabletech/spark-k8s-operator/pull/589
[#603]: https://github.com/stackabletech/spark-k8s-operator/pull/603
[#608]: https://github.com/stackabletech/spark-k8s-operator/pull/608
[#610]: https://github.com/stackabletech/spark-k8s-operator/pull/610

## [25.7.0] - 2025-07-23

Expand Down
2 changes: 1 addition & 1 deletion docs/modules/spark-k8s/examples/example-history-app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: spark-pi-s3-1
spec:
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
pullPolicy: IfNotPresent
mode: cluster
mainClass: org.apache.spark.examples.SparkPi
Expand Down
12 changes: 6 additions & 6 deletions docs/modules/spark-k8s/examples/example-history-server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ metadata:
name: spark-history
spec:
image:
productVersion: 4.0.0
logFileDirectory: # <1>
productVersion: 3.5.7
logFileDirectory: # <1>
s3:
prefix: eventlogs/ # <2>
bucket: # <3>
prefix: eventlogs/ # <2>
bucket: # <3>
inline:
bucketName: spark-logs
connection:
Expand All @@ -19,10 +19,10 @@ spec:
accessStyle: Path
credentials:
secretClass: history-credentials-class
sparkConf: # <4>
sparkConf: # <4>
nodes:
roleGroups:
default:
replicas: 1 # <5>
replicas: 1 # <5>
config:
cleaner: true # <6>
6 changes: 3 additions & 3 deletions docs/modules/spark-k8s/examples/example-spark-connect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: spark-connect # <1>
spec:
image:
productVersion: "3.5.6" # <2>
productVersion: "3.5.7" # <2>
pullPolicy: IfNotPresent
args:
- "--package org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1" # <3>
Expand All @@ -22,7 +22,7 @@ spec:
- -Dmy.custom.jvm.arg=customValue # <5>
config:
logging:
enableVectorAgent: False
enableVectorAgent: false
containers:
spark:
custom:
Expand All @@ -37,7 +37,7 @@ spec:
spark.executor.instances: "3"
config:
logging:
enableVectorAgent: False
enableVectorAgent: false
containers:
spark:
custom:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ metadata:
namespace: default
spec:
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
mode: cluster
mainApplicationFile: s3a://stackable-spark-k8s-jars/jobs/ny-tlc-report-1.1.0.jar # <3>
mainClass: tech.stackable.demo.spark.NYTLCReport
Expand All @@ -22,7 +22,7 @@ spec:
config:
volumeMounts:
- name: cm-job-arguments # <6>
mountPath: /arguments # <7>
mountPath: /arguments # <7>
executor:
replicas: 3
config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
spec:
image: oci.stackable.tech/stackable/ny-tlc-report:0.2.0 # <1>
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
mode: cluster
mainApplicationFile: local:///stackable/spark/jobs/ny_tlc_report.py # <2>
args:
Expand Down
2 changes: 1 addition & 1 deletion docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ metadata:
namespace: default
spec:
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
mode: cluster
mainApplicationFile: s3a://my-bucket/app.jar # <1>
mainClass: org.example.App # <2>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: example-sparkapp-s3-private
spec:
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
mode: cluster
mainApplicationFile: s3a://my-bucket/spark-examples.jar # <1>
mainClass: org.apache.spark.examples.SparkPi # <2>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ metadata:
namespace: default
spec:
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
mode: cluster
mainApplicationFile: local:///stackable/spark/examples/src/main/python/streaming/hdfs_wordcount.py
args:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ metadata:
namespace: default
spec:
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
mode: cluster
mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py
driver:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ metadata:
namespace: default
spec:
sparkImage:
productVersion: 4.0.0
productVersion: 3.5.7
mode: cluster
mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py
driver:
Expand Down
10 changes: 5 additions & 5 deletions docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Below is an example of a custom image that includes a JDBC driver:

[source, Dockerfile]
----
FROM oci.stackable.tech/sdp/spark-k8s:4.0.0-stackable0.0.0-dev # <1>
FROM oci.stackable.tech/sdp/spark-k8s:3.5.7-stackable0.0.0-dev # <1>

RUN curl --fail -o /stackable/spark/jars/postgresql-42.6.0.jar "https://jdbc.postgresql.org/download/postgresql-42.6.0.jar" # <2>
----
Expand All @@ -41,8 +41,8 @@ Build your custom image and push it to your container registry.

[source, bash]
----
docker build -t my-registry/spark-k8s:4.0.0-psql .
docker push my-registry/spark-k8s:4.0.0-psql
docker build -t my-registry/spark-k8s:3.5.7-psql .
docker push my-registry/spark-k8s:3.5.7-psql
----

And the following snippet showcases an application that uses the custom image:
Expand All @@ -55,8 +55,8 @@ metadata:
name: spark-jdbc
spec:
sparkImage:
custom: "my-registry/spark-k8s:4.0.0-psql" # <1>
productVersion: "4.0.0" # <2>
custom: "my-registry/spark-k8s:3.5.7-psql" # <1>
productVersion: "3.5.7" # <2>
...
----

Expand Down
9 changes: 4 additions & 5 deletions docs/modules/spark-k8s/partials/supported-versions.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
// Stackable Platform documentation.
// Please sort the versions in descending order (newest first)

- 4.0.0 (Hadoop 3.4.1, Scala 2.13, Python 3.11, Java 17) (Experimental)
- 3.5.6 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (LTS)
- 3.5.5 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (Deprecated)
- 4.0.1 (Hadoop 3.4.1, Scala 2.13, Python 3.11, Java 17) (Experimental)
- 3.5.6 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (Deprecated)
- 3.5.7 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (LTS)

Some reasons why Spark 4 is considered experimental (as of August 2025):
Some reasons why Spark 4 is considered experimental (as of September 2025):

- Missing HBase compatibility (See: https://github.com/apache/hbase-connectors/pull/130)
- Missing Iceberg compatibility (See: https://github.com/apache/iceberg/issues/13358)
- Executors fail to load logging libs (maybe related: https://issues.apache.org/jira/browse/SPARK-52585)
32 changes: 16 additions & 16 deletions examples/README-examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,30 @@ This note outlines a few things that are needed to run these examples on a local

Create a new local cluster (e.g. with [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/) and the [stackablectl tool](https://github.com/stackabletech/stackablectl)). This creates a cluster named `stackable-data-platform`.

````text
```text
kind create cluster --name stackable-data-platform
stackablectl operator install spark-k8s commons secret
````
```

Load the `ny-tlc-report` image to the cluster:

````text
```text
kind load docker-image oci.stackable.tech/stackable/ny-tlc-report:0.2.0 --name stackable-data-platform
````
```

## Set up the `PersistentVolumeClaim`

The PVC should contain a few dependencies that Spark needs to access S3:

````text
```text
kubectl apply -f kind/kind-pvc.yaml
````
```

## Set up the `minio` object store

Use a local object store to avoid external dependencies:

````text
```text
helm install test-minio \
--version 4.0.15 \
--set mode=standalone \
Expand All @@ -40,26 +40,26 @@ helm install test-minio \
--set 'buckets[0].name=my-bucket,buckets[0].policy=public' \
--set resources.requests.memory=1Gi \
--repo https://charts.min.io/ minio
````
```

````text
```text
kubectl apply -f kind/minio.yaml
````
```

Several resources are needed in this store. These can be loaded like this:

````text
```text
kubectl exec minio-mc-0 -- sh -c 'mc alias set test-minio http://test-minio:9000/'
kubectl cp examples/ny-tlc-report-1.1.0-3.5.6.jar minio-mc-0:/tmp
kubectl cp examples/ny-tlc-report-1.1.0-3.5.7.jar minio-mc-0:/tmp
kubectl cp apps/ny_tlc_report.py minio-mc-0:/tmp
kubectl cp examples/yellow_tripdata_2021-07.csv minio-mc-0:/tmp
kubectl exec minio-mc-0 -- mc cp /tmp/ny-tlc-report-1.1.0-3.5.6.jar test-minio/my-bucket
kubectl exec minio-mc-0 -- mc cp /tmp/ny-tlc-report-1.1.0-3.5.7.jar test-minio/my-bucket
kubectl exec minio-mc-0 -- mc cp /tmp/ny_tlc_report.py test-minio/my-bucket
kubectl exec minio-mc-0 -- mc cp /tmp/yellow_tripdata_2021-07.csv test-minio/my-bucket
````
```

We now have a local S3-implementation with the bucket populated with the resources we need for the examples, which can be run like this:

````text
```text
kubectl apply -f examples/ny-tlc-report.yaml
````
```
2 changes: 1 addition & 1 deletion examples/ny-tlc-report-external-dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ metadata:
namespace: default
spec:
sparkImage:
productVersion: 3.5.6
productVersion: 3.5.7
pullPolicy: IfNotPresent
mode: cluster
mainApplicationFile: s3a://my-bucket/ny_tlc_report.py
Expand Down
2 changes: 1 addition & 1 deletion examples/ny-tlc-report-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
spec:
# everything under /jobs will be copied to /stackable/spark/jobs
image: oci.stackable.tech/stackable/ny-tlc-report:0.2.0
sparkImage: oci.stackable.tech/sdp/spark-k8s:3.5.6-stackable0.0.0-dev
sparkImage: oci.stackable.tech/sdp/spark-k8s:3.5.7-stackable0.0.0-dev
sparkImagePullPolicy: IfNotPresent
mode: cluster
mainApplicationFile: local:///stackable/spark/jobs/ny_tlc_report.py
Expand Down
4 changes: 2 additions & 2 deletions examples/ny-tlc-report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ metadata:
name: spark-ny-cm
spec:
sparkImage:
productVersion: 3.5.6
productVersion: 3.5.7
mode: cluster
mainApplicationFile: s3a://my-bucket/ny-tlc-report-1.1.0-3.5.6.jar
mainApplicationFile: s3a://my-bucket/ny-tlc-report-1.1.0-3.5.7.jar
mainClass: tech.stackable.demo.spark.NYTLCReport
volumes:
- name: cm-job-arguments
Expand Down
2 changes: 1 addition & 1 deletion rust/operator-binary/src/crd/affinity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ mod test {
name: spark-history
spec:
image:
productVersion: 3.5.6
productVersion: 3.5.7
logFileDirectory:
s3:
prefix: eventlogs/
Expand Down
2 changes: 1 addition & 1 deletion rust/operator-binary/src/crd/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ mod test {
name: spark-history
spec:
image:
productVersion: 3.5.6
productVersion: 3.5.7
logFileDirectory:
s3:
prefix: eventlogs/
Expand Down
4 changes: 2 additions & 2 deletions rust/operator-binary/src/history/config/jvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ mod tests {
name: spark-history
spec:
image:
productVersion: 3.5.6
productVersion: 3.5.7
logFileDirectory:
s3:
prefix: eventlogs/
Expand Down Expand Up @@ -98,7 +98,7 @@ mod tests {
name: spark-history
spec:
image:
productVersion: 3.5.6
productVersion: 3.5.7
logFileDirectory:
s3:
prefix: eventlogs/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ spec:
config:
logging:
enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
gracefulShutdownTimeout: 30s # speed up tests
roleGroups:
default:
replicas: 2
regionServers:
config:
logging:
enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
gracefulShutdownTimeout: 30s # speed up tests
roleGroups:
default:
replicas: 1
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ spec:
{% else %}
productVersion: "{{ test_scenario['values']['spark-hbase-connector'] }}"
{% endif %}
# pullPolicy: IfNotPresent
pullPolicy: Always
pullPolicy: IfNotPresent
mode: cluster
mainApplicationFile: local:///stackable/spark/jobs/test-hbase.py
sparkConf:
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading