diff --git a/spiceaidocs/docs/data-connectors/spark.md b/spiceaidocs/docs/data-connectors/spark.md new file mode 100644 index 00000000..2868c407 --- /dev/null +++ b/spiceaidocs/docs/data-connectors/spark.md @@ -0,0 +1,109 @@ +--- +title: 'Apache Spark Connector' +sidebar_label: 'Apache Spark Connector' +description: 'Apache Spark Connector Documentation' +pagination_prev: null +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Apache Spark as a connector for federated SQL query against a Spark Cluster using [Spark Connect](https://spark.apache.org/docs/latest/spark-connect-overview.html) + +## Configuration + +The Apache Spark Connector can be used in two ways: specifying a plaintext connection string using the `spark_remote` parameter or specifying a `spark_remote` secret. The connector will fail if both configurations are set. + + +### Parameters +- `spark_remote`: A [spark remote](https://spark.apache.org/docs/latest/spark-connect-overview.html#set-sparkremote-environment-variable) connection URI + +### Auth + +Spark clusters configured to accept authenticated requests should not set `spark_remote` as an inline dataset param, as it will contain sensitive data. For this case, use a secret named `spark` with key `spark_remote`. + +Check [Secrets Stores](/secret-stores) for more details. + + + + ```bash + spice login spark --spark_remote + ``` + + Learn more about [File Secret Store](/secret-stores/file). + + + ```bash + SPICE_SECRET_SPARK_SPARK_REMOTE= \ + spice run + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: env + + # <...> + ``` + + Learn more about [Env Secret Store](/secret-stores/env). + + + ```bash + kubectl create secret generic spark \ + --from-literal=spark_remote='' + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: kubernetes + + # <...> + ``` + + Learn more about [Kubernetes Secret Store](/secret-stores/kubernetes). + + + Add new keychain entry (macOS), with user and password in JSON string + + ```bash + security add-generic-password -l "Spark Remote" \ + -a spiced -s spice_secret_spark \ + -w $(echo -n '{"spark_remote": "spark"}') + ``` + + `spicepod.yaml` + ```yaml + version: v1beta1 + kind: Spicepod + name: spice-app + + secrets: + store: keyring + + # <...> + ``` + + Learn more about [Keyring Secret Store](/secret-stores/keyring). + + + +## Example + +```yaml +datasets: + - from: spark:spiceai.datasets.my_awesome_table + name: my_table + params: + spark_remote: sc://localhost + +``` diff --git a/spiceaidocs/docs/reference/spicepod/datasets.md b/spiceaidocs/docs/reference/spicepod/datasets.md index 08f1b6c4..28f8dd8a 100644 --- a/spiceaidocs/docs/reference/spicepod/datasets.md +++ b/spiceaidocs/docs/reference/spicepod/datasets.md @@ -78,6 +78,7 @@ Where: - [`spiceai`](../../data-connectors/spiceai.md) - [`dremio`](../../data-connectors/dremio.md) + - [`spark`](../../data-connectors/spark.md) - [`databricks`](../../data-connectors/databricks.md) - [`s3`](../../data-connectors/s3.md) - [`postgres`](../../data-connectors/postgres/index.md)