From f7233645bf43868c87453fe3441de2b86b12d262 Mon Sep 17 00:00:00 2001 From: josh-wong Date: Thu, 30 Oct 2025 04:10:34 +0000 Subject: [PATCH 1/2] AUTO: Sync ScalarDB docs in English to docs site repo --- .../deploy-scalardb-analytics-server.mdx | 545 ++++++++++++++++++ 1 file changed, 545 insertions(+) create mode 100644 docs/scalardb-analytics/deploy-scalardb-analytics-server.mdx diff --git a/docs/scalardb-analytics/deploy-scalardb-analytics-server.mdx b/docs/scalardb-analytics/deploy-scalardb-analytics-server.mdx new file mode 100644 index 00000000..a9cc07b4 --- /dev/null +++ b/docs/scalardb-analytics/deploy-scalardb-analytics-server.mdx @@ -0,0 +1,545 @@ +--- +tags: + - Enterprise +displayed_sidebar: docsEnglish +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import WarningLicenseKeyContact from '/src/components/en-us/_warning-license-key-contact.mdx'; +import HelmCommandUsage from '/src/components/en-us/_helm-command-usage.mdx'; + +# Deploy a ScalarDB Analytics server + +This document explains how to deploy a ScalarDB Analytics server in your local or production environment. + +## Step 1. Decide on the billing method for ScalarDB Analytics + +You can get the ScalarDB Analytics server in several ways: + + + + You can use ScalarDB Analytics in a pay-as-you-go plan. In this case, you will pay the license fee based on your query usage. + + + You can use ScalarDB Analytics in the pay-as-you-go plan in AWS Marketplace. + + + To deploy the ScalarDB Analytics server from AWS Marketplace with a pay-as-you-go plan: + + 1. Go to the AWS Marketplace page [ScalarDB Analytics server](https://aws.amazon.com/marketplace/pp/prodview-53ik57autkmci). + 1. Subscribe to the ScalarDB Analytics server. + 1. Select **View purchase options**. + 1. Select **Subscribe**. + +:::tip + +After subscribing, you'll have permission to pull the container image of the ScalarDB Analytics server from the following container registry. You will specify this container registry and pull the container image in a later step, so keep note of it. + +```console +709825985650.dkr.ecr.us-east-1.amazonaws.com/scalar/scalardb-analytics-server-aws-payg +``` + +::: + + + + + + + You can use ScalarDB Analytics in a fixed-price method. In this case, you will pay the fixed license fee based on your contract, with an upper limit on the queries you can run. + + + You can use ScalarDB Analytics in the fixed-price method on any supported Kubernetes platforms. You can see the supported Kubernetes platforms in [Requirements](../requirements.mdx#kubernetes). + + + + You can deploy the ScalarDB Analytics server by using a container image with a license key that is provided at a fixed price, also known as bring your own license (BYOL). You can pull the container image of the ScalarDB Analytics server from the following container registry. + +:::note + +You will specify this container registry in a later step, so keep note of it. + +::: + + ```console + ghcr.io/scalar-labs/scalardb-analytics-server-byol + ``` + + + + + + + +## Step 2. Deploy a Kubernetes cluster + +Deploy a cluster on your preferred Kubernetes platform based on the following requirements and checkpoints: + +1. Decide which Kubernetes platform to use based on the billing method and purpose. + + - If you chose **Pay as you go (container offer - AWS Marketplace)** in [Step 1. Decide on the billing method for ScalarDB Analytics](#step-1-decide-on-the-billing-method-for-scalardb-analytics), you need to deploy Amazon Elastic Kubernetes Service (EKS) in the supported regions. The supported regions will be referred to in a later step. + - If you chose **Fixed price w/bring your own license (container offer - any supported Kubernetes platform)** in [Step 1. Decide on the billing method for ScalarDB Analytics](#step-1-decide-on-the-billing-method-for-scalardb-analytics), you can use any of the supported Kubernetes platforms. + + :::note + + You should use minikube for testing or development purposes only. minikube is not recommended for production use. + + ::: + +1. Check the general recommendations and requirements of the Kubernetes cluster for the ScalarDB Analytics server. + + - Recommendations + - You should use a worker node that has at least 2 CPUs and 4 GB of memory. + - Currently, the ScalarDB Analytics server does not have a clustering feature. Therefore, only one worker node is enough. + - If you want to make the Kubernetes cluster itself highly available, you can deploy it with multiple worker nodes. + - Requirements + - You must allow your Spark application to connect to the ScalarDB Analytics server deployed on the Kubernetes cluster from a network perspective. To see which port the ScalarDB Analytics server uses, see [Requirements](../requirements.mdx). + - You must allow the ScalarDB Analytics server to read from and write to the backend database to store the catalog information. These procedures will be described in detail in [Step 3. Deploy a backend database](#step-3-deploy-a-backend-database). + - You must allow the ScalarDB Analytics server to read from and write to the object storage to store metering information. These procedures will be described in detail in [Step 4. Deploy an object storage](#step-4-deploy-an-object-storage). + +1. Deploy a Kubernetes cluster for the ScalarDB Analytics server. + + + + For testing or development purposes, you can use minikube as a local Kubernetes cluster. For details on how to install and start minikube, see the [official minikube documentation](https://minikube.sigs.k8s.io/docs/start/). + + + For production environments, please deploy the Kubernetes cluster based on the above requirements of the ScalarDB Analytics server and your system's requirements, for example, security, availability, backup/restore, cost, and scalability amongst your other requirements. + + + - If you chose **Fixed price w/ bring your own license (container offer - any supported Kubernetes platform)**, you can use Amazon Elastic Kubernetes Service (EKS). + - If you chose **Pay as you go (container offer - AWS Marketplace)** in [Step 1. Decide on the billing method for ScalarDB Analytics](#step-1-decide-on-the-billing-method-for-scalardb-analytics), you need to do the following: + 1. Deploy EKS in supported regions that are described in the AWS documentation [MeterUsage Region support for Amazon ECS and Amazon EKS](https://docs.aws.amazon.com/marketplace/latest/APIReference/metering-regions.html#meterusage-region-support-ecs-eks). + 1. Run the following two commands after you deploy EKS: + - `eksctl utils associate-iam-oidc-provider` + + ```console + eksctl utils associate-iam-oidc-provider --region --cluster --approve + ``` + + - `eksctl create iamserviceaccount` + + ```console + eksctl create iamserviceaccount \ + --name \ + --namespace \ + --region \ + --cluster \ + --attach-policy-arn arn:aws:iam::aws:policy/AWSMarketplaceMeteringFullAccess \ + --approve \ + --override-existing-serviceaccounts + ``` + + You can set an arbitrary name to `SERVICE_ACCOUNT_NAME` based on the [Kubernetes resource naming rule](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/). + + :::note + + Keep note of the value that you set for `SERVICE_ACCOUNT_NAME` because you will specify this service account name in a later step. + + ::: + + + :::important + + For production environments, you must use the supported Kubernetes platform. You can see the supported Kubernetes platforms in [Requirements](../requirements.mdx#kubernetes). + + ::: + + + + + +## Step 3. Deploy a backend database + +Deploy your preferred backend database based on the following requirements and checkpoints: + +1. Decide which backend database to use. + - You can see the supported backend database for the ScalarDB Analytics server in [Requirements](../requirements.mdx). + - Unless you have a special reason not to, you should use a database that you are familiar with. +1. Check the backend database requirements for the ScalarDB Analytics server. + - You can see the requirements of each backend database in the [Requirements](../requirements.mdx) page. +1. Deploy the backend database in your environment. + + + + For testing or development purposes, you can deploy a backend database in the Kubernetes cluster as a Pod. For example, if you use PostgreSQL, you can deploy it as follows: + + 1. Add the Bitnami Helm repository by running the following command: + + ```console + helm repo add bitnami https://charts.bitnami.com/bitnami + ``` + + 1. Deploy PostgreSQL by running the following command: + + ```console + helm install postgresql-scalardb-cluster bitnami/postgresql \ + --set auth.postgresPassword=postgres \ + --set primary.persistence.enabled=false + ``` + + 1. Check if the PostgreSQL container is running by running the following command: + + ```console + kubectl get pod + ``` + + You should see the following output: + + ```console + NAME READY STATUS RESTARTS AGE + postgresql-scalardb-cluster-0 1/1 Running 0 17s + ``` + + + For production environments, please deploy the backend database based on the above requirements of the ScalarDB Analytics server and your system's requirements, for example, security, availability, backup/restore, cost, and scalability amongst your other requirements. + + + +## Step 4. Deploy an object storage + +Deploy an object storage based on the following requirements and checkpoints: + +1. Decide which object storage to use. + - You can use [Amazon S3](https://aws.amazon.com/s3/), [Azure Blob Storage](https://azure.microsoft.com/products/storage/blobs), or [Google's Cloud Storage](https://cloud.google.com/storage) as a data store for metering information for the ScalarDB Analytics server. + - You should use the object storage that is provided by the same cloud service provider as the Kubernetes cluster that you chose in [Step 2. Deploy a Kubernetes cluster](#step-2-deploy-a-kubernetes-cluster). For example, if you chose EKS, you should use Amazon S3. + +1. Check the object storage requirements for the ScalarDB Analytics server. + - You must allow the ScalarDB Analytics server to read from and write to the object storage. +1. Deploy the object storage in your environment. + + + + For testing or development purposes, you can store metering information on the filesystem in the ScalarDB Analytics server container. In other words, you don't need to use the object storage. In this case, you need to set `scalar.db.analytics.server.metering.storage.provider=filesystem` in the properties file. For more details, see [Step 5. Create a custom values file](#step-5-create-a-custom-values-file). + + + For production environments, please deploy the object storage based on the above requirements of the ScalarDB Analytics server and your system's requirements, for example, security, availability, backup/restore, cost, and scalability amongst your other requirements. + + + +## Step 5. Create a custom values file + +Create your custom values file `scalardb-analytics-server.yaml` based on your environment and your decisions in the previous steps. + +### Set the required configurations + +1. Set the container image and the license configurations + + Based on the billing method you chose in [Step 1. Decide on the billing method for ScalarDB Analytics](#step-1-decide-on-the-billing-method-for-scalardb-analytics), set the container image configuration to `scalarDbAnalyticsServer.image.repository`. Select one of the following billing methods to see an example of this configuration. + + + + ```yaml + scalarDbAnalyticsServer: + image: + repository: 709825985650.dkr.ecr.us-east-1.amazonaws.com/scalar/scalardb-analytics-server-aws-payg + ``` + + + + ```yaml + scalarDbAnalyticsServer: + image: + repository: ghcr.io/scalar-labs/scalardb-analytics-server-byol + properties: | + scalar.db.analytics.server.licensing.license_key= + scalar.db.analytics.server.licensing.license_check_cert_pem=-----BEGIN CERTIFICATE-----\nMIID...certificate content...\n-----END CERTIFICATE----- + ``` + + + +1. Set the service account configurations + + Based on the billing method you chose in [Step 1. Decide on the billing method for ScalarDB Analytics](#step-1-decide-on-the-billing-method-for-scalardb-analytics), set the service account configurations to `scalarDbAnalyticsServer.serviceAccount`. Select one of the following billing methods to see an example of this configuration. + + + + ```yaml + scalarDbAnalyticsServer: + serviceAccount: + serviceAccountName: + automountServiceAccountToken: true + ``` + + :::note + + Change `` to the name of the service account that you created by using the `eksctl create iamserviceaccount` command in [Step 2. Deploy a Kubernetes cluster](#step-2-deploy-a-kubernetes-cluster). + + ::: + + + You don't need to set a service account configuration. + + + +1. Set the database configurations + + Based on the backend database you chose in [Step 3. Deploy a backend database](#step-3-deploy-a-backend-database), set the database configurations in `scalarDbAnalyticsServer.properties`. Select one of the following databases to see an example of these configurations. + + + + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.db.url=jdbc:postgresql://:/ + scalar.db.analytics.server.db.username= + scalar.db.analytics.server.db.password= + ``` + + + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.db.url=jdbc:mysql://:/ + scalar.db.analytics.server.db.username= + scalar.db.analytics.server.db.password= + ``` + + + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.db.url=jdbc:sqlserver://:;databaseName=;encrypt=true;trustServerCertificate=true + scalar.db.analytics.server.db.username= + scalar.db.analytics.server.db.password= + ``` + + + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.db.url=jdbc:oracle:thin:@//:/ + scalar.db.analytics.server.db.username= + scalar.db.analytics.server.db.password= + ``` + + + +1. Set the object storage configurations + + Based on the object storage you chose in [Step 4. Deploy an object storage](#step-4-deploy-an-object-storage), please set object storage configurations in `scalarDbAnalyticsServer.properties`. Select one of the following object storages to see an example of these configurations. + + + + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.metering.storage.provider=aws-s3 + scalar.db.analytics.server.metering.storage.accessKeyId= + scalar.db.analytics.server.metering.storage.secretAccessKey= + ``` + + + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.metering.storage.provider=azureblob + scalar.db.analytics.server.metering.storage.accessKeyId= + scalar.db.analytics.server.metering.storage.secretAccessKey= + ``` + + + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.metering.storage.provider=google-cloud-storage + scalar.db.analytics.server.metering.storage.accessKeyId= + scalar.db.analytics.server.metering.storage.secretAccessKey= + ``` + + + :::note + + You can use `filesystem` for testing or development purposes only. Filesystem is not recommended for production use. + + ::: + ```yaml + scalarDbAnalyticsServer: + properties: | + scalar.db.analytics.server.metering.storage.provider=filesystem + scalar.db.analytics.server.metering.storage.path=/tmp/scalardb-analytics-metering + ``` + + + +1. Set the service configurations + + Based on the connectivity of the ScalarDB Analytics server, you need to set `scalarDbAnalyticsServer.service.type`. Select one of the following types of connections to see an example of this configuration. + + + + If your Spark application accesses the ScalarDB Analytics server from outside of the Kubernetes cluster, set `scalarDbAnalyticsServer.service.type` to `LoadBalancer`. + + ```yaml + scalarDbAnalyticsServer: + service: + type: "LoadBalancer" + ``` + + + If your Spark application accesses the ScalarDB Analytics server from inside of the Kubernetes cluster, set `scalarDbAnalyticsServer.service.type` to `ClusterIP`. + + ```yaml + scalarDbAnalyticsServer: + service: + type: "ClusterIP" + ``` + + + +1. Check the required configurations + + After completing the above steps, you should have the following configurations, depending on your environment, for example: + + :::note + + These configurations are just examples. The actual configurations may be different from these examples. Please make sure to set configurations based on your environment. + + ::: + + + + ```yaml + scalarDbAnalyticsServer: + image: + repository: ghcr.io/scalar-labs/scalardb-analytics-server-byol + properties: | + # License configurations + scalar.db.analytics.server.licensing.license_key= + scalar.db.analytics.server.licensing.license_check_cert_pem=-----BEGIN CERTIFICATE-----\nMIID...certificate content...\n-----END CERTIFICATE----- + # Database configurations + scalar.db.analytics.server.db.url=jdbc:postgresql://:/ + scalar.db.analytics.server.db.username= + scalar.db.analytics.server.db.password= + # Object storage configurations + scalar.db.analytics.server.metering.storage.provider=azureblob + scalar.db.analytics.server.metering.storage.accessKeyId= + scalar.db.analytics.server.metering.storage.secretAccessKey= + service: + type: "LoadBalancer" + ``` + + + ```yaml + scalarDbAnalyticsServer: + image: + repository: 709825985650.dkr.ecr.us-east-1.amazonaws.com/scalar/scalardb-analytics-server-aws-payg + properties: | + # Database configurations + scalar.db.analytics.server.db.url=jdbc:mysql://:/ + scalar.db.analytics.server.db.username= + scalar.db.analytics.server.db.password= + # Object storage configurations + scalar.db.analytics.server.metering.storage.provider=aws-s3 + scalar.db.analytics.server.metering.storage.accessKeyId= + scalar.db.analytics.server.metering.storage.secretAccessKey= + service: + type: "ClusterIP" + serviceAccount: + serviceAccountName: "scalardb-analytics-payg-sa" + automountServiceAccountToken: true + ``` + + + :::note + + You can use `filesystem` for testing or development purposes only. Filesystem is not recommended for production use. + + ::: + + ```yaml + scalarDbAnalyticsServer: + image: + repository: ghcr.io/scalar-labs/scalardb-analytics-server-byol + properties: | + # License configurations + scalar.db.analytics.server.licensing.license_key= + scalar.db.analytics.server.licensing.license_check_cert_pem=-----BEGIN CERTIFICATE-----\nMIID...certificate content...\n-----END CERTIFICATE----- + # Database configurations + scalar.db.analytics.server.db.url=jdbc:sqlserver://:;databaseName=;encrypt=true;trustServerCertificate=true + scalar.db.analytics.server.db.username= + scalar.db.analytics.server.db.password= + # Filesystem configurations + scalar.db.analytics.server.metering.storage.provider=filesystem + scalar.db.analytics.server.metering.storage.path=/tmp/scalardb-analytics-metering + service: + type: "ClusterIP" + ``` + + + +### Set the optional configurations + +You can see the optional configurations in [Optional configurations](../helm-charts/configure-custom-values-scalardb-analytics.mdx#optional-configurations). Set the optional configurations based on your environment if necessary. + +## Step 6. Deploy a ScalarDB Analytics server by using Helm Chart + +Deploy, upgrade, or uninstall the ScalarDB Analytics server deployment by using the `helm` command with your custom values file `scalardb-analytics-server.yaml` that you created in [Step 5. Create a custom values file](#step-5-create-a-custom-values-file). + + + +## Step 7. Check your deployment + +After deploying the ScalarDB Analytics server or upgrading it, you should check the following points: + +1. Check if the pod status is `Running` by running the following command: + + ```console + kubectl get pod --namespace + ``` + + :::note + + For the `--namespace` option, change `` to the name of the Kubernetes namespace that you deployed the ScalarDB Analytics server to. + + ::: + + For example, you can see `Running` in the `STATUS` column and `1/1` in the `READY` column as follows: + + ```console + $ kubectl get pod + NAME READY STATUS RESTARTS AGE + scalardb-analytics-server-86767fff4c-p6nkq 1/1 Running 0 22m + ``` + +1. Check if the service is exported. + + ```console + kubectl get svc --namespace + ``` + + :::note + + For the `--namespace` option, change `` to the name of the Kubernetes namespace that you deployed the ScalarDB Analytics server to. + + ::: + + + + If you set `scalarDbAnalyticsServer.service.type` to `LoadBalancer` in [Step 5. Create a custom values file](#step-5-create-a-custom-values-file), you'll see the IP address or FQDN (depending on Kubernetes cluster) in the `EXTERNAL-IP` column as follows: + + ```console + $ kubectl get svc + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + kubernetes ClusterIP 10.96.0.1 443/TCP 4h54m + scalardb-analytics-server LoadBalancer 10.98.116.121 127.0.0.1 11051:32619/TCP,11052:32598/TCP 2m43s + ``` + + :::note + + If you're using minikube for testing or development purposes, you'll need to run the [minikube tunnel](https://minikube.sigs.k8s.io/docs/commands/tunnel/) command to expose the `LoadBalancer` service. + + ::: + + + + If you set `scalarDbAnalyticsServer.service.type` to `ClusterIP` in [Step 5. Create a custom values file](#step-5-create-a-custom-values-file), you'll see the IP address in the `CLUSTER-IP` column as follows: + ```console + $ kubectl get svc + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + kubernetes ClusterIP 10.96.0.1 443/TCP 4h56m + scalardb-analytics-server ClusterIP 10.102.141.240 11051/TCP,11052/TCP 3s + ``` + + From e0c3505800913437f76ff1476d9eacf855d6c574 Mon Sep 17 00:00:00 2001 From: Josh Wong <23216828+josh-wong@users.noreply.github.com> Date: Thu, 30 Oct 2025 13:34:40 +0900 Subject: [PATCH 2/2] Add deployment doc for ScalarDB Analytics server --- sidebars.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sidebars.js b/sidebars.js index 28b349a6..dbfd7068 100644 --- a/sidebars.js +++ b/sidebars.js @@ -645,6 +645,11 @@ const sidebars = { id: 'scalardb-analytics/deployment', label: 'Deploy ScalarDB Analytics in Public Cloud Environments', }, + { + type: 'doc', + id: 'scalardb-analytics/deploy-scalardb-analytics-server', + label: 'Deploy a ScalarDB Analytics Server', + }, { type: 'category', label: 'Reference',