From fcbda0730d720f73eeb223a11938cf9d3eaea1fd Mon Sep 17 00:00:00 2001 From: kbatuigas <36839689+kbatuigas@users.noreply.github.com> Date: Fri, 1 Aug 2025 13:35:00 -0700 Subject: [PATCH 1/7] Cloud users must use specific AWS rest catalog credential properties --- local-antora-playbook.yml | 2 +- .../iceberg/iceberg-topics-aws-glue.adoc | 26 ++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index e15360d1a9..a23cae41ea 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -17,7 +17,7 @@ content: - url: https://github.com/redpanda-data/docs branches: [v/*, api, shared, site-search,'!v-end-of-life/*'] - url: https://github.com/redpanda-data/cloud-docs - branches: 'main' + branches: 'DOC-1377-cloud-aws-glue' - url: https://github.com/redpanda-data/redpanda-labs branches: main start_paths: [docs,'*/docs'] diff --git a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc index af4c531f48..82419a9e19 100644 --- a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc +++ b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc @@ -78,7 +78,8 @@ For more information on configuring IAM permissions, see the https://docs.aws.am == Configure authentication and credentials -You can configure credentials for the AWS Glue Data Catalog integration in either of the following ways: +ifndef::env-cloud[] +You must configure credentials for the AWS Glue Data Catalog integration in either of the following ways: * Allow Redpanda to use the same `cloud_storage_*` credential properties configured for S3. If you do not configure the overrides listed below, Redpanda uses the same credentials for both S3 and AWS Glue. This is the recommended approach. * If you want to configure authentication to AWS Glue separately from authentication to S3, there are equivalent credential configuration properties named `iceberg_rest_catalog_aws_*` that override the object storage credentials. These properties only apply to REST catalog authentication, and never to S3 authentication: @@ -86,6 +87,15 @@ You can configure credentials for the AWS Glue Data Catalog integration in eithe ** config_ref:iceberg_rest_catalog_aws_secret_key,true,properties/cluster-properties[`iceberg_rest_catalog_aws_secret_key`] overrides config_ref:cloud_storage_secret_key,true,properties/cluster-properties[`cloud_storage_secret_key`] ** config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] overrides config_ref:cloud_storage_region,true,properties/cluster-properties[`cloud_storage_region`] ** config_ref:iceberg_rest_catalog_aws_credentials_source,true,properties/cluster-properties[`iceberg_rest_catalog_aws_credentials_source`] overrides config_ref:cloud_storage_credentials_source,true,properties/cluster-properties[`cloud_storage_credentials_source`] +endif::[] + +ifdef::env-cloud[] +You must configure credentials for the AWS Glue Data Catalog integration using the following properties: + +* config_ref:iceberg_rest_catalog_aws_access_key,true,properties/cluster-properties[`iceberg_rest_catalog_aws_access_key`] +* config_ref:iceberg_rest_catalog_aws_secret_key,true,properties/cluster-properties[`iceberg_rest_catalog_aws_secret_key`], added as a secret value +* config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] +endif::[] == Update cluster configuration @@ -103,12 +113,18 @@ iceberg_catalog_type: rest iceberg_rest_catalog_endpoint: https://glue..amazonaws.com/iceberg iceberg_rest_catalog_authentication_mode: aws_sigv4 iceberg_rest_catalog_base_location: s3:/// +# Also configure the following if you want to use separate AWS credentials +# for the catalog: +# iceberg_rest_catalog_aws_access_key +# iceberg_rest_catalog_aws_secret_key +# iceberg_rest_catalog_aws_region +# iceberg_rest_catalog_aws_credentials_source ---- endif::[] ifdef::env-cloud[] Use `rpk` like in the following example, or use the Cloud API to xref:manage:cluster-maintenance/config-cluster.adoc#set-cluster-configuration-properties[update these cluster properties]. The update might take several minutes to complete. + -To reference a secret in a cluster property, you must first xref:manage:iceberg/use-iceberg-catalogs.adoc#store-a-secret-for-rest-catalog-authentication[store the secret value]. +To reference a secret in a cluster property, for example `iceberg_rest_catalog_aws_secret_key`, you must first xref:manage:iceberg/use-iceberg-catalogs.adoc#store-a-secret-for-rest-catalog-authentication[store the secret value]. + [,bash] ---- @@ -122,7 +138,11 @@ rpk cluster config set \ iceberg_rest_catalog_endpoint=https://glue..amazonaws.com/iceberg \ iceberg_rest_catalog_authentication_mode=aws_sigv4 \ iceberg_rest_catalog_base_location=s3:/// - + # Also configure the following if you want to use separate AWS credentials + # for the catalog: + # iceberg_rest_catalog_aws_access_key + # iceberg_rest_catalog_aws_secret_key + # iceberg_rest_catalog_aws_region ---- endif::[] + From 2cdfad4d4cc9dfd73067c3bce0fe74547010168c Mon Sep 17 00:00:00 2001 From: kbatuigas <36839689+kbatuigas@users.noreply.github.com> Date: Fri, 1 Aug 2025 14:19:37 -0700 Subject: [PATCH 2/7] Display correct RP version and rpk install page based on context --- .../manage/pages/iceberg/iceberg-topics-aws-glue.adoc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc index 82419a9e19..14a211c7b5 100644 --- a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc +++ b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc @@ -3,10 +3,12 @@ :page-categories: Iceberg, Tiered Storage, Management, High Availability, Data Replication, Integration :page-beta: true ifdef::env-cloud[] -:rpk-install-doc: manage:rpk/rpk-install.adoc +:rp_version: 25.2 +:rpk_install_doc: manage:rpk/rpk-install.adoc endif::[] ifndef::env-cloud[] -:rpk-install-doc: get-started:rpk-install.adoc +:rp_version: 25.1.7 +:rpk_install_doc: get-started:rpk-install.adoc endif::[] @@ -21,8 +23,8 @@ This guide walks you through querying Redpanda topics as Iceberg tables stored i == Prerequisites -* Redpanda version 25.1.7 or later. -* xref:{rpk-install-doc}[`rpk`] installed or updated to the latest version. +* Redpanda version {rp_version} or later. +* xref:{rpk_install_doc}[`rpk`] installed or updated to the latest version. ifdef::env-cloud[] ** You can also use the Redpanda Cloud API to xref:manage:cluster-maintenance/config-cluster.adoc#set-cluster-configuration-properties[reference secrets in your cluster configuration]. endif::[] From fe550b0edb2d5179c2decffa8d7dbc729f056a75 Mon Sep 17 00:00:00 2001 From: kbatuigas <36839689+kbatuigas@users.noreply.github.com> Date: Fri, 1 Aug 2025 15:06:08 -0700 Subject: [PATCH 3/7] Add more cloud-specific guidance for Glue integration --- .../iceberg/iceberg-topics-aws-glue.adoc | 49 +++++++++++-------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc index 14a211c7b5..bf7cb85105 100644 --- a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc +++ b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc @@ -2,23 +2,23 @@ :description: Add Redpanda topics as Iceberg tables that you can query from AWS Glue Data Catalog. :page-categories: Iceberg, Tiered Storage, Management, High Availability, Data Replication, Integration :page-beta: true + +[NOTE] +==== +include::shared:partial$enterprise-license.adoc[] +==== + +// tag::single-source[] ifdef::env-cloud[] :rp_version: 25.2 :rpk_install_doc: manage:rpk/rpk-install.adoc endif::[] + ifndef::env-cloud[] :rp_version: 25.1.7 :rpk_install_doc: get-started:rpk-install.adoc endif::[] - -[NOTE] -==== -include::shared:partial$enterprise-license.adoc[] -==== - -// tag::single-source[] - This guide walks you through querying Redpanda topics as Iceberg tables stored in AWS S3, using a catalog integration with https://docs.aws.amazon.com/glue/latest/dg/components-overview.html#data-catalog-intro[AWS Glue^]. For general information about Iceberg catalog integrations in Redpanda, see xref:manage:iceberg/use-iceberg-catalogs.adoc[]. == Prerequisites @@ -95,7 +95,7 @@ ifdef::env-cloud[] You must configure credentials for the AWS Glue Data Catalog integration using the following properties: * config_ref:iceberg_rest_catalog_aws_access_key,true,properties/cluster-properties[`iceberg_rest_catalog_aws_access_key`] -* config_ref:iceberg_rest_catalog_aws_secret_key,true,properties/cluster-properties[`iceberg_rest_catalog_aws_secret_key`], added as a secret value +* config_ref:iceberg_rest_catalog_aws_secret_key,true,properties/cluster-properties[`iceberg_rest_catalog_aws_secret_key`], added as a secret value (see the <> for details) * config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] endif::[] @@ -117,17 +117,22 @@ iceberg_rest_catalog_authentication_mode: aws_sigv4 iceberg_rest_catalog_base_location: s3:/// # Also configure the following if you want to use separate AWS credentials # for the catalog: -# iceberg_rest_catalog_aws_access_key -# iceberg_rest_catalog_aws_secret_key -# iceberg_rest_catalog_aws_region -# iceberg_rest_catalog_aws_credentials_source +# iceberg_rest_catalog_aws_access_key: +# iceberg_rest_catalog_aws_secret_key: +# iceberg_rest_catalog_aws_region: +# iceberg_rest_catalog_aws_credentials_source: ---- ++ +Use your own values for the following placeholders: ++ +-- +- ``: The AWS region where your Data Catalog is located. The region in the AWS Glue endpoint must match the region specified in either your config_ref:cloud_storage_region,true,properties/cluster-properties[`cloud_storage_region`] or config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] property. +- `` and ``: AWS Glue requires you to specify the base location where Redpanda stores Iceberg data and metadata files. You must use an S3 URI; for example, `s3:///iceberg`. As a security best practice, Redpanda Data recommends specifying a subfolder (using prefixes) rather than the root of the bucket. +-- endif::[] ifdef::env-cloud[] Use `rpk` like in the following example, or use the Cloud API to xref:manage:cluster-maintenance/config-cluster.adoc#set-cluster-configuration-properties[update these cluster properties]. The update might take several minutes to complete. + -To reference a secret in a cluster property, for example `iceberg_rest_catalog_aws_secret_key`, you must first xref:manage:iceberg/use-iceberg-catalogs.adoc#store-a-secret-for-rest-catalog-authentication[store the secret value]. -+ [,bash] ---- rpk cloud login @@ -140,20 +145,22 @@ rpk cluster config set \ iceberg_rest_catalog_endpoint=https://glue..amazonaws.com/iceberg \ iceberg_rest_catalog_authentication_mode=aws_sigv4 \ iceberg_rest_catalog_base_location=s3:/// - # Also configure the following if you want to use separate AWS credentials - # for the catalog: - # iceberg_rest_catalog_aws_access_key - # iceberg_rest_catalog_aws_secret_key - # iceberg_rest_catalog_aws_region + + iceberg_rest_catalog_aws_access_key= + iceberg_rest_catalog_aws_secret_key=${secrets.} + iceberg_rest_catalog_aws_region= ---- -endif::[] + Use your own values for the following placeholders: + -- - ``: The AWS region where your Data Catalog is located. The region in the AWS Glue endpoint must match the region specified in either your config_ref:cloud_storage_region,true,properties/cluster-properties[`cloud_storage_region`] or config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] property. - `` and ``: AWS Glue requires you to specify the base location where Redpanda stores Iceberg data and metadata files. You must use an S3 URI; for example, `s3:///iceberg`. As a security best practice, Redpanda Data recommends specifying a subfolder (using prefixes) rather than the root of the bucket. +- ``: The AWS access key ID for your Glue service account. +- ``: The name of the secret that stores the AWS secret access key for your Glue service account. To reference a secret in a cluster property, for example `iceberg_rest_catalog_aws_secret_key`, you must first xref:manage:iceberg/use-iceberg-catalogs.adoc#store-a-secret-for-rest-catalog-authentication[store the secret value]. +- ``: The AWS region where your Glue service is located. -- +endif::[] + [,bash,role=no-copy] ---- From 70cf11fe20248f5d54e8d5a8925ce57cfd1bfb0c Mon Sep 17 00:00:00 2001 From: kbatuigas <36839689+kbatuigas@users.noreply.github.com> Date: Fri, 1 Aug 2025 15:40:01 -0700 Subject: [PATCH 4/7] Apply suggestions from automated review --- .../pages/iceberg/iceberg-topics-aws-glue.adoc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc index bf7cb85105..5ca2fa3772 100644 --- a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc +++ b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc @@ -112,11 +112,11 @@ Run `rpk cluster config edit` to update these properties: ---- iceberg_enabled: true iceberg_catalog_type: rest -iceberg_rest_catalog_endpoint: https://glue..amazonaws.com/iceberg +iceberg_rest_catalog_endpoint: https://glue..amazonaws.com/iceberg iceberg_rest_catalog_authentication_mode: aws_sigv4 iceberg_rest_catalog_base_location: s3:/// -# Also configure the following if you want to use separate AWS credentials -# for the catalog: +# Use the following properties if you want to use separate AWS credentials +# for the catalog, or delete to reuse S3 (cloud_storage) credentials: # iceberg_rest_catalog_aws_access_key: # iceberg_rest_catalog_aws_secret_key: # iceberg_rest_catalog_aws_region: @@ -126,7 +126,7 @@ iceberg_rest_catalog_base_location: s3:/// Use your own values for the following placeholders: + -- -- ``: The AWS region where your Data Catalog is located. The region in the AWS Glue endpoint must match the region specified in either your config_ref:cloud_storage_region,true,properties/cluster-properties[`cloud_storage_region`] or config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] property. +- ``: The AWS region where your Data Catalog is located. The region in the AWS Glue endpoint must match the region specified in either your config_ref:cloud_storage_region,true,properties/cluster-properties[`cloud_storage_region`] or config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] property. - `` and ``: AWS Glue requires you to specify the base location where Redpanda stores Iceberg data and metadata files. You must use an S3 URI; for example, `s3:///iceberg`. As a security best practice, Redpanda Data recommends specifying a subfolder (using prefixes) rather than the root of the bucket. -- endif::[] @@ -142,23 +142,21 @@ rpk profile create --from-cloud rpk cluster config set \ iceberg_enabled=true \ iceberg_catalog_type=rest \ - iceberg_rest_catalog_endpoint=https://glue..amazonaws.com/iceberg \ + iceberg_rest_catalog_endpoint=https://glue..amazonaws.com/iceberg \ iceberg_rest_catalog_authentication_mode=aws_sigv4 \ iceberg_rest_catalog_base_location=s3:/// iceberg_rest_catalog_aws_access_key= iceberg_rest_catalog_aws_secret_key=${secrets.} - iceberg_rest_catalog_aws_region= ---- + Use your own values for the following placeholders: + -- -- ``: The AWS region where your Data Catalog is located. The region in the AWS Glue endpoint must match the region specified in either your config_ref:cloud_storage_region,true,properties/cluster-properties[`cloud_storage_region`] or config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] property. +- ``: The AWS region where your Data Catalog is located. The region in the AWS Glue endpoint must match the region specified in your config_ref:iceberg_rest_catalog_aws_region,true,properties/cluster-properties[`iceberg_rest_catalog_aws_region`] property. - `` and ``: AWS Glue requires you to specify the base location where Redpanda stores Iceberg data and metadata files. You must use an S3 URI; for example, `s3:///iceberg`. As a security best practice, Redpanda Data recommends specifying a subfolder (using prefixes) rather than the root of the bucket. - ``: The AWS access key ID for your Glue service account. - ``: The name of the secret that stores the AWS secret access key for your Glue service account. To reference a secret in a cluster property, for example `iceberg_rest_catalog_aws_secret_key`, you must first xref:manage:iceberg/use-iceberg-catalogs.adoc#store-a-secret-for-rest-catalog-authentication[store the secret value]. -- ``: The AWS region where your Glue service is located. -- endif::[] + From 93374774f3487a5e18bdc4d200c40efed292fa5a Mon Sep 17 00:00:00 2001 From: kbatuigas <36839689+kbatuigas@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:00:08 -0700 Subject: [PATCH 5/7] Recommend separate policy or role for access to Glue for Cloud --- .../pages/iceberg/iceberg-topics-aws-glue.adoc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc index 5ca2fa3772..81430556fd 100644 --- a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc +++ b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc @@ -46,15 +46,21 @@ If you want to use partitioning, you must specify a custom partition specificati == Authorize access to AWS Glue +ifndef::env-cloud[] You must allow Redpanda access to AWS Glue services in your AWS account. You can use the same access credentials that you configured for S3 (IAM role, access keys, and KMS key), as long as you have also added read and write access to AWS Glue Data Catalog. For example, you could create a separate IAM policy that manages access to AWS Glue, and attach it to the IAM role that Redpanda also uses to access S3. It is recommended to add all AWS Glue API actions in the policy (`"glue:*"`) on the following resources: +endif::[] + +ifdef::env-cloud[] +You must allow Redpanda access to AWS Glue services in your AWS account. It is recommended to create a new IAM policy or role that manages access to AWS Glue, allowing all AWS Glue API actions (`"glue:*"`) on the following resources: +endif::[] - Root catalog (`catalog`) - All databases (`database/*`) - All tables (`table/\*/*`) -Your policy should include a statement similar to the following: +Your IAM policy should include a statement similar to the following: [,json] ---- @@ -116,10 +122,13 @@ iceberg_rest_catalog_endpoint: https://glue..amazonaws.com/iceberg iceberg_rest_catalog_authentication_mode: aws_sigv4 iceberg_rest_catalog_base_location: s3:/// # Use the following properties if you want to use separate AWS credentials -# for the catalog, or delete to reuse S3 (cloud_storage) credentials: +# for the catalog, or delete to reuse S3 (cloud_storage) credentials. +# iceberg_rest_catalog_aws_region: +# For access using access keys only, use iceberg_rest_catalog_aws_access_key +# and iceberg_rest_catalog_aws_secret_key. For access with an IAM role, use +# iceberg_rest_catalog_aws_credentials_source only. # iceberg_rest_catalog_aws_access_key: # iceberg_rest_catalog_aws_secret_key: -# iceberg_rest_catalog_aws_region: # iceberg_rest_catalog_aws_credentials_source: ---- + From 59b8b83a639e39ee1c82e79dfae225edb1ce1d2c Mon Sep 17 00:00:00 2001 From: kbatuigas <36839689+kbatuigas@users.noreply.github.com> Date: Tue, 5 Aug 2025 11:46:57 -0700 Subject: [PATCH 6/7] Minor cleanup for required properties --- .../manage/pages/iceberg/iceberg-topics-aws-glue.adoc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc index 81430556fd..7351e97bab 100644 --- a/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc +++ b/modules/manage/pages/iceberg/iceberg-topics-aws-glue.adoc @@ -121,12 +121,13 @@ iceberg_catalog_type: rest iceberg_rest_catalog_endpoint: https://glue..amazonaws.com/iceberg iceberg_rest_catalog_authentication_mode: aws_sigv4 iceberg_rest_catalog_base_location: s3:/// -# Use the following properties if you want to use separate AWS credentials -# for the catalog, or delete to reuse S3 (cloud_storage) credentials. -# iceberg_rest_catalog_aws_region: +# Use the iceberg_rest_catalog_aws_* properties if you want to +# use separate AWS credentials for the catalog, or delete to reuse S3 +# (cloud_storage_*) credentials. # For access using access keys only, use iceberg_rest_catalog_aws_access_key # and iceberg_rest_catalog_aws_secret_key. For access with an IAM role, use # iceberg_rest_catalog_aws_credentials_source only. +# iceberg_rest_catalog_aws_region: # iceberg_rest_catalog_aws_access_key: # iceberg_rest_catalog_aws_secret_key: # iceberg_rest_catalog_aws_credentials_source: @@ -154,7 +155,7 @@ rpk cluster config set \ iceberg_rest_catalog_endpoint=https://glue..amazonaws.com/iceberg \ iceberg_rest_catalog_authentication_mode=aws_sigv4 \ iceberg_rest_catalog_base_location=s3:/// - + iceberg_rest_catalog_aws_region= iceberg_rest_catalog_aws_access_key= iceberg_rest_catalog_aws_secret_key=${secrets.} ---- From a99ecc99801d1e38eb5c082ee5982851d643dc84 Mon Sep 17 00:00:00 2001 From: Kat Batuigas <36839689+kbatuigas@users.noreply.github.com> Date: Tue, 5 Aug 2025 16:04:13 -0700 Subject: [PATCH 7/7] Update local-antora-playbook.yml --- local-antora-playbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index a23cae41ea..e15360d1a9 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -17,7 +17,7 @@ content: - url: https://github.com/redpanda-data/docs branches: [v/*, api, shared, site-search,'!v-end-of-life/*'] - url: https://github.com/redpanda-data/cloud-docs - branches: 'DOC-1377-cloud-aws-glue' + branches: 'main' - url: https://github.com/redpanda-data/redpanda-labs branches: main start_paths: [docs,'*/docs']