From 023afa595092aec4767fb27138cfd0b564dbd66c Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 10:56:16 +0200 Subject: [PATCH 01/90] extract common properties to role config --- rust/operator-binary/src/crd/mod.rs | 70 +++++++++++++------ rust/operator-binary/src/kafka_controller.rs | 3 +- .../src/operations/graceful_shutdown.rs | 4 +- 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 56de9409..d6a9e735 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -68,8 +68,6 @@ pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; pub const STACKABLE_KERBEROS_DIR: &str = "/stackable/kerberos"; pub const STACKABLE_KERBEROS_KRB5_PATH: &str = "/stackable/kerberos/krb5.conf"; -const DEFAULT_BROKER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(30); - #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("object has no namespace associated"))] @@ -413,27 +411,21 @@ pub enum Container { Kafka, } -#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] +#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)] #[fragment_attrs( derive( Clone, Debug, Default, Deserialize, - JsonSchema, Merge, + JsonSchema, PartialEq, Serialize ), serde(rename_all = "camelCase") )] -pub struct KafkaConfig { - #[fragment_attrs(serde(default))] - pub logging: Logging, - - #[fragment_attrs(serde(default))] - pub resources: Resources, - +pub struct CommonRoleConfig { #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, @@ -441,24 +433,63 @@ pub struct KafkaConfig { #[fragment_attrs(serde(default))] pub graceful_shutdown_timeout: Option, + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, +} + +impl CommonRoleConfig { + const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(30); + // Auto TLS certificate lifetime + const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); + + pub fn default_config(cluster_name: &str, role: &KafkaRole) -> CommonRoleConfigFragment { + CommonRoleConfigFragment { + affinity: get_affinity(cluster_name, role), + graceful_shutdown_timeout: Some(Self::DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT), + requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME), + } + } +} + +#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + JsonSchema, + Merge, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct KafkaConfig { + #[fragment_attrs(serde(flatten))] + pub common_role_config: CommonRoleConfig, + /// The ListenerClass used for bootstrapping new clients. Should use a stable ListenerClass to avoid unnecessary client restarts (such as `cluster-internal` or `external-stable`). pub bootstrap_listener_class: String, /// The ListenerClass used for connecting to brokers. Should use a direct connection ListenerClass to minimize cost and minimize performance overhead (such as `cluster-internal` or `external-unstable`). pub broker_listener_class: String, - /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. - /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] - pub requested_secret_lifetime: Option, + pub logging: Logging, + + #[fragment_attrs(serde(default))] + pub resources: Resources, } impl KafkaConfig { - // Auto TLS certificate lifetime - const DEFAULT_BROKER_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); - pub fn default_config(cluster_name: &str, role: &KafkaRole) -> KafkaConfigFragment { KafkaConfigFragment { + common_role_config: CommonRoleConfig::default_config(cluster_name, role), + bootstrap_listener_class: Some("cluster-internal".to_string()), + broker_listener_class: Some("cluster-internal".to_string()), logging: product_logging::spec::default_logging(), resources: ResourcesFragment { cpu: CpuLimitsFragment { @@ -477,11 +508,6 @@ impl KafkaConfig { }, }, }, - affinity: get_affinity(cluster_name, role), - graceful_shutdown_timeout: Some(DEFAULT_BROKER_GRACEFUL_SHUTDOWN_TIMEOUT), - bootstrap_listener_class: Some("cluster-internal".to_string()), - broker_listener_class: Some("cluster-internal".to_string()), - requested_secret_lifetime: Some(Self::DEFAULT_BROKER_SECRET_LIFETIME), } } } diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 98e6240b..abcdf50d 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -854,6 +854,7 @@ fn build_broker_rolegroup_statefulset( // Add TLS related volumes and volume mounts let requested_secret_lifetime = merged_config + .common_role_config .requested_secret_lifetime .context(MissingSecretLifetimeSnafu)?; kafka_security @@ -1068,7 +1069,7 @@ fn build_broker_rolegroup_statefulset( .image_pull_secrets_from_product_image(resolved_product_image) .add_container(cb_kafka.build()) .add_container(cb_kcat_prober.build()) - .affinity(&merged_config.affinity) + .affinity(&merged_config.common_role_config.affinity) .add_volume(Volume { name: "config".to_string(), config_map: Some(ConfigMapVolumeSource { diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index a3cc3f02..420ea7a0 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -25,7 +25,9 @@ pub fn add_graceful_shutdown_config( ) -> Result<(), Error> { // This must be always set by the merge mechanism, as we provide a default value, // users can not disable graceful shutdown. - if let Some(graceful_shutdown_timeout) = merged_config.graceful_shutdown_timeout { + if let Some(graceful_shutdown_timeout) = + merged_config.common_role_config.graceful_shutdown_timeout + { pod_builder .termination_grace_period(&graceful_shutdown_timeout) .context(SetTerminationGracePeriodSnafu)?; From 0a21d4441a23cf6a439352a034523d3bc5a43596 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 10:59:14 +0200 Subject: [PATCH 02/90] rename KafkaConfig to BrokerConfig --- rust/operator-binary/src/config/jvm.rs | 18 ++++++++--------- rust/operator-binary/src/crd/mod.rs | 20 +++++++++---------- rust/operator-binary/src/kafka_controller.rs | 10 +++++----- .../src/operations/graceful_shutdown.rs | 4 ++-- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 355a42cb..129dd9e0 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -5,7 +5,7 @@ use stackable_operator::{ }; use crate::crd::{ - JVM_SECURITY_PROPERTIES_FILE, KafkaConfig, KafkaConfigFragment, METRICS_PORT, + BrokerConfig, BrokerConfigFragment, JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, STACKABLE_CONFIG_DIR, }; @@ -27,8 +27,8 @@ pub enum Error { /// All JVM arguments. fn construct_jvm_args( - merged_config: &KafkaConfig, - role: &Role, + merged_config: &BrokerConfig, + role: &Role, role_group: &str, ) -> Result, Error> { let heap_size = MemoryQuantity::try_from( @@ -69,8 +69,8 @@ fn construct_jvm_args( /// Arguments that go into `EXTRA_ARGS`, so *not* the heap settings (which you can get using /// [`construct_heap_jvm_args`]). pub fn construct_non_heap_jvm_args( - merged_config: &KafkaConfig, - role: &Role, + merged_config: &BrokerConfig, + role: &Role, role_group: &str, ) -> Result { let mut jvm_args = construct_jvm_args(merged_config, role, role_group)?; @@ -82,8 +82,8 @@ pub fn construct_non_heap_jvm_args( /// Arguments that go into `KAFKA_HEAP_OPTS`. /// You can get the normal JVM arguments using [`construct_non_heap_jvm_args`]. pub fn construct_heap_jvm_args( - merged_config: &KafkaConfig, - role: &Role, + merged_config: &BrokerConfig, + role: &Role, role_group: &str, ) -> Result { let mut jvm_args = construct_jvm_args(merged_config, role, role_group)?; @@ -186,8 +186,8 @@ mod tests { fn construct_boilerplate( kafka_cluster: &str, ) -> ( - KafkaConfig, - Role, + BrokerConfig, + Role, String, ) { let kafka: v1alpha1::KafkaCluster = diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index d6a9e735..7c962a30 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -130,7 +130,7 @@ pub mod versioned { pub image: ProductImage, // no doc - docs in Role struct. - pub brokers: Option>, + pub brokers: Option>, /// Kafka settings that affect all roles and role groups. /// @@ -203,7 +203,7 @@ impl v1alpha1::KafkaCluster { pub fn role( &self, role_variant: &KafkaRole, - ) -> Result<&Role, Error> { + ) -> Result<&Role, Error> { match role_variant { KafkaRole::Broker => self.spec.brokers.as_ref(), } @@ -215,7 +215,7 @@ impl v1alpha1::KafkaCluster { pub fn rolegroup( &self, rolegroup_ref: &RoleGroupRef, - ) -> Result<&RoleGroup, Error> { + ) -> Result<&RoleGroup, Error> { let role_variant = KafkaRole::from_str(&rolegroup_ref.role).with_context(|_| UnknownKafkaRoleSnafu { role: rolegroup_ref.role.to_owned(), @@ -266,9 +266,9 @@ impl v1alpha1::KafkaCluster { &self, role: &KafkaRole, rolegroup_ref: &RoleGroupRef, - ) -> Result { + ) -> Result { // Initialize the result with all default values as baseline - let conf_defaults = KafkaConfig::default_config(&self.name_any(), role); + let conf_defaults = BrokerConfig::default_config(&self.name_any(), role); // Retrieve role resource config let role = self.role(role)?; @@ -467,7 +467,7 @@ impl CommonRoleConfig { ), serde(rename_all = "camelCase") )] -pub struct KafkaConfig { +pub struct BrokerConfig { #[fragment_attrs(serde(flatten))] pub common_role_config: CommonRoleConfig, @@ -484,9 +484,9 @@ pub struct KafkaConfig { pub resources: Resources, } -impl KafkaConfig { - pub fn default_config(cluster_name: &str, role: &KafkaRole) -> KafkaConfigFragment { - KafkaConfigFragment { +impl BrokerConfig { + pub fn default_config(cluster_name: &str, role: &KafkaRole) -> BrokerConfigFragment { + BrokerConfigFragment { common_role_config: CommonRoleConfig::default_config(cluster_name, role), bootstrap_listener_class: Some("cluster-internal".to_string()), broker_listener_class: Some("cluster-internal".to_string()), @@ -512,7 +512,7 @@ impl KafkaConfig { } } -impl Configuration for KafkaConfigFragment { +impl Configuration for BrokerConfigFragment { type Configurable = v1alpha1::KafkaCluster; fn compute_env( diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index abcdf50d..b10ae850 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -75,8 +75,8 @@ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::{ - APP_NAME, Container, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, - KafkaClusterStatus, KafkaConfig, KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, + APP_NAME, BrokerConfig, Container, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, + KAFKA_HEAP_OPTS, KafkaClusterStatus, KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, SERVER_PROPERTIES_FILE, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, @@ -646,7 +646,7 @@ pub fn build_broker_rolegroup_bootstrap_listener( resolved_product_image: &ResolvedProductImage, kafka_security: &KafkaTlsSecurity, rolegroup: &RoleGroupRef, - merged_config: &KafkaConfig, + merged_config: &BrokerConfig, ) -> Result { Ok(listener::v1alpha1::Listener { metadata: ObjectMetaBuilder::new() @@ -679,7 +679,7 @@ fn build_broker_rolegroup_config_map( kafka_security: &KafkaTlsSecurity, rolegroup: &RoleGroupRef, broker_config: &HashMap>, - merged_config: &KafkaConfig, + merged_config: &BrokerConfig, ) -> Result { let mut server_cfg = broker_config .get(&PropertyNameKind::File(SERVER_PROPERTIES_FILE.to_string())) @@ -810,7 +810,7 @@ fn build_broker_rolegroup_statefulset( broker_config: &HashMap>, opa_connect_string: Option<&str>, kafka_security: &KafkaTlsSecurity, - merged_config: &KafkaConfig, + merged_config: &BrokerConfig, service_account: &ServiceAccount, cluster_info: &KubernetesClusterInfo, ) -> Result { diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index 420ea7a0..9a0c3b95 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; use snafu::{ResultExt, Snafu}; use stackable_operator::builder::pod::PodBuilder; -use crate::crd::KafkaConfig; +use crate::crd::BrokerConfig; #[derive(Debug, Snafu)] pub enum Error { @@ -20,7 +20,7 @@ pub fn graceful_shutdown_config_properties() -> BTreeMap { } pub fn add_graceful_shutdown_config( - merged_config: &KafkaConfig, + merged_config: &BrokerConfig, pod_builder: &mut PodBuilder, ) -> Result<(), Error> { // This must be always set by the merge mechanism, as we provide a default value, From cd6e03becb6f9d6cd2bb1aabe1eaeb7e2e4439f3 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 11:24:08 +0200 Subject: [PATCH 03/90] rename Container to BrokerContainer --- rust/operator-binary/src/crd/mod.rs | 4 +-- rust/operator-binary/src/kafka_controller.rs | 26 ++++++++++++-------- rust/operator-binary/src/product_logging.rs | 10 ++++---- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 7c962a30..01e98b6a 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -404,7 +404,7 @@ impl Storage { )] #[serde(rename_all = "kebab-case")] #[strum(serialize_all = "kebab-case")] -pub enum Container { +pub enum BrokerContainer { Vector, KcatProber, GetService, @@ -478,7 +478,7 @@ pub struct BrokerConfig { pub broker_listener_class: String, #[fragment_attrs(serde(default))] - pub logging: Logging, + pub logging: Logging, #[fragment_attrs(serde(default))] pub resources: Resources, diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index b10ae850..c0ff2f34 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -75,12 +75,12 @@ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::{ - APP_NAME, BrokerConfig, Container, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, - KAFKA_HEAP_OPTS, KafkaClusterStatus, KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, - LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, - OPERATOR_NAME, SERVER_PROPERTIES_FILE, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, - STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, - STACKABLE_LOG_DIR, + APP_NAME, BrokerConfig, BrokerContainer, DOCKER_IMAGE_BASE_NAME, + JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, KafkaClusterStatus, KafkaRole, + LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, + METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, SERVER_PROPERTIES_FILE, + STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, + STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, listener::{KafkaListenerError, get_kafka_listener_config}, security::KafkaTlsSecurity, v1alpha1, @@ -838,13 +838,13 @@ fn build_broker_rolegroup_statefulset( )) .context(LabelBuildSnafu)?; - let kcat_prober_container_name = Container::KcatProber.to_string(); + let kcat_prober_container_name = BrokerContainer::KcatProber.to_string(); let mut cb_kcat_prober = ContainerBuilder::new(&kcat_prober_container_name).context(InvalidContainerNameSnafu { name: kcat_prober_container_name.clone(), })?; - let kafka_container_name = Container::Kafka.to_string(); + let kafka_container_name = BrokerContainer::Kafka.to_string(); let mut cb_kafka = ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { name: kafka_container_name.clone(), @@ -1040,7 +1040,10 @@ fn build_broker_rolegroup_statefulset( Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { custom: ConfigMapLogConfig { config_map }, })), - }) = merged_config.logging.containers.get(&Container::Kafka) + }) = merged_config + .logging + .containers + .get(&BrokerContainer::Kafka) { pod_builder .add_volume( @@ -1105,7 +1108,10 @@ fn build_broker_rolegroup_statefulset( resolved_product_image, "config", "log", - merged_config.logging.containers.get(&Container::Vector), + merged_config + .logging + .containers + .get(&BrokerContainer::Vector), ResourceRequirementsBuilder::new() .with_cpu_request("250m") .with_cpu_limit("500m") diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 2ba77898..f5d2d22f 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -9,7 +9,7 @@ use stackable_operator::{ role_utils::RoleGroupRef, }; -use crate::crd::{Container, STACKABLE_LOG_DIR, v1alpha1}; +use crate::crd::{BrokerContainer, STACKABLE_LOG_DIR, v1alpha1}; #[derive(Snafu, Debug)] pub enum Error { @@ -47,19 +47,19 @@ const CONSOLE_CONVERSION_PATTERN: &str = "[%d] %p %m (%c)%n"; /// Extend the role group ConfigMap with logging and Vector configurations pub fn extend_role_group_config_map( rolegroup: &RoleGroupRef, - logging: &Logging, + logging: &Logging, cm_builder: &mut ConfigMapBuilder, ) -> Result<()> { if let Some(ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = logging.containers.get(&Container::Kafka) + }) = logging.containers.get(&BrokerContainer::Kafka) { cm_builder.add_data( LOG4J_CONFIG_FILE, product_logging::framework::create_log4j_config( &format!( "{STACKABLE_LOG_DIR}/{container}", - container = Container::Kafka + container = BrokerContainer::Kafka ), KAFKA_LOG_FILE, MAX_KAFKA_LOG_FILES_SIZE @@ -74,7 +74,7 @@ pub fn extend_role_group_config_map( let vector_log_config = if let Some(ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = logging.containers.get(&Container::Vector) + }) = logging.containers.get(&BrokerContainer::Vector) { Some(log_config) } else { From 2a27fbca2d28bca08a18774c3860872ec9926ec8 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 11:34:34 +0200 Subject: [PATCH 04/90] make zookeeper config map ref optional --- deploy/helm/kafka-operator/crds/crds.yaml | 5 ++-- rust/operator-binary/src/crd/mod.rs | 5 ++-- rust/operator-binary/src/kafka_controller.rs | 24 +++++++++++--------- rust/operator-binary/src/main.rs | 2 +- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 4d695e91..2f4400d0 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -685,10 +685,9 @@ spec: nullable: true type: string zookeeperConfigMapName: - description: Kafka requires a ZooKeeper cluster connection to run. Provide the name of the ZooKeeper [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) here. When using the [Stackable operator for Apache ZooKeeper](https://docs.stackable.tech/home/nightly/zookeeper/) to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. + description: Provide the name of the ZooKeeper [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) here. When using the [Stackable operator for Apache ZooKeeper](https://docs.stackable.tech/home/nightly/zookeeper/) to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper suppport was dropped. Please use the 'controller' role instead. + nullable: true type: string - required: - - zookeeperConfigMapName type: object clusterOperation: default: diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 01e98b6a..7474a085 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -167,11 +167,12 @@ pub mod versioned { #[serde(skip_serializing_if = "Option::is_none")] pub vector_aggregator_config_map_name: Option, - /// Kafka requires a ZooKeeper cluster connection to run. /// Provide the name of the ZooKeeper [discovery ConfigMap](DOCS_BASE_URL_PLACEHOLDER/concepts/service_discovery) /// here. When using the [Stackable operator for Apache ZooKeeper](DOCS_BASE_URL_PLACEHOLDER/zookeeper/) /// to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. - pub zookeeper_config_map_name: String, + /// This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper suppport was dropped. + /// Please use the 'controller' role instead. + pub zookeeper_config_map_name: Option, } } diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index c0ff2f34..1d30269e 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -903,18 +903,20 @@ fn build_broker_rolegroup_statefulset( }) .collect::>(); - env.push(EnvVar { - name: "ZOOKEEPER".to_string(), - value_from: Some(EnvVarSource { - config_map_key_ref: Some(ConfigMapKeySelector { - name: kafka.spec.cluster_config.zookeeper_config_map_name.clone(), - key: "ZOOKEEPER".to_string(), - ..ConfigMapKeySelector::default() + if let Some(zookeeper_config_map_name) = &kafka.spec.cluster_config.zookeeper_config_map_name { + env.push(EnvVar { + name: "ZOOKEEPER".to_string(), + value_from: Some(EnvVarSource { + config_map_key_ref: Some(ConfigMapKeySelector { + name: zookeeper_config_map_name.to_string(), + key: "ZOOKEEPER".to_string(), + ..ConfigMapKeySelector::default() + }), + ..EnvVarSource::default() }), - ..EnvVarSource::default() - }), - ..EnvVar::default() - }); + ..EnvVar::default() + }) + }; env.push(EnvVar { name: "POD_NAME".to_string(), diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 2d36a64f..ef9c8ce7 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -207,7 +207,7 @@ fn references_config_map( return false; }; - kafka.spec.cluster_config.zookeeper_config_map_name == config_map.name_any() + kafka.spec.cluster_config.zookeeper_config_map_name == Some(config_map.name_any()) || match &kafka.spec.cluster_config.authorization.opa { Some(opa_config) => opa_config.config_map_name == config_map.name_any(), None => false, From f29baa173cf45aa1f990cb3e7e4e59c88e5376d0 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 11:58:42 +0200 Subject: [PATCH 05/90] remove zookeeper.connect and timeout from product config --- deploy/config-spec/properties.yaml | 36 ------------------- .../kafka-operator/configs/properties.yaml | 36 ------------------- 2 files changed, 72 deletions(-) diff --git a/deploy/config-spec/properties.yaml b/deploy/config-spec/properties.yaml index 52fb205b..8ee5b297 100644 --- a/deploy/config-spec/properties.yaml +++ b/deploy/config-spec/properties.yaml @@ -58,42 +58,6 @@ properties: comment: "TTL for domain names that cannot be resolved." description: "TTL for domain names that cannot be resolved." - - property: &zookeeperConnect - propertyNames: - - name: "zookeeper.connect" - kind: - type: "file" - file: "server.properties" - datatype: - type: "string" - unit: *unitUrl - defaultValues: - - fromVersion: "0.0.0" - value: "localhost:2181" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "The zookeeper connection string" - - - property: &zookeeperTimeout - propertyNames: - - name: "zookeeper.connection.timeout.ms" - kind: - type: "file" - file: "server.properties" - datatype: - type: "integer" - unit: *unitMilliseconds - defaultValues: - - fromVersion: "0.0.0" - value: "18000" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "Zookeeper connection timeout in milliseconds." - - property: &opaAuthorizerClassName propertyNames: - name: "authorizer.class.name" diff --git a/deploy/helm/kafka-operator/configs/properties.yaml b/deploy/helm/kafka-operator/configs/properties.yaml index 52fb205b..8ee5b297 100644 --- a/deploy/helm/kafka-operator/configs/properties.yaml +++ b/deploy/helm/kafka-operator/configs/properties.yaml @@ -58,42 +58,6 @@ properties: comment: "TTL for domain names that cannot be resolved." description: "TTL for domain names that cannot be resolved." - - property: &zookeeperConnect - propertyNames: - - name: "zookeeper.connect" - kind: - type: "file" - file: "server.properties" - datatype: - type: "string" - unit: *unitUrl - defaultValues: - - fromVersion: "0.0.0" - value: "localhost:2181" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "The zookeeper connection string" - - - property: &zookeeperTimeout - propertyNames: - - name: "zookeeper.connection.timeout.ms" - kind: - type: "file" - file: "server.properties" - datatype: - type: "integer" - unit: *unitMilliseconds - defaultValues: - - fromVersion: "0.0.0" - value: "18000" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "Zookeeper connection timeout in milliseconds." - - property: &opaAuthorizerClassName propertyNames: - name: "authorizer.class.name" From 2a12e3fe264c7dd46b4f6a64576930f95374f017 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 15:19:09 +0200 Subject: [PATCH 06/90] add controller role and move configs in own module --- rust/operator-binary/src/config/jvm.rs | 4 +- rust/operator-binary/src/crd/affinity.rs | 9 +- rust/operator-binary/src/crd/mod.rs | 292 ++---------------- rust/operator-binary/src/crd/role/broker.rs | 150 +++++++++ rust/operator-binary/src/crd/role/commons.rs | 82 +++++ .../src/crd/role/controller.rs | 124 ++++++++ rust/operator-binary/src/crd/role/mod.rs | 63 ++++ rust/operator-binary/src/crd/security.rs | 3 +- rust/operator-binary/src/discovery.rs | 2 +- rust/operator-binary/src/kafka_controller.rs | 20 +- rust/operator-binary/src/kerberos.rs | 4 +- .../src/operations/graceful_shutdown.rs | 6 +- rust/operator-binary/src/operations/pdb.rs | 8 +- rust/operator-binary/src/product_logging.rs | 2 +- 14 files changed, 474 insertions(+), 295 deletions(-) create mode 100644 rust/operator-binary/src/crd/role/broker.rs create mode 100644 rust/operator-binary/src/crd/role/commons.rs create mode 100644 rust/operator-binary/src/crd/role/controller.rs create mode 100644 rust/operator-binary/src/crd/role/mod.rs diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 129dd9e0..f88ccd83 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -5,8 +5,8 @@ use stackable_operator::{ }; use crate::crd::{ - BrokerConfig, BrokerConfigFragment, JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, - STACKABLE_CONFIG_DIR, + JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, STACKABLE_CONFIG_DIR, + role::broker::{BrokerConfig, BrokerConfigFragment}, }; const JAVA_HEAP_FACTOR: f32 = 0.8; diff --git a/rust/operator-binary/src/crd/affinity.rs b/rust/operator-binary/src/crd/affinity.rs index 392c0085..b228f7ce 100644 --- a/rust/operator-binary/src/crd/affinity.rs +++ b/rust/operator-binary/src/crd/affinity.rs @@ -3,14 +3,14 @@ use stackable_operator::{ k8s_openapi::api::core::v1::PodAntiAffinity, }; -use crate::crd::{APP_NAME, KafkaRole}; +use crate::crd::APP_NAME; -pub fn get_affinity(cluster_name: &str, role: &KafkaRole) -> StackableAffinityFragment { +pub fn get_affinity(cluster_name: &str, role: &str) -> StackableAffinityFragment { StackableAffinityFragment { pod_affinity: None, pod_anti_affinity: Some(PodAntiAffinity { preferred_during_scheduling_ignored_during_execution: Some(vec![ - affinity_between_role_pods(APP_NAME, cluster_name, &role.to_string(), 70), + affinity_between_role_pods(APP_NAME, cluster_name, role, 70), ]), required_during_scheduling_ignored_during_execution: None, }), @@ -32,8 +32,7 @@ mod tests { }, }; - use super::*; - use crate::crd::v1alpha1; + use crate::crd::{KafkaRole, v1alpha1}; #[rstest] #[case(KafkaRole::Broker)] diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 7474a085..c1aa1635 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -2,45 +2,38 @@ pub mod affinity; pub mod authentication; pub mod authorization; pub mod listener; +pub mod role; pub mod security; pub mod tls; use std::{collections::BTreeMap, str::FromStr}; -use affinity::get_affinity; use authentication::KafkaAuthentication; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ - commons::{ - affinity::StackableAffinity, - cluster_operation::ClusterOperation, - product_image_selection::ProductImage, - resources::{ - CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, - PvcConfig, PvcConfigFragment, Resources, ResourcesFragment, - }, - }, + commons::{cluster_operation::ClusterOperation, product_image_selection::ProductImage}, config::{ - fragment::{self, Fragment, ValidationError}, + fragment::{self, ValidationError}, merge::Merge, }, - k8s_openapi::{ - api::core::v1::PersistentVolumeClaim, apimachinery::pkg::api::resource::Quantity, - }, kube::{CustomResource, ResourceExt, runtime::reflector::ObjectRef}, - product_config_utils::Configuration, - product_logging::{self, spec::Logging}, role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, - shared::time::Duration, status::condition::{ClusterCondition, HasStatusCondition}, utils::cluster_info::KubernetesClusterInfo, versioned::versioned, }; -use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; -use crate::crd::{authorization::KafkaAuthorization, tls::KafkaTls}; +use crate::crd::{ + authorization::KafkaAuthorization, + role::{ + KafkaRole, + broker::{BrokerConfig, BrokerConfigFragment}, + controller::ControllerConfigFragment, + }, + tls::KafkaTls, +}; pub const DOCKER_IMAGE_BASE_NAME: &str = "kafka"; pub const APP_NAME: &str = "kafka"; @@ -132,6 +125,10 @@ pub mod versioned { // no doc - docs in Role struct. pub brokers: Option>, + // no doc - docs in Role struct. + pub controllers: + Option>, + /// Kafka settings that affect all roles and role groups. /// /// The settings in the `clusterConfig` are cluster wide settings that do not need to be configurable at role or role group level. @@ -203,13 +200,14 @@ impl v1alpha1::KafkaCluster { pub fn role( &self, - role_variant: &KafkaRole, + role: &KafkaRole, ) -> Result<&Role, Error> { - match role_variant { + match role { KafkaRole::Broker => self.spec.brokers.as_ref(), + KafkaRole::Controller => todo!(), } .with_context(|| CannotRetrieveKafkaRoleSnafu { - role: role_variant.to_string(), + role: role.to_string(), }) } @@ -234,6 +232,7 @@ impl v1alpha1::KafkaCluster { pub fn role_config(&self, role: &KafkaRole) -> Option<&GenericRoleConfig> { match role { KafkaRole::Broker => self.spec.brokers.as_ref().map(|b| &b.role_config), + KafkaRole::Controller => self.spec.controllers.as_ref().map(|b| &b.role_config), } } @@ -269,7 +268,7 @@ impl v1alpha1::KafkaCluster { rolegroup_ref: &RoleGroupRef, ) -> Result { // Initialize the result with all default values as baseline - let conf_defaults = BrokerConfig::default_config(&self.name_any(), role); + let conf_defaults = BrokerConfig::default_config(&self.name_any(), &role.to_string()); // Retrieve role resource config let role = self.role(role)?; @@ -314,253 +313,6 @@ impl KafkaPodRef { } } -#[derive( - Clone, - Debug, - Deserialize, - Display, - EnumIter, - Eq, - Hash, - JsonSchema, - PartialEq, - Serialize, - EnumString, -)] -pub enum KafkaRole { - #[strum(serialize = "broker")] - Broker, -} - -impl KafkaRole { - /// Metadata about a rolegroup - pub fn rolegroup_ref( - &self, - kafka: &v1alpha1::KafkaCluster, - group_name: impl Into, - ) -> RoleGroupRef { - RoleGroupRef { - cluster: ObjectRef::from_obj(kafka), - role: self.to_string(), - role_group: group_name.into(), - } - } - - pub fn roles() -> Vec { - let mut roles = vec![]; - for role in Self::iter() { - roles.push(role.to_string()) - } - roles - } - - /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. - /// We only have one role and will use "kafka" everywhere (which e.g. differs from the current hdfs implementation, - /// but is similar to HBase). - pub fn kerberos_service_name(&self) -> &'static str { - "kafka" - } -} - -#[derive(Clone, Debug, Default, PartialEq, Fragment, JsonSchema)] -#[fragment_attrs( - derive( - Clone, - Debug, - Default, - Deserialize, - JsonSchema, - Merge, - PartialEq, - Serialize - ), - serde(rename_all = "camelCase") -)] -pub struct Storage { - #[fragment_attrs(serde(default))] - pub log_dirs: PvcConfig, -} - -impl Storage { - pub fn build_pvcs(&self) -> Vec { - let data_pvc = self - .log_dirs - .build_pvc(LOG_DIRS_VOLUME_NAME, Some(vec!["ReadWriteOnce"])); - vec![data_pvc] - } -} - -#[derive( - Clone, - Debug, - Deserialize, - Display, - Eq, - EnumIter, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -#[serde(rename_all = "kebab-case")] -#[strum(serialize_all = "kebab-case")] -pub enum BrokerContainer { - Vector, - KcatProber, - GetService, - Kafka, -} - -#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)] -#[fragment_attrs( - derive( - Clone, - Debug, - Default, - Deserialize, - Merge, - JsonSchema, - PartialEq, - Serialize - ), - serde(rename_all = "camelCase") -)] -pub struct CommonRoleConfig { - #[fragment_attrs(serde(default))] - pub affinity: StackableAffinity, - - /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. - #[fragment_attrs(serde(default))] - pub graceful_shutdown_timeout: Option, - - /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. - /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. - #[fragment_attrs(serde(default))] - pub requested_secret_lifetime: Option, -} - -impl CommonRoleConfig { - const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(30); - // Auto TLS certificate lifetime - const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); - - pub fn default_config(cluster_name: &str, role: &KafkaRole) -> CommonRoleConfigFragment { - CommonRoleConfigFragment { - affinity: get_affinity(cluster_name, role), - graceful_shutdown_timeout: Some(Self::DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT), - requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME), - } - } -} - -#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] -#[fragment_attrs( - derive( - Clone, - Debug, - Default, - Deserialize, - JsonSchema, - Merge, - PartialEq, - Serialize - ), - serde(rename_all = "camelCase") -)] -pub struct BrokerConfig { - #[fragment_attrs(serde(flatten))] - pub common_role_config: CommonRoleConfig, - - /// The ListenerClass used for bootstrapping new clients. Should use a stable ListenerClass to avoid unnecessary client restarts (such as `cluster-internal` or `external-stable`). - pub bootstrap_listener_class: String, - - /// The ListenerClass used for connecting to brokers. Should use a direct connection ListenerClass to minimize cost and minimize performance overhead (such as `cluster-internal` or `external-unstable`). - pub broker_listener_class: String, - - #[fragment_attrs(serde(default))] - pub logging: Logging, - - #[fragment_attrs(serde(default))] - pub resources: Resources, -} - -impl BrokerConfig { - pub fn default_config(cluster_name: &str, role: &KafkaRole) -> BrokerConfigFragment { - BrokerConfigFragment { - common_role_config: CommonRoleConfig::default_config(cluster_name, role), - bootstrap_listener_class: Some("cluster-internal".to_string()), - broker_listener_class: Some("cluster-internal".to_string()), - logging: product_logging::spec::default_logging(), - resources: ResourcesFragment { - cpu: CpuLimitsFragment { - min: Some(Quantity("250m".to_owned())), - max: Some(Quantity("1000m".to_owned())), - }, - memory: MemoryLimitsFragment { - limit: Some(Quantity("1Gi".to_owned())), - runtime_limits: NoRuntimeLimitsFragment {}, - }, - storage: StorageFragment { - log_dirs: PvcConfigFragment { - capacity: Some(Quantity("2Gi".to_owned())), - storage_class: None, - selectors: None, - }, - }, - }, - } - } -} - -impl Configuration for BrokerConfigFragment { - type Configurable = v1alpha1::KafkaCluster; - - fn compute_env( - &self, - _resource: &Self::Configurable, - _role_name: &str, - ) -> Result>, stackable_operator::product_config_utils::Error> - { - Ok(BTreeMap::new()) - } - - fn compute_cli( - &self, - _resource: &Self::Configurable, - _role_name: &str, - ) -> Result>, stackable_operator::product_config_utils::Error> - { - Ok(BTreeMap::new()) - } - - fn compute_files( - &self, - resource: &Self::Configurable, - _role_name: &str, - file: &str, - ) -> Result>, stackable_operator::product_config_utils::Error> - { - let mut config = BTreeMap::new(); - - if file == SERVER_PROPERTIES_FILE { - // OPA - if resource.spec.cluster_config.authorization.opa.is_some() { - config.insert( - "authorizer.class.name".to_string(), - Some("org.openpolicyagent.kafka.OpaAuthorizer".to_string()), - ); - config.insert( - "opa.authorizer.metrics.enabled".to_string(), - Some("true".to_string()), - ); - } - } - - Ok(config) - } -} - #[derive(Clone, Default, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct KafkaClusterStatus { diff --git a/rust/operator-binary/src/crd/role/broker.rs b/rust/operator-binary/src/crd/role/broker.rs new file mode 100644 index 00000000..a66f8127 --- /dev/null +++ b/rust/operator-binary/src/crd/role/broker.rs @@ -0,0 +1,150 @@ +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; +use stackable_operator::{ + commons::resources::{ + CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, + PvcConfigFragment, Resources, ResourcesFragment, + }, + config::{fragment::Fragment, merge::Merge}, + k8s_openapi::apimachinery::pkg::api::resource::Quantity, + product_config_utils::Configuration, + product_logging::{self, spec::Logging}, + schemars::{self, JsonSchema}, +}; +use strum::{Display, EnumIter}; + +use crate::crd::{ + SERVER_PROPERTIES_FILE, + role::commons::{CommonConfig, Storage, StorageFragment}, + v1alpha1, +}; + +#[derive( + Clone, + Debug, + Deserialize, + Display, + Eq, + EnumIter, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum BrokerContainer { + Vector, + KcatProber, + GetService, + Kafka, +} + +#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + JsonSchema, + Merge, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct BrokerConfig { + #[fragment_attrs(serde(flatten))] + pub common_config: CommonConfig, + + /// The ListenerClass used for bootstrapping new clients. Should use a stable ListenerClass to avoid unnecessary client restarts (such as `cluster-internal` or `external-stable`). + pub bootstrap_listener_class: String, + + /// The ListenerClass used for connecting to brokers. Should use a direct connection ListenerClass to minimize cost and minimize performance overhead (such as `cluster-internal` or `external-unstable`). + pub broker_listener_class: String, + + #[fragment_attrs(serde(default))] + pub logging: Logging, + + #[fragment_attrs(serde(default))] + pub resources: Resources, +} + +impl BrokerConfig { + pub fn default_config(cluster_name: &str, role: &str) -> BrokerConfigFragment { + BrokerConfigFragment { + common_config: CommonConfig::default_config(cluster_name, role), + bootstrap_listener_class: Some("cluster-internal".to_string()), + broker_listener_class: Some("cluster-internal".to_string()), + logging: product_logging::spec::default_logging(), + resources: ResourcesFragment { + cpu: CpuLimitsFragment { + min: Some(Quantity("250m".to_owned())), + max: Some(Quantity("1000m".to_owned())), + }, + memory: MemoryLimitsFragment { + limit: Some(Quantity("1Gi".to_owned())), + runtime_limits: NoRuntimeLimitsFragment {}, + }, + storage: StorageFragment { + log_dirs: PvcConfigFragment { + capacity: Some(Quantity("2Gi".to_owned())), + storage_class: None, + selectors: None, + }, + }, + }, + } + } +} + +impl Configuration for BrokerConfigFragment { + type Configurable = v1alpha1::KafkaCluster; + + fn compute_env( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_cli( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_files( + &self, + resource: &Self::Configurable, + _role_name: &str, + file: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + let mut config = BTreeMap::new(); + + if file == SERVER_PROPERTIES_FILE { + // OPA + if resource.spec.cluster_config.authorization.opa.is_some() { + config.insert( + "authorizer.class.name".to_string(), + Some("org.openpolicyagent.kafka.OpaAuthorizer".to_string()), + ); + config.insert( + "opa.authorizer.metrics.enabled".to_string(), + Some("true".to_string()), + ); + } + } + + Ok(config) + } +} diff --git a/rust/operator-binary/src/crd/role/commons.rs b/rust/operator-binary/src/crd/role/commons.rs new file mode 100644 index 00000000..1ef6f3dc --- /dev/null +++ b/rust/operator-binary/src/crd/role/commons.rs @@ -0,0 +1,82 @@ +use serde::{Deserialize, Serialize}; +use stackable_operator::{ + commons::{affinity::StackableAffinity, resources::PvcConfig}, + config::{fragment::Fragment, merge::Merge}, + k8s_openapi::api::core::v1::PersistentVolumeClaim, + schemars::{self, JsonSchema}, + shared::time::Duration, +}; + +use crate::crd::affinity::get_affinity; + +#[derive(Clone, Debug, Default, PartialEq, Fragment, JsonSchema)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + JsonSchema, + Merge, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct Storage { + #[fragment_attrs(serde(default))] + pub log_dirs: PvcConfig, +} + +impl Storage { + pub const LOG_DIRS_VOLUME_NAME: &str = "log-dirs"; + + pub fn build_pvcs(&self) -> Vec { + let data_pvc = self + .log_dirs + .build_pvc(Self::LOG_DIRS_VOLUME_NAME, Some(vec!["ReadWriteOnce"])); + vec![data_pvc] + } +} + +#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + Merge, + JsonSchema, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct CommonConfig { + #[fragment_attrs(serde(default))] + pub affinity: StackableAffinity, + + /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + #[fragment_attrs(serde(default))] + pub graceful_shutdown_timeout: Option, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, +} + +impl CommonConfig { + const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(30); + // Auto TLS certificate lifetime + const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); + + pub fn default_config(cluster_name: &str, role: &str) -> CommonConfigFragment { + CommonConfigFragment { + affinity: get_affinity(cluster_name, role), + graceful_shutdown_timeout: Some(Self::DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT), + requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME), + } + } +} diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs new file mode 100644 index 00000000..0425e0b8 --- /dev/null +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -0,0 +1,124 @@ +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; +use stackable_operator::{ + commons::resources::{ + CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, + PvcConfigFragment, Resources, ResourcesFragment, + }, + config::{fragment::Fragment, merge::Merge}, + k8s_openapi::apimachinery::pkg::api::resource::Quantity, + product_config_utils::Configuration, + product_logging::{self, spec::Logging}, + schemars::{self, JsonSchema}, +}; +use strum::{Display, EnumIter}; + +use crate::crd::{ + role::commons::{CommonConfig, Storage, StorageFragment}, + v1alpha1, +}; + +#[derive( + Clone, + Debug, + Deserialize, + Display, + Eq, + EnumIter, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum ControllerContainer { + Vector, + // TODO: Kafka, Kraft, Controller? + Kafka, +} + +#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + JsonSchema, + Merge, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct ControllerConfig { + #[fragment_attrs(serde(flatten))] + pub common_role_config: CommonConfig, + + #[fragment_attrs(serde(default))] + pub logging: Logging, + + #[fragment_attrs(serde(default))] + pub resources: Resources, +} + +impl ControllerConfig { + pub fn default_config(cluster_name: &str, role: &str) -> ControllerConfigFragment { + ControllerConfigFragment { + common_role_config: CommonConfig::default_config(cluster_name, role), + logging: product_logging::spec::default_logging(), + resources: ResourcesFragment { + cpu: CpuLimitsFragment { + min: Some(Quantity("250m".to_owned())), + max: Some(Quantity("1000m".to_owned())), + }, + memory: MemoryLimitsFragment { + limit: Some(Quantity("1Gi".to_owned())), + runtime_limits: NoRuntimeLimitsFragment {}, + }, + storage: StorageFragment { + log_dirs: PvcConfigFragment { + capacity: Some(Quantity("2Gi".to_owned())), + storage_class: None, + selectors: None, + }, + }, + }, + } + } +} + +impl Configuration for ControllerConfigFragment { + type Configurable = v1alpha1::KafkaCluster; + + fn compute_env( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_cli( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_files( + &self, + _resource: &Self::Configurable, + _role_name: &str, + _file: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } +} diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs new file mode 100644 index 00000000..d0ab26a2 --- /dev/null +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -0,0 +1,63 @@ +pub mod broker; +pub mod commons; +pub mod controller; + +use serde::{Deserialize, Serialize}; +use stackable_operator::{ + kube::runtime::reflector::ObjectRef, + role_utils::RoleGroupRef, + schemars::{self, JsonSchema}, +}; +use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; + +use crate::crd::v1alpha1; + +#[derive( + Clone, + Debug, + Deserialize, + Display, + EnumIter, + Eq, + Hash, + JsonSchema, + PartialEq, + Serialize, + EnumString, +)] +pub enum KafkaRole { + #[strum(serialize = "broker")] + Broker, + #[strum(serialize = "controller")] + Controller, +} + +impl KafkaRole { + /// Metadata about a rolegroup + pub fn rolegroup_ref( + &self, + kafka: &v1alpha1::KafkaCluster, + group_name: impl Into, + ) -> RoleGroupRef { + RoleGroupRef { + cluster: ObjectRef::from_obj(kafka), + role: self.to_string(), + role_group: group_name.into(), + } + } + + pub fn roles() -> Vec { + let mut roles = vec![]; + for role in Self::iter() { + roles.push(role.to_string()) + } + roles + } + + /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. + /// We only have one role and will use "kafka" everywhere (which e.g. differs from the current hdfs implementation, + /// but is similar to HBase). + pub fn kerberos_service_name(&self) -> &'static str { + "kafka" + } +} diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index 4e846655..031e0673 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -29,11 +29,12 @@ use stackable_operator::{ use super::listener::node_port_cmd; use crate::crd::{ - KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, SERVER_PROPERTIES_FILE, + LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, SERVER_PROPERTIES_FILE, STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_DIR, authentication::{self, ResolvedAuthenticationClasses}, listener::{self, KafkaListenerConfig, node_address_cmd}, + role::KafkaRole, tls, v1alpha1, }; diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index 7a6d01b4..ce311f77 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -10,7 +10,7 @@ use stackable_operator::{ }; use crate::{ - crd::{KafkaRole, security::KafkaTlsSecurity, v1alpha1}, + crd::{role::KafkaRole, security::KafkaTlsSecurity, v1alpha1}, kafka_controller::KAFKA_CONTROLLER_NAME, utils::build_recommended_labels, }; diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 1d30269e..e3d48cb4 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -75,13 +75,17 @@ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::{ - APP_NAME, BrokerConfig, BrokerContainer, DOCKER_IMAGE_BASE_NAME, - JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, KafkaClusterStatus, KafkaRole, - LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, - METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, SERVER_PROPERTIES_FILE, - STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, - STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, + APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, + KafkaClusterStatus, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, + LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, + SERVER_PROPERTIES_FILE, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, + STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, + STACKABLE_LOG_DIR, listener::{KafkaListenerError, get_kafka_listener_config}, + role::{ + KafkaRole, + broker::{BrokerConfig, BrokerContainer}, + }, security::KafkaTlsSecurity, v1alpha1, }, @@ -854,7 +858,7 @@ fn build_broker_rolegroup_statefulset( // Add TLS related volumes and volume mounts let requested_secret_lifetime = merged_config - .common_role_config + .common_config .requested_secret_lifetime .context(MissingSecretLifetimeSnafu)?; kafka_security @@ -1074,7 +1078,7 @@ fn build_broker_rolegroup_statefulset( .image_pull_secrets_from_product_image(resolved_product_image) .add_container(cb_kafka.build()) .add_container(cb_kcat_prober.build()) - .affinity(&merged_config.common_role_config.affinity) + .affinity(&merged_config.common_config.affinity) .add_volume(Volume { name: "config".to_string(), config_map: Some(ConfigMapVolumeSource { diff --git a/rust/operator-binary/src/kerberos.rs b/rust/operator-binary/src/kerberos.rs index e96ceea9..e22de94a 100644 --- a/rust/operator-binary/src/kerberos.rs +++ b/rust/operator-binary/src/kerberos.rs @@ -12,8 +12,8 @@ use stackable_operator::builder::{ }; use crate::crd::{ - KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_KERBEROS_DIR, - STACKABLE_KERBEROS_KRB5_PATH, security::KafkaTlsSecurity, + LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_KERBEROS_DIR, + STACKABLE_KERBEROS_KRB5_PATH, role::KafkaRole, security::KafkaTlsSecurity, }; #[derive(Snafu, Debug)] diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index 9a0c3b95..9a9974ef 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; use snafu::{ResultExt, Snafu}; use stackable_operator::builder::pod::PodBuilder; -use crate::crd::BrokerConfig; +use crate::crd::role::broker::BrokerConfig; #[derive(Debug, Snafu)] pub enum Error { @@ -25,9 +25,7 @@ pub fn add_graceful_shutdown_config( ) -> Result<(), Error> { // This must be always set by the merge mechanism, as we provide a default value, // users can not disable graceful shutdown. - if let Some(graceful_shutdown_timeout) = - merged_config.common_role_config.graceful_shutdown_timeout - { + if let Some(graceful_shutdown_timeout) = merged_config.common_config.graceful_shutdown_timeout { pod_builder .termination_grace_period(&graceful_shutdown_timeout) .context(SetTerminationGracePeriodSnafu)?; diff --git a/rust/operator-binary/src/operations/pdb.rs b/rust/operator-binary/src/operations/pdb.rs index 31c760bf..d6211fa0 100644 --- a/rust/operator-binary/src/operations/pdb.rs +++ b/rust/operator-binary/src/operations/pdb.rs @@ -5,7 +5,7 @@ use stackable_operator::{ }; use crate::{ - crd::{APP_NAME, KafkaRole, OPERATOR_NAME, v1alpha1}, + crd::{APP_NAME, OPERATOR_NAME, role::KafkaRole, v1alpha1}, kafka_controller::KAFKA_CONTROLLER_NAME, }; @@ -35,6 +35,7 @@ pub async fn add_pdbs( } let max_unavailable = pdb.max_unavailable.unwrap_or(match role { KafkaRole::Broker => max_unavailable_brokers(), + KafkaRole::Controller => max_unavailable_controllers(), }); let pdb = PodDisruptionBudgetBuilder::new_with_role( kafka, @@ -61,3 +62,8 @@ fn max_unavailable_brokers() -> u16 { // We can not make any assumptions about topic replication factors. 1 } + +fn max_unavailable_controllers() -> u16 { + // TODO: what do we want here? + 1 +} diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index f5d2d22f..84cc704b 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -9,7 +9,7 @@ use stackable_operator::{ role_utils::RoleGroupRef, }; -use crate::crd::{BrokerContainer, STACKABLE_LOG_DIR, v1alpha1}; +use crate::crd::{STACKABLE_LOG_DIR, role::broker::BrokerContainer, v1alpha1}; #[derive(Snafu, Debug)] pub enum Error { From 213939321ed48274c2136ff86fc8a95a36786861 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 15:22:22 +0200 Subject: [PATCH 07/90] fix tests --- rust/operator-binary/src/config/jvm.rs | 2 +- rust/operator-binary/src/crd/affinity.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index f88ccd83..c3e36a5f 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -101,7 +101,7 @@ fn is_heap_jvm_argument(jvm_argument: &str) -> bool { #[cfg(test)] mod tests { use super::*; - use crate::crd::{KafkaRole, v1alpha1}; + use crate::crd::{role::KafkaRole, v1alpha1}; #[test] fn test_construct_jvm_arguments_defaults() { diff --git a/rust/operator-binary/src/crd/affinity.rs b/rust/operator-binary/src/crd/affinity.rs index b228f7ce..3485fdb0 100644 --- a/rust/operator-binary/src/crd/affinity.rs +++ b/rust/operator-binary/src/crd/affinity.rs @@ -60,7 +60,7 @@ mod tests { .unwrap(); assert_eq!( - merged_config.affinity, + merged_config.common_config.affinity, StackableAffinity { pod_affinity: None, pod_anti_affinity: Some(PodAntiAffinity { From f58e01d5e03febada260ef38fa80334caabacb92 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 15:22:43 +0200 Subject: [PATCH 08/90] regenerated charts --- deploy/helm/kafka-operator/crds/crds.yaml | 567 ++++++++++++++++++++++ 1 file changed, 567 insertions(+) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 2f4400d0..26fc4ff3 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -704,6 +704,573 @@ spec: description: Flag to stop the cluster. This means all deployed resources (e.g. Services, StatefulSets, ConfigMaps) are kept but all deployed Pods (e.g. replicas from a StatefulSet) are scaled to 0 and therefore stopped and removed. If applied at the same time with `reconciliationPaused`, the latter will pause reconciliation and `stopped` will take no effect until `reconciliationPaused` is set to false or removed. type: boolean type: object + controllers: + description: This struct represents a role - e.g. HDFS datanodes or Trino workers. It has a key-value-map containing all the roleGroups that are part of this role. Additionally, there is a `config`, which is configurable at the role *and* roleGroup level. Everything at roleGroup level is merged on top of what is configured on role level. There is also a second form of config, which can only be configured at role level, the `roleConfig`. You can learn more about this in the [Roles and role group concept documentation](https://docs.stackable.tech/home/nightly/concepts/roles-and-role-groups). + nullable: true + properties: + cliOverrides: + additionalProperties: + type: string + default: {} + type: object + config: + default: {} + properties: + affinity: + default: + nodeAffinity: null + nodeSelector: null + podAffinity: null + podAntiAffinity: null + description: These configuration settings control [Pod placement](https://docs.stackable.tech/home/nightly/concepts/operations/pod_placement). + properties: + nodeAffinity: + description: Same as the `spec.affinity.nodeAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + nodeSelector: + additionalProperties: + type: string + description: Simple key-value pairs forming a nodeSelector, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + podAffinity: + description: Same as the `spec.affinity.podAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + podAntiAffinity: + description: Same as the `spec.affinity.podAntiAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string + logging: + default: + containers: {} + enableVectorAgent: null + description: Logging configuration, learn more in the [logging concept documentation](https://docs.stackable.tech/home/nightly/concepts/logging). + properties: + containers: + additionalProperties: + anyOf: + - required: + - custom + - {} + description: Log configuration of the container + properties: + console: + description: Configuration for the console appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + custom: + description: Custom log configuration provided in a ConfigMap + properties: + configMap: + description: ConfigMap containing the log configuration files + nullable: true + type: string + type: object + file: + description: Configuration for the file appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + loggers: + additionalProperties: + description: Configuration of a logger + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + default: {} + description: Configuration per logger + type: object + type: object + description: Log configuration per container. + type: object + enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent. + nullable: true + type: boolean + type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string + resources: + default: + cpu: + max: null + min: null + memory: + limit: null + runtimeLimits: {} + storage: + logDirs: + capacity: null + description: Resource usage is configured here, this includes CPU usage, memory usage and disk storage usage, if this role needs any. + properties: + cpu: + default: + max: null + min: null + properties: + max: + description: The maximum amount of CPU cores that can be requested by Pods. Equivalent to the `limit` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + min: + description: The minimal amount of CPU cores that Pods need to run. Equivalent to the `request` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + type: object + memory: + properties: + limit: + description: 'The maximum amount of memory that should be available to the Pod. Specified as a byte [Quantity](https://kubernetes.io/docs/reference/kubernetes-api/common-definitions/quantity/), which means these suffixes are supported: E, P, T, G, M, k. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. For example, the following represent roughly the same value: `128974848, 129e6, 129M, 128974848000m, 123Mi`' + nullable: true + type: string + runtimeLimits: + description: Additional options that can be specified. + type: object + type: object + storage: + properties: + logDirs: + default: + capacity: null + properties: + capacity: + description: "Quantity is a fixed-point representation of a number. It provides convenient marshaling/unmarshaling in JSON and YAML, in addition to String() and AsInt64() accessors.\n\nThe serialization format is:\n\n``` ::= \n\n\t(Note that may be empty, from the \"\" case in .)\n\n ::= 0 | 1 | ... | 9 ::= | ::= | . | . | . ::= \"+\" | \"-\" ::= | ::= | | ::= Ki | Mi | Gi | Ti | Pi | Ei\n\n\t(International System of units; See: http://physics.nist.gov/cuu/Units/binary.html)\n\n ::= m | \"\" | k | M | G | T | P | E\n\n\t(Note that 1024 = 1Ki but 1000 = 1k; I didn't choose the capitalization.)\n\n ::= \"e\" | \"E\" ```\n\nNo matter which of the three exponent forms is used, no quantity may represent a number greater than 2^63-1 in magnitude, nor may it have more than 3 decimal places. Numbers larger or more precise will be capped or rounded up. (E.g.: 0.1m will rounded up to 1m.) This may be extended in the future if we require larger or smaller quantities.\n\nWhen a Quantity is parsed from a string, it will remember the type of suffix it had, and will use the same type again when it is serialized.\n\nBefore serializing, Quantity will be put in \"canonical form\". This means that Exponent/suffix will be adjusted up or down (with a corresponding increase or decrease in Mantissa) such that:\n\n- No precision is lost - No fractional digits will be emitted - The exponent (or suffix) is as large as possible.\n\nThe sign will be omitted unless the number is negative.\n\nExamples:\n\n- 1.5 will be serialized as \"1500m\" - 1.5Gi will be serialized as \"1536Mi\"\n\nNote that the quantity will NEVER be internally represented by a floating point number. That is the whole point of this exercise.\n\nNon-canonical values will still parse as long as they are well formed, but will be re-emitted in their canonical form. (So always use canonical form, or don't diff.)\n\nThis format is intended to make it difficult to use these numbers without writing some sort of special handling code in the hopes that that will cause implementors to also use a fixed point implementation." + nullable: true + type: string + selectors: + description: A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. + nullable: true + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + storageClass: + nullable: true + type: string + type: object + type: object + type: object + type: object + configOverrides: + additionalProperties: + additionalProperties: + type: string + type: object + default: {} + description: The `configOverrides` can be used to configure properties in product config files that are not exposed in the CRD. Read the [config overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#config-overrides) and consult the operator specific usage guide documentation for details on the available config files and settings for the specific product. + type: object + envOverrides: + additionalProperties: + type: string + default: {} + description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' + type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object + podOverrides: + default: {} + description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. + type: object + x-kubernetes-preserve-unknown-fields: true + roleConfig: + default: + podDisruptionBudget: + enabled: true + maxUnavailable: null + description: This is a product-agnostic RoleConfig, which is sufficient for most of the products. + properties: + podDisruptionBudget: + default: + enabled: true + maxUnavailable: null + description: |- + This struct is used to configure: + + 1. If PodDisruptionBudgets are created by the operator 2. The allowed number of Pods to be unavailable (`maxUnavailable`) + + Learn more in the [allowed Pod disruptions documentation](https://docs.stackable.tech/home/nightly/concepts/operations/pod_disruptions). + properties: + enabled: + default: true + description: Whether a PodDisruptionBudget should be written out for this role. Disabling this enables you to specify your own - custom - one. Defaults to true. + type: boolean + maxUnavailable: + description: The number of Pods that are allowed to be down because of voluntary disruptions. If you don't explicitly set this, the operator will use a sane default based upon knowledge about the individual product. + format: uint16 + minimum: 0.0 + nullable: true + type: integer + type: object + type: object + roleGroups: + additionalProperties: + properties: + cliOverrides: + additionalProperties: + type: string + default: {} + type: object + config: + default: {} + properties: + affinity: + default: + nodeAffinity: null + nodeSelector: null + podAffinity: null + podAntiAffinity: null + description: These configuration settings control [Pod placement](https://docs.stackable.tech/home/nightly/concepts/operations/pod_placement). + properties: + nodeAffinity: + description: Same as the `spec.affinity.nodeAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + nodeSelector: + additionalProperties: + type: string + description: Simple key-value pairs forming a nodeSelector, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + podAffinity: + description: Same as the `spec.affinity.podAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + podAntiAffinity: + description: Same as the `spec.affinity.podAntiAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string + logging: + default: + containers: {} + enableVectorAgent: null + description: Logging configuration, learn more in the [logging concept documentation](https://docs.stackable.tech/home/nightly/concepts/logging). + properties: + containers: + additionalProperties: + anyOf: + - required: + - custom + - {} + description: Log configuration of the container + properties: + console: + description: Configuration for the console appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + custom: + description: Custom log configuration provided in a ConfigMap + properties: + configMap: + description: ConfigMap containing the log configuration files + nullable: true + type: string + type: object + file: + description: Configuration for the file appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + loggers: + additionalProperties: + description: Configuration of a logger + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + default: {} + description: Configuration per logger + type: object + type: object + description: Log configuration per container. + type: object + enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent. + nullable: true + type: boolean + type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string + resources: + default: + cpu: + max: null + min: null + memory: + limit: null + runtimeLimits: {} + storage: + logDirs: + capacity: null + description: Resource usage is configured here, this includes CPU usage, memory usage and disk storage usage, if this role needs any. + properties: + cpu: + default: + max: null + min: null + properties: + max: + description: The maximum amount of CPU cores that can be requested by Pods. Equivalent to the `limit` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + min: + description: The minimal amount of CPU cores that Pods need to run. Equivalent to the `request` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + type: object + memory: + properties: + limit: + description: 'The maximum amount of memory that should be available to the Pod. Specified as a byte [Quantity](https://kubernetes.io/docs/reference/kubernetes-api/common-definitions/quantity/), which means these suffixes are supported: E, P, T, G, M, k. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. For example, the following represent roughly the same value: `128974848, 129e6, 129M, 128974848000m, 123Mi`' + nullable: true + type: string + runtimeLimits: + description: Additional options that can be specified. + type: object + type: object + storage: + properties: + logDirs: + default: + capacity: null + properties: + capacity: + description: "Quantity is a fixed-point representation of a number. It provides convenient marshaling/unmarshaling in JSON and YAML, in addition to String() and AsInt64() accessors.\n\nThe serialization format is:\n\n``` ::= \n\n\t(Note that may be empty, from the \"\" case in .)\n\n ::= 0 | 1 | ... | 9 ::= | ::= | . | . | . ::= \"+\" | \"-\" ::= | ::= | | ::= Ki | Mi | Gi | Ti | Pi | Ei\n\n\t(International System of units; See: http://physics.nist.gov/cuu/Units/binary.html)\n\n ::= m | \"\" | k | M | G | T | P | E\n\n\t(Note that 1024 = 1Ki but 1000 = 1k; I didn't choose the capitalization.)\n\n ::= \"e\" | \"E\" ```\n\nNo matter which of the three exponent forms is used, no quantity may represent a number greater than 2^63-1 in magnitude, nor may it have more than 3 decimal places. Numbers larger or more precise will be capped or rounded up. (E.g.: 0.1m will rounded up to 1m.) This may be extended in the future if we require larger or smaller quantities.\n\nWhen a Quantity is parsed from a string, it will remember the type of suffix it had, and will use the same type again when it is serialized.\n\nBefore serializing, Quantity will be put in \"canonical form\". This means that Exponent/suffix will be adjusted up or down (with a corresponding increase or decrease in Mantissa) such that:\n\n- No precision is lost - No fractional digits will be emitted - The exponent (or suffix) is as large as possible.\n\nThe sign will be omitted unless the number is negative.\n\nExamples:\n\n- 1.5 will be serialized as \"1500m\" - 1.5Gi will be serialized as \"1536Mi\"\n\nNote that the quantity will NEVER be internally represented by a floating point number. That is the whole point of this exercise.\n\nNon-canonical values will still parse as long as they are well formed, but will be re-emitted in their canonical form. (So always use canonical form, or don't diff.)\n\nThis format is intended to make it difficult to use these numbers without writing some sort of special handling code in the hopes that that will cause implementors to also use a fixed point implementation." + nullable: true + type: string + selectors: + description: A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. + nullable: true + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + storageClass: + nullable: true + type: string + type: object + type: object + type: object + type: object + configOverrides: + additionalProperties: + additionalProperties: + type: string + type: object + default: {} + description: The `configOverrides` can be used to configure properties in product config files that are not exposed in the CRD. Read the [config overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#config-overrides) and consult the operator specific usage guide documentation for details on the available config files and settings for the specific product. + type: object + envOverrides: + additionalProperties: + type: string + default: {} + description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' + type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object + podOverrides: + default: {} + description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. + type: object + x-kubernetes-preserve-unknown-fields: true + replicas: + format: uint16 + minimum: 0.0 + nullable: true + type: integer + type: object + type: object + required: + - roleGroups + type: object image: anyOf: - required: From 0b34d01c4c30562971e020dbcf4ca2f6e1610687 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 15:45:15 +0200 Subject: [PATCH 09/90] use broker.properties and controller.properties --- rust/operator-binary/src/crd/mod.rs | 1 - rust/operator-binary/src/crd/role/broker.rs | 5 +- .../src/crd/role/controller.rs | 2 + rust/operator-binary/src/crd/security.rs | 10 +- rust/operator-binary/src/kafka_controller.rs | 161 +++++++++--------- 5 files changed, 92 insertions(+), 87 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index c1aa1635..cc751aa6 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -42,7 +42,6 @@ pub const OPERATOR_NAME: &str = "kafka.stackable.tech"; pub const METRICS_PORT_NAME: &str = "metrics"; pub const METRICS_PORT: u16 = 9606; // config files -pub const SERVER_PROPERTIES_FILE: &str = "server.properties"; pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties"; // env vars pub const KAFKA_HEAP_OPTS: &str = "KAFKA_HEAP_OPTS"; diff --git a/rust/operator-binary/src/crd/role/broker.rs b/rust/operator-binary/src/crd/role/broker.rs index a66f8127..4a98af2f 100644 --- a/rust/operator-binary/src/crd/role/broker.rs +++ b/rust/operator-binary/src/crd/role/broker.rs @@ -15,11 +15,12 @@ use stackable_operator::{ use strum::{Display, EnumIter}; use crate::crd::{ - SERVER_PROPERTIES_FILE, role::commons::{CommonConfig, Storage, StorageFragment}, v1alpha1, }; +pub const BROKER_PROPERTIES_FILE: &str = "broker.properties"; + #[derive( Clone, Debug, @@ -131,7 +132,7 @@ impl Configuration for BrokerConfigFragment { { let mut config = BTreeMap::new(); - if file == SERVER_PROPERTIES_FILE { + if file == BROKER_PROPERTIES_FILE { // OPA if resource.spec.cluster_config.authorization.opa.is_some() { config.insert( diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index 0425e0b8..1772a404 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -19,6 +19,8 @@ use crate::crd::{ v1alpha1, }; +pub const CONTROLLER_PROPERTIES_FILE: &str = "controller.properties"; + #[derive( Clone, Debug, diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index 031e0673..ffe29ae4 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -29,12 +29,12 @@ use stackable_operator::{ use super::listener::node_port_cmd; use crate::crd::{ - LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, SERVER_PROPERTIES_FILE, - STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, - STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_DIR, + LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_CONFIG_DIR, + STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, + STACKABLE_LOG_DIR, authentication::{self, ResolvedAuthenticationClasses}, listener::{self, KafkaListenerConfig, node_address_cmd}, - role::KafkaRole, + role::{KafkaRole, broker::BROKER_PROPERTIES_FILE}, tls, v1alpha1, }; @@ -364,7 +364,7 @@ impl KafkaTlsSecurity { prepare_signal_handlers containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & {set_realm_env} - bin/kafka-server-start.sh {STACKABLE_CONFIG_DIR}/{SERVER_PROPERTIES_FILE} --override \"zookeeper.connect=$ZOOKEEPER\" --override \"listeners={listeners}\" --override \"advertised.listeners={advertised_listeners}\" --override \"listener.security.protocol.map={listener_security_protocol_map}\"{opa_config}{jaas_config} & + bin/kafka-server-start.sh {STACKABLE_CONFIG_DIR}/{BROKER_PROPERTIES_FILE} --override \"zookeeper.connect=$ZOOKEEPER\" --override \"listeners={listeners}\" --override \"advertised.listeners={advertised_listeners}\" --override \"listener.security.protocol.map={listener_security_protocol_map}\"{opa_config}{jaas_config} & wait_for_termination $! {create_vector_shutdown_file_command} ", diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index e3d48cb4..9068cd80 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -1,8 +1,8 @@ //! Ensures that `Pod`s are configured and running for each [`v1alpha1::KafkaCluster`]. use std::{ - borrow::Cow, collections::{BTreeMap, HashMap}, + str::FromStr, sync::Arc, }; @@ -77,14 +77,13 @@ use crate::{ crd::{ APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, KafkaClusterStatus, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, - LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, - SERVER_PROPERTIES_FILE, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, - STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, - STACKABLE_LOG_DIR, + LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, STACKABLE_CONFIG_DIR, + STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, + STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, listener::{KafkaListenerError, get_kafka_listener_config}, role::{ KafkaRole, - broker::{BrokerConfig, BrokerContainer}, + broker::{BROKER_PROPERTIES_FILE, BrokerConfig, BrokerContainer}, }, security::KafkaTlsSecurity, v1alpha1, @@ -352,6 +351,9 @@ pub enum Error { ResolveProductImage { source: product_image_selection::Error, }, + + #[snafu(display("failed to parse role: {source}"))] + ParseRole { source: strum::ParseError }, } type Result = std::result::Result; @@ -419,6 +421,7 @@ impl ReconcilerError for Error { Error::InvalidKafkaCluster { .. } => None, Error::ConstructJvmArguments { .. } => None, Error::ResolveProductImage { .. } => None, + Error::ParseRole { .. } => None, } } } @@ -436,7 +439,6 @@ pub async fn reconcile_kafka( .context(InvalidKafkaClusterSnafu)?; let client = &ctx.client; - let kafka_role = KafkaRole::Broker; let resolved_product_image = kafka .spec @@ -461,12 +463,13 @@ pub async fn reconcile_kafka( KafkaRole::Broker.to_string(), ( vec![ - PropertyNameKind::File(SERVER_PROPERTIES_FILE.to_string()), + PropertyNameKind::File(BROKER_PROPERTIES_FILE.to_string()), PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), PropertyNameKind::Env, ], kafka.spec.brokers.clone().context(NoBrokerRoleSnafu)?, ), + // TODO: ADD controller )] .into(), ) @@ -476,10 +479,6 @@ pub async fn reconcile_kafka( false, ) .context(InvalidProductConfigSnafu)?; - let role_broker_config = validated_config - .get(&KafkaRole::Broker.to_string()) - .map(Cow::Borrowed) - .unwrap_or_default(); let kafka_security = KafkaTlsSecurity::new_from_kafka_cluster(client, kafka) .await @@ -532,80 +531,84 @@ pub async fn reconcile_kafka( let mut bootstrap_listeners = Vec::::new(); - for (rolegroup_name, rolegroup_config) in role_broker_config.iter() { - let rolegroup_ref = kafka.broker_rolegroup_ref(rolegroup_name); + for (kafka_role_str, role_config) in &validated_config { + let kafka_role = KafkaRole::from_str(&kafka_role_str).context(ParseRoleSnafu)?; - let merged_config = kafka - .merged_config(&KafkaRole::Broker, &rolegroup_ref) - .context(FailedToResolveConfigSnafu)?; + for (rolegroup_name, rolegroup_config) in role_config.iter() { + let rolegroup_ref = kafka.broker_rolegroup_ref(rolegroup_name); - let rg_service = - build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref)?; - let rg_configmap = build_broker_rolegroup_config_map( - kafka, - &resolved_product_image, - &kafka_security, - &rolegroup_ref, - rolegroup_config, - &merged_config, - )?; - let rg_statefulset = build_broker_rolegroup_statefulset( - kafka, - &kafka_role, - &resolved_product_image, - &rolegroup_ref, - rolegroup_config, - opa_connect.as_deref(), - &kafka_security, - &merged_config, - &rbac_sa, - &client.kubernetes_cluster_info, - )?; - let rg_bootstrap_listener = build_broker_rolegroup_bootstrap_listener( - kafka, - &resolved_product_image, - &kafka_security, - &rolegroup_ref, - &merged_config, - )?; + let merged_config = kafka + .merged_config(&KafkaRole::Broker, &rolegroup_ref) + .context(FailedToResolveConfigSnafu)?; - bootstrap_listeners.push( + let rg_service = + build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref)?; + let rg_configmap = build_broker_rolegroup_config_map( + kafka, + &resolved_product_image, + &kafka_security, + &rolegroup_ref, + rolegroup_config, + &merged_config, + )?; + let rg_statefulset = build_broker_rolegroup_statefulset( + kafka, + &kafka_role, + &resolved_product_image, + &rolegroup_ref, + rolegroup_config, + opa_connect.as_deref(), + &kafka_security, + &merged_config, + &rbac_sa, + &client.kubernetes_cluster_info, + )?; + let rg_bootstrap_listener = build_broker_rolegroup_bootstrap_listener( + kafka, + &resolved_product_image, + &kafka_security, + &rolegroup_ref, + &merged_config, + )?; + + bootstrap_listeners.push( + cluster_resources + .add(client, rg_bootstrap_listener) + .await + .context(ApplyRoleServiceSnafu)?, + ); cluster_resources - .add(client, rg_bootstrap_listener) + .add(client, rg_service) .await - .context(ApplyRoleServiceSnafu)?, - ); - cluster_resources - .add(client, rg_service) - .await - .with_context(|_| ApplyRoleGroupServiceSnafu { - rolegroup: rolegroup_ref.clone(), - })?; - cluster_resources - .add(client, rg_configmap) - .await - .with_context(|_| ApplyRoleGroupConfigSnafu { - rolegroup: rolegroup_ref.clone(), - })?; - - ss_cond_builder.add( + .with_context(|_| ApplyRoleGroupServiceSnafu { + rolegroup: rolegroup_ref.clone(), + })?; cluster_resources - .add(client, rg_statefulset) + .add(client, rg_configmap) .await - .with_context(|_| ApplyRoleGroupStatefulSetSnafu { + .with_context(|_| ApplyRoleGroupConfigSnafu { rolegroup: rolegroup_ref.clone(), - })?, - ); - } + })?; + + ss_cond_builder.add( + cluster_resources + .add(client, rg_statefulset) + .await + .with_context(|_| ApplyRoleGroupStatefulSetSnafu { + rolegroup: rolegroup_ref.clone(), + })?, + ); + } - let role_config = kafka.role_config(&kafka_role); - if let Some(GenericRoleConfig { - pod_disruption_budget: pdb, - }) = role_config - { - add_pdbs(pdb, kafka, &kafka_role, client, &mut cluster_resources) - .await - .context(FailedToCreatePdbSnafu)?; + let role_config = kafka.role_config(&kafka_role); + if let Some(GenericRoleConfig { + pod_disruption_budget: pdb, + }) = role_config + { + add_pdbs(pdb, kafka, &kafka_role, client, &mut cluster_resources) + .await + .context(FailedToCreatePdbSnafu)?; + } } let discovery_cm = build_discovery_configmap( @@ -686,7 +689,7 @@ fn build_broker_rolegroup_config_map( merged_config: &BrokerConfig, ) -> Result { let mut server_cfg = broker_config - .get(&PropertyNameKind::File(SERVER_PROPERTIES_FILE.to_string())) + .get(&PropertyNameKind::File(BROKER_PROPERTIES_FILE.to_string())) .cloned() .unwrap_or_default(); @@ -727,7 +730,7 @@ fn build_broker_rolegroup_config_map( .build(), ) .add_data( - SERVER_PROPERTIES_FILE, + BROKER_PROPERTIES_FILE, to_java_properties_string(server_cfg.iter().map(|(k, v)| (k, v))).with_context( |_| SerializeZooCfgSnafu { rolegroup: rolegroup.clone(), From 2d49b4595a2c65168aba68340e5ad4b6e15bf069 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 15:52:30 +0200 Subject: [PATCH 10/90] replace missing server.properties --- deploy/config-spec/properties.yaml | 12 ++++++------ deploy/helm/kafka-operator/configs/properties.yaml | 12 ++++++------ docs/modules/kafka/pages/index.adoc | 2 +- docs/modules/kafka/pages/usage-guide/overrides.adoc | 6 +++--- docs/modules/kafka/pages/usage-guide/security.adoc | 2 +- examples/logging/simple-kafka-cluster-opa-log4j.yaml | 2 +- examples/opa/simple-kafka-cluster-opa-allow-all.yaml | 2 +- rust/operator-binary/src/crd/listener.rs | 6 +++--- rust/operator-binary/src/crd/security.rs | 2 +- tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 | 4 ++-- tests/templates/kuttl/smoke/31-assert.yaml | 6 +++--- 11 files changed, 28 insertions(+), 28 deletions(-) diff --git a/deploy/config-spec/properties.yaml b/deploy/config-spec/properties.yaml index 8ee5b297..1e92fbd7 100644 --- a/deploy/config-spec/properties.yaml +++ b/deploy/config-spec/properties.yaml @@ -63,7 +63,7 @@ properties: - name: "authorizer.class.name" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" defaultValues: @@ -82,7 +82,7 @@ properties: - name: "opa.authorizer.url" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" unit: *unitUrl @@ -97,7 +97,7 @@ properties: - name: "opa.authorizer.cache.initial.capacity" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -115,7 +115,7 @@ properties: - name: "opa.authorizer.cache.maximum.size" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -133,7 +133,7 @@ properties: - name: "opa.authorizer.cache.expire.after.seconds" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -151,7 +151,7 @@ properties: - name: "log.dirs" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" recommendedValues: diff --git a/deploy/helm/kafka-operator/configs/properties.yaml b/deploy/helm/kafka-operator/configs/properties.yaml index 8ee5b297..1e92fbd7 100644 --- a/deploy/helm/kafka-operator/configs/properties.yaml +++ b/deploy/helm/kafka-operator/configs/properties.yaml @@ -63,7 +63,7 @@ properties: - name: "authorizer.class.name" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" defaultValues: @@ -82,7 +82,7 @@ properties: - name: "opa.authorizer.url" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" unit: *unitUrl @@ -97,7 +97,7 @@ properties: - name: "opa.authorizer.cache.initial.capacity" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -115,7 +115,7 @@ properties: - name: "opa.authorizer.cache.maximum.size" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -133,7 +133,7 @@ properties: - name: "opa.authorizer.cache.expire.after.seconds" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -151,7 +151,7 @@ properties: - name: "log.dirs" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" recommendedValues: diff --git a/docs/modules/kafka/pages/index.adoc b/docs/modules/kafka/pages/index.adoc index 6b15fa8c..163c2d5d 100644 --- a/docs/modules/kafka/pages/index.adoc +++ b/docs/modules/kafka/pages/index.adoc @@ -33,7 +33,7 @@ image::kafka_overview.drawio.svg[A diagram depicting the Kubernetes resources cr For every xref:concepts:roles-and-role-groups.adoc#_role_groups[role group] in the `broker` role the operator creates a StatefulSet. Multiple Services are created - one at role level, one per role group as well as one for every individual Pod - to allow access to the entire Kafka cluster, parts of it or just individual brokers. -For every StatefulSet (role group) a ConfigMap is deployed containing a `log4j.properties` file for xref:usage-guide/logging.adoc[logging] configuration and a `server.properties` file containing the whole Kafka configuration which is derived from the KafkaCluster resource. +For every StatefulSet (role group) a ConfigMap is deployed containing a `log4j.properties` file for xref:usage-guide/logging.adoc[logging] configuration and a `broker.properties` or file containing the whole Kafka configuration which is derived from the KafkaCluster resource. The operator creates a xref:concepts:service_discovery.adoc[] for the whole KafkaCluster which references the Service for the whole cluster. Other operators use this ConfigMap to connect to a Kafka cluster simply by name and it can also be used by custom third party applications to find the connection endpoint. diff --git a/docs/modules/kafka/pages/usage-guide/overrides.adoc b/docs/modules/kafka/pages/usage-guide/overrides.adoc index 2abd5543..4a24d67c 100644 --- a/docs/modules/kafka/pages/usage-guide/overrides.adoc +++ b/docs/modules/kafka/pages/usage-guide/overrides.adoc @@ -8,7 +8,7 @@ IMPORTANT: Overriding operator-set properties (such as the ports) can interfere For a role or role group, at the same level of `config`, you can specify: `configOverrides` for the following files: -* `server.properties` +* `broker.properties` * `security.properties` For example, if you want to set the `auto.create.topics.enable` to disable automatic topic creation, it can be configured in the KafkaCluster resource like so: @@ -19,7 +19,7 @@ brokers: roleGroups: default: configOverrides: - server.properties: + broker.properties: auto.create.topics.enable: "false" replicas: 1 ---- @@ -30,7 +30,7 @@ Just as for the `config`, it is possible to specify this at role level as well: ---- brokers: configOverrides: - server.properties: + broker.properties: auto.create.topics.enable: "false" roleGroups: default: diff --git a/docs/modules/kafka/pages/usage-guide/security.adoc b/docs/modules/kafka/pages/usage-guide/security.adoc index ffe7b151..8afb107b 100644 --- a/docs/modules/kafka/pages/usage-guide/security.adoc +++ b/docs/modules/kafka/pages/usage-guide/security.adoc @@ -217,7 +217,7 @@ spec: zookeeperConfigMapName: simple-kafka-znode brokers: configOverrides: - server.properties: + broker.properties: opa.authorizer.cache.initial.capacity: "100" opa.authorizer.cache.maximum.size: "100" opa.authorizer.cache.expire.after.seconds: "10" diff --git a/examples/logging/simple-kafka-cluster-opa-log4j.yaml b/examples/logging/simple-kafka-cluster-opa-log4j.yaml index dcda3e95..59b6df13 100644 --- a/examples/logging/simple-kafka-cluster-opa-log4j.yaml +++ b/examples/logging/simple-kafka-cluster-opa-log4j.yaml @@ -62,7 +62,7 @@ spec: logging: enableVectorAgent: true configOverrides: - server.properties: + broker.properties: opa.authorizer.cache.expire.after.seconds: "10" roleGroups: default: diff --git a/examples/opa/simple-kafka-cluster-opa-allow-all.yaml b/examples/opa/simple-kafka-cluster-opa-allow-all.yaml index 0890ba11..bd5768dc 100644 --- a/examples/opa/simple-kafka-cluster-opa-allow-all.yaml +++ b/examples/opa/simple-kafka-cluster-opa-allow-all.yaml @@ -59,7 +59,7 @@ spec: zookeeperConfigMapName: simple-kafka-znode brokers: configOverrides: - server.properties: + broker.properties: opa.authorizer.cache.expire.after.seconds: "0" roleGroups: default: diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 5de0bde1..b337461e 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -52,7 +52,7 @@ pub struct KafkaListenerConfig { } impl KafkaListenerConfig { - /// Returns the `listeners` for the Kafka `server.properties` config. + /// Returns the `listeners` for the Kafka `broker.properties` config. pub fn listeners(&self) -> String { self.listeners .iter() @@ -61,7 +61,7 @@ impl KafkaListenerConfig { .join(",") } - /// Returns the `advertised.listeners` for the Kafka `server.properties` config. + /// Returns the `advertised.listeners` for the Kafka `broker.properties` config. /// May contain ENV variables and therefore should be used as cli argument /// like --override \"advertised.listeners=xxx\". pub fn advertised_listeners(&self) -> String { @@ -72,7 +72,7 @@ impl KafkaListenerConfig { .join(",") } - /// Returns the `listener.security.protocol.map` for the Kafka `server.properties` config. + /// Returns the `listener.security.protocol.map` for the Kafka `broker.properties` config. pub fn listener_security_protocol_map(&self) -> String { self.listener_security_protocol_map .iter() diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index ffe29ae4..1384bac3 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -455,7 +455,7 @@ impl KafkaTlsSecurity { Ok(()) } - /// Returns required Kafka configuration settings for the `server.properties` file + /// Returns required Kafka configuration settings for the `broker.properties` file /// depending on the tls and authentication settings. pub fn config_settings(&self) -> BTreeMap { let mut config = BTreeMap::new(); diff --git a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 index 9b5f2482..4f3b95a0 100644 --- a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 @@ -29,7 +29,7 @@ spec: zookeeperConfigMapName: test-zk brokers: configOverrides: - server.properties: + broker.properties: compression.type: uncompressed # overridden by role group below controller.quorum.election.backoff.max.ms: "2000" envOverrides: @@ -46,7 +46,7 @@ spec: COMMON_VAR: group-value # overrides role value GROUP_VAR: group-value # only defined here at group level configOverrides: - server.properties: + broker.properties: compression.type: snappy controller.quorum.fetch.timeout.ms: "3000" podOverrides: diff --git a/tests/templates/kuttl/smoke/31-assert.yaml b/tests/templates/kuttl/smoke/31-assert.yaml index 63804de1..26a55394 100644 --- a/tests/templates/kuttl/smoke/31-assert.yaml +++ b/tests/templates/kuttl/smoke/31-assert.yaml @@ -14,6 +14,6 @@ commands: # Test configOverrides # - script: | - kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."server.properties"' | grep "compression.type=snappy" - kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."server.properties"' | grep "controller.quorum.election.backoff.max.ms=2000" - kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."server.properties"' | grep "controller.quorum.fetch.timeout.ms=3000" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "compression.type=snappy" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.election.backoff.max.ms=2000" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.fetch.timeout.ms=3000" From baae4ce5726a8e5ea952d9e9b90380f85f8f90fc Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 17:03:40 +0200 Subject: [PATCH 11/90] remove unused errors --- rust/operator-binary/src/crd/mod.rs | 9 --- rust/operator-binary/src/kafka_controller.rs | 69 ++------------------ 2 files changed, 7 insertions(+), 71 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index cc751aa6..430aca74 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -65,21 +65,12 @@ pub enum Error { #[snafu(display("object has no namespace associated"))] NoNamespace, - #[snafu(display("failed to validate config of rolegroup {rolegroup}"))] - RoleGroupValidation { - rolegroup: RoleGroupRef, - source: ValidationError, - }, - #[snafu(display("the Kafka role [{role}] is missing from spec"))] MissingKafkaRole { role: String }, #[snafu(display("the role {role} is not defined"))] CannotRetrieveKafkaRole { role: String }, - #[snafu(display("the Kafka node role group [{role_group}] is missing from spec"))] - MissingKafkaRoleGroup { role_group: String }, - #[snafu(display("the role group {role_group} is not defined"))] CannotRetrieveKafkaRoleGroup { role_group: String }, diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 9068cd80..b2793a0b 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -32,7 +32,7 @@ use stackable_operator::{ product_image_selection::{self, ResolvedProductImage}, rbac::build_rbac_resources, }, - crd::{authentication::core, listener}, + crd::listener, k8s_openapi::{ DeepMerge, api::{ @@ -80,7 +80,7 @@ use crate::{ LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, - listener::{KafkaListenerError, get_kafka_listener_config}, + listener::get_kafka_listener_config, role::{ KafkaRole, broker::{BROKER_PROPERTIES_FILE, BrokerConfig, BrokerContainer}, @@ -113,12 +113,6 @@ pub enum Error { #[snafu(display("missing secret lifetime"))] MissingSecretLifetime, - #[snafu(display("object has no name"))] - ObjectHasNoName, - - #[snafu(display("object has no namespace"))] - ObjectHasNoNamespace, - #[snafu(display("object defines no broker role"))] NoBrokerRole, @@ -127,16 +121,6 @@ pub enum Error { source: stackable_operator::cluster_resources::Error, }, - #[snafu(display("failed to apply role ServiceAccount"))] - ApplyRoleServiceAccount { - source: stackable_operator::cluster_resources::Error, - }, - - #[snafu(display("failed to apply global RoleBinding"))] - ApplyRoleRoleBinding { - source: stackable_operator::cluster_resources::Error, - }, - #[snafu(display("failed to apply Service for {}", rolegroup))] ApplyRoleGroupService { source: stackable_operator::cluster_resources::Error, @@ -171,8 +155,8 @@ pub enum Error { source: stackable_operator::product_config_utils::Error, }, - #[snafu(display("failed to serialize zoo.cfg for {}", rolegroup))] - SerializeZooCfg { + #[snafu(display("failed to serialize config for {}", rolegroup))] + SerializeConfig { source: PropertiesWriterError, rolegroup: RoleGroupRef, }, @@ -190,33 +174,11 @@ pub enum Error { source: stackable_operator::cluster_resources::Error, }, - #[snafu(display("failed to find rolegroup {}", rolegroup))] - RoleGroupNotFound { - rolegroup: RoleGroupRef, - }, - #[snafu(display("invalid OpaConfig"))] InvalidOpaConfig { source: stackable_operator::commons::opa::Error, }, - #[snafu(display("failed to retrieve {}", authentication_class))] - AuthenticationClassRetrieval { - source: stackable_operator::commons::opa::Error, - authentication_class: ObjectRef, - }, - - #[snafu(display( - "failed to use authentication provider {} - supported methods: {:?}", - provider, - supported - ))] - AuthenticationProviderNotSupported { - authentication_class: ObjectRef, - supported: Vec, - provider: String, - }, - #[snafu(display("invalid kafka listeners"))] InvalidKafkaListeners { source: crate::crd::listener::KafkaListenerError, @@ -319,9 +281,6 @@ pub enum Error { #[snafu(display("failed to add Secret Volumes and VolumeMounts"))] AddVolumesAndVolumeMounts { source: crate::crd::security::Error }, - #[snafu(display("failed to resolve the fully-qualified pod name"))] - ResolveNamespace { source: KafkaListenerError }, - #[snafu(display("failed to add kerberos config"))] AddKerberosConfig { source: kerberos::Error }, @@ -365,32 +324,19 @@ impl ReconcilerError for Error { fn secondary_object(&self) -> Option> { match self { Error::MissingSecretLifetime => None, - Error::ObjectHasNoName => None, - Error::ObjectHasNoNamespace => None, Error::NoBrokerRole => None, Error::ApplyRoleService { .. } => None, - Error::ApplyRoleServiceAccount { .. } => None, - Error::ApplyRoleRoleBinding { .. } => None, Error::ApplyRoleGroupService { .. } => None, Error::BuildRoleGroupConfig { .. } => None, Error::ApplyRoleGroupConfig { .. } => None, Error::ApplyRoleGroupStatefulSet { .. } => None, Error::GenerateProductConfig { .. } => None, Error::InvalidProductConfig { .. } => None, - Error::SerializeZooCfg { .. } => None, + Error::SerializeConfig { .. } => None, Error::ObjectMissingMetadataForOwnerRef { .. } => None, Error::BuildDiscoveryConfig { .. } => None, Error::ApplyDiscoveryConfig { .. } => None, - Error::RoleGroupNotFound { .. } => None, Error::InvalidOpaConfig { .. } => None, - Error::AuthenticationClassRetrieval { - authentication_class, - .. - } => Some(authentication_class.clone().erase()), - Error::AuthenticationProviderNotSupported { - authentication_class, - .. - } => Some(authentication_class.clone().erase()), Error::InvalidKafkaListeners { .. } => None, Error::AddListenerVolume { .. } => None, Error::InvalidContainerName { .. } => None, @@ -415,7 +361,6 @@ impl ReconcilerError for Error { Error::ConfigureLogging { .. } => None, Error::AddVolume { .. } => None, Error::AddVolumeMount { .. } => None, - Error::ResolveNamespace { .. } => None, Error::AddKerberosConfig { .. } => None, Error::FailedToValidateAuthenticationMethod { .. } => None, Error::InvalidKafkaCluster { .. } => None, @@ -538,7 +483,7 @@ pub async fn reconcile_kafka( let rolegroup_ref = kafka.broker_rolegroup_ref(rolegroup_name); let merged_config = kafka - .merged_config(&KafkaRole::Broker, &rolegroup_ref) + .merged_config(&kafka_role, &rolegroup_ref) .context(FailedToResolveConfigSnafu)?; let rg_service = @@ -732,7 +677,7 @@ fn build_broker_rolegroup_config_map( .add_data( BROKER_PROPERTIES_FILE, to_java_properties_string(server_cfg.iter().map(|(k, v)| (k, v))).with_context( - |_| SerializeZooCfgSnafu { + |_| SerializeConfigSnafu { rolegroup: rolegroup.clone(), }, )?, From f74c2550fbd6db462a535b76de4f7e32adcbc721 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 18:12:38 +0200 Subject: [PATCH 12/90] add controller to product confif machinery --- rust/operator-binary/src/crd/mod.rs | 31 +++++ rust/operator-binary/src/crd/role/mod.rs | 2 + rust/operator-binary/src/kafka_controller.rs | 117 ++++++++++++++----- 3 files changed, 123 insertions(+), 27 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 430aca74..c77e26d1 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -83,6 +83,16 @@ pub enum Error { #[snafu(display("fragment validation failure"))] FragmentValidationFailure { source: ValidationError }, + + #[snafu(display( + "Kafka version 4 and higher requires a Kraft controller (configured via `spec.controller`)" + ))] + Kafka4RequiresKraft, + + #[snafu(display( + "Kraft controller (`spec.controller`) and ZooKeeper (`spec.clusterConfig.zookeeperConfigMapName`) are configured. Please only choose one" + ))] + KraftAndZookeeperConfigured, } #[versioned( @@ -173,6 +183,27 @@ impl HasStatusCondition for v1alpha1::KafkaCluster { } impl v1alpha1::KafkaCluster { + /// Supporting Kraft alongside Zookeeper requires a couple of CRD checks + /// - If Kafka 4 and higher is used, no zookeeper config map ref has to be provided + /// - Configuring the controller role means no zookeeper config map ref has to be provided + pub fn check_kraft_vs_zookeeper(&self, product_version: &str) -> Result<(), Error> { + if product_version.starts_with("4.") && self.spec.controllers.is_none() { + return Err(Error::Kafka4RequiresKraft); + } + + if self.spec.controllers.is_some() + && self.spec.cluster_config.zookeeper_config_map_name.is_some() + { + return Err(Error::KraftAndZookeeperConfigured); + } + + Ok(()) + } + + pub fn is_controller_configured(&self) -> bool { + self.spec.controllers.is_some() + } + /// The name of the load-balanced Kubernetes Service providing the bootstrap address. Kafka clients will use this /// to get a list of broker addresses and will use those to transmit data to the correct broker. pub fn bootstrap_service_name(&self, rolegroup: &RoleGroupRef) -> String { diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index d0ab26a2..59f3015c 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -60,4 +60,6 @@ impl KafkaRole { pub fn kerberos_service_name(&self) -> &'static str { "kafka" } + + RoleConfigByPropertyKind } diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index b2793a0b..6145675d 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -53,7 +53,10 @@ use stackable_operator::{ }, kvp::{Label, Labels}, logging::controller::ReconcilerError, - product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, + product_config_utils::{ + ValidatedRoleConfigByPropertyKind, transform_all_roles_to_config, + validate_all_roles_and_groups_config, + }, product_logging::{ self, framework::LoggingError, @@ -75,7 +78,7 @@ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::{ - APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, + self, APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, KafkaClusterStatus, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, @@ -84,6 +87,7 @@ use crate::{ role::{ KafkaRole, broker::{BROKER_PROPERTIES_FILE, BrokerConfig, BrokerContainer}, + controller::CONTROLLER_PROPERTIES_FILE, }, security::KafkaTlsSecurity, v1alpha1, @@ -113,8 +117,8 @@ pub enum Error { #[snafu(display("missing secret lifetime"))] MissingSecretLifetime, - #[snafu(display("object defines no broker role"))] - NoBrokerRole, + #[snafu(display("cluster object defines no '{role}' role"))] + MissingKafkaRole { role: KafkaRole }, #[snafu(display("failed to apply role Service"))] ApplyRoleService { @@ -303,6 +307,9 @@ pub enum Error { source: error_boundary::InvalidObject, }, + #[snafu(display("KafkaCluster object is misconfigured"))] + MisconfiguredKafkaCluster { source: crd::Error }, + #[snafu(display("failed to construct JVM arguments"))] ConstructJvmArguments { source: crate::config::jvm::Error }, @@ -324,7 +331,7 @@ impl ReconcilerError for Error { fn secondary_object(&self) -> Option> { match self { Error::MissingSecretLifetime => None, - Error::NoBrokerRole => None, + Error::MissingKafkaRole { .. } => None, Error::ApplyRoleService { .. } => None, Error::ApplyRoleGroupService { .. } => None, Error::BuildRoleGroupConfig { .. } => None, @@ -364,6 +371,7 @@ impl ReconcilerError for Error { Error::AddKerberosConfig { .. } => None, Error::FailedToValidateAuthenticationMethod { .. } => None, Error::InvalidKafkaCluster { .. } => None, + Error::MisconfiguredKafkaCluster { .. } => None, Error::ConstructJvmArguments { .. } => None, Error::ResolveProductImage { .. } => None, Error::ParseRole { .. } => None, @@ -391,6 +399,11 @@ pub async fn reconcile_kafka( .resolve(DOCKER_IMAGE_BASE_NAME, crate::built_info::PKG_VERSION) .context(ResolveProductImageSnafu)?; + // check Kraft vs ZooKeeper and fail if misconfigured + kafka + .check_kraft_vs_zookeeper(&resolved_product_image.product_version) + .context(MisconfiguredKafkaClusterSnafu)?; + let mut cluster_resources = ClusterResources::new( APP_NAME, OPERATOR_NAME, @@ -400,30 +413,11 @@ pub async fn reconcile_kafka( ) .context(CreateClusterResourcesSnafu)?; - let validated_config = validate_all_roles_and_groups_config( + let validated_config = validated_product_config( + kafka, &resolved_product_image.product_version, - &transform_all_roles_to_config( - kafka, - [( - KafkaRole::Broker.to_string(), - ( - vec![ - PropertyNameKind::File(BROKER_PROPERTIES_FILE.to_string()), - PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), - PropertyNameKind::Env, - ], - kafka.spec.brokers.clone().context(NoBrokerRoleSnafu)?, - ), - // TODO: ADD controller - )] - .into(), - ) - .context(GenerateProductConfigSnafu)?, &ctx.product_config, - false, - false, - ) - .context(InvalidProductConfigSnafu)?; + )?; let kafka_security = KafkaTlsSecurity::new_from_kafka_cluster(client, kafka) .await @@ -1195,3 +1189,72 @@ fn container_ports(kafka_security: &KafkaTlsSecurity) -> Vec { } ports } + +/// Defines all required roles and their required configuration. +/// +/// The roles and their configs are then validated and complemented by the product config. +/// +/// # Arguments +/// * `resource` - The TrinoCluster containing the role definitions. +/// * `version` - The TrinoCluster version. +/// * `product_config` - The product config to validate and complement the user config. +/// +fn validated_product_config( + kafka: &v1alpha1::KafkaCluster, + product_version: &str, + product_config: &ProductConfigManager, +) -> Result { + let mut roles = HashMap::new(); + + roles.insert( + KafkaRole::Broker.to_string(), + ( + vec![ + PropertyNameKind::File(BROKER_PROPERTIES_FILE.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + PropertyNameKind::Env, + ], + kafka + .spec + .brokers + .clone() + .context(MissingKafkaRoleSnafu { + role: KafkaRole::Broker, + })? + .erase(), + ), + ); + + if kafka.is_controller_configured() { + roles.insert( + KafkaRole::Controller.to_string(), + ( + vec![ + PropertyNameKind::File(CONTROLLER_PROPERTIES_FILE.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + PropertyNameKind::Env, + ], + kafka + .spec + .controllers + .clone() + .context(MissingKafkaRoleSnafu { + role: KafkaRole::Controller, + })? + .erase(), + ), + ); + } + + let role_config = + transform_all_roles_to_config(kafka, roles).context(GenerateProductConfigSnafu)?; + + validate_all_roles_and_groups_config( + product_version, + &role_config, + product_config, + false, + false, + ) + .context(InvalidProductConfigSnafu) +} From 8c8e4fcbcc86ea944e107c1330ecbb05deef5d50 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 25 Aug 2025 18:13:09 +0200 Subject: [PATCH 13/90] remove cp --- rust/operator-binary/src/crd/role/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 59f3015c..d0ab26a2 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -60,6 +60,4 @@ impl KafkaRole { pub fn kerberos_service_name(&self) -> &'static str { "kafka" } - - RoleConfigByPropertyKind } From b01b93173c8f2174ef87a2a5fc7955c8de5ca198 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 11:00:56 +0200 Subject: [PATCH 14/90] add controller to security.properties --- deploy/config-spec/properties.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deploy/config-spec/properties.yaml b/deploy/config-spec/properties.yaml index 1e92fbd7..8e67978b 100644 --- a/deploy/config-spec/properties.yaml +++ b/deploy/config-spec/properties.yaml @@ -35,6 +35,8 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for successfully resolved domain names." description: "TTL for successfully resolved domain names." @@ -54,6 +56,8 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for domain names that cannot be resolved." description: "TTL for domain names that cannot be resolved." From 8e95e72db5b0c1e3237753ff6c74e20723fb282d Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 14:19:01 +0200 Subject: [PATCH 15/90] add controller to security properties --- deploy/helm/kafka-operator/configs/properties.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deploy/helm/kafka-operator/configs/properties.yaml b/deploy/helm/kafka-operator/configs/properties.yaml index 1e92fbd7..8e67978b 100644 --- a/deploy/helm/kafka-operator/configs/properties.yaml +++ b/deploy/helm/kafka-operator/configs/properties.yaml @@ -35,6 +35,8 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for successfully resolved domain names." description: "TTL for successfully resolved domain names." @@ -54,6 +56,8 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for domain names that cannot be resolved." description: "TTL for domain names that cannot be resolved." From a87c3f295bf6f6169ec64a59cd3e474996979891 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 14:19:48 +0200 Subject: [PATCH 16/90] wip - add AnyConfig to access different role configs --- rust/operator-binary/src/config/jvm.rs | 10 +- rust/operator-binary/src/crd/mod.rs | 47 +--- .../src/crd/role/controller.rs | 4 +- rust/operator-binary/src/crd/role/mod.rs | 215 +++++++++++++++++- rust/operator-binary/src/kafka_controller.rs | 82 +++---- .../src/operations/graceful_shutdown.rs | 6 +- rust/operator-binary/src/product_logging.rs | 68 ++++-- tests/templates/kuttl/smoke/30-assert.yaml.j2 | 2 +- 8 files changed, 303 insertions(+), 131 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index c3e36a5f..0ee24fc9 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -6,7 +6,7 @@ use stackable_operator::{ use crate::crd::{ JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, STACKABLE_CONFIG_DIR, - role::broker::{BrokerConfig, BrokerConfigFragment}, + role::{AnyConfig, broker::BrokerConfigFragment}, }; const JAVA_HEAP_FACTOR: f32 = 0.8; @@ -27,13 +27,13 @@ pub enum Error { /// All JVM arguments. fn construct_jvm_args( - merged_config: &BrokerConfig, + merged_config: &AnyConfig, role: &Role, role_group: &str, ) -> Result, Error> { let heap_size = MemoryQuantity::try_from( merged_config - .resources + .resources() .memory .limit .as_ref() @@ -69,7 +69,7 @@ fn construct_jvm_args( /// Arguments that go into `EXTRA_ARGS`, so *not* the heap settings (which you can get using /// [`construct_heap_jvm_args`]). pub fn construct_non_heap_jvm_args( - merged_config: &BrokerConfig, + merged_config: &AnyConfig, role: &Role, role_group: &str, ) -> Result { @@ -82,7 +82,7 @@ pub fn construct_non_heap_jvm_args( /// Arguments that go into `KAFKA_HEAP_OPTS`. /// You can get the normal JVM arguments using [`construct_non_heap_jvm_args`]. pub fn construct_heap_jvm_args( - merged_config: &BrokerConfig, + merged_config: &AnyConfig, role: &Role, role_group: &str, ) -> Result { diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index c77e26d1..df5bb49a 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -13,11 +13,7 @@ use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ commons::{cluster_operation::ClusterOperation, product_image_selection::ProductImage}, - config::{ - fragment::{self, ValidationError}, - merge::Merge, - }, - kube::{CustomResource, ResourceExt, runtime::reflector::ObjectRef}, + kube::{CustomResource, runtime::reflector::ObjectRef}, role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, @@ -27,11 +23,7 @@ use stackable_operator::{ use crate::crd::{ authorization::KafkaAuthorization, - role::{ - KafkaRole, - broker::{BrokerConfig, BrokerConfigFragment}, - controller::ControllerConfigFragment, - }, + role::{KafkaRole, broker::BrokerConfigFragment, controller::ControllerConfigFragment}, tls::KafkaTls, }; @@ -65,9 +57,6 @@ pub enum Error { #[snafu(display("object has no namespace associated"))] NoNamespace, - #[snafu(display("the Kafka role [{role}] is missing from spec"))] - MissingKafkaRole { role: String }, - #[snafu(display("the role {role} is not defined"))] CannotRetrieveKafkaRole { role: String }, @@ -81,9 +70,6 @@ pub enum Error { roles: Vec, }, - #[snafu(display("fragment validation failure"))] - FragmentValidationFailure { source: ValidationError }, - #[snafu(display( "Kafka version 4 and higher requires a Kraft controller (configured via `spec.controller`)" ))] @@ -281,35 +267,6 @@ impl v1alpha1::KafkaCluster { }) })) } - - /// Retrieve and merge resource configs for role and role groups - pub fn merged_config( - &self, - role: &KafkaRole, - rolegroup_ref: &RoleGroupRef, - ) -> Result { - // Initialize the result with all default values as baseline - let conf_defaults = BrokerConfig::default_config(&self.name_any(), &role.to_string()); - - // Retrieve role resource config - let role = self.role(role)?; - let mut conf_role = role.config.config.to_owned(); - - // Retrieve rolegroup specific resource config - let role_group = self.rolegroup(rolegroup_ref)?; - let mut conf_role_group = role_group.config.config.to_owned(); - - // Merge more specific configs into default config - // Hierarchy is: - // 1. RoleGroup - // 2. Role - // 3. Default - conf_role.merge(&conf_defaults); - conf_role_group.merge(&conf_role); - - tracing::debug!("Merged config: {:?}", conf_role_group); - fragment::validate(conf_role_group).context(FragmentValidationFailureSnafu) - } } /// Reference to a single `Pod` that is a component of a [`KafkaCluster`] diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index 1772a404..fd66ec7b 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -58,7 +58,7 @@ pub enum ControllerContainer { )] pub struct ControllerConfig { #[fragment_attrs(serde(flatten))] - pub common_role_config: CommonConfig, + pub common_config: CommonConfig, #[fragment_attrs(serde(default))] pub logging: Logging, @@ -70,7 +70,7 @@ pub struct ControllerConfig { impl ControllerConfig { pub fn default_config(cluster_name: &str, role: &str) -> ControllerConfigFragment { ControllerConfigFragment { - common_role_config: CommonConfig::default_config(cluster_name, role), + common_config: CommonConfig::default_config(cluster_name, role), logging: product_logging::spec::default_logging(), resources: ResourcesFragment { cpu: CpuLimitsFragment { diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index d0ab26a2..c56d2e21 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -2,15 +2,43 @@ pub mod broker; pub mod commons; pub mod controller; +use std::{borrow::Cow, ops::Deref}; + use serde::{Deserialize, Serialize}; +use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ - kube::runtime::reflector::ObjectRef, + commons::resources::{NoRuntimeLimits, Resources}, + config::{ + fragment::{self, ValidationError}, + merge::Merge, + }, + kube::{ResourceExt, runtime::reflector::ObjectRef}, + product_logging::spec::ContainerLogConfig, role_utils::RoleGroupRef, schemars::{self, JsonSchema}, }; use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; -use crate::crd::v1alpha1; +use crate::{ + crd::role::{ + broker::BrokerConfig, + commons::{CommonConfig, Storage}, + controller::ControllerConfig, + }, + v1alpha1, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("fragment validation failure"))] + FragmentValidationFailure { source: ValidationError }, + + #[snafu(display("the Kafka role [{role}] is missing from spec"))] + MissingRole { role: String }, + + #[snafu(display("missing role group {role_group:?} for role {role:?}"))] + MissingRoleGroup { role: String, role_group: String }, +} #[derive( Clone, @@ -33,6 +61,15 @@ pub enum KafkaRole { } impl KafkaRole { + /// Return all available roles + pub fn roles() -> Vec { + let mut roles = vec![]; + for role in Self::iter() { + roles.push(role.to_string()) + } + roles + } + /// Metadata about a rolegroup pub fn rolegroup_ref( &self, @@ -46,18 +83,176 @@ impl KafkaRole { } } - pub fn roles() -> Vec { - let mut roles = vec![]; - for role in Self::iter() { - roles.push(role.to_string()) - } - roles - } - /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. /// We only have one role and will use "kafka" everywhere (which e.g. differs from the current hdfs implementation, /// but is similar to HBase). pub fn kerberos_service_name(&self) -> &'static str { "kafka" } + + /// Merge the [Broker|Controller]ConfigFragment defaults, role and role group settings. + /// The priority is: default < role config < role_group config + pub fn merged_config( + &self, + kafka: &v1alpha1::KafkaCluster, + role_group: &str, + ) -> Result { + match self { + Self::Broker => { + // Initialize the result with all default values as baseline + let default_config = + BrokerConfig::default_config(&kafka.name_any(), &self.to_string()); + + // Retrieve role resource config + let role = kafka + .spec + .brokers + .as_ref() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })?; + + let mut role_config = role.config.config.clone(); + // Retrieve rolegroup specific resource config + let mut role_group_config = role + .role_groups + .get(role_group) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + role_group: role_group.to_string(), + })? + .config + .config + .clone(); + + // Merge more specific configs into default config + // Hierarchy is: + // 1. RoleGroup + // 2. Role + // 3. Default + role_config.merge(&default_config); + role_group_config.merge(&role_config); + Ok(AnyConfig::Broker( + fragment::validate::(role_group_config) + .context(FragmentValidationFailureSnafu)?, + )) + } + Self::Controller => { + // Initialize the result with all default values as baseline + let default_config = + ControllerConfig::default_config(&kafka.name_any(), &self.to_string()); + + // Retrieve role resource config + let role = kafka + .spec + .controllers + .as_ref() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })?; + + let mut role_config = role.config.config.clone(); + // Retrieve rolegroup specific resource config + let mut role_group_config = role + .role_groups + .get(role_group) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + role_group: role_group.to_string(), + })? + .config + .config + .clone(); + + // Merge more specific configs into default config + // Hierarchy is: + // 1. RoleGroup + // 2. Role + // 3. Default + role_config.merge(&default_config); + role_group_config.merge(&role_config); + Ok(AnyConfig::Controller( + fragment::validate::(role_group_config) + .context(FragmentValidationFailureSnafu)?, + )) + } + } + } +} + +/// Configuration for a role and rolegroup of an unknown type. +#[derive(Debug)] +pub enum AnyConfig { + Broker(BrokerConfig), + Controller(ControllerConfig), +} + +// impl From for AnyConfig { +// fn from(broker_config: BrokerConfig) -> Self { +// Self::Broker(broker_config) +// } +// } + +// impl From for AnyConfig { +// fn from(controller_config: ControllerConfig) -> Self { +// Self::Controller(controller_config) +// } +// } + +impl Deref for AnyConfig { + type Target = CommonConfig; + + fn deref(&self) -> &Self::Target { + match self { + AnyConfig::Broker(broker_config) => &broker_config.common_config, + AnyConfig::Controller(controller_config) => &controller_config.common_config, + } + } +} + +impl AnyConfig { + pub fn resources(&self) -> &Resources { + match self { + AnyConfig::Broker(broker_config) => &broker_config.resources, + AnyConfig::Controller(controller_config) => &controller_config.resources, + } + } + + // Logging config is distinct between each role, due to the different enum types, + // so provide helpers for containers that are common between all roles. + pub fn kafka_logging(&self) -> Cow { + match self { + AnyConfig::Broker(node) => node.logging.for_container(&broker::BrokerContainer::Kafka), + AnyConfig::Controller(node) => node + .logging + .for_container(&controller::ControllerContainer::Kafka), + } + } + + pub fn vector_logging(&self) -> Cow { + match &self { + AnyConfig::Broker(broker_config) => broker_config + .logging + .for_container(&broker::BrokerContainer::Vector), + AnyConfig::Controller(controller_config) => controller_config + .logging + .for_container(&controller::ControllerContainer::Vector), + } + } + + pub fn vector_logging_enabled(&self) -> bool { + match self { + AnyConfig::Broker(broker_config) => broker_config.logging.enable_vector_agent, + AnyConfig::Controller(controller_config) => { + controller_config.logging.enable_vector_agent + } + } + } + + pub fn listener_class(&self) -> Option<&String> { + match self { + AnyConfig::Broker(broker_config) => Some(&broker_config.broker_listener_class), + AnyConfig::Controller(_) => None, + } + } } diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 6145675d..76ca49b1 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -2,6 +2,7 @@ use std::{ collections::{BTreeMap, HashMap}, + ops::Deref, str::FromStr, sync::Arc, }; @@ -85,7 +86,7 @@ use crate::{ STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, listener::get_kafka_listener_config, role::{ - KafkaRole, + AnyConfig, KafkaRole, broker::{BROKER_PROPERTIES_FILE, BrokerConfig, BrokerContainer}, controller::CONTROLLER_PROPERTIES_FILE, }, @@ -213,7 +214,7 @@ pub enum Error { }, #[snafu(display("failed to resolve and merge config for role and role group"))] - FailedToResolveConfig { source: crate::crd::Error }, + FailedToResolveConfig { source: crate::crd::role::Error }, #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] VectorAggregatorConfigMapMissing, @@ -476,8 +477,8 @@ pub async fn reconcile_kafka( for (rolegroup_name, rolegroup_config) in role_config.iter() { let rolegroup_ref = kafka.broker_rolegroup_ref(rolegroup_name); - let merged_config = kafka - .merged_config(&kafka_role, &rolegroup_ref) + let merged_config = kafka_role + .merged_config(&kafka, &rolegroup_ref.role_group) .context(FailedToResolveConfigSnafu)?; let rg_service = @@ -502,20 +503,24 @@ pub async fn reconcile_kafka( &rbac_sa, &client.kubernetes_cluster_info, )?; - let rg_bootstrap_listener = build_broker_rolegroup_bootstrap_listener( - kafka, - &resolved_product_image, - &kafka_security, - &rolegroup_ref, - &merged_config, - )?; - bootstrap_listeners.push( - cluster_resources - .add(client, rg_bootstrap_listener) - .await - .context(ApplyRoleServiceSnafu)?, - ); + // TODO: broker / controller? + if let AnyConfig::Broker(broker_config) = merged_config { + let rg_bootstrap_listener = build_broker_rolegroup_bootstrap_listener( + kafka, + &resolved_product_image, + &kafka_security, + &rolegroup_ref, + &broker_config, + )?; + bootstrap_listeners.push( + cluster_resources + .add(client, rg_bootstrap_listener) + .await + .context(ApplyRoleServiceSnafu)?, + ); + } + cluster_resources .add(client, rg_service) .await @@ -625,7 +630,7 @@ fn build_broker_rolegroup_config_map( kafka_security: &KafkaTlsSecurity, rolegroup: &RoleGroupRef, broker_config: &HashMap>, - merged_config: &BrokerConfig, + merged_config: &AnyConfig, ) -> Result { let mut server_cfg = broker_config .get(&PropertyNameKind::File(BROKER_PROPERTIES_FILE.to_string())) @@ -688,7 +693,7 @@ fn build_broker_rolegroup_config_map( tracing::debug!(?server_cfg, "Applied server config"); tracing::debug!(?jvm_sec_props, "Applied JVM config"); - extend_role_group_config_map(rolegroup, &merged_config.logging, &mut cm_builder).context( + extend_role_group_config_map(rolegroup, &merged_config, &mut cm_builder).context( InvalidLoggingConfigSnafu { cm_name: rolegroup.object_name(), }, @@ -756,7 +761,7 @@ fn build_broker_rolegroup_statefulset( broker_config: &HashMap>, opa_connect_string: Option<&str>, kafka_security: &KafkaTlsSecurity, - merged_config: &BrokerConfig, + merged_config: &AnyConfig, service_account: &ServiceAccount, cluster_info: &KubernetesClusterInfo, ) -> Result { @@ -800,7 +805,7 @@ fn build_broker_rolegroup_statefulset( // Add TLS related volumes and volume mounts let requested_secret_lifetime = merged_config - .common_config + .deref() .requested_secret_lifetime .context(MissingSecretLifetimeSnafu)?; kafka_security @@ -812,7 +817,7 @@ fn build_broker_rolegroup_statefulset( ) .context(AddVolumesAndVolumeMountsSnafu)?; - let mut pvcs = merged_config.resources.storage.build_pvcs(); + let mut pvcs = merged_config.resources().storage.build_pvcs(); // bootstrap listener should be persistent, // main broker listener is an ephemeral PVC instead @@ -938,7 +943,7 @@ fn build_broker_rolegroup_statefulset( .context(AddVolumeMountSnafu)? .add_volume_mount("log", STACKABLE_LOG_DIR) .context(AddVolumeMountSnafu)? - .resources(merged_config.resources.clone().into()); + .resources(merged_config.resources().clone().into()); // Use kcat sidecar for probing container status rather than the official Kafka tools, since they incur a lot of // unacceptable perf overhead @@ -983,15 +988,12 @@ fn build_broker_rolegroup_statefulset( ..Probe::default() }); - if let Some(ContainerLogConfig { + if let ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { custom: ConfigMapLogConfig { config_map }, })), - }) = merged_config - .logging - .containers - .get(&BrokerContainer::Kafka) + } = &*merged_config.kafka_logging() { pod_builder .add_volume( @@ -1015,12 +1017,21 @@ fn build_broker_rolegroup_statefulset( .context(MetadataBuildSnafu)? .build(); + if let Some(listener_class) = merged_config.listener_class() { + pod_builder + .add_listener_volume_by_listener_class( + LISTENER_BROKER_VOLUME_NAME, + listener_class, + &recommended_labels, + ) + .context(AddListenerVolumeSnafu)?; + } pod_builder .metadata(metadata) .image_pull_secrets_from_product_image(resolved_product_image) .add_container(cb_kafka.build()) .add_container(cb_kcat_prober.build()) - .affinity(&merged_config.common_config.affinity) + .affinity(&merged_config.affinity) .add_volume(Volume { name: "config".to_string(), config_map: Some(ConfigMapVolumeSource { @@ -1031,12 +1042,6 @@ fn build_broker_rolegroup_statefulset( }) .context(AddVolumeSnafu)? // bootstrap volume is a persistent volume template instead, to keep addresses persistent - .add_listener_volume_by_listener_class( - LISTENER_BROKER_VOLUME_NAME, - &merged_config.broker_listener_class, - &recommended_labels, - ) - .context(AddListenerVolumeSnafu)? .add_empty_dir_volume( "log", Some(product_logging::framework::calculate_log_volume_size_limit( @@ -1048,7 +1053,7 @@ fn build_broker_rolegroup_statefulset( .security_context(PodSecurityContextBuilder::new().fs_group(1000).build()); // Add vector container after kafka container to keep the defaulting into kafka container - if merged_config.logging.enable_vector_agent { + if merged_config.vector_logging_enabled() { match &kafka.spec.cluster_config.vector_aggregator_config_map_name { Some(vector_aggregator_config_map_name) => { pod_builder.add_container( @@ -1056,10 +1061,7 @@ fn build_broker_rolegroup_statefulset( resolved_product_image, "config", "log", - merged_config - .logging - .containers - .get(&BrokerContainer::Vector), + Some(&*merged_config.vector_logging()), ResourceRequirementsBuilder::new() .with_cpu_request("250m") .with_cpu_limit("500m") diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index 9a9974ef..ec4f7611 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; use snafu::{ResultExt, Snafu}; use stackable_operator::builder::pod::PodBuilder; -use crate::crd::role::broker::BrokerConfig; +use crate::crd::role::AnyConfig; #[derive(Debug, Snafu)] pub enum Error { @@ -20,12 +20,12 @@ pub fn graceful_shutdown_config_properties() -> BTreeMap { } pub fn add_graceful_shutdown_config( - merged_config: &BrokerConfig, + merged_config: &AnyConfig, pod_builder: &mut PodBuilder, ) -> Result<(), Error> { // This must be always set by the merge mechanism, as we provide a default value, // users can not disable graceful shutdown. - if let Some(graceful_shutdown_timeout) = merged_config.common_config.graceful_shutdown_timeout { + if let Some(graceful_shutdown_timeout) = &merged_config.graceful_shutdown_timeout { pod_builder .termination_grace_period(&graceful_shutdown_timeout) .context(SetTerminationGracePeriodSnafu)?; diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 84cc704b..05bdcfdd 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -1,15 +1,17 @@ +use std::{borrow::Cow, fmt::Display}; + use snafu::Snafu; use stackable_operator::{ builder::configmap::ConfigMapBuilder, memory::{BinaryMultiple, MemoryQuantity}, product_logging::{ self, - spec::{ContainerLogConfig, ContainerLogConfigChoice, Logging}, + spec::{ContainerLogConfig, ContainerLogConfigChoice}, }, role_utils::RoleGroupRef, }; -use crate::crd::{STACKABLE_LOG_DIR, role::broker::BrokerContainer, v1alpha1}; +use crate::crd::{STACKABLE_LOG_DIR, role::AnyConfig, v1alpha1}; #[derive(Snafu, Debug)] pub enum Error { @@ -47,41 +49,57 @@ const CONSOLE_CONVERSION_PATTERN: &str = "[%d] %p %m (%c)%n"; /// Extend the role group ConfigMap with logging and Vector configurations pub fn extend_role_group_config_map( rolegroup: &RoleGroupRef, - logging: &Logging, + merged_config: &AnyConfig, cm_builder: &mut ConfigMapBuilder, ) -> Result<()> { - if let Some(ContainerLogConfig { - choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = logging.containers.get(&BrokerContainer::Kafka) - { - cm_builder.add_data( - LOG4J_CONFIG_FILE, - product_logging::framework::create_log4j_config( - &format!( - "{STACKABLE_LOG_DIR}/{container}", - container = BrokerContainer::Kafka + fn add_log4j_config_if_automatic( + cm_builder: &mut ConfigMapBuilder, + log_config: Option>, + log_config_file: &str, + container_name: impl Display, + log_file: &str, + max_log_file_size: MemoryQuantity, + ) { + if let Some(ContainerLogConfig { + choice: Some(ContainerLogConfigChoice::Automatic(log_config)), + }) = log_config.as_deref() + { + cm_builder.add_data( + log_config_file, + product_logging::framework::create_log4j_config( + &format!("{STACKABLE_LOG_DIR}/{container_name}"), + log_file, + max_log_file_size + .scale_to(BinaryMultiple::Mebi) + .floor() + .value as u32, + CONSOLE_CONVERSION_PATTERN, + log_config, ), - KAFKA_LOG_FILE, - MAX_KAFKA_LOG_FILES_SIZE - .scale_to(BinaryMultiple::Mebi) - .floor() - .value as u32, - CONSOLE_CONVERSION_PATTERN, - log_config, - ), - ); + ); + } } + add_log4j_config_if_automatic( + cm_builder, + Some(merged_config.kafka_logging()), + LOG4J_CONFIG_FILE, + // TODO: configure? + "kafka", + KAFKA_LOG_FILE, + MAX_KAFKA_LOG_FILES_SIZE, + ); - let vector_log_config = if let Some(ContainerLogConfig { + let vector_log_config = merged_config.vector_logging(); + let vector_log_config = if let ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = logging.containers.get(&BrokerContainer::Vector) + } = &*vector_log_config { Some(log_config) } else { None }; - if logging.enable_vector_agent { + if merged_config.vector_logging_enabled() { cm_builder.add_data( product_logging::framework::VECTOR_CONFIG_FILE, product_logging::framework::create_vector_config(rolegroup, vector_log_config), diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2 index 1520c7dc..96d71c3d 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-assert.yaml.j2 @@ -60,8 +60,8 @@ spec: annotations: secrets.stackable.tech/backend.autotls.cert.lifetime: 7d - name: log-config - - name: config - name: listener-broker + - name: config - name: log status: readyReplicas: 1 From 5d1f9b1edbd7424b74556a6ceff6ae7f90030706 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 14:23:40 +0200 Subject: [PATCH 17/90] remove errors from product logging --- rust/operator-binary/src/crd/role/mod.rs | 12 --------- rust/operator-binary/src/kafka_controller.rs | 13 +-------- rust/operator-binary/src/product_logging.rs | 28 +------------------- 3 files changed, 2 insertions(+), 51 deletions(-) diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index c56d2e21..3736656a 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -187,18 +187,6 @@ pub enum AnyConfig { Controller(ControllerConfig), } -// impl From for AnyConfig { -// fn from(broker_config: BrokerConfig) -> Self { -// Self::Broker(broker_config) -// } -// } - -// impl From for AnyConfig { -// fn from(controller_config: ControllerConfig) -> Self { -// Self::Controller(controller_config) -// } -// } - impl Deref for AnyConfig { type Target = CommonConfig; diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 76ca49b1..a353a3f2 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -219,12 +219,6 @@ pub enum Error { #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] VectorAggregatorConfigMapMissing, - #[snafu(display("failed to add the logging configuration to the ConfigMap [{cm_name}]"))] - InvalidLoggingConfig { - source: crate::product_logging::Error, - cm_name: String, - }, - #[snafu(display("failed to patch service account"))] ApplyServiceAccount { source: stackable_operator::cluster_resources::Error, @@ -353,7 +347,6 @@ impl ReconcilerError for Error { Error::CreateClusterResources { .. } => None, Error::FailedToResolveConfig { .. } => None, Error::VectorAggregatorConfigMapMissing => None, - Error::InvalidLoggingConfig { .. } => None, Error::ApplyServiceAccount { .. } => None, Error::ApplyRoleBinding { .. } => None, Error::ApplyStatus { .. } => None, @@ -693,11 +686,7 @@ fn build_broker_rolegroup_config_map( tracing::debug!(?server_cfg, "Applied server config"); tracing::debug!(?jvm_sec_props, "Applied JVM config"); - extend_role_group_config_map(rolegroup, &merged_config, &mut cm_builder).context( - InvalidLoggingConfigSnafu { - cm_name: rolegroup.object_name(), - }, - )?; + extend_role_group_config_map(rolegroup, &merged_config, &mut cm_builder); cm_builder .build() diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 05bdcfdd..94d21659 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -1,6 +1,5 @@ use std::{borrow::Cow, fmt::Display}; -use snafu::Snafu; use stackable_operator::{ builder::configmap::ConfigMapBuilder, memory::{BinaryMultiple, MemoryQuantity}, @@ -13,29 +12,6 @@ use stackable_operator::{ use crate::crd::{STACKABLE_LOG_DIR, role::AnyConfig, v1alpha1}; -#[derive(Snafu, Debug)] -pub enum Error { - #[snafu(display("object has no namespace"))] - ObjectHasNoNamespace, - - #[snafu(display("failed to retrieve the ConfigMap {cm_name}"))] - ConfigMapNotFound { - source: stackable_operator::client::Error, - cm_name: String, - }, - - #[snafu(display("failed to retrieve the entry {entry} for ConfigMap {cm_name}"))] - MissingConfigMapEntry { - entry: &'static str, - cm_name: String, - }, - - #[snafu(display("crd validation failure"))] - CrdValidationFailure { source: crate::crd::Error }, -} - -type Result = std::result::Result; - pub const LOG4J_CONFIG_FILE: &str = "log4j.properties"; pub const KAFKA_LOG_FILE: &str = "kafka.log4j.xml"; @@ -51,7 +27,7 @@ pub fn extend_role_group_config_map( rolegroup: &RoleGroupRef, merged_config: &AnyConfig, cm_builder: &mut ConfigMapBuilder, -) -> Result<()> { +) { fn add_log4j_config_if_automatic( cm_builder: &mut ConfigMapBuilder, log_config: Option>, @@ -105,6 +81,4 @@ pub fn extend_role_group_config_map( product_logging::framework::create_vector_config(rolegroup, vector_log_config), ); } - - Ok(()) } From 2fb197ea1e25e7d9488ba1c5dd41ce47230260a6 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 15:45:24 +0200 Subject: [PATCH 18/90] finished controller prep work --- rust/operator-binary/src/config/jvm.rs | 26 ++- rust/operator-binary/src/crd/affinity.rs | 6 +- rust/operator-binary/src/crd/mod.rs | 47 +--- rust/operator-binary/src/crd/role/mod.rs | 207 +++++++++++++++++- rust/operator-binary/src/kafka_controller.rs | 48 ++-- .../src/operations/graceful_shutdown.rs | 2 +- 6 files changed, 250 insertions(+), 86 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 0ee24fc9..4ad8dd4f 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -5,8 +5,7 @@ use stackable_operator::{ }; use crate::crd::{ - JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, STACKABLE_CONFIG_DIR, - role::{AnyConfig, broker::BrokerConfigFragment}, + JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, STACKABLE_CONFIG_DIR, role::AnyConfig, }; const JAVA_HEAP_FACTOR: f32 = 0.8; @@ -26,9 +25,9 @@ pub enum Error { } /// All JVM arguments. -fn construct_jvm_args( +fn construct_jvm_args( merged_config: &AnyConfig, - role: &Role, + role: &Role, role_group: &str, ) -> Result, Error> { let heap_size = MemoryQuantity::try_from( @@ -68,9 +67,9 @@ fn construct_jvm_args( /// Arguments that go into `EXTRA_ARGS`, so *not* the heap settings (which you can get using /// [`construct_heap_jvm_args`]). -pub fn construct_non_heap_jvm_args( +pub fn construct_non_heap_jvm_args( merged_config: &AnyConfig, - role: &Role, + role: &Role, role_group: &str, ) -> Result { let mut jvm_args = construct_jvm_args(merged_config, role, role_group)?; @@ -81,9 +80,9 @@ pub fn construct_non_heap_jvm_args( /// Arguments that go into `KAFKA_HEAP_OPTS`. /// You can get the normal JVM arguments using [`construct_non_heap_jvm_args`]. -pub fn construct_heap_jvm_args( +pub fn construct_heap_jvm_args( merged_config: &AnyConfig, - role: &Role, + role: &Role, role_group: &str, ) -> Result { let mut jvm_args = construct_jvm_args(merged_config, role, role_group)?; @@ -101,7 +100,10 @@ fn is_heap_jvm_argument(jvm_argument: &str) -> bool { #[cfg(test)] mod tests { use super::*; - use crate::crd::{role::KafkaRole, v1alpha1}; + use crate::crd::{ + role::{KafkaRole, broker::BrokerConfigFragment}, + v1alpha1, + }; #[test] fn test_construct_jvm_arguments_defaults() { @@ -186,7 +188,7 @@ mod tests { fn construct_boilerplate( kafka_cluster: &str, ) -> ( - BrokerConfig, + AnyConfig, Role, String, ) { @@ -195,7 +197,9 @@ mod tests { let kafka_role = KafkaRole::Broker; let rolegroup_ref = kafka.broker_rolegroup_ref("default"); - let merged_config = kafka.merged_config(&kafka_role, &rolegroup_ref).unwrap(); + let merged_config = kafka_role + .merged_config(&kafka, &rolegroup_ref.role_group) + .unwrap(); let role = kafka.spec.brokers.unwrap(); (merged_config, role, "default".to_owned()) diff --git a/rust/operator-binary/src/crd/affinity.rs b/rust/operator-binary/src/crd/affinity.rs index 3485fdb0..01c07d86 100644 --- a/rust/operator-binary/src/crd/affinity.rs +++ b/rust/operator-binary/src/crd/affinity.rs @@ -55,12 +55,10 @@ mod tests { let kafka: v1alpha1::KafkaCluster = serde_yaml::from_str(input).expect("illegal test input"); - let merged_config = kafka - .merged_config(&role, &role.rolegroup_ref(&kafka, "default")) - .unwrap(); + let merged_config = role.merged_config(&kafka, "default").unwrap(); assert_eq!( - merged_config.common_config.affinity, + merged_config.affinity, StackableAffinity { pod_affinity: None, pod_anti_affinity: Some(PodAntiAffinity { diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index df5bb49a..3ef484bf 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -6,15 +6,15 @@ pub mod role; pub mod security; pub mod tls; -use std::{collections::BTreeMap, str::FromStr}; +use std::collections::BTreeMap; use authentication::KafkaAuthentication; use serde::{Deserialize, Serialize}; -use snafu::{OptionExt, ResultExt, Snafu}; +use snafu::{OptionExt, Snafu}; use stackable_operator::{ commons::{cluster_operation::ClusterOperation, product_image_selection::ProductImage}, kube::{CustomResource, runtime::reflector::ObjectRef}, - role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef}, + role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, utils::cluster_info::KubernetesClusterInfo, @@ -60,16 +60,6 @@ pub enum Error { #[snafu(display("the role {role} is not defined"))] CannotRetrieveKafkaRole { role: String }, - #[snafu(display("the role group {role_group} is not defined"))] - CannotRetrieveKafkaRoleGroup { role_group: String }, - - #[snafu(display("unknown role {role}. Should be one of {roles:?}"))] - UnknownKafkaRole { - source: strum::ParseError, - role: String, - roles: Vec, - }, - #[snafu(display( "Kafka version 4 and higher requires a Kraft controller (configured via `spec.controller`)" ))] @@ -205,37 +195,6 @@ impl v1alpha1::KafkaCluster { } } - pub fn role( - &self, - role: &KafkaRole, - ) -> Result<&Role, Error> { - match role { - KafkaRole::Broker => self.spec.brokers.as_ref(), - KafkaRole::Controller => todo!(), - } - .with_context(|| CannotRetrieveKafkaRoleSnafu { - role: role.to_string(), - }) - } - - pub fn rolegroup( - &self, - rolegroup_ref: &RoleGroupRef, - ) -> Result<&RoleGroup, Error> { - let role_variant = - KafkaRole::from_str(&rolegroup_ref.role).with_context(|_| UnknownKafkaRoleSnafu { - role: rolegroup_ref.role.to_owned(), - roles: KafkaRole::roles(), - })?; - - let role = self.role(&role_variant)?; - role.role_groups - .get(&rolegroup_ref.role_group) - .with_context(|| CannotRetrieveKafkaRoleGroupSnafu { - role_group: rolegroup_ref.role_group.to_owned(), - }) - } - pub fn role_config(&self, role: &KafkaRole) -> Option<&GenericRoleConfig> { match role { KafkaRole::Broker => self.spec.brokers.as_ref().map(|b| &b.role_config), diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 3736656a..87e145bc 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -12,6 +12,7 @@ use stackable_operator::{ fragment::{self, ValidationError}, merge::Merge, }, + k8s_openapi::api::core::v1::PodTemplateSpec, kube::{ResourceExt, runtime::reflector::ObjectRef}, product_logging::spec::ContainerLogConfig, role_utils::RoleGroupRef, @@ -20,8 +21,9 @@ use stackable_operator::{ use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; use crate::{ + config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::role::{ - broker::BrokerConfig, + broker::{BrokerConfig, BrokerConfigFragment}, commons::{CommonConfig, Storage}, controller::ControllerConfig, }, @@ -36,8 +38,11 @@ pub enum Error { #[snafu(display("the Kafka role [{role}] is missing from spec"))] MissingRole { role: String }, - #[snafu(display("missing role group {role_group:?} for role {role:?}"))] - MissingRoleGroup { role: String, role_group: String }, + #[snafu(display("missing role group {rolegroup:?} for role {role:?}"))] + MissingRoleGroup { role: String, rolegroup: String }, + + #[snafu(display("failed to construct JVM arguments"))] + ConstructJvmArguments { source: crate::config::jvm::Error }, } #[derive( @@ -95,7 +100,7 @@ impl KafkaRole { pub fn merged_config( &self, kafka: &v1alpha1::KafkaCluster, - role_group: &str, + rolegroup: &str, ) -> Result { match self { Self::Broker => { @@ -116,10 +121,10 @@ impl KafkaRole { // Retrieve rolegroup specific resource config let mut role_group_config = role .role_groups - .get(role_group) + .get(rolegroup) .with_context(|| MissingRoleGroupSnafu { role: self.to_string(), - role_group: role_group.to_string(), + rolegroup: rolegroup.to_string(), })? .config .config @@ -155,10 +160,10 @@ impl KafkaRole { // Retrieve rolegroup specific resource config let mut role_group_config = role .role_groups - .get(role_group) + .get(rolegroup) .with_context(|| MissingRoleGroupSnafu { role: self.to_string(), - role_group: role_group.to_string(), + rolegroup: rolegroup.to_string(), })? .config .config @@ -178,6 +183,192 @@ impl KafkaRole { } } } + + pub fn construct_non_heap_jvm_args( + &self, + merged_config: &AnyConfig, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result { + match self { + Self::Broker => construct_non_heap_jvm_args::( + merged_config, + &kafka + .spec + .brokers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + Self::Controller => construct_non_heap_jvm_args( + merged_config, + &kafka + .spec + .controllers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + } + } + + pub fn construct_heap_jvm_args( + &self, + merged_config: &AnyConfig, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result { + match self { + Self::Broker => construct_heap_jvm_args::( + merged_config, + &kafka + .spec + .brokers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + Self::Controller => construct_heap_jvm_args( + merged_config, + &kafka + .spec + .controllers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + } + } + + pub fn role_pod_overrides( + &self, + kafka: &v1alpha1::KafkaCluster, + ) -> Result { + let pod_overrides = match self { + Self::Broker => { + kafka + .spec + .brokers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .config + .pod_overrides + } + Self::Controller => { + kafka + .spec + .controllers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .config + .pod_overrides + } + }; + + Ok(pod_overrides) + } + + pub fn role_group_pod_overrides( + &self, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result { + let pod_overrides = match self { + Self::Broker => kafka + .spec + .brokers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .config + .pod_overrides + .clone(), + Self::Controller => kafka + .spec + .controllers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .config + .pod_overrides + .clone(), + }; + + Ok(pod_overrides) + } + + pub fn replicas( + &self, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result, Error> { + let replicas = match self { + Self::Broker => { + kafka + .spec + .brokers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .replicas + } + Self::Controller => { + kafka + .spec + .controllers + .clone() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .replicas + } + }; + + Ok(replicas) + } } /// Configuration for a role and rolegroup of an unknown type. diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index a353a3f2..b1e4b495 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -77,7 +77,6 @@ use stackable_operator::{ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ - config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::{ self, APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, KafkaClusterStatus, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, @@ -239,9 +238,6 @@ pub enum Error { source: stackable_operator::commons::rbac::Error, }, - #[snafu(display("internal operator failure"))] - InternalOperatorError { source: crate::crd::Error }, - #[snafu(display( "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", rolegroup @@ -306,7 +302,7 @@ pub enum Error { MisconfiguredKafkaCluster { source: crd::Error }, #[snafu(display("failed to construct JVM arguments"))] - ConstructJvmArguments { source: crate::config::jvm::Error }, + ConstructJvmArguments { source: crate::crd::role::Error }, #[snafu(display("failed to resolve product image"))] ResolveProductImage { @@ -315,6 +311,12 @@ pub enum Error { #[snafu(display("failed to parse role: {source}"))] ParseRole { source: strum::ParseError }, + + #[snafu(display("failed to merge pod overrides"))] + MergePodOverrides { source: crd::role::Error }, + + #[snafu(display("failed to retrieve rolegroup replicas"))] + RoleGroupReplicas { source: crd::role::Error }, } type Result = std::result::Result; @@ -351,7 +353,6 @@ impl ReconcilerError for Error { Error::ApplyRoleBinding { .. } => None, Error::ApplyStatus { .. } => None, Error::BuildRbacResources { .. } => None, - Error::InternalOperatorError { .. } => None, Error::JvmSecurityPoperties { .. } => None, Error::FailedToCreatePdb { .. } => None, Error::GracefulShutdown { .. } => None, @@ -369,6 +370,8 @@ impl ReconcilerError for Error { Error::ConstructJvmArguments { .. } => None, Error::ResolveProductImage { .. } => None, Error::ParseRole { .. } => None, + Error::MergePodOverrides { .. } => None, + Error::RoleGroupReplicas { .. } => None, } } } @@ -465,13 +468,13 @@ pub async fn reconcile_kafka( let mut bootstrap_listeners = Vec::::new(); for (kafka_role_str, role_config) in &validated_config { - let kafka_role = KafkaRole::from_str(&kafka_role_str).context(ParseRoleSnafu)?; + let kafka_role = KafkaRole::from_str(kafka_role_str).context(ParseRoleSnafu)?; for (rolegroup_name, rolegroup_config) in role_config.iter() { let rolegroup_ref = kafka.broker_rolegroup_ref(rolegroup_name); let merged_config = kafka_role - .merged_config(&kafka, &rolegroup_ref.role_group) + .merged_config(kafka, &rolegroup_ref.role_group) .context(FailedToResolveConfigSnafu)?; let rg_service = @@ -686,7 +689,7 @@ fn build_broker_rolegroup_config_map( tracing::debug!(?server_cfg, "Applied server config"); tracing::debug!(?jvm_sec_props, "Applied JVM config"); - extend_role_group_config_map(rolegroup, &merged_config, &mut cm_builder); + extend_role_group_config_map(rolegroup, merged_config, &mut cm_builder); cm_builder .build() @@ -754,10 +757,6 @@ fn build_broker_rolegroup_statefulset( service_account: &ServiceAccount, cluster_info: &KubernetesClusterInfo, ) -> Result { - let role = kafka.role(kafka_role).context(InternalOperatorSnafu)?; - let rolegroup = kafka - .rolegroup(rolegroup_ref) - .context(InternalOperatorSnafu)?; let recommended_object_labels = build_recommended_labels( kafka, KAFKA_CONTROLLER_NAME, @@ -898,12 +897,14 @@ fn build_broker_rolegroup_statefulset( ]) .add_env_var( "EXTRA_ARGS", - construct_non_heap_jvm_args(merged_config, role, &rolegroup_ref.role_group) + kafka_role + .construct_non_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) .context(ConstructJvmArgumentsSnafu)?, ) .add_env_var( KAFKA_HEAP_OPTS, - construct_heap_jvm_args(merged_config, role, &rolegroup_ref.role_group) + kafka_role + .construct_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) .context(ConstructJvmArgumentsSnafu)?, ) .add_env_var( @@ -1076,8 +1077,16 @@ fn build_broker_rolegroup_statefulset( // Don't run kcat pod as PID 1, to ensure that default signal handlers apply pod_template_spec.share_process_namespace = Some(true); - pod_template.merge_from(role.config.pod_overrides.clone()); - pod_template.merge_from(rolegroup.config.pod_overrides.clone()); + pod_template.merge_from( + kafka_role + .role_pod_overrides(kafka) + .context(MergePodOverridesSnafu)?, + ); + pod_template.merge_from( + kafka_role + .role_group_pod_overrides(kafka, &rolegroup_ref.role_group) + .context(MergePodOverridesSnafu)?, + ); Ok(StatefulSet { metadata: ObjectMetaBuilder::new() @@ -1096,7 +1105,10 @@ fn build_broker_rolegroup_statefulset( .build(), spec: Some(StatefulSetSpec { pod_management_policy: Some("Parallel".to_string()), - replicas: rolegroup.replicas.map(i32::from), + replicas: kafka_role + .replicas(kafka, &rolegroup_ref.role_group) + .context(RoleGroupReplicasSnafu)? + .map(i32::from), selector: LabelSelector { match_labels: Some( Labels::role_group_selector( diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index ec4f7611..b03187f4 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -27,7 +27,7 @@ pub fn add_graceful_shutdown_config( // users can not disable graceful shutdown. if let Some(graceful_shutdown_timeout) = &merged_config.graceful_shutdown_timeout { pod_builder - .termination_grace_period(&graceful_shutdown_timeout) + .termination_grace_period(graceful_shutdown_timeout) .context(SetTerminationGracePeriodSnafu)?; } From 0296ede63e99063cc5b6264287edd147e83fa62e Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 15:52:39 +0200 Subject: [PATCH 19/90] remove clones --- rust/operator-binary/src/crd/role/mod.rs | 50 ++++++++++++------------ 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 87e145bc..adf7626d 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -257,28 +257,26 @@ impl KafkaRole { kafka: &v1alpha1::KafkaCluster, ) -> Result { let pod_overrides = match self { - Self::Broker => { - kafka - .spec - .brokers - .clone() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })? - .config - .pod_overrides - } - Self::Controller => { - kafka - .spec - .controllers - .clone() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })? - .config - .pod_overrides - } + Self::Broker => kafka + .spec + .brokers + .as_ref() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .config + .pod_overrides + .clone(), + Self::Controller => kafka + .spec + .controllers + .as_ref() + .with_context(|| MissingRoleSnafu { + role: self.to_string(), + })? + .config + .pod_overrides + .clone(), }; Ok(pod_overrides) @@ -293,7 +291,7 @@ impl KafkaRole { Self::Broker => kafka .spec .brokers - .clone() + .as_ref() .with_context(|| MissingRoleSnafu { role: self.to_string(), })? @@ -309,7 +307,7 @@ impl KafkaRole { Self::Controller => kafka .spec .controllers - .clone() + .as_ref() .with_context(|| MissingRoleSnafu { role: self.to_string(), })? @@ -337,7 +335,7 @@ impl KafkaRole { kafka .spec .brokers - .clone() + .as_ref() .with_context(|| MissingRoleSnafu { role: self.to_string(), })? @@ -353,7 +351,7 @@ impl KafkaRole { kafka .spec .controllers - .clone() + .as_ref() .with_context(|| MissingRoleSnafu { role: self.to_string(), })? From a44c05656253a70fa87339ca64af0f7129e1fb06 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 15:52:49 +0200 Subject: [PATCH 20/90] remove obsolete methods --- rust/operator-binary/src/crd/mod.rs | 52 +---------------------------- 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 3ef484bf..de3e9c7a 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -6,18 +6,15 @@ pub mod role; pub mod security; pub mod tls; -use std::collections::BTreeMap; - use authentication::KafkaAuthentication; use serde::{Deserialize, Serialize}; -use snafu::{OptionExt, Snafu}; +use snafu::Snafu; use stackable_operator::{ commons::{cluster_operation::ClusterOperation, product_image_selection::ProductImage}, kube::{CustomResource, runtime::reflector::ObjectRef}, role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, - utils::cluster_info::KubernetesClusterInfo, versioned::versioned, }; @@ -201,53 +198,6 @@ impl v1alpha1::KafkaCluster { KafkaRole::Controller => self.spec.controllers.as_ref().map(|b| &b.role_config), } } - - /// List all pods expected to form the cluster - /// - /// We try to predict the pods here rather than looking at the current cluster state in order to - /// avoid instance churn. - pub fn pods(&self) -> Result + '_, Error> { - let ns = self.metadata.namespace.clone().context(NoNamespaceSnafu)?; - Ok(self - .spec - .brokers - .iter() - .flat_map(|role| &role.role_groups) - // Order rolegroups consistently, to avoid spurious downstream rewrites - .collect::>() - .into_iter() - .flat_map(move |(rolegroup_name, rolegroup)| { - let rolegroup_ref = self.broker_rolegroup_ref(rolegroup_name); - let ns = ns.clone(); - (0..rolegroup.replicas.unwrap_or(0)).map(move |i| KafkaPodRef { - namespace: ns.clone(), - role_group_service_name: rolegroup_ref.object_name(), - pod_name: format!("{}-{}", rolegroup_ref.object_name(), i), - }) - })) - } -} - -/// Reference to a single `Pod` that is a component of a [`KafkaCluster`] -/// -/// Used for service discovery. -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct KafkaPodRef { - pub namespace: String, - pub role_group_service_name: String, - pub pod_name: String, -} - -impl KafkaPodRef { - pub fn fqdn(&self, cluster_info: &KubernetesClusterInfo) -> String { - format!( - "{pod_name}.{service_name}.{namespace}.svc.{cluster_domain}", - pod_name = self.pod_name, - service_name = self.role_group_service_name, - namespace = self.namespace, - cluster_domain = cluster_info.cluster_domain - ) - } } #[derive(Clone, Default, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] From ab61dd1a738c86f5aa79fbef0427e2574f67aef4 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 26 Aug 2025 19:06:10 +0200 Subject: [PATCH 21/90] cleanup --- rust/operator-binary/src/config/jvm.rs | 2 +- rust/operator-binary/src/crd/mod.rs | 35 ++++-- rust/operator-binary/src/crd/role/mod.rs | 112 +++++++------------ rust/operator-binary/src/kafka_controller.rs | 24 ++-- 4 files changed, 83 insertions(+), 90 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 4ad8dd4f..75039767 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -196,7 +196,7 @@ mod tests { serde_yaml::from_str(kafka_cluster).expect("illegal test input"); let kafka_role = KafkaRole::Broker; - let rolegroup_ref = kafka.broker_rolegroup_ref("default"); + let rolegroup_ref = kafka.rolegroup_ref(&kafka_role, "default"); let merged_config = kafka_role .merged_config(&kafka, &rolegroup_ref.role_group) .unwrap(); diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index de3e9c7a..255baa12 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -8,7 +8,7 @@ pub mod tls; use authentication::KafkaAuthentication; use serde::{Deserialize, Serialize}; -use snafu::Snafu; +use snafu::{OptionExt, Snafu}; use stackable_operator::{ commons::{cluster_operation::ClusterOperation, product_image_selection::ProductImage}, kube::{CustomResource, runtime::reflector::ObjectRef}, @@ -51,11 +51,8 @@ pub const STACKABLE_KERBEROS_KRB5_PATH: &str = "/stackable/kerberos/krb5.conf"; #[derive(Snafu, Debug)] pub enum Error { - #[snafu(display("object has no namespace associated"))] - NoNamespace, - - #[snafu(display("the role {role} is not defined"))] - CannotRetrieveKafkaRole { role: String }, + #[snafu(display("the Kafka role [{role}] is missing from spec"))] + MissingRole { role: String }, #[snafu(display( "Kafka version 4 and higher requires a Kraft controller (configured via `spec.controller`)" @@ -183,11 +180,15 @@ impl v1alpha1::KafkaCluster { format!("{}-bootstrap", rolegroup.object_name()) } - /// Metadata about a broker rolegroup - pub fn broker_rolegroup_ref(&self, group_name: impl Into) -> RoleGroupRef { + /// Metadata about a rolegroup + pub fn rolegroup_ref( + &self, + role: &KafkaRole, + group_name: impl Into, + ) -> RoleGroupRef { RoleGroupRef { cluster: ObjectRef::from_obj(self), - role: KafkaRole::Broker.to_string(), + role: role.to_string(), role_group: group_name.into(), } } @@ -198,6 +199,22 @@ impl v1alpha1::KafkaCluster { KafkaRole::Controller => self.spec.controllers.as_ref().map(|b| &b.role_config), } } + + pub fn broker_role( + &self, + ) -> Result<&Role, Error> { + self.spec.brokers.as_ref().context(MissingRoleSnafu { + role: KafkaRole::Broker.to_string(), + }) + } + + pub fn controller_role( + &self, + ) -> Result<&Role, Error> { + self.spec.controllers.as_ref().context(MissingRoleSnafu { + role: KafkaRole::Controller.to_string(), + }) + } } #[derive(Clone, Default, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index adf7626d..16df9217 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -23,9 +23,9 @@ use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; use crate::{ config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::role::{ - broker::{BrokerConfig, BrokerConfigFragment}, + broker::{BROKER_PROPERTIES_FILE, BrokerConfig, BrokerConfigFragment}, commons::{CommonConfig, Storage}, - controller::ControllerConfig, + controller::{CONTROLLER_PROPERTIES_FILE, ControllerConfig}, }, v1alpha1, }; @@ -36,7 +36,10 @@ pub enum Error { FragmentValidationFailure { source: ValidationError }, #[snafu(display("the Kafka role [{role}] is missing from spec"))] - MissingRole { role: String }, + MissingRole { + source: crate::crd::Error, + role: String, + }, #[snafu(display("missing role group {rolegroup:?} for role {role:?}"))] MissingRoleGroup { role: String, rolegroup: String }, @@ -109,13 +112,9 @@ impl KafkaRole { BrokerConfig::default_config(&kafka.name_any(), &self.to_string()); // Retrieve role resource config - let role = kafka - .spec - .brokers - .as_ref() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })?; + let role = kafka.broker_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?; let mut role_config = role.config.config.clone(); // Retrieve rolegroup specific resource config @@ -148,13 +147,9 @@ impl KafkaRole { ControllerConfig::default_config(&kafka.name_any(), &self.to_string()); // Retrieve role resource config - let role = kafka - .spec - .controllers - .as_ref() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })?; + let role = kafka.controller_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?; let mut role_config = role.config.config.clone(); // Retrieve rolegroup specific resource config @@ -193,25 +188,17 @@ impl KafkaRole { match self { Self::Broker => construct_non_heap_jvm_args::( merged_config, - &kafka - .spec - .brokers - .clone() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })?, + kafka.broker_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, rolegroup, ) .context(ConstructJvmArgumentsSnafu), Self::Controller => construct_non_heap_jvm_args( merged_config, - &kafka - .spec - .controllers - .clone() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })?, + kafka.controller_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, rolegroup, ) .context(ConstructJvmArgumentsSnafu), @@ -227,25 +214,17 @@ impl KafkaRole { match self { Self::Broker => construct_heap_jvm_args::( merged_config, - &kafka - .spec - .brokers - .clone() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })?, + kafka.broker_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, rolegroup, ) .context(ConstructJvmArgumentsSnafu), Self::Controller => construct_heap_jvm_args( merged_config, - &kafka - .spec - .controllers - .clone() - .with_context(|| MissingRoleSnafu { - role: self.to_string(), - })?, + kafka.controller_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, rolegroup, ) .context(ConstructJvmArgumentsSnafu), @@ -258,20 +237,16 @@ impl KafkaRole { ) -> Result { let pod_overrides = match self { Self::Broker => kafka - .spec - .brokers - .as_ref() - .with_context(|| MissingRoleSnafu { + .broker_role() + .with_context(|_| MissingRoleSnafu { role: self.to_string(), })? .config .pod_overrides .clone(), Self::Controller => kafka - .spec - .controllers - .as_ref() - .with_context(|| MissingRoleSnafu { + .controller_role() + .with_context(|_| MissingRoleSnafu { role: self.to_string(), })? .config @@ -289,10 +264,8 @@ impl KafkaRole { ) -> Result { let pod_overrides = match self { Self::Broker => kafka - .spec - .brokers - .as_ref() - .with_context(|| MissingRoleSnafu { + .broker_role() + .with_context(|_| MissingRoleSnafu { role: self.to_string(), })? .role_groups @@ -305,10 +278,8 @@ impl KafkaRole { .pod_overrides .clone(), Self::Controller => kafka - .spec - .controllers - .as_ref() - .with_context(|| MissingRoleSnafu { + .controller_role() + .with_context(|_| MissingRoleSnafu { role: self.to_string(), })? .role_groups @@ -333,10 +304,8 @@ impl KafkaRole { let replicas = match self { Self::Broker => { kafka - .spec - .brokers - .as_ref() - .with_context(|| MissingRoleSnafu { + .broker_role() + .with_context(|_| MissingRoleSnafu { role: self.to_string(), })? .role_groups @@ -349,10 +318,8 @@ impl KafkaRole { } Self::Controller => { kafka - .spec - .controllers - .as_ref() - .with_context(|| MissingRoleSnafu { + .controller_role() + .with_context(|_| MissingRoleSnafu { role: self.to_string(), })? .role_groups @@ -432,4 +399,11 @@ impl AnyConfig { AnyConfig::Controller(_) => None, } } + + pub fn config_file_name(&self) -> &str { + match self { + AnyConfig::Broker(_) => BROKER_PROPERTIES_FILE, + AnyConfig::Controller(_) => CONTROLLER_PROPERTIES_FILE, + } + } } diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index b1e4b495..1e6ea062 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -471,7 +471,7 @@ pub async fn reconcile_kafka( let kafka_role = KafkaRole::from_str(kafka_role_str).context(ParseRoleSnafu)?; for (rolegroup_name, rolegroup_config) in role_config.iter() { - let rolegroup_ref = kafka.broker_rolegroup_ref(rolegroup_name); + let rolegroup_ref = kafka.rolegroup_ref(&kafka_role, rolegroup_name); let merged_config = kafka_role .merged_config(kafka, &rolegroup_ref.role_group) @@ -479,7 +479,7 @@ pub async fn reconcile_kafka( let rg_service = build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref)?; - let rg_configmap = build_broker_rolegroup_config_map( + let rg_configmap = build_rolegroup_config_map( kafka, &resolved_product_image, &kafka_security, @@ -620,28 +620,30 @@ pub fn build_broker_rolegroup_bootstrap_listener( } /// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator -fn build_broker_rolegroup_config_map( +fn build_rolegroup_config_map( kafka: &v1alpha1::KafkaCluster, resolved_product_image: &ResolvedProductImage, kafka_security: &KafkaTlsSecurity, rolegroup: &RoleGroupRef, - broker_config: &HashMap>, + rolegroup_config: &HashMap>, merged_config: &AnyConfig, ) -> Result { - let mut server_cfg = broker_config - .get(&PropertyNameKind::File(BROKER_PROPERTIES_FILE.to_string())) + let kafka_config_file_name = merged_config.config_file_name(); + + let mut kafka_config = rolegroup_config + .get(&PropertyNameKind::File(kafka_config_file_name.to_string())) .cloned() .unwrap_or_default(); - server_cfg.extend(kafka_security.config_settings()); - server_cfg.extend(graceful_shutdown_config_properties()); + kafka_config.extend(kafka_security.config_settings()); + kafka_config.extend(graceful_shutdown_config_properties()); - let server_cfg = server_cfg + let server_cfg = kafka_config .into_iter() .map(|(k, v)| (k, Some(v))) .collect::>(); - let jvm_sec_props: BTreeMap> = broker_config + let jvm_sec_props: BTreeMap> = rolegroup_config .get(&PropertyNameKind::File( JVM_SECURITY_PROPERTIES_FILE.to_string(), )) @@ -670,7 +672,7 @@ fn build_broker_rolegroup_config_map( .build(), ) .add_data( - BROKER_PROPERTIES_FILE, + kafka_config_file_name, to_java_properties_string(server_cfg.iter().map(|(k, v)| (k, v))).with_context( |_| SerializeConfigSnafu { rolegroup: rolegroup.clone(), From 25b95e889d10688dffdb148f7b46a8b4013ee42a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 27 Aug 2025 12:38:20 +0200 Subject: [PATCH 22/90] use role methods --- rust/operator-binary/src/kafka_controller.rs | 21 ++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 1e6ea062..9873cc5d 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -118,7 +118,10 @@ pub enum Error { MissingSecretLifetime, #[snafu(display("cluster object defines no '{role}' role"))] - MissingKafkaRole { role: KafkaRole }, + MissingKafkaRole { + source: crate::crd::Error, + role: KafkaRole, + }, #[snafu(display("failed to apply role Service"))] ApplyRoleService { @@ -638,7 +641,7 @@ fn build_rolegroup_config_map( kafka_config.extend(kafka_security.config_settings()); kafka_config.extend(graceful_shutdown_config_properties()); - let server_cfg = kafka_config + let kafka_config = kafka_config .into_iter() .map(|(k, v)| (k, Some(v))) .collect::>(); @@ -673,7 +676,7 @@ fn build_rolegroup_config_map( ) .add_data( kafka_config_file_name, - to_java_properties_string(server_cfg.iter().map(|(k, v)| (k, v))).with_context( + to_java_properties_string(kafka_config.iter().map(|(k, v)| (k, v))).with_context( |_| SerializeConfigSnafu { rolegroup: rolegroup.clone(), }, @@ -688,7 +691,7 @@ fn build_rolegroup_config_map( })?, ); - tracing::debug!(?server_cfg, "Applied server config"); + tracing::debug!(?kafka_config, "Applied kafka config"); tracing::debug!(?jvm_sec_props, "Applied JVM config"); extend_role_group_config_map(rolegroup, merged_config, &mut cm_builder); @@ -1220,9 +1223,8 @@ fn validated_product_config( PropertyNameKind::Env, ], kafka - .spec - .brokers - .clone() + .broker_role() + .cloned() .context(MissingKafkaRoleSnafu { role: KafkaRole::Broker, })? @@ -1240,9 +1242,8 @@ fn validated_product_config( PropertyNameKind::Env, ], kafka - .spec - .controllers - .clone() + .controller_role() + .cloned() .context(MissingKafkaRoleSnafu { role: KafkaRole::Controller, })? From 50d4a430df2f09fcdb7d761de7ac8b97d30bfe50 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 27 Aug 2025 12:44:09 +0200 Subject: [PATCH 23/90] remove obsolete errors --- rust/operator-binary/src/discovery.rs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index ce311f77..ca76eafd 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -5,7 +5,7 @@ use stackable_operator::{ builder::{configmap::ConfigMapBuilder, meta::ObjectMetaBuilder}, commons::product_image_selection::ResolvedProductImage, crd::listener, - k8s_openapi::api::core::v1::{ConfigMap, Service}, + k8s_openapi::api::core::v1::ConfigMap, kube::{Resource, ResourceExt, runtime::reflector::ObjectRef}, }; @@ -26,21 +26,9 @@ pub enum Error { #[snafu(display("object has no name associated"))] NoName, - #[snafu(display("object has no namespace associated"))] - NoNamespace, - #[snafu(display("could not find service port with name {}", port_name))] NoServicePort { port_name: String }, - #[snafu(display("service port with name {} does not have a nodePort", port_name))] - NoNodePort { port_name: String }, - - #[snafu(display("could not find Endpoints for {}", svc))] - FindEndpoints { - source: stackable_operator::client::Error, - svc: ObjectRef, - }, - #[snafu(display("nodePort was out of range"))] InvalidNodePort { source: TryFromIntError }, From c712ff75cf649a39e87073b50cf7da8726805e02 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 27 Aug 2025 13:04:19 +0200 Subject: [PATCH 24/90] remove unwrap --- rust/operator-binary/src/kafka_controller.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 9873cc5d..a53fe4f5 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -320,6 +320,11 @@ pub enum Error { #[snafu(display("failed to retrieve rolegroup replicas"))] RoleGroupReplicas { source: crd::role::Error }, + + #[snafu(display("failed to builld bootstrap listener pvc"))] + BuildBootstrapListenerPvc { + source: stackable_operator::builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, + }, } type Result = std::result::Result; @@ -375,6 +380,7 @@ impl ReconcilerError for Error { Error::ParseRole { .. } => None, Error::MergePodOverrides { .. } => None, Error::RoleGroupReplicas { .. } => None, + Error::BuildBootstrapListenerPvc { .. } => None, } } } @@ -820,9 +826,7 @@ fn build_broker_rolegroup_statefulset( &unversioned_recommended_labels, ) .build_pvc(LISTENER_BOOTSTRAP_VOLUME_NAME) - // FIXME (@Techassi): This should either be an expect (if it can never fail) or should be - // handled via a proper error handling - .unwrap(), + .context(BuildBootstrapListenerPvcSnafu)?, ); if kafka_security.has_kerberos_enabled() { From f646b1e1c7fe6e1c6bd822080ed6e0f2cb1b2be2 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 27 Aug 2025 15:27:07 +0200 Subject: [PATCH 25/90] wip - kafka controller cleanup, move resources to own module --- deploy/config-spec/properties.yaml | 16 - rust/operator-binary/src/crd/role/broker.rs | 18 +- .../src/crd/role/controller.rs | 40 +- rust/operator-binary/src/crd/role/mod.rs | 17 +- rust/operator-binary/src/kafka_controller.rs | 870 ++---------------- rust/operator-binary/src/main.rs | 1 + .../operator-binary/src/resource/configmap.rs | 133 +++ rust/operator-binary/src/resource/listener.rs | 85 ++ rust/operator-binary/src/resource/mod.rs | 4 + rust/operator-binary/src/resource/service.rs | 75 ++ .../src/resource/statefulset.rs | 795 ++++++++++++++++ 11 files changed, 1241 insertions(+), 813 deletions(-) create mode 100644 rust/operator-binary/src/resource/configmap.rs create mode 100644 rust/operator-binary/src/resource/listener.rs create mode 100644 rust/operator-binary/src/resource/mod.rs create mode 100644 rust/operator-binary/src/resource/service.rs create mode 100644 rust/operator-binary/src/resource/statefulset.rs diff --git a/deploy/config-spec/properties.yaml b/deploy/config-spec/properties.yaml index 8e67978b..8ebbbeb0 100644 --- a/deploy/config-spec/properties.yaml +++ b/deploy/config-spec/properties.yaml @@ -149,19 +149,3 @@ properties: required: false asOfVersion: "0.0.0" description: "The number of seconds after which the OPA authorizer cache expires" - - - property: &logDirs - propertyNames: - - name: "log.dirs" - kind: - type: "file" - file: "broker.properties" - datatype: - type: "string" - recommendedValues: - - value: "/stackable/data/topicdata" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "A comma separated list of directories under which to store log files" diff --git a/rust/operator-binary/src/crd/role/broker.rs b/rust/operator-binary/src/crd/role/broker.rs index 4a98af2f..e9c467bc 100644 --- a/rust/operator-binary/src/crd/role/broker.rs +++ b/rust/operator-binary/src/crd/role/broker.rs @@ -15,7 +15,10 @@ use stackable_operator::{ use strum::{Display, EnumIter}; use crate::crd::{ - role::commons::{CommonConfig, Storage, StorageFragment}, + role::{ + KafkaRole, LOG_DIRS, NODE_ID, PROCESS_ROLES, + commons::{CommonConfig, Storage, StorageFragment}, + }, v1alpha1, }; @@ -133,6 +136,19 @@ impl Configuration for BrokerConfigFragment { let mut config = BTreeMap::new(); if file == BROKER_PROPERTIES_FILE { + // TODO: generate? + config.insert(NODE_ID.to_string(), Some("1".to_string())); + + config.insert( + PROCESS_ROLES.to_string(), + Some(KafkaRole::Broker.to_string()), + ); + + config.insert( + LOG_DIRS.to_string(), + Some("/stackable/data/topicdata".to_string()), + ); + // OPA if resource.spec.cluster_config.authorization.opa.is_some() { config.insert( diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index fd66ec7b..a2d8a9f3 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -15,7 +15,10 @@ use stackable_operator::{ use strum::{Display, EnumIter}; use crate::crd::{ - role::commons::{CommonConfig, Storage, StorageFragment}, + role::{ + KafkaRole, LOG_DIRS, NODE_ID, PROCESS_ROLES, + commons::{CommonConfig, Storage, StorageFragment}, + }, v1alpha1, }; @@ -118,9 +121,40 @@ impl Configuration for ControllerConfigFragment { &self, _resource: &Self::Configurable, _role_name: &str, - _file: &str, + file: &str, ) -> Result>, stackable_operator::product_config_utils::Error> { - Ok(BTreeMap::new()) + let mut config = BTreeMap::new(); + + if file == CONTROLLER_PROPERTIES_FILE { + // TODO: generate? + config.insert(NODE_ID.to_string(), Some("2".to_string())); + + config.insert( + PROCESS_ROLES.to_string(), + Some(KafkaRole::Controller.to_string()), + ); + + config.insert( + LOG_DIRS.to_string(), + Some("/stackable/data/kraft".to_string()), + ); + + // TEST: + config.insert( + "listeners".to_string(), + Some("listeners=INTERNAL://simple-kafka-controller-default-0.simple-kafka-controller-default.default.svc.cluster.local:9093".to_string()), + ); + config.insert( + "controller.quorum.bootstrap.servers".to_string(), + Some("simple-kafka-controller-default-0.simple-kafka-controller-default.default.svc.cluster.local:9093".to_string()), + ); + config.insert( + "listener.security.protocol.map".to_string(), + Some("INTERNAL:PLAINTEXT".to_string()), + ); + } + + Ok(config) } } diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 16df9217..0c292ee5 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -29,6 +29,15 @@ use crate::{ }, v1alpha1, }; +// See: https://kafka.apache.org/documentation/#brokerconfigs + +// The node ID associated with the roles this process is playing when process.roles is non-empty. +// This is required configuration when running in KRaft mode. +pub const NODE_ID: &str = "node.id"; +// The roles that this process plays: 'broker', 'controller', or 'broker,controller' if it is both. +pub const PROCESS_ROLES: &str = "process.roles"; +// A comma-separated list of the directories where the log data is stored. If not set, the value in log.dir is used. +pub const LOG_DIRS: &str = "log.dirs"; #[derive(Snafu, Debug)] pub enum Error { @@ -231,7 +240,13 @@ impl KafkaRole { } } - pub fn role_pod_overrides( + pub fn role_pod_overrides(JvmArgumentsSnafu), + } + } + + pub fn construct_heap_jvm_args( + &self, + merged_config: &AnyCon &self, kafka: &v1alpha1::KafkaCluster, ) -> Result { diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index a53fe4f5..018ac52d 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -1,105 +1,57 @@ //! Ensures that `Pod`s are configured and running for each [`v1alpha1::KafkaCluster`]. -use std::{ - collections::{BTreeMap, HashMap}, - ops::Deref, - str::FromStr, - sync::Arc, -}; +use std::{collections::HashMap, str::FromStr, sync::Arc}; use const_format::concatcp; -use product_config::{ - ProductConfigManager, - types::PropertyNameKind, - writer::{PropertiesWriterError, to_java_properties_string}, -}; -use snafu::{OptionExt, ResultExt, Snafu}; +use product_config::{ProductConfigManager, types::PropertyNameKind}; +use snafu::{ResultExt, Snafu}; use stackable_operator::{ - builder::{ - self, - configmap::ConfigMapBuilder, - meta::ObjectMetaBuilder, - pod::{ - PodBuilder, - container::ContainerBuilder, - resources::ResourceRequirementsBuilder, - security::PodSecurityContextBuilder, - volume::{ListenerOperatorVolumeSourceBuilder, ListenerReference, VolumeBuilder}, - }, - }, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, commons::{ opa::OpaApiVersion, - product_image_selection::{self, ResolvedProductImage}, + product_image_selection::{self}, rbac::build_rbac_resources, }, crd::listener, - k8s_openapi::{ - DeepMerge, - api::{ - apps::v1::{StatefulSet, StatefulSetSpec}, - core::v1::{ - ConfigMap, ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, - EnvVarSource, ExecAction, ObjectFieldSelector, PodSpec, Probe, Service, - ServiceAccount, ServiceSpec, Volume, - }, - }, - apimachinery::pkg::apis::meta::v1::LabelSelector, - }, kube::{ - Resource, ResourceExt, + Resource, api::DynamicObject, core::{DeserializeGuard, error_boundary}, runtime::{controller::Action, reflector::ObjectRef}, }, - kvp::{Label, Labels}, logging::controller::ReconcilerError, product_config_utils::{ ValidatedRoleConfigByPropertyKind, transform_all_roles_to_config, validate_all_roles_and_groups_config, }, - product_logging::{ - self, - framework::LoggingError, - spec::{ - ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, - CustomContainerLogConfig, - }, - }, role_utils::{GenericRoleConfig, RoleGroupRef}, shared::time::Duration, status::condition::{ compute_conditions, operations::ClusterOperationsConditionBuilder, statefulset::StatefulSetConditionBuilder, }, - utils::cluster_info::KubernetesClusterInfo, }; use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ crd::{ - self, APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, - KafkaClusterStatus, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, - LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, STACKABLE_CONFIG_DIR, - STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, - STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, - listener::get_kafka_listener_config, + self, APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KafkaClusterStatus, + OPERATOR_NAME, role::{ - AnyConfig, KafkaRole, - broker::{BROKER_PROPERTIES_FILE, BrokerConfig, BrokerContainer}, + AnyConfig, KafkaRole, broker::BROKER_PROPERTIES_FILE, controller::CONTROLLER_PROPERTIES_FILE, }, security::KafkaTlsSecurity, v1alpha1, }, discovery::{self, build_discovery_configmap}, - kerberos::{self, add_kerberos_pod_config}, - operations::{ - graceful_shutdown::{add_graceful_shutdown_config, graceful_shutdown_config_properties}, - pdb::add_pdbs, + operations::pdb::add_pdbs, + resource::{ + configmap::build_rolegroup_config_map, + listener::build_broker_rolegroup_bootstrap_listener, + service::build_broker_rolegroup_service, + statefulset::{build_broker_rolegroup_statefulset, build_controller_rolegroup_statefulset}, }, - product_logging::{LOG4J_CONFIG_FILE, MAX_KAFKA_LOG_FILES_SIZE, extend_role_group_config_map}, - utils::build_recommended_labels, }; pub const KAFKA_CONTROLLER_NAME: &str = "kafkacluster"; @@ -114,9 +66,6 @@ pub struct Ctx { #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { - #[snafu(display("missing secret lifetime"))] - MissingSecretLifetime, - #[snafu(display("cluster object defines no '{role}' role"))] MissingKafkaRole { source: crate::crd::Error, @@ -134,12 +83,6 @@ pub enum Error { rolegroup: RoleGroupRef, }, - #[snafu(display("failed to build ConfigMap for {}", rolegroup))] - BuildRoleGroupConfig { - source: stackable_operator::builder::configmap::Error, - rolegroup: RoleGroupRef, - }, - #[snafu(display("failed to apply ConfigMap for {}", rolegroup))] ApplyRoleGroupConfig { source: stackable_operator::cluster_resources::Error, @@ -162,17 +105,6 @@ pub enum Error { source: stackable_operator::product_config_utils::Error, }, - #[snafu(display("failed to serialize config for {}", rolegroup))] - SerializeConfig { - source: PropertiesWriterError, - rolegroup: RoleGroupRef, - }, - - #[snafu(display("object is missing metadata to build owner reference"))] - ObjectMissingMetadataForOwnerRef { - source: stackable_operator::builder::meta::Error, - }, - #[snafu(display("failed to build discovery ConfigMap"))] BuildDiscoveryConfig { source: discovery::Error }, @@ -186,22 +118,6 @@ pub enum Error { source: stackable_operator::commons::opa::Error, }, - #[snafu(display("invalid kafka listeners"))] - InvalidKafkaListeners { - source: crate::crd::listener::KafkaListenerError, - }, - - #[snafu(display("failed to add listener volume"))] - AddListenerVolume { - source: stackable_operator::builder::pod::Error, - }, - - #[snafu(display("invalid container name [{name}]"))] - InvalidContainerName { - name: String, - source: stackable_operator::builder::pod::container::Error, - }, - #[snafu(display("failed to delete orphaned resources"))] DeleteOrphans { source: stackable_operator::cluster_resources::Error, @@ -218,9 +134,6 @@ pub enum Error { #[snafu(display("failed to resolve and merge config for role and role group"))] FailedToResolveConfig { source: crate::crd::role::Error }, - #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] - VectorAggregatorConfigMapMissing, - #[snafu(display("failed to patch service account"))] ApplyServiceAccount { source: stackable_operator::cluster_resources::Error, @@ -241,61 +154,20 @@ pub enum Error { source: stackable_operator::commons::rbac::Error, }, - #[snafu(display( - "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", - rolegroup - ))] - JvmSecurityPoperties { - source: PropertiesWriterError, - rolegroup: String, - }, - #[snafu(display("failed to create PodDisruptionBudget"))] FailedToCreatePdb { source: crate::operations::pdb::Error, }, - #[snafu(display("failed to configure graceful shutdown"))] - GracefulShutdown { - source: crate::operations::graceful_shutdown::Error, - }, - #[snafu(display("failed to get required Labels"))] GetRequiredLabels { source: stackable_operator::kvp::KeyValuePairError, }, - #[snafu(display("failed to build Metadata"))] - MetadataBuild { - source: stackable_operator::builder::meta::Error, - }, - - #[snafu(display("failed to build Labels"))] - LabelBuild { - source: stackable_operator::kvp::LabelError, - }, - - #[snafu(display("failed to add Secret Volumes and VolumeMounts"))] - AddVolumesAndVolumeMounts { source: crate::crd::security::Error }, - - #[snafu(display("failed to add kerberos config"))] - AddKerberosConfig { source: kerberos::Error }, - #[snafu(display("failed to validate authentication method"))] FailedToValidateAuthenticationMethod { source: crate::crd::security::Error }, - #[snafu(display("failed to add needed volume"))] - AddVolume { source: builder::pod::Error }, - - #[snafu(display("failed to add needed volumeMount"))] - AddVolumeMount { - source: builder::pod::container::Error, - }, - - #[snafu(display("failed to configure logging"))] - ConfigureLogging { source: LoggingError }, - #[snafu(display("KafkaCluster object is invalid"))] InvalidKafkaCluster { source: error_boundary::InvalidObject, @@ -304,9 +176,6 @@ pub enum Error { #[snafu(display("KafkaCluster object is misconfigured"))] MisconfiguredKafkaCluster { source: crd::Error }, - #[snafu(display("failed to construct JVM arguments"))] - ConstructJvmArguments { source: crate::crd::role::Error }, - #[snafu(display("failed to resolve product image"))] ResolveProductImage { source: product_image_selection::Error, @@ -315,15 +184,24 @@ pub enum Error { #[snafu(display("failed to parse role: {source}"))] ParseRole { source: strum::ParseError }, - #[snafu(display("failed to merge pod overrides"))] - MergePodOverrides { source: crd::role::Error }, + #[snafu(display("failed to build statefulset"))] + BuildStatefulset { + source: crate::resource::statefulset::Error, + }, - #[snafu(display("failed to retrieve rolegroup replicas"))] - RoleGroupReplicas { source: crd::role::Error }, + #[snafu(display("failed to build configmap"))] + BuildConfigMap { + source: crate::resource::configmap::Error, + }, + + #[snafu(display("failed to build service"))] + BuildService { + source: crate::resource::service::Error, + }, - #[snafu(display("failed to builld bootstrap listener pvc"))] - BuildBootstrapListenerPvc { - source: stackable_operator::builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, + #[snafu(display("failed to build listener"))] + BuildListener { + source: crate::resource::listener::Error, }, } type Result = std::result::Result; @@ -335,52 +213,35 @@ impl ReconcilerError for Error { fn secondary_object(&self) -> Option> { match self { - Error::MissingSecretLifetime => None, Error::MissingKafkaRole { .. } => None, Error::ApplyRoleService { .. } => None, Error::ApplyRoleGroupService { .. } => None, - Error::BuildRoleGroupConfig { .. } => None, Error::ApplyRoleGroupConfig { .. } => None, Error::ApplyRoleGroupStatefulSet { .. } => None, Error::GenerateProductConfig { .. } => None, Error::InvalidProductConfig { .. } => None, - Error::SerializeConfig { .. } => None, - Error::ObjectMissingMetadataForOwnerRef { .. } => None, Error::BuildDiscoveryConfig { .. } => None, Error::ApplyDiscoveryConfig { .. } => None, Error::InvalidOpaConfig { .. } => None, - Error::InvalidKafkaListeners { .. } => None, - Error::AddListenerVolume { .. } => None, - Error::InvalidContainerName { .. } => None, Error::DeleteOrphans { .. } => None, Error::FailedToInitializeSecurityContext { .. } => None, Error::CreateClusterResources { .. } => None, Error::FailedToResolveConfig { .. } => None, - Error::VectorAggregatorConfigMapMissing => None, Error::ApplyServiceAccount { .. } => None, Error::ApplyRoleBinding { .. } => None, Error::ApplyStatus { .. } => None, Error::BuildRbacResources { .. } => None, - Error::JvmSecurityPoperties { .. } => None, Error::FailedToCreatePdb { .. } => None, - Error::GracefulShutdown { .. } => None, Error::GetRequiredLabels { .. } => None, - Error::MetadataBuild { .. } => None, - Error::LabelBuild { .. } => None, - Error::AddVolumesAndVolumeMounts { .. } => None, - Error::ConfigureLogging { .. } => None, - Error::AddVolume { .. } => None, - Error::AddVolumeMount { .. } => None, - Error::AddKerberosConfig { .. } => None, Error::FailedToValidateAuthenticationMethod { .. } => None, Error::InvalidKafkaCluster { .. } => None, Error::MisconfiguredKafkaCluster { .. } => None, - Error::ConstructJvmArguments { .. } => None, Error::ResolveProductImage { .. } => None, Error::ParseRole { .. } => None, - Error::MergePodOverrides { .. } => None, - Error::RoleGroupReplicas { .. } => None, - Error::BuildBootstrapListenerPvc { .. } => None, + Error::BuildStatefulset { .. } => None, + Error::BuildConfigMap { .. } => None, + Error::BuildService { .. } => None, + Error::BuildListener { .. } => None, } } } @@ -487,27 +348,48 @@ pub async fn reconcile_kafka( .context(FailedToResolveConfigSnafu)?; let rg_service = - build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref)?; - let rg_configmap = build_rolegroup_config_map( - kafka, - &resolved_product_image, - &kafka_security, - &rolegroup_ref, - rolegroup_config, - &merged_config, - )?; - let rg_statefulset = build_broker_rolegroup_statefulset( - kafka, - &kafka_role, - &resolved_product_image, - &rolegroup_ref, - rolegroup_config, - opa_connect.as_deref(), - &kafka_security, - &merged_config, - &rbac_sa, - &client.kubernetes_cluster_info, - )?; + build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref) + .context(BuildServiceSnafu)?; + + let rg_configmap = match kafka_role { + KafkaRole::Broker => build_rolegroup_config_map( + kafka, + &resolved_product_image, + &kafka_security, + &rolegroup_ref, + rolegroup_config, + &merged_config, + ) + .context(BuildConfigMapSnafu)?, + KafkaRole::Controller => todo!(), + }; + + let rg_statefulset = match kafka_role { + KafkaRole::Broker => build_broker_rolegroup_statefulset( + kafka, + &kafka_role, + &resolved_product_image, + &rolegroup_ref, + rolegroup_config, + opa_connect.as_deref(), + &kafka_security, + &merged_config, + &rbac_sa, + &client.kubernetes_cluster_info, + ) + .context(BuildStatefulsetSnafu)?, + KafkaRole::Controller => build_controller_rolegroup_statefulset( + kafka, + &kafka_role, + &resolved_product_image, + &rolegroup_ref, + rolegroup_config, + &kafka_security, + &merged_config, + &rbac_sa, + ) + .context(BuildStatefulsetSnafu)?, + }; // TODO: broker / controller? if let AnyConfig::Broker(broker_config) = merged_config { @@ -517,7 +399,8 @@ pub async fn reconcile_kafka( &kafka_security, &rolegroup_ref, &broker_config, - )?; + ) + .context(BuildListenerSnafu)?; bootstrap_listeners.push( cluster_resources .add(client, rg_bootstrap_listener) @@ -594,552 +477,6 @@ pub async fn reconcile_kafka( Ok(Action::await_change()) } -/// Kafka clients will use the load-balanced bootstrap listener to get a list of broker addresses and will use those to -/// transmit data to the correct broker. -// TODO (@NickLarsenNZ): Move shared functionality to stackable-operator -pub fn build_broker_rolegroup_bootstrap_listener( - kafka: &v1alpha1::KafkaCluster, - resolved_product_image: &ResolvedProductImage, - kafka_security: &KafkaTlsSecurity, - rolegroup: &RoleGroupRef, - merged_config: &BrokerConfig, -) -> Result { - Ok(listener::v1alpha1::Listener { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(kafka.bootstrap_service_name(rolegroup)) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(MetadataBuildSnafu)? - .build(), - spec: listener::v1alpha1::ListenerSpec { - class_name: Some(merged_config.bootstrap_listener_class.clone()), - ports: Some(listener_ports(kafka_security)), - ..listener::v1alpha1::ListenerSpec::default() - }, - status: None, - }) -} - -/// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator -fn build_rolegroup_config_map( - kafka: &v1alpha1::KafkaCluster, - resolved_product_image: &ResolvedProductImage, - kafka_security: &KafkaTlsSecurity, - rolegroup: &RoleGroupRef, - rolegroup_config: &HashMap>, - merged_config: &AnyConfig, -) -> Result { - let kafka_config_file_name = merged_config.config_file_name(); - - let mut kafka_config = rolegroup_config - .get(&PropertyNameKind::File(kafka_config_file_name.to_string())) - .cloned() - .unwrap_or_default(); - - kafka_config.extend(kafka_security.config_settings()); - kafka_config.extend(graceful_shutdown_config_properties()); - - let kafka_config = kafka_config - .into_iter() - .map(|(k, v)| (k, Some(v))) - .collect::>(); - - let jvm_sec_props: BTreeMap> = rolegroup_config - .get(&PropertyNameKind::File( - JVM_SECURITY_PROPERTIES_FILE.to_string(), - )) - .cloned() - .unwrap_or_default() - .into_iter() - .map(|(k, v)| (k, Some(v))) - .collect(); - - let mut cm_builder = ConfigMapBuilder::new(); - cm_builder - .metadata( - ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(rolegroup.object_name()) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(MetadataBuildSnafu)? - .build(), - ) - .add_data( - kafka_config_file_name, - to_java_properties_string(kafka_config.iter().map(|(k, v)| (k, v))).with_context( - |_| SerializeConfigSnafu { - rolegroup: rolegroup.clone(), - }, - )?, - ) - .add_data( - JVM_SECURITY_PROPERTIES_FILE, - to_java_properties_string(jvm_sec_props.iter()).with_context(|_| { - JvmSecurityPopertiesSnafu { - rolegroup: rolegroup.role_group.clone(), - } - })?, - ); - - tracing::debug!(?kafka_config, "Applied kafka config"); - tracing::debug!(?jvm_sec_props, "Applied JVM config"); - - extend_role_group_config_map(rolegroup, merged_config, &mut cm_builder); - - cm_builder - .build() - .with_context(|_| BuildRoleGroupConfigSnafu { - rolegroup: rolegroup.clone(), - }) -} - -/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup -/// -/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. -fn build_broker_rolegroup_service( - kafka: &v1alpha1::KafkaCluster, - resolved_product_image: &ResolvedProductImage, - rolegroup: &RoleGroupRef, -) -> Result { - Ok(Service { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(rolegroup.object_name()) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(MetadataBuildSnafu)? - .with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) - .build(), - spec: Some(ServiceSpec { - cluster_ip: Some("None".to_string()), - selector: Some( - Labels::role_group_selector( - kafka, - APP_NAME, - &rolegroup.role, - &rolegroup.role_group, - ) - .context(LabelBuildSnafu)? - .into(), - ), - publish_not_ready_addresses: Some(true), - ..ServiceSpec::default() - }), - status: None, - }) -} - -/// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. -/// -/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_broker_rolegroup_service`]). -#[allow(clippy::too_many_arguments)] -fn build_broker_rolegroup_statefulset( - kafka: &v1alpha1::KafkaCluster, - kafka_role: &KafkaRole, - resolved_product_image: &ResolvedProductImage, - rolegroup_ref: &RoleGroupRef, - broker_config: &HashMap>, - opa_connect_string: Option<&str>, - kafka_security: &KafkaTlsSecurity, - merged_config: &AnyConfig, - service_account: &ServiceAccount, - cluster_info: &KubernetesClusterInfo, -) -> Result { - let recommended_object_labels = build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup_ref.role, - &rolegroup_ref.role_group, - ); - let recommended_labels = - Labels::recommended(recommended_object_labels.clone()).context(LabelBuildSnafu)?; - // Used for PVC templates that cannot be modified once they are deployed - let unversioned_recommended_labels = Labels::recommended(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - // A version value is required, and we do want to use the "recommended" format for the other desired labels - "none", - &rolegroup_ref.role, - &rolegroup_ref.role_group, - )) - .context(LabelBuildSnafu)?; - - let kcat_prober_container_name = BrokerContainer::KcatProber.to_string(); - let mut cb_kcat_prober = - ContainerBuilder::new(&kcat_prober_container_name).context(InvalidContainerNameSnafu { - name: kcat_prober_container_name.clone(), - })?; - - let kafka_container_name = BrokerContainer::Kafka.to_string(); - let mut cb_kafka = - ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { - name: kafka_container_name.clone(), - })?; - - let mut pod_builder = PodBuilder::new(); - - // Add TLS related volumes and volume mounts - let requested_secret_lifetime = merged_config - .deref() - .requested_secret_lifetime - .context(MissingSecretLifetimeSnafu)?; - kafka_security - .add_volume_and_volume_mounts( - &mut pod_builder, - &mut cb_kcat_prober, - &mut cb_kafka, - &requested_secret_lifetime, - ) - .context(AddVolumesAndVolumeMountsSnafu)?; - - let mut pvcs = merged_config.resources().storage.build_pvcs(); - - // bootstrap listener should be persistent, - // main broker listener is an ephemeral PVC instead - pvcs.push( - ListenerOperatorVolumeSourceBuilder::new( - &ListenerReference::ListenerName(kafka.bootstrap_service_name(rolegroup_ref)), - &unversioned_recommended_labels, - ) - .build_pvc(LISTENER_BOOTSTRAP_VOLUME_NAME) - .context(BuildBootstrapListenerPvcSnafu)?, - ); - - if kafka_security.has_kerberos_enabled() { - add_kerberos_pod_config( - kafka_security, - kafka_role, - &mut cb_kcat_prober, - &mut cb_kafka, - &mut pod_builder, - ) - .context(AddKerberosConfigSnafu)?; - } - - let mut env = broker_config - .get(&PropertyNameKind::Env) - .into_iter() - .flatten() - .map(|(k, v)| EnvVar { - name: k.clone(), - value: Some(v.clone()), - ..EnvVar::default() - }) - .collect::>(); - - if let Some(zookeeper_config_map_name) = &kafka.spec.cluster_config.zookeeper_config_map_name { - env.push(EnvVar { - name: "ZOOKEEPER".to_string(), - value_from: Some(EnvVarSource { - config_map_key_ref: Some(ConfigMapKeySelector { - name: zookeeper_config_map_name.to_string(), - key: "ZOOKEEPER".to_string(), - ..ConfigMapKeySelector::default() - }), - ..EnvVarSource::default() - }), - ..EnvVar::default() - }) - }; - - env.push(EnvVar { - name: "POD_NAME".to_string(), - value_from: Some(EnvVarSource { - field_ref: Some(ObjectFieldSelector { - api_version: Some("v1".to_string()), - field_path: "metadata.name".to_string(), - }), - ..EnvVarSource::default() - }), - ..EnvVar::default() - }); - - let kafka_listeners = get_kafka_listener_config( - kafka, - kafka_security, - &rolegroup_ref.object_name(), - cluster_info, - ) - .context(InvalidKafkaListenersSnafu)?; - - cb_kafka - .image_from_product_image(resolved_product_image) - .command(vec![ - "/bin/bash".to_string(), - "-x".to_string(), - "-euo".to_string(), - "pipefail".to_string(), - "-c".to_string(), - ]) - .args(vec![ - kafka_security - .kafka_container_commands( - &kafka_listeners, - opa_connect_string, - kafka_security.has_kerberos_enabled(), - ) - .join("\n"), - ]) - .add_env_var( - "EXTRA_ARGS", - kafka_role - .construct_non_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) - .context(ConstructJvmArgumentsSnafu)?, - ) - .add_env_var( - KAFKA_HEAP_OPTS, - kafka_role - .construct_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) - .context(ConstructJvmArgumentsSnafu)?, - ) - .add_env_var( - "KAFKA_LOG4J_OPTS", - format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), - ) - // Needed for the `containerdebug` process to log it's tracing information to. - .add_env_var( - "CONTAINERDEBUG_LOG_DIRECTORY", - format!("{STACKABLE_LOG_DIR}/containerdebug"), - ) - .add_env_vars(env) - .add_container_ports(container_ports(kafka_security)) - .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount("config", STACKABLE_CONFIG_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount( - LISTENER_BOOTSTRAP_VOLUME_NAME, - STACKABLE_LISTENER_BOOTSTRAP_DIR, - ) - .context(AddVolumeMountSnafu)? - .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount("log-config", STACKABLE_LOG_CONFIG_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount("log", STACKABLE_LOG_DIR) - .context(AddVolumeMountSnafu)? - .resources(merged_config.resources().clone().into()); - - // Use kcat sidecar for probing container status rather than the official Kafka tools, since they incur a lot of - // unacceptable perf overhead - cb_kcat_prober - .image_from_product_image(resolved_product_image) - .command(vec!["sleep".to_string(), "infinity".to_string()]) - .add_env_vars(vec![EnvVar { - name: "POD_NAME".to_string(), - value_from: Some(EnvVarSource { - field_ref: Some(ObjectFieldSelector { - api_version: Some("v1".to_string()), - field_path: "metadata.name".to_string(), - }), - ..EnvVarSource::default() - }), - ..EnvVar::default() - }]) - .resources( - ResourceRequirementsBuilder::new() - .with_cpu_request("100m") - .with_cpu_limit("200m") - .with_memory_request("128Mi") - .with_memory_limit("128Mi") - .build(), - ) - .add_volume_mount( - LISTENER_BOOTSTRAP_VOLUME_NAME, - STACKABLE_LISTENER_BOOTSTRAP_DIR, - ) - .context(AddVolumeMountSnafu)? - .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) - .context(AddVolumeMountSnafu)? - // Only allow the global load balancing service to send traffic to pods that are members of the quorum - // This also acts as a hint to the StatefulSet controller to wait for each pod to enter quorum before taking down the next - .readiness_probe(Probe { - exec: Some(ExecAction { - // If the broker is able to get its fellow cluster members then it has at least completed basic registration at some point - command: Some(kafka_security.kcat_prober_container_commands()), - }), - timeout_seconds: Some(5), - period_seconds: Some(2), - ..Probe::default() - }); - - if let ContainerLogConfig { - choice: - Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { - custom: ConfigMapLogConfig { config_map }, - })), - } = &*merged_config.kafka_logging() - { - pod_builder - .add_volume( - VolumeBuilder::new("log-config") - .with_config_map(config_map) - .build(), - ) - .context(AddVolumeSnafu)?; - } else { - pod_builder - .add_volume( - VolumeBuilder::new("log-config") - .with_config_map(rolegroup_ref.object_name()) - .build(), - ) - .context(AddVolumeSnafu)?; - } - - let metadata = ObjectMetaBuilder::new() - .with_recommended_labels(recommended_object_labels) - .context(MetadataBuildSnafu)? - .build(); - - if let Some(listener_class) = merged_config.listener_class() { - pod_builder - .add_listener_volume_by_listener_class( - LISTENER_BROKER_VOLUME_NAME, - listener_class, - &recommended_labels, - ) - .context(AddListenerVolumeSnafu)?; - } - pod_builder - .metadata(metadata) - .image_pull_secrets_from_product_image(resolved_product_image) - .add_container(cb_kafka.build()) - .add_container(cb_kcat_prober.build()) - .affinity(&merged_config.affinity) - .add_volume(Volume { - name: "config".to_string(), - config_map: Some(ConfigMapVolumeSource { - name: rolegroup_ref.object_name(), - ..ConfigMapVolumeSource::default() - }), - ..Volume::default() - }) - .context(AddVolumeSnafu)? - // bootstrap volume is a persistent volume template instead, to keep addresses persistent - .add_empty_dir_volume( - "log", - Some(product_logging::framework::calculate_log_volume_size_limit( - &[MAX_KAFKA_LOG_FILES_SIZE], - )), - ) - .context(AddVolumeSnafu)? - .service_account_name(service_account.name_any()) - .security_context(PodSecurityContextBuilder::new().fs_group(1000).build()); - - // Add vector container after kafka container to keep the defaulting into kafka container - if merged_config.vector_logging_enabled() { - match &kafka.spec.cluster_config.vector_aggregator_config_map_name { - Some(vector_aggregator_config_map_name) => { - pod_builder.add_container( - product_logging::framework::vector_container( - resolved_product_image, - "config", - "log", - Some(&*merged_config.vector_logging()), - ResourceRequirementsBuilder::new() - .with_cpu_request("250m") - .with_cpu_limit("500m") - .with_memory_request("128Mi") - .with_memory_limit("128Mi") - .build(), - vector_aggregator_config_map_name, - ) - .context(ConfigureLoggingSnafu)?, - ); - } - None => { - VectorAggregatorConfigMapMissingSnafu.fail()?; - } - } - } - - add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; - - let mut pod_template = pod_builder.build_template(); - - let pod_template_spec = pod_template.spec.get_or_insert_with(PodSpec::default); - // Don't run kcat pod as PID 1, to ensure that default signal handlers apply - pod_template_spec.share_process_namespace = Some(true); - - pod_template.merge_from( - kafka_role - .role_pod_overrides(kafka) - .context(MergePodOverridesSnafu)?, - ); - pod_template.merge_from( - kafka_role - .role_group_pod_overrides(kafka, &rolegroup_ref.role_group) - .context(MergePodOverridesSnafu)?, - ); - - Ok(StatefulSet { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(rolegroup_ref.object_name()) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup_ref.role, - &rolegroup_ref.role_group, - )) - .context(MetadataBuildSnafu)? - .build(), - spec: Some(StatefulSetSpec { - pod_management_policy: Some("Parallel".to_string()), - replicas: kafka_role - .replicas(kafka, &rolegroup_ref.role_group) - .context(RoleGroupReplicasSnafu)? - .map(i32::from), - selector: LabelSelector { - match_labels: Some( - Labels::role_group_selector( - kafka, - APP_NAME, - &rolegroup_ref.role, - &rolegroup_ref.role_group, - ) - .context(LabelBuildSnafu)? - .into(), - ), - ..LabelSelector::default() - }, - service_name: Some(rolegroup_ref.object_name()), - template: pod_template, - volume_claim_templates: Some(pvcs), - ..StatefulSetSpec::default() - }), - status: None, - }) -} - pub fn error_policy( _obj: Arc>, error: &Error, @@ -1151,57 +488,6 @@ pub fn error_policy( } } -/// We only expose client HTTP / HTTPS and Metrics ports. -fn listener_ports(kafka_security: &KafkaTlsSecurity) -> Vec { - let mut ports = vec![ - listener::v1alpha1::ListenerPort { - name: METRICS_PORT_NAME.to_string(), - port: METRICS_PORT.into(), - protocol: Some("TCP".to_string()), - }, - listener::v1alpha1::ListenerPort { - name: kafka_security.client_port_name().to_string(), - port: kafka_security.client_port().into(), - protocol: Some("TCP".to_string()), - }, - ]; - if kafka_security.has_kerberos_enabled() { - ports.push(listener::v1alpha1::ListenerPort { - name: kafka_security.bootstrap_port_name().to_string(), - port: kafka_security.bootstrap_port().into(), - protocol: Some("TCP".to_string()), - }); - } - ports -} - -/// We only expose client HTTP / HTTPS and Metrics ports. -fn container_ports(kafka_security: &KafkaTlsSecurity) -> Vec { - let mut ports = vec![ - ContainerPort { - name: Some(METRICS_PORT_NAME.to_string()), - container_port: METRICS_PORT.into(), - protocol: Some("TCP".to_string()), - ..ContainerPort::default() - }, - ContainerPort { - name: Some(kafka_security.client_port_name().to_string()), - container_port: kafka_security.client_port().into(), - protocol: Some("TCP".to_string()), - ..ContainerPort::default() - }, - ]; - if kafka_security.has_kerberos_enabled() { - ports.push(ContainerPort { - name: Some(kafka_security.bootstrap_port_name().to_string()), - container_port: kafka_security.bootstrap_port().into(), - protocol: Some("TCP".to_string()), - ..ContainerPort::default() - }); - } - ports -} - /// Defines all required roles and their required configuration. /// /// The roles and their configs are then validated and complemented by the product config. diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index ef9c8ce7..2404e690 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -45,6 +45,7 @@ mod kafka_controller; mod kerberos; mod operations; mod product_logging; +mod resource; mod utils; mod built_info { diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs new file mode 100644 index 00000000..533955ef --- /dev/null +++ b/rust/operator-binary/src/resource/configmap.rs @@ -0,0 +1,133 @@ +use std::collections::{BTreeMap, HashMap}; + +use product_config::{types::PropertyNameKind, writer::to_java_properties_string}; +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::{configmap::ConfigMapBuilder, meta::ObjectMetaBuilder}, + commons::product_image_selection::ResolvedProductImage, + k8s_openapi::api::core::v1::ConfigMap, + role_utils::RoleGroupRef, +}; + +use crate::{ + crd::{JVM_SECURITY_PROPERTIES_FILE, role::AnyConfig, security::KafkaTlsSecurity, v1alpha1}, + kafka_controller::KAFKA_CONTROLLER_NAME, + operations::graceful_shutdown::graceful_shutdown_config_properties, + product_logging::extend_role_group_config_map, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to build ConfigMap for {}", rolegroup))] + BuildRoleGroupConfig { + source: stackable_operator::builder::configmap::Error, + rolegroup: RoleGroupRef, + }, + + #[snafu(display( + "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", + rolegroup + ))] + JvmSecurityPoperties { + source: product_config::writer::PropertiesWriterError, + rolegroup: String, + }, + + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to serialize config for {rolegroup}"))] + SerializeConfig { + source: product_config::writer::PropertiesWriterError, + rolegroup: RoleGroupRef, + }, +} + +/// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator +pub fn build_rolegroup_config_map( + kafka: &v1alpha1::KafkaCluster, + resolved_product_image: &ResolvedProductImage, + kafka_security: &KafkaTlsSecurity, + rolegroup: &RoleGroupRef, + rolegroup_config: &HashMap>, + merged_config: &AnyConfig, +) -> Result { + let kafka_config_file_name = merged_config.config_file_name(); + + let mut kafka_config = rolegroup_config + .get(&PropertyNameKind::File(kafka_config_file_name.to_string())) + .cloned() + .unwrap_or_default(); + + kafka_config.extend(kafka_security.config_settings()); + kafka_config.extend(graceful_shutdown_config_properties()); + + let kafka_config = kafka_config + .into_iter() + .map(|(k, v)| (k, Some(v))) + .collect::>(); + + let jvm_sec_props: BTreeMap> = rolegroup_config + .get(&PropertyNameKind::File( + JVM_SECURITY_PROPERTIES_FILE.to_string(), + )) + .cloned() + .unwrap_or_default() + .into_iter() + .map(|(k, v)| (k, Some(v))) + .collect(); + + let mut cm_builder = ConfigMapBuilder::new(); + cm_builder + .metadata( + ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + ) + .add_data( + kafka_config_file_name, + to_java_properties_string(kafka_config.iter().map(|(k, v)| (k, v))).with_context( + |_| SerializeConfigSnafu { + rolegroup: rolegroup.clone(), + }, + )?, + ) + .add_data( + JVM_SECURITY_PROPERTIES_FILE, + to_java_properties_string(jvm_sec_props.iter()).with_context(|_| { + JvmSecurityPopertiesSnafu { + rolegroup: rolegroup.role_group.clone(), + } + })?, + ); + + tracing::debug!(?kafka_config, "Applied kafka config"); + tracing::debug!(?jvm_sec_props, "Applied JVM config"); + + extend_role_group_config_map(rolegroup, merged_config, &mut cm_builder); + + cm_builder + .build() + .with_context(|_| BuildRoleGroupConfigSnafu { + rolegroup: rolegroup.clone(), + }) +} diff --git a/rust/operator-binary/src/resource/listener.rs b/rust/operator-binary/src/resource/listener.rs new file mode 100644 index 00000000..23cc254f --- /dev/null +++ b/rust/operator-binary/src/resource/listener.rs @@ -0,0 +1,85 @@ +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::meta::ObjectMetaBuilder, commons::product_image_selection::ResolvedProductImage, + crd::listener, role_utils::RoleGroupRef, +}; + +use crate::{ + crd::{ + METRICS_PORT, METRICS_PORT_NAME, role::broker::BrokerConfig, security::KafkaTlsSecurity, + v1alpha1, + }, + kafka_controller::KAFKA_CONTROLLER_NAME, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, +} + +/// Kafka clients will use the load-balanced bootstrap listener to get a list of broker addresses and will use those to +/// transmit data to the correct broker. +// TODO (@NickLarsenNZ): Move shared functionality to stackable-operator +pub fn build_broker_rolegroup_bootstrap_listener( + kafka: &v1alpha1::KafkaCluster, + resolved_product_image: &ResolvedProductImage, + kafka_security: &KafkaTlsSecurity, + rolegroup: &RoleGroupRef, + merged_config: &BrokerConfig, +) -> Result { + Ok(listener::v1alpha1::Listener { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(kafka.bootstrap_service_name(rolegroup)) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + spec: listener::v1alpha1::ListenerSpec { + class_name: Some(merged_config.bootstrap_listener_class.clone()), + ports: Some(listener_ports(kafka_security)), + ..listener::v1alpha1::ListenerSpec::default() + }, + status: None, + }) +} + +/// We only expose client HTTP / HTTPS and Metrics ports. +fn listener_ports(kafka_security: &KafkaTlsSecurity) -> Vec { + let mut ports = vec![ + listener::v1alpha1::ListenerPort { + name: METRICS_PORT_NAME.to_string(), + port: METRICS_PORT.into(), + protocol: Some("TCP".to_string()), + }, + listener::v1alpha1::ListenerPort { + name: kafka_security.client_port_name().to_string(), + port: kafka_security.client_port().into(), + protocol: Some("TCP".to_string()), + }, + ]; + if kafka_security.has_kerberos_enabled() { + ports.push(listener::v1alpha1::ListenerPort { + name: kafka_security.bootstrap_port_name().to_string(), + port: kafka_security.bootstrap_port().into(), + protocol: Some("TCP".to_string()), + }); + } + ports +} diff --git a/rust/operator-binary/src/resource/mod.rs b/rust/operator-binary/src/resource/mod.rs new file mode 100644 index 00000000..a79483f8 --- /dev/null +++ b/rust/operator-binary/src/resource/mod.rs @@ -0,0 +1,4 @@ +pub mod configmap; +pub mod listener; +pub mod service; +pub mod statefulset; diff --git a/rust/operator-binary/src/resource/service.rs b/rust/operator-binary/src/resource/service.rs new file mode 100644 index 00000000..38f7b34b --- /dev/null +++ b/rust/operator-binary/src/resource/service.rs @@ -0,0 +1,75 @@ +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::meta::ObjectMetaBuilder, + commons::product_image_selection::ResolvedProductImage, + k8s_openapi::api::core::v1::{Service, ServiceSpec}, + kvp::{Label, Labels}, + role_utils::RoleGroupRef, +}; + +use crate::{ + crd::{APP_NAME, v1alpha1}, + kafka_controller::KAFKA_CONTROLLER_NAME, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build Labels"))] + LabelBuild { + source: stackable_operator::kvp::LabelError, + }, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, +} + +/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup +/// +/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. +pub fn build_broker_rolegroup_service( + kafka: &v1alpha1::KafkaCluster, + resolved_product_image: &ResolvedProductImage, + rolegroup: &RoleGroupRef, +) -> Result { + Ok(Service { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(MetadataBuildSnafu)? + .with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) + .build(), + spec: Some(ServiceSpec { + cluster_ip: Some("None".to_string()), + selector: Some( + Labels::role_group_selector( + kafka, + APP_NAME, + &rolegroup.role, + &rolegroup.role_group, + ) + .context(LabelBuildSnafu)? + .into(), + ), + publish_not_ready_addresses: Some(true), + ..ServiceSpec::default() + }), + status: None, + }) +} diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs new file mode 100644 index 00000000..b051cfab --- /dev/null +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -0,0 +1,795 @@ +use std::{ + collections::{BTreeMap, HashMap}, + ops::Deref, +}; + +use product_config::types::PropertyNameKind; +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + builder::{ + meta::ObjectMetaBuilder, + pod::{ + PodBuilder, + container::ContainerBuilder, + resources::ResourceRequirementsBuilder, + security::PodSecurityContextBuilder, + volume::{ListenerOperatorVolumeSourceBuilder, ListenerReference, VolumeBuilder}, + }, + }, + commons::product_image_selection::ResolvedProductImage, + k8s_openapi::{ + DeepMerge, + api::{ + apps::v1::{StatefulSet, StatefulSetSpec}, + core::v1::{ + ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, EnvVarSource, + ExecAction, ObjectFieldSelector, PodSpec, Probe, ServiceAccount, Volume, + }, + }, + apimachinery::pkg::apis::meta::v1::LabelSelector, + }, + kube::ResourceExt, + kvp::Labels, + product_logging::{ + self, + spec::{ + ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, + CustomContainerLogConfig, + }, + }, + role_utils::RoleGroupRef, + utils::cluster_info::KubernetesClusterInfo, +}; + +use crate::{ + crd::{ + self, APP_NAME, KAFKA_HEAP_OPTS, LISTENER_BOOTSTRAP_VOLUME_NAME, + LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, + STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, + STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, + listener::get_kafka_listener_config, + role::{AnyConfig, KafkaRole, broker::BrokerContainer}, + security::KafkaTlsSecurity, + v1alpha1, + }, + kafka_controller::KAFKA_CONTROLLER_NAME, + kerberos::add_kerberos_pod_config, + operations::graceful_shutdown::add_graceful_shutdown_config, + product_logging::{LOG4J_CONFIG_FILE, MAX_KAFKA_LOG_FILES_SIZE}, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to add kerberos config"))] + AddKerberosConfig { source: crate::kerberos::Error }, + + #[snafu(display("failed to add listener volume"))] + AddListenerVolume { + source: stackable_operator::builder::pod::Error, + }, + + #[snafu(display("failed to add Secret Volumes and VolumeMounts"))] + AddVolumesAndVolumeMounts { source: crate::crd::security::Error }, + + #[snafu(display("failed to add needed volumeMount"))] + AddVolumeMount { + source: stackable_operator::builder::pod::container::Error, + }, + + #[snafu(display("failed to add needed volume"))] + AddVolume { + source: stackable_operator::builder::pod::Error, + }, + + #[snafu(display("failed to builld bootstrap listener pvc"))] + BuildBootstrapListenerPvc { + source: stackable_operator::builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, + }, + + #[snafu(display("failed to configure logging"))] + ConfigureLogging { + source: stackable_operator::product_logging::framework::LoggingError, + }, + + #[snafu(display("failed to construct JVM arguments"))] + ConstructJvmArguments { source: crate::crd::role::Error }, + + #[snafu(display("failed to configure graceful shutdown"))] + GracefulShutdown { + source: crate::operations::graceful_shutdown::Error, + }, + + #[snafu(display("invalid Container name [{name}]"))] + InvalidContainerName { + name: String, + source: stackable_operator::builder::pod::container::Error, + }, + + #[snafu(display("invalid kafka listeners"))] + InvalidKafkaListeners { + source: crate::crd::listener::KafkaListenerError, + }, + + #[snafu(display("failed to build Labels"))] + LabelBuild { + source: stackable_operator::kvp::LabelError, + }, + + #[snafu(display("failed to merge pod overrides"))] + MergePodOverrides { source: crd::role::Error }, + + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("missing secret lifetime"))] + MissingSecretLifetime, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to retrieve rolegroup replicas"))] + RoleGroupReplicas { source: crd::role::Error }, + + #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] + VectorAggregatorConfigMapMissing, +} + +/// The broker rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. +/// +/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_broker_rolegroup_service`]). +#[allow(clippy::too_many_arguments)] +pub fn build_broker_rolegroup_statefulset( + kafka: &v1alpha1::KafkaCluster, + kafka_role: &KafkaRole, + resolved_product_image: &ResolvedProductImage, + rolegroup_ref: &RoleGroupRef, + broker_config: &HashMap>, + opa_connect_string: Option<&str>, + kafka_security: &KafkaTlsSecurity, + merged_config: &AnyConfig, + service_account: &ServiceAccount, + cluster_info: &KubernetesClusterInfo, +) -> Result { + let recommended_object_labels = build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ); + let recommended_labels = + Labels::recommended(recommended_object_labels.clone()).context(LabelBuildSnafu)?; + // Used for PVC templates that cannot be modified once they are deployed + let unversioned_recommended_labels = Labels::recommended(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + // A version value is required, and we do want to use the "recommended" format for the other desired labels + "none", + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(LabelBuildSnafu)?; + + let kcat_prober_container_name = BrokerContainer::KcatProber.to_string(); + let mut cb_kcat_prober = + ContainerBuilder::new(&kcat_prober_container_name).context(InvalidContainerNameSnafu { + name: kcat_prober_container_name.clone(), + })?; + + let kafka_container_name = BrokerContainer::Kafka.to_string(); + let mut cb_kafka = + ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { + name: kafka_container_name.clone(), + })?; + + let mut pod_builder = PodBuilder::new(); + + // Add TLS related volumes and volume mounts + let requested_secret_lifetime = merged_config + .deref() + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; + kafka_security + .add_volume_and_volume_mounts( + &mut pod_builder, + &mut cb_kcat_prober, + &mut cb_kafka, + &requested_secret_lifetime, + ) + .context(AddVolumesAndVolumeMountsSnafu)?; + + let mut pvcs = merged_config.resources().storage.build_pvcs(); + + // bootstrap listener should be persistent, + // main broker listener is an ephemeral PVC instead + pvcs.push( + ListenerOperatorVolumeSourceBuilder::new( + &ListenerReference::ListenerName(kafka.bootstrap_service_name(rolegroup_ref)), + &unversioned_recommended_labels, + ) + .build_pvc(LISTENER_BOOTSTRAP_VOLUME_NAME) + .context(BuildBootstrapListenerPvcSnafu)?, + ); + + if kafka_security.has_kerberos_enabled() { + add_kerberos_pod_config( + kafka_security, + kafka_role, + &mut cb_kcat_prober, + &mut cb_kafka, + &mut pod_builder, + ) + .context(AddKerberosConfigSnafu)?; + } + + let mut env = broker_config + .get(&PropertyNameKind::Env) + .into_iter() + .flatten() + .map(|(k, v)| EnvVar { + name: k.clone(), + value: Some(v.clone()), + ..EnvVar::default() + }) + .collect::>(); + + if let Some(zookeeper_config_map_name) = &kafka.spec.cluster_config.zookeeper_config_map_name { + env.push(EnvVar { + name: "ZOOKEEPER".to_string(), + value_from: Some(EnvVarSource { + config_map_key_ref: Some(ConfigMapKeySelector { + name: zookeeper_config_map_name.to_string(), + key: "ZOOKEEPER".to_string(), + ..ConfigMapKeySelector::default() + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }) + }; + + env.push(EnvVar { + name: "POD_NAME".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.name".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }); + + let kafka_listeners = get_kafka_listener_config( + kafka, + kafka_security, + &rolegroup_ref.object_name(), + cluster_info, + ) + .context(InvalidKafkaListenersSnafu)?; + + cb_kafka + .image_from_product_image(resolved_product_image) + .command(vec![ + "/bin/bash".to_string(), + "-x".to_string(), + "-euo".to_string(), + "pipefail".to_string(), + "-c".to_string(), + ]) + .args(vec![ + kafka_security + .kafka_container_commands( + &kafka_listeners, + opa_connect_string, + kafka_security.has_kerberos_enabled(), + ) + .join("\n"), + ]) + .add_env_var( + "EXTRA_ARGS", + kafka_role + .construct_non_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + KAFKA_HEAP_OPTS, + kafka_role + .construct_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + "KAFKA_LOG4J_OPTS", + format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), + ) + // Needed for the `containerdebug` process to log it's tracing information to. + .add_env_var( + "CONTAINERDEBUG_LOG_DIRECTORY", + format!("{STACKABLE_LOG_DIR}/containerdebug"), + ) + .add_env_vars(env) + .add_container_ports(container_ports(kafka_security)) + .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("config", STACKABLE_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount( + LISTENER_BOOTSTRAP_VOLUME_NAME, + STACKABLE_LISTENER_BOOTSTRAP_DIR, + ) + .context(AddVolumeMountSnafu)? + .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log-config", STACKABLE_LOG_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log", STACKABLE_LOG_DIR) + .context(AddVolumeMountSnafu)? + .resources(merged_config.resources().clone().into()); + + // Use kcat sidecar for probing container status rather than the official Kafka tools, since they incur a lot of + // unacceptable perf overhead + cb_kcat_prober + .image_from_product_image(resolved_product_image) + .command(vec!["sleep".to_string(), "infinity".to_string()]) + .add_env_vars(vec![EnvVar { + name: "POD_NAME".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.name".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }]) + .resources( + ResourceRequirementsBuilder::new() + .with_cpu_request("100m") + .with_cpu_limit("200m") + .with_memory_request("128Mi") + .with_memory_limit("128Mi") + .build(), + ) + .add_volume_mount( + LISTENER_BOOTSTRAP_VOLUME_NAME, + STACKABLE_LISTENER_BOOTSTRAP_DIR, + ) + .context(AddVolumeMountSnafu)? + .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) + .context(AddVolumeMountSnafu)? + // Only allow the global load balancing service to send traffic to pods that are members of the quorum + // This also acts as a hint to the StatefulSet controller to wait for each pod to enter quorum before taking down the next + .readiness_probe(Probe { + exec: Some(ExecAction { + // If the broker is able to get its fellow cluster members then it has at least completed basic registration at some point + command: Some(kafka_security.kcat_prober_container_commands()), + }), + timeout_seconds: Some(5), + period_seconds: Some(2), + ..Probe::default() + }); + + if let ContainerLogConfig { + choice: + Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { + custom: ConfigMapLogConfig { config_map }, + })), + } = &*merged_config.kafka_logging() + { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(config_map) + .build(), + ) + .context(AddVolumeSnafu)?; + } else { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(rolegroup_ref.object_name()) + .build(), + ) + .context(AddVolumeSnafu)?; + } + + let metadata = ObjectMetaBuilder::new() + .with_recommended_labels(recommended_object_labels) + .context(MetadataBuildSnafu)? + .build(); + + if let Some(listener_class) = merged_config.listener_class() { + pod_builder + .add_listener_volume_by_listener_class( + LISTENER_BROKER_VOLUME_NAME, + listener_class, + &recommended_labels, + ) + .context(AddListenerVolumeSnafu)?; + } + pod_builder + .metadata(metadata) + .image_pull_secrets_from_product_image(resolved_product_image) + .add_container(cb_kafka.build()) + .add_container(cb_kcat_prober.build()) + .affinity(&merged_config.affinity) + .add_volume(Volume { + name: "config".to_string(), + config_map: Some(ConfigMapVolumeSource { + name: rolegroup_ref.object_name(), + ..ConfigMapVolumeSource::default() + }), + ..Volume::default() + }) + .context(AddVolumeSnafu)? + // bootstrap volume is a persistent volume template instead, to keep addresses persistent + .add_empty_dir_volume( + "log", + Some(product_logging::framework::calculate_log_volume_size_limit( + &[MAX_KAFKA_LOG_FILES_SIZE], + )), + ) + .context(AddVolumeSnafu)? + .service_account_name(service_account.name_any()) + .security_context(PodSecurityContextBuilder::new().fs_group(1000).build()); + + // Add vector container after kafka container to keep the defaulting into kafka container + if merged_config.vector_logging_enabled() { + match &kafka.spec.cluster_config.vector_aggregator_config_map_name { + Some(vector_aggregator_config_map_name) => { + pod_builder.add_container( + product_logging::framework::vector_container( + resolved_product_image, + "config", + "log", + Some(&*merged_config.vector_logging()), + ResourceRequirementsBuilder::new() + .with_cpu_request("250m") + .with_cpu_limit("500m") + .with_memory_request("128Mi") + .with_memory_limit("128Mi") + .build(), + vector_aggregator_config_map_name, + ) + .context(ConfigureLoggingSnafu)?, + ); + } + None => { + VectorAggregatorConfigMapMissingSnafu.fail()?; + } + } + } + + add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; + + let mut pod_template = pod_builder.build_template(); + + let pod_template_spec = pod_template.spec.get_or_insert_with(PodSpec::default); + // Don't run kcat pod as PID 1, to ensure that default signal handlers apply + pod_template_spec.share_process_namespace = Some(true); + + pod_template.merge_from( + kafka_role + .role_pod_overrides(kafka) + .context(MergePodOverridesSnafu)?, + ); + pod_template.merge_from( + kafka_role + .role_group_pod_overrides(kafka, &rolegroup_ref.role_group) + .context(MergePodOverridesSnafu)?, + ); + + Ok(StatefulSet { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup_ref.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + spec: Some(StatefulSetSpec { + pod_management_policy: Some("Parallel".to_string()), + replicas: kafka_role + .replicas(kafka, &rolegroup_ref.role_group) + .context(RoleGroupReplicasSnafu)? + .map(i32::from), + selector: LabelSelector { + match_labels: Some( + Labels::role_group_selector( + kafka, + APP_NAME, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ) + .context(LabelBuildSnafu)? + .into(), + ), + ..LabelSelector::default() + }, + service_name: Some(rolegroup_ref.object_name()), + template: pod_template, + volume_claim_templates: Some(pvcs), + ..StatefulSetSpec::default() + }), + status: None, + }) +} + +/// The controller rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. +#[allow(clippy::too_many_arguments)] +pub fn build_controller_rolegroup_statefulset( + kafka: &v1alpha1::KafkaCluster, + kafka_role: &KafkaRole, + resolved_product_image: &ResolvedProductImage, + rolegroup_ref: &RoleGroupRef, + controller_config: &HashMap>, + kafka_security: &KafkaTlsSecurity, + merged_config: &AnyConfig, + service_account: &ServiceAccount, +) -> Result { + let recommended_object_labels = build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ); + + let kafka_container_name = BrokerContainer::Kafka.to_string(); + let mut cb_kafka = + ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { + name: kafka_container_name.clone(), + })?; + + let mut pod_builder = PodBuilder::new(); + + let mut env = controller_config + .get(&PropertyNameKind::Env) + .into_iter() + .flatten() + .map(|(k, v)| EnvVar { + name: k.clone(), + value: Some(v.clone()), + ..EnvVar::default() + }) + .collect::>(); + + env.push(EnvVar { + name: "POD_NAME".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.name".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }); + + cb_kafka + .image_from_product_image(resolved_product_image) + .command(vec![ + "/bin/bash".to_string(), + "-x".to_string(), + "-euo".to_string(), + "pipefail".to_string(), + "-c".to_string(), + ]) + .args(vec![ + "bin/kafka-server-start.sh /stackable/config/controller.properties".to_string(), + ]) + .add_env_var( + "EXTRA_ARGS", + kafka_role + .construct_non_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + KAFKA_HEAP_OPTS, + kafka_role + .construct_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + "KAFKA_LOG4J_OPTS", + format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), + ) + // Needed for the `containerdebug` process to log it's tracing information to. + .add_env_var( + "CONTAINERDEBUG_LOG_DIRECTORY", + format!("{STACKABLE_LOG_DIR}/containerdebug"), + ) + .add_env_vars(env) + .add_container_ports(container_ports(kafka_security)) + .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("config", STACKABLE_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log-config", STACKABLE_LOG_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log", STACKABLE_LOG_DIR) + .context(AddVolumeMountSnafu)? + .resources(merged_config.resources().clone().into()); + + if let ContainerLogConfig { + choice: + Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { + custom: ConfigMapLogConfig { config_map }, + })), + } = &*merged_config.kafka_logging() + { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(config_map) + .build(), + ) + .context(AddVolumeSnafu)?; + } else { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(rolegroup_ref.object_name()) + .build(), + ) + .context(AddVolumeSnafu)?; + } + + let metadata = ObjectMetaBuilder::new() + .with_recommended_labels(recommended_object_labels) + .context(MetadataBuildSnafu)? + .build(); + + pod_builder + .metadata(metadata) + .image_pull_secrets_from_product_image(resolved_product_image) + .add_container(cb_kafka.build()) + .affinity(&merged_config.affinity) + .add_volume(Volume { + name: "config".to_string(), + config_map: Some(ConfigMapVolumeSource { + name: rolegroup_ref.object_name(), + ..ConfigMapVolumeSource::default() + }), + ..Volume::default() + }) + .context(AddVolumeSnafu)? + // bootstrap volume is a persistent volume template instead, to keep addresses persistent + .add_empty_dir_volume( + "log", + Some(product_logging::framework::calculate_log_volume_size_limit( + &[MAX_KAFKA_LOG_FILES_SIZE], + )), + ) + .context(AddVolumeSnafu)? + .service_account_name(service_account.name_any()) + .security_context(PodSecurityContextBuilder::new().fs_group(1000).build()); + + // Add vector container after kafka container to keep the defaulting into kafka container + if merged_config.vector_logging_enabled() { + match &kafka.spec.cluster_config.vector_aggregator_config_map_name { + Some(vector_aggregator_config_map_name) => { + pod_builder.add_container( + product_logging::framework::vector_container( + resolved_product_image, + "config", + "log", + Some(&*merged_config.vector_logging()), + ResourceRequirementsBuilder::new() + .with_cpu_request("250m") + .with_cpu_limit("500m") + .with_memory_request("128Mi") + .with_memory_limit("128Mi") + .build(), + vector_aggregator_config_map_name, + ) + .context(ConfigureLoggingSnafu)?, + ); + } + None => { + VectorAggregatorConfigMapMissingSnafu.fail()?; + } + } + } + + add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; + + let mut pod_template = pod_builder.build_template(); + let pod_template_spec = pod_template.spec.get_or_insert_with(PodSpec::default); + + // Don't run kcat pod as PID 1, to ensure that default signal handlers apply + // TODO: we need that? + pod_template_spec.share_process_namespace = Some(true); + + pod_template.merge_from( + kafka_role + .role_pod_overrides(kafka) + .context(MergePodOverridesSnafu)?, + ); + pod_template.merge_from( + kafka_role + .role_group_pod_overrides(kafka, &rolegroup_ref.role_group) + .context(MergePodOverridesSnafu)?, + ); + + Ok(StatefulSet { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup_ref.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + spec: Some(StatefulSetSpec { + pod_management_policy: Some("Parallel".to_string()), + replicas: kafka_role + .replicas(kafka, &rolegroup_ref.role_group) + .context(RoleGroupReplicasSnafu)? + .map(i32::from), + selector: LabelSelector { + match_labels: Some( + Labels::role_group_selector( + kafka, + APP_NAME, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ) + .context(LabelBuildSnafu)? + .into(), + ), + ..LabelSelector::default() + }, + service_name: Some(rolegroup_ref.object_name()), + template: pod_template, + volume_claim_templates: Some(merged_config.resources().storage.build_pvcs()), + ..StatefulSetSpec::default() + }), + status: None, + }) +} + +/// We only expose client HTTP / HTTPS and Metrics ports. +fn container_ports(kafka_security: &KafkaTlsSecurity) -> Vec { + let mut ports = vec![ + ContainerPort { + name: Some(METRICS_PORT_NAME.to_string()), + container_port: METRICS_PORT.into(), + protocol: Some("TCP".to_string()), + ..ContainerPort::default() + }, + ContainerPort { + name: Some(kafka_security.client_port_name().to_string()), + container_port: kafka_security.client_port().into(), + protocol: Some("TCP".to_string()), + ..ContainerPort::default() + }, + ]; + if kafka_security.has_kerberos_enabled() { + ports.push(ContainerPort { + name: Some(kafka_security.bootstrap_port_name().to_string()), + container_port: kafka_security.bootstrap_port().into(), + protocol: Some("TCP".to_string()), + ..ContainerPort::default() + }); + } + ports +} From 1a0b5d49f911d44c56aba31d72231e8b8ff2c065 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 28 Aug 2025 15:28:16 +0200 Subject: [PATCH 26/90] wip - controller working --- rust/operator-binary/src/config/command.rs | 92 +++++++++++++ rust/operator-binary/src/config/mod.rs | 1 + rust/operator-binary/src/crd/mod.rs | 125 +++++++++++++++++- .../src/crd/role/controller.rs | 22 +-- rust/operator-binary/src/crd/role/mod.rs | 37 ++++-- rust/operator-binary/src/kafka_controller.rs | 22 ++- .../operator-binary/src/resource/configmap.rs | 5 +- .../src/resource/statefulset.rs | 52 +++++++- 8 files changed, 305 insertions(+), 51 deletions(-) create mode 100644 rust/operator-binary/src/config/command.rs diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs new file mode 100644 index 00000000..a37937c2 --- /dev/null +++ b/rust/operator-binary/src/config/command.rs @@ -0,0 +1,92 @@ +use std::collections::BTreeMap; + +use indoc::formatdoc; +use stackable_operator::{ + product_logging::framework::{ + create_vector_shutdown_file_command, remove_vector_shutdown_file_command, + }, + utils::COMMON_BASH_TRAP_FUNCTIONS, +}; + +use crate::crd::{ + KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_LOG_DIR, + role::{ + KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, + KAFKA_LISTENERS, KAFKA_NODE_ID, controller::CONTROLLER_PROPERTIES_FILE, + }, +}; + +pub fn controller_kafka_container_command( + cluster_id: &str, + controller_descriptors: Vec, + server_start_overrides: BTreeMap, +) -> String { + // TODO: copy to tmp? mount readwrite folder? + formatdoc! {" + {COMMON_BASH_TRAP_FUNCTIONS} + {remove_vector_shutdown_file_command} + prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & + + export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + cp {config_dir}/{properties_file} /tmp/{properties_file} + + echo \"{KAFKA_NODE_ID}=$REPLICA_ID\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted + bin/kafka-server-start.sh /tmp/{properties_file} {overrides} & + + wait_for_termination $! + {create_vector_shutdown_file_command} + ", + remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + config_dir = STACKABLE_CONFIG_DIR, + properties_file = CONTROLLER_PROPERTIES_FILE, + bootstrap_servers = to_bootstrap_servers(&controller_descriptors), + listeners = to_listeners(), + listener_security_protocol_map = to_listener_security_protocol_map(), + initial_controllers = to_initial_controllers(&controller_descriptors), + overrides = to_kafka_overrides(server_start_overrides), + create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) + } +} + +fn to_listeners() -> String { + // TODO: + // - document that variables are set in stateful set + // - customize listener (CONTROLLER) + "CONTROLLER://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:9093".to_string() +} + +fn to_listener_security_protocol_map() -> String { + // TODO: make configurable + "CONTROLLER:PLAINTEXT".to_string() +} + +fn to_initial_controllers(controller_descriptors: &[KafkaPodDescriptor]) -> String { + controller_descriptors + .iter() + .map(|desc| desc.as_voter()) + .collect::>() + .join(",") +} + +fn to_bootstrap_servers(controller_descriptors: &[KafkaPodDescriptor]) -> String { + controller_descriptors + .iter() + // TODO: make port configureable + .map(|desc| format!("{fqdn}:{port}", fqdn = desc.fqdn(), port = 9093)) + .collect::>() + .join(",") +} + +fn to_kafka_overrides(overrides: BTreeMap) -> String { + overrides + .iter() + .map(|(key, value)| format!("--override \"{key}={value}\"")) + .collect::>() + .join(" ") +} diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs index 271c6d99..7a4b4e4a 100644 --- a/rust/operator-binary/src/config/mod.rs +++ b/rust/operator-binary/src/config/mod.rs @@ -1 +1,2 @@ +pub mod command; pub mod jvm; diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 255baa12..9e833f58 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -6,15 +6,21 @@ pub mod role; pub mod security; pub mod tls; +use std::collections::BTreeMap; + use authentication::KafkaAuthentication; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, Snafu}; use stackable_operator::{ - commons::{cluster_operation::ClusterOperation, product_image_selection::ProductImage}, + commons::{ + cluster_operation::ClusterOperation, networking::DomainName, + product_image_selection::ProductImage, + }, kube::{CustomResource, runtime::reflector::ObjectRef}, role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, + utils::cluster_info::KubernetesClusterInfo, versioned::versioned, }; @@ -54,6 +60,9 @@ pub enum Error { #[snafu(display("the Kafka role [{role}] is missing from spec"))] MissingRole { role: String }, + #[snafu(display("object has no namespace associated"))] + NoNamespace, + #[snafu(display( "Kafka version 4 and higher requires a Kraft controller (configured via `spec.controller`)" ))] @@ -174,6 +183,10 @@ impl v1alpha1::KafkaCluster { self.spec.controllers.is_some() } + pub fn uid(&self) -> Option<&str> { + self.metadata.uid.as_deref() + } + /// The name of the load-balanced Kubernetes Service providing the bootstrap address. Kafka clients will use this /// to get a list of broker addresses and will use those to transmit data to the correct broker. pub fn bootstrap_service_name(&self, rolegroup: &RoleGroupRef) -> String { @@ -215,6 +228,116 @@ impl v1alpha1::KafkaCluster { role: KafkaRole::Controller.to_string(), }) } + + /// List all pod descriptors of a provided role expected to form the cluster. + /// + /// We try to predict the pods here rather than looking at the current cluster state in order to + /// avoid instance churn. + pub fn pod_descriptors( + &self, + kafka_role: &KafkaRole, + cluster_info: &KubernetesClusterInfo, + ) -> Result, Error> { + let ns = self.metadata.namespace.clone().context(NoNamespaceSnafu)?; + Ok(match kafka_role { + KafkaRole::Broker => self + .broker_role() + .iter() + .flat_map(|role| &role.role_groups) + // Order rolegroups consistently, to avoid spurious downstream rewrites + .collect::>() + .into_iter() + .flat_map(move |(rolegroup_name, rolegroup)| { + let rolegroup_ref = self.rolegroup_ref(kafka_role, rolegroup_name); + let ns = ns.clone(); + (0..rolegroup.replicas.unwrap_or(0)).map(move |i| KafkaPodDescriptor { + namespace: ns.clone(), + role_group_service_name: rolegroup_ref.object_name(), + replica: i, + cluster_domain: cluster_info.cluster_domain.clone(), + }) + }) + .collect(), + + KafkaRole::Controller => self + .controller_role() + .iter() + .flat_map(|role| &role.role_groups) + // Order rolegroups consistently, to avoid spurious downstream rewrites + .collect::>() + .into_iter() + .flat_map(move |(rolegroup_name, rolegroup)| { + let rolegroup_ref = self.rolegroup_ref(kafka_role, rolegroup_name); + let ns = ns.clone(); + (0..rolegroup.replicas.unwrap_or(0)).map(move |i| KafkaPodDescriptor { + namespace: ns.clone(), + role_group_service_name: rolegroup_ref.object_name(), + replica: i, + cluster_domain: cluster_info.cluster_domain.clone(), + }) + }) + .collect(), + }) + } +} + +/// Reference to a single `Pod` that is a component of a [`KafkaCluster`] +/// +/// Used for service discovery. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct KafkaPodDescriptor { + namespace: String, + role_group_service_name: String, + replica: u16, + cluster_domain: DomainName, +} + +impl KafkaPodDescriptor { + /// Return the fully qualified domain name + /// Format: ...svc. + pub fn fqdn(&self) -> String { + format!( + "{pod_name}.{service_name}.{namespace}.svc.{cluster_domain}", + pod_name = self.pod_name(), + service_name = self.role_group_service_name, + namespace = self.namespace, + cluster_domain = self.cluster_domain + ) + } + + /// Return the fully qualified domain name for "replica" + /// Format: -...svc. + pub fn fqdn_for_replica(&self, replica: u16) -> String { + format!( + "{service_name}-{replica}.{service_name}.{namespace}.svc.{cluster_domain}", + service_name = self.role_group_service_name, + namespace = self.namespace, + cluster_domain = self.cluster_domain + ) + } + + pub fn pod_name(&self) -> String { + format!("{}-{}", self.role_group_service_name, self.replica) + } + + /// Build the Kraft voter String + /// See: https://kafka.apache.org/documentation/#kraft_storage_voters + /// Example: 0@controller-0:1234:0000000000-00000000000 + /// * 0 is the replica id + /// * 0000000000-00000000000 is the replica directory id (even though the used Uuid states to be type 4 it does not work) + /// See: https://github.com/apache/kafka/blob/c5169ca805bd03d870a5bcd49744dcc34891cf15/clients/src/main/java/org/apache/kafka/common/Uuid.java#L29 + /// * controller-0 is the replica's host, + /// * 1234 is the replica's port. + // TODO(@maltesander): Even though the used Uuid states to be type 4 it does not work... 0000000000-00000000000 works... + pub fn as_voter(&self) -> String { + format!( + "{replica}@{fqdn}:{port}:0000000000-{replica:0>11}", + replica = self.replica, + fqdn = self.fqdn(), + // TODO: make port configureable + port = 9093 + ) + } } #[derive(Clone, Default, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index a2d8a9f3..52e9d972 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -16,7 +16,7 @@ use strum::{Display, EnumIter}; use crate::crd::{ role::{ - KafkaRole, LOG_DIRS, NODE_ID, PROCESS_ROLES, + KAFKA_LOG_DIRS, KAFKA_PROCESS_ROLES, KafkaRole, commons::{CommonConfig, Storage, StorageFragment}, }, v1alpha1, @@ -127,31 +127,19 @@ impl Configuration for ControllerConfigFragment { let mut config = BTreeMap::new(); if file == CONTROLLER_PROPERTIES_FILE { - // TODO: generate? - config.insert(NODE_ID.to_string(), Some("2".to_string())); - config.insert( - PROCESS_ROLES.to_string(), + KAFKA_PROCESS_ROLES.to_string(), Some(KafkaRole::Controller.to_string()), ); config.insert( - LOG_DIRS.to_string(), + KAFKA_LOG_DIRS.to_string(), Some("/stackable/data/kraft".to_string()), ); - // TEST: - config.insert( - "listeners".to_string(), - Some("listeners=INTERNAL://simple-kafka-controller-default-0.simple-kafka-controller-default.default.svc.cluster.local:9093".to_string()), - ); - config.insert( - "controller.quorum.bootstrap.servers".to_string(), - Some("simple-kafka-controller-default-0.simple-kafka-controller-default.default.svc.cluster.local:9093".to_string()), - ); config.insert( - "listener.security.protocol.map".to_string(), - Some("INTERNAL:PLAINTEXT".to_string()), + "controller.listener.names".to_string(), + Some("CONTROLLER".to_string()), ); } diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 0c292ee5..192f786c 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -29,15 +29,30 @@ use crate::{ }, v1alpha1, }; + // See: https://kafka.apache.org/documentation/#brokerconfigs +/// The node ID associated with the roles this process is playing when process.roles is non-empty. +/// This is required configuration when running in KRaft mode. +pub const KAFKA_NODE_ID: &str = "node.id"; + +/// The roles that this process plays: 'broker', 'controller', or 'broker,controller' if it is both. +pub const KAFKA_PROCESS_ROLES: &str = "process.roles"; + +/// A comma-separated list of the directories where the log data is stored. If not set, the value in log.dir is used. +pub const KAFKA_LOG_DIRS: &str = "log.dirs"; + +/// Listener List - Comma-separated list of URIs we will listen on and the listener names. +/// If the listener name is not a security protocol, listener.security.protocol.map must also be set. +pub const KAFKA_LISTENERS: &str = "listeners"; + +/// Map between listener names and security protocols. This must be defined for the same security protocol to be usable in more than one port or IP. +/// For example, internal and external traffic can be separated even if SSL is required for both. +/// Concretely, the user could define listeners with names INTERNAL and EXTERNAL and this property as: INTERNAL:SSL,EXTERNAL:SSL +pub const KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: &str = "listener.security.protocol.map"; -// The node ID associated with the roles this process is playing when process.roles is non-empty. -// This is required configuration when running in KRaft mode. -pub const NODE_ID: &str = "node.id"; -// The roles that this process plays: 'broker', 'controller', or 'broker,controller' if it is both. -pub const PROCESS_ROLES: &str = "process.roles"; -// A comma-separated list of the directories where the log data is stored. If not set, the value in log.dir is used. -pub const LOG_DIRS: &str = "log.dirs"; +/// List of endpoints to use for bootstrapping the cluster metadata. The endpoints are specified in comma-separated list of {host}:{port} entries. +/// For example: localhost:9092,localhost:9093,localhost:9094. +pub const KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS: &str = "controller.quorum.bootstrap.servers"; #[derive(Snafu, Debug)] pub enum Error { @@ -240,13 +255,7 @@ impl KafkaRole { } } - pub fn role_pod_overrides(JvmArgumentsSnafu), - } - } - - pub fn construct_heap_jvm_args( - &self, - merged_config: &AnyCon + pub fn role_pod_overrides( &self, kafka: &v1alpha1::KafkaCluster, ) -> Result { diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 018ac52d..b7adccd8 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -351,18 +351,15 @@ pub async fn reconcile_kafka( build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref) .context(BuildServiceSnafu)?; - let rg_configmap = match kafka_role { - KafkaRole::Broker => build_rolegroup_config_map( - kafka, - &resolved_product_image, - &kafka_security, - &rolegroup_ref, - rolegroup_config, - &merged_config, - ) - .context(BuildConfigMapSnafu)?, - KafkaRole::Controller => todo!(), - }; + let rg_configmap = build_rolegroup_config_map( + kafka, + &resolved_product_image, + &kafka_security, + &rolegroup_ref, + rolegroup_config, + &merged_config, + ) + .context(BuildConfigMapSnafu)?; let rg_statefulset = match kafka_role { KafkaRole::Broker => build_broker_rolegroup_statefulset( @@ -387,6 +384,7 @@ pub async fn reconcile_kafka( &kafka_security, &merged_config, &rbac_sa, + &client.kubernetes_cluster_info, ) .context(BuildStatefulsetSnafu)?, }; diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index 533955ef..da42e95b 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -67,7 +67,10 @@ pub fn build_rolegroup_config_map( .cloned() .unwrap_or_default(); - kafka_config.extend(kafka_security.config_settings()); + if let AnyConfig::Broker(_) = merged_config { + kafka_config.extend(kafka_security.config_settings()) + } + kafka_config.extend(graceful_shutdown_config_properties()); let kafka_config = kafka_config diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index b051cfab..34b2f767 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -42,13 +42,14 @@ use stackable_operator::{ }; use crate::{ + config::command::controller_kafka_container_command, crd::{ self, APP_NAME, KAFKA_HEAP_OPTS, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, listener::get_kafka_listener_config, - role::{AnyConfig, KafkaRole, broker::BrokerContainer}, + role::{AnyConfig, KafkaRole, broker::BrokerContainer, controller::ControllerContainer}, security::KafkaTlsSecurity, v1alpha1, }, @@ -82,11 +83,14 @@ pub enum Error { source: stackable_operator::builder::pod::Error, }, - #[snafu(display("failed to builld bootstrap listener pvc"))] + #[snafu(display("failed to build bootstrap listener pvc"))] BuildBootstrapListenerPvc { source: stackable_operator::builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, }, + #[snafu(display("failed to build pod descriptors"))] + BuildPodDescriptors { source: crate::crd::Error }, + #[snafu(display("failed to configure logging"))] ConfigureLogging { source: stackable_operator::product_logging::framework::LoggingError, @@ -135,6 +139,9 @@ pub enum Error { #[snafu(display("failed to retrieve rolegroup replicas"))] RoleGroupReplicas { source: crd::role::Error }, + #[snafu(display("cluster does not define UID"))] + ClusterUidMissing, + #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] VectorAggregatorConfigMapMissing, } @@ -273,6 +280,8 @@ pub fn build_broker_rolegroup_statefulset( ) .context(InvalidKafkaListenersSnafu)?; + let cluster_id = kafka.uid().context(ClusterUidMissingSnafu)?; + cb_kafka .image_from_product_image(resolved_product_image) .command(vec![ @@ -288,6 +297,7 @@ pub fn build_broker_rolegroup_statefulset( &kafka_listeners, opa_connect_string, kafka_security.has_kerberos_enabled(), + cluster_id, ) .join("\n"), ]) @@ -538,6 +548,7 @@ pub fn build_controller_rolegroup_statefulset( kafka_security: &KafkaTlsSecurity, merged_config: &AnyConfig, service_account: &ServiceAccount, + cluster_info: &KubernetesClusterInfo, ) -> Result { let recommended_object_labels = build_recommended_labels( kafka, @@ -547,7 +558,7 @@ pub fn build_controller_rolegroup_statefulset( &rolegroup_ref.role_group, ); - let kafka_container_name = BrokerContainer::Kafka.to_string(); + let kafka_container_name = ControllerContainer::Kafka.to_string(); let mut cb_kafka = ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { name: kafka_container_name.clone(), @@ -566,6 +577,18 @@ pub fn build_controller_rolegroup_statefulset( }) .collect::>(); + env.push(EnvVar { + name: "NAMESPACE".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.namespace".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }); + env.push(EnvVar { name: "POD_NAME".to_string(), value_from: Some(EnvVarSource { @@ -578,6 +601,18 @@ pub fn build_controller_rolegroup_statefulset( ..EnvVar::default() }); + env.push(EnvVar { + name: "ROLEGROUP_REF".to_string(), + value: Some(rolegroup_ref.object_name()), + ..EnvVar::default() + }); + + env.push(EnvVar { + name: "CLUSTER_DOMAIN".to_string(), + value: Some(cluster_info.cluster_domain.to_string()), + ..EnvVar::default() + }); + cb_kafka .image_from_product_image(resolved_product_image) .command(vec![ @@ -587,9 +622,14 @@ pub fn build_controller_rolegroup_statefulset( "pipefail".to_string(), "-c".to_string(), ]) - .args(vec![ - "bin/kafka-server-start.sh /stackable/config/controller.properties".to_string(), - ]) + .args(vec![controller_kafka_container_command( + kafka.uid().context(ClusterUidMissingSnafu)?, + kafka + .pod_descriptors(kafka_role, cluster_info) + .context(BuildPodDescriptorsSnafu)?, + // TODO: fix overrides + BTreeMap::new(), + )]) .add_env_var( "EXTRA_ARGS", kafka_role From 3a42eca8b52f15348b234062c049c559c729c7d7 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 28 Aug 2025 17:22:16 +0200 Subject: [PATCH 27/90] wip - combine broker & controller --- rust/operator-binary/src/config/command.rs | 68 ++++++++++++++++++- rust/operator-binary/src/crd/listener.rs | 13 +++- rust/operator-binary/src/crd/role/broker.rs | 17 +++-- rust/operator-binary/src/crd/role/mod.rs | 12 ++++ rust/operator-binary/src/crd/security.rs | 58 ++-------------- .../src/resource/statefulset.rs | 21 +++--- 6 files changed, 116 insertions(+), 73 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index a37937c2..8c66cf22 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -9,13 +9,77 @@ use stackable_operator::{ }; use crate::crd::{ - KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_LOG_DIR, + KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, + STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_DIR, + listener::{KafkaListenerConfig, node_address_cmd}, role::{ + KAFKA_ADVERTISED_LISTENERS, KAFKA_BROKER_ID_OFFSET, KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, - KAFKA_LISTENERS, KAFKA_NODE_ID, controller::CONTROLLER_PROPERTIES_FILE, + KAFKA_LISTENERS, KAFKA_NODE_ID, KafkaRole, broker::BROKER_PROPERTIES_FILE, + controller::CONTROLLER_PROPERTIES_FILE, }, }; +/// Returns the commands to start the main Kafka container +pub fn broker_kafka_container_commands( + cluster_id: &str, + controller_descriptors: Vec, + kafka_listeners: &KafkaListenerConfig, + opa_connect_string: Option<&str>, + kerberos_enabled: bool, +) -> Vec { + // TODO: fix the "10$REPLICA_ID" fix to not clash with controller ids + vec![formatdoc! {" + {COMMON_BASH_TRAP_FUNCTIONS} + {remove_vector_shutdown_file_command} + prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & + {set_realm_env} + + export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + cp {config_dir}/{properties_file} /tmp/{properties_file} + + echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + {KAFKA_BROKER_ID_OFFSET}))\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted + bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & + + wait_for_termination $! + {create_vector_shutdown_file_command} + ", + remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR), + set_realm_env = match kerberos_enabled { + true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {})", STACKABLE_KERBEROS_KRB5_PATH), + false => "".to_string(), + }, + config_dir = STACKABLE_CONFIG_DIR, + properties_file = BROKER_PROPERTIES_FILE, + bootstrap_servers = to_bootstrap_servers(&controller_descriptors), + initial_controllers = to_initial_controllers(&controller_descriptors), + listeners = kafka_listeners.listeners(), + advertised_listeners = kafka_listeners.advertised_listeners(), + listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), + opa_config = match opa_connect_string { + None => "".to_string(), + Some(opa_connect_string) => format!(" --override \"opa.authorizer.url={opa_connect_string}\""), + }, + jaas_config = match kerberos_enabled { + true => { + let service_name = KafkaRole::Broker.kerberos_service_name(); + let broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR); + let bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR); + // TODO replace client and bootstrap below with constants + format!(" --override \"listener.name.client.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" --override \"listener.name.bootstrap.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\"").to_string()}, + false => "".to_string(), + }, + }] +} + pub fn controller_kafka_container_command( cluster_id: &str, controller_descriptors: Vec, diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index b337461e..2bff4d4b 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -42,6 +42,8 @@ pub enum KafkaListenerName { Internal, #[strum(serialize = "BOOTSTRAP")] Bootstrap, + #[strum(serialize = "CONTROLLER")] + Controller, } #[derive(Debug)] @@ -104,7 +106,16 @@ pub fn get_kafka_listener_config( let pod_fqdn = pod_fqdn(kafka, object_name, cluster_info)?; let mut listeners = vec![]; let mut advertised_listeners = vec![]; - let mut listener_security_protocol_map = BTreeMap::new(); + let mut listener_security_protocol_map: BTreeMap = + BTreeMap::new(); + + // TODO: REMOVE - Testing + listener_security_protocol_map.insert( + KafkaListenerName::Controller, + KafkaListenerProtocol::Plaintext, + ); + // TODO: REMOVE - Testing + listener_security_protocol_map.insert(KafkaListenerName::Internal, KafkaListenerProtocol::Ssl); // CLIENT if kafka_security.tls_client_authentication_class().is_some() { diff --git a/rust/operator-binary/src/crd/role/broker.rs b/rust/operator-binary/src/crd/role/broker.rs index e9c467bc..c79c80b8 100644 --- a/rust/operator-binary/src/crd/role/broker.rs +++ b/rust/operator-binary/src/crd/role/broker.rs @@ -15,8 +15,9 @@ use stackable_operator::{ use strum::{Display, EnumIter}; use crate::crd::{ + listener::KafkaListenerName, role::{ - KafkaRole, LOG_DIRS, NODE_ID, PROCESS_ROLES, + KAFKA_LOG_DIRS, KAFKA_PROCESS_ROLES, KafkaRole, commons::{CommonConfig, Storage, StorageFragment}, }, v1alpha1, @@ -90,7 +91,7 @@ impl BrokerConfig { max: Some(Quantity("1000m".to_owned())), }, memory: MemoryLimitsFragment { - limit: Some(Quantity("1Gi".to_owned())), + limit: Some(Quantity("2Gi".to_owned())), runtime_limits: NoRuntimeLimitsFragment {}, }, storage: StorageFragment { @@ -136,19 +137,21 @@ impl Configuration for BrokerConfigFragment { let mut config = BTreeMap::new(); if file == BROKER_PROPERTIES_FILE { - // TODO: generate? - config.insert(NODE_ID.to_string(), Some("1".to_string())); - config.insert( - PROCESS_ROLES.to_string(), + KAFKA_PROCESS_ROLES.to_string(), Some(KafkaRole::Broker.to_string()), ); config.insert( - LOG_DIRS.to_string(), + KAFKA_LOG_DIRS.to_string(), Some("/stackable/data/topicdata".to_string()), ); + config.insert( + "controller.listener.names".to_string(), + Some(KafkaListenerName::Controller.to_string()), + ); + // OPA if resource.spec.cluster_config.authorization.opa.is_some() { config.insert( diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 192f786c..6e2fc757 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -30,6 +30,9 @@ use crate::{ v1alpha1, }; +/// Broker and Kafka node.id properties should not clash; This is an offset for brokers. +pub const KAFKA_BROKER_ID_OFFSET: u16 = 1000; + // See: https://kafka.apache.org/documentation/#brokerconfigs /// The node ID associated with the roles this process is playing when process.roles is non-empty. /// This is required configuration when running in KRaft mode. @@ -45,6 +48,15 @@ pub const KAFKA_LOG_DIRS: &str = "log.dirs"; /// If the listener name is not a security protocol, listener.security.protocol.map must also be set. pub const KAFKA_LISTENERS: &str = "listeners"; +/// Specifies the listener addresses that the Kafka brokers will advertise to clients and other brokers. +/// The config is useful where the actual listener configuration listeners does not represent the addresses that clients should use to connect, +/// such as in cloud environments. The addresses are published to and managed by the controller, the brokers pull these data from the controller as needed. +/// In IaaS environments, this may need to be different from the interface to which the broker binds. If this is not set, the value for listeners will be used. +/// Unlike listeners, it is not valid to advertise the 0.0.0.0 meta-address. +/// Also unlike listeners, there can be duplicated ports in this property, so that one listener can be configured to advertise another listener's address. +/// This can be useful in some cases where external load balancers are used. +pub const KAFKA_ADVERTISED_LISTENERS: &str = "advertised.listeners"; + /// Map between listener names and security protocols. This must be defined for the same security protocol to be usable in more than one port or IP. /// For example, internal and external traffic can be separated even if SSL is required for both. /// Concretely, the user could define listeners with names INTERNAL and EXTERNAL and this property as: INTERNAL:SSL,EXTERNAL:SSL diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index 1384bac3..66ca4f44 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -6,7 +6,6 @@ //! This is required due to overlaps between TLS encryption and e.g. mTLS authentication or Kerberos use std::collections::BTreeMap; -use indoc::formatdoc; use snafu::{ResultExt, Snafu, ensure}; use stackable_operator::{ builder::{ @@ -20,21 +19,16 @@ use stackable_operator::{ client::Client, crd::authentication::core, k8s_openapi::api::core::v1::Volume, - product_logging::framework::{ - create_vector_shutdown_file_command, remove_vector_shutdown_file_command, - }, shared::time::Duration, - utils::COMMON_BASH_TRAP_FUNCTIONS, }; use super::listener::node_port_cmd; use crate::crd::{ - LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_CONFIG_DIR, - STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, - STACKABLE_LOG_DIR, + LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_KERBEROS_KRB5_PATH, + STACKABLE_LISTENER_BROKER_DIR, authentication::{self, ResolvedAuthenticationClasses}, - listener::{self, KafkaListenerConfig, node_address_cmd}, - role::{KafkaRole, broker::BROKER_PROPERTIES_FILE}, + listener::{self, node_address_cmd}, + role::KafkaRole, tls, v1alpha1, }; @@ -351,50 +345,6 @@ impl KafkaTlsSecurity { args } - /// Returns the commands to start the main Kafka container - pub fn kafka_container_commands( - &self, - kafka_listeners: &KafkaListenerConfig, - opa_connect_string: Option<&str>, - kerberos_enabled: bool, - ) -> Vec { - vec![formatdoc! {" - {COMMON_BASH_TRAP_FUNCTIONS} - {remove_vector_shutdown_file_command} - prepare_signal_handlers - containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & - {set_realm_env} - bin/kafka-server-start.sh {STACKABLE_CONFIG_DIR}/{BROKER_PROPERTIES_FILE} --override \"zookeeper.connect=$ZOOKEEPER\" --override \"listeners={listeners}\" --override \"advertised.listeners={advertised_listeners}\" --override \"listener.security.protocol.map={listener_security_protocol_map}\"{opa_config}{jaas_config} & - wait_for_termination $! - {create_vector_shutdown_file_command} - ", - remove_vector_shutdown_file_command = - remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), - create_vector_shutdown_file_command = - create_vector_shutdown_file_command(STACKABLE_LOG_DIR), - set_realm_env = match kerberos_enabled { - true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {})", STACKABLE_KERBEROS_KRB5_PATH), - false => "".to_string(), - }, - listeners = kafka_listeners.listeners(), - advertised_listeners = kafka_listeners.advertised_listeners(), - listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), - opa_config = match opa_connect_string { - None => "".to_string(), - Some(opa_connect_string) => format!(" --override \"opa.authorizer.url={opa_connect_string}\""), - }, - jaas_config = match kerberos_enabled { - true => { - let service_name = KafkaRole::Broker.kerberos_service_name(); - let broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR); - let bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR); - // TODO replace client and bootstrap below with constants - format!(" --override \"listener.name.client.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" --override \"listener.name.bootstrap.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\"").to_string()}, - false => "".to_string(), - }, - }] - } - /// Adds required volumes and volume mounts to the pod and container builders /// depending on the tls and authentication settings. pub fn add_volume_and_volume_mounts( diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 34b2f767..8b2ad27e 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -42,7 +42,7 @@ use stackable_operator::{ }; use crate::{ - config::command::controller_kafka_container_command, + config::command::{broker_kafka_container_commands, controller_kafka_container_command}, crd::{ self, APP_NAME, KAFKA_HEAP_OPTS, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, @@ -292,14 +292,17 @@ pub fn build_broker_rolegroup_statefulset( "-c".to_string(), ]) .args(vec![ - kafka_security - .kafka_container_commands( - &kafka_listeners, - opa_connect_string, - kafka_security.has_kerberos_enabled(), - cluster_id, - ) - .join("\n"), + broker_kafka_container_commands( + cluster_id, + // we need controller pods + kafka + .pod_descriptors(&KafkaRole::Controller, cluster_info) + .context(BuildPodDescriptorsSnafu)?, + &kafka_listeners, + opa_connect_string, + kafka_security.has_kerberos_enabled(), + ) + .join("\n"), ]) .add_env_var( "EXTRA_ARGS", From f32440998ae97db7101432109e81044d8aa03570 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 28 Aug 2025 18:52:51 +0200 Subject: [PATCH 28/90] fix todos --- rust/operator-binary/src/config/command.rs | 81 ++++++++++--------- rust/operator-binary/src/crd/listener.rs | 16 ++-- .../src/resource/statefulset.rs | 23 +++--- 3 files changed, 59 insertions(+), 61 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 8c66cf22..79bd9fb8 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -27,30 +27,30 @@ pub fn broker_kafka_container_commands( kafka_listeners: &KafkaListenerConfig, opa_connect_string: Option<&str>, kerberos_enabled: bool, -) -> Vec { - // TODO: fix the "10$REPLICA_ID" fix to not clash with controller ids - vec![formatdoc! {" - {COMMON_BASH_TRAP_FUNCTIONS} - {remove_vector_shutdown_file_command} - prepare_signal_handlers - containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & - {set_realm_env} - - export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') - cp {config_dir}/{properties_file} /tmp/{properties_file} - - echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + {KAFKA_BROKER_ID_OFFSET}))\" >> /tmp/{properties_file} - echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} - echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} - echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} - echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} - - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted - bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & - - wait_for_termination $! - {create_vector_shutdown_file_command} - ", +) -> String { + // TODO: copy to tmp? mount readwrite folder? + formatdoc! {" + {COMMON_BASH_TRAP_FUNCTIONS} + {remove_vector_shutdown_file_command} + prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & + {set_realm_env} + + export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + cp {config_dir}/{properties_file} /tmp/{properties_file} + + echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + {KAFKA_BROKER_ID_OFFSET}))\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted + bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & + + wait_for_termination $! + {create_vector_shutdown_file_command} + ", remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR), set_realm_env = match kerberos_enabled { @@ -77,7 +77,7 @@ pub fn broker_kafka_container_commands( format!(" --override \"listener.name.client.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" --override \"listener.name.bootstrap.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\"").to_string()}, false => "".to_string(), }, - }] + } } pub fn controller_kafka_container_command( @@ -87,25 +87,25 @@ pub fn controller_kafka_container_command( ) -> String { // TODO: copy to tmp? mount readwrite folder? formatdoc! {" - {COMMON_BASH_TRAP_FUNCTIONS} - {remove_vector_shutdown_file_command} - prepare_signal_handlers - containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & + {COMMON_BASH_TRAP_FUNCTIONS} + {remove_vector_shutdown_file_command} + prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & - export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') - cp {config_dir}/{properties_file} /tmp/{properties_file} + export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + cp {config_dir}/{properties_file} /tmp/{properties_file} - echo \"{KAFKA_NODE_ID}=$REPLICA_ID\" >> /tmp/{properties_file} - echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} - echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} - echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + echo \"{KAFKA_NODE_ID}=$REPLICA_ID\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted - bin/kafka-server-start.sh /tmp/{properties_file} {overrides} & + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted + bin/kafka-server-start.sh /tmp/{properties_file} {overrides} & - wait_for_termination $! - {create_vector_shutdown_file_command} - ", + wait_for_termination $! + {create_vector_shutdown_file_command} + ", remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), config_dir = STACKABLE_CONFIG_DIR, properties_file = CONTROLLER_PROPERTIES_FILE, @@ -122,6 +122,7 @@ fn to_listeners() -> String { // TODO: // - document that variables are set in stateful set // - customize listener (CONTROLLER) + // - customize port "CONTROLLER://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:9093".to_string() } diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 2bff4d4b..111b7525 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -109,14 +109,6 @@ pub fn get_kafka_listener_config( let mut listener_security_protocol_map: BTreeMap = BTreeMap::new(); - // TODO: REMOVE - Testing - listener_security_protocol_map.insert( - KafkaListenerName::Controller, - KafkaListenerProtocol::Plaintext, - ); - // TODO: REMOVE - Testing - listener_security_protocol_map.insert(KafkaListenerName::Internal, KafkaListenerProtocol::Ssl); - // CLIENT if kafka_security.tls_client_authentication_class().is_some() { // 1) If client authentication required, we expose only CLIENT_AUTH connection with SSL @@ -241,6 +233,14 @@ pub fn get_kafka_listener_config( .insert(KafkaListenerName::Bootstrap, KafkaListenerProtocol::SaslSsl); } + // CONTROLLER + if kafka.is_controller_configured() { + listener_security_protocol_map.insert( + KafkaListenerName::Controller, + KafkaListenerProtocol::Plaintext, + ); + } + Ok(KafkaListenerConfig { listeners, advertised_listeners, diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 8b2ad27e..53e6b06d 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -291,19 +291,16 @@ pub fn build_broker_rolegroup_statefulset( "pipefail".to_string(), "-c".to_string(), ]) - .args(vec![ - broker_kafka_container_commands( - cluster_id, - // we need controller pods - kafka - .pod_descriptors(&KafkaRole::Controller, cluster_info) - .context(BuildPodDescriptorsSnafu)?, - &kafka_listeners, - opa_connect_string, - kafka_security.has_kerberos_enabled(), - ) - .join("\n"), - ]) + .args(vec![broker_kafka_container_commands( + cluster_id, + // we need controller pods + kafka + .pod_descriptors(&KafkaRole::Controller, cluster_info) + .context(BuildPodDescriptorsSnafu)?, + &kafka_listeners, + opa_connect_string, + kafka_security.has_kerberos_enabled(), + )]) .add_env_var( "EXTRA_ARGS", kafka_role From 934f6e14655df22c9acfe4e1baa16ee9751cb68e Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 29 Aug 2025 13:22:03 +0200 Subject: [PATCH 29/90] zookeeper working again --- rust/operator-binary/src/config/command.rs | 90 +++++++++++++------ rust/operator-binary/src/crd/listener.rs | 1 + rust/operator-binary/src/crd/role/broker.rs | 20 +++-- .../src/crd/role/controller.rs | 11 ++- .../src/resource/statefulset.rs | 27 +++++- 5 files changed, 110 insertions(+), 39 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 79bd9fb8..332ecf39 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -18,17 +18,18 @@ use crate::crd::{ KAFKA_LISTENERS, KAFKA_NODE_ID, KafkaRole, broker::BROKER_PROPERTIES_FILE, controller::CONTROLLER_PROPERTIES_FILE, }, + v1alpha1, }; /// Returns the commands to start the main Kafka container pub fn broker_kafka_container_commands( + kafka: &v1alpha1::KafkaCluster, cluster_id: &str, controller_descriptors: Vec, kafka_listeners: &KafkaListenerConfig, opa_connect_string: Option<&str>, kerberos_enabled: bool, ) -> String { - // TODO: copy to tmp? mount readwrite folder? formatdoc! {" {COMMON_BASH_TRAP_FUNCTIONS} {remove_vector_shutdown_file_command} @@ -36,17 +37,7 @@ pub fn broker_kafka_container_commands( containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & {set_realm_env} - export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') - cp {config_dir}/{properties_file} /tmp/{properties_file} - - echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + {KAFKA_BROKER_ID_OFFSET}))\" >> /tmp/{properties_file} - echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} - echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} - echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} - echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} - - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted - bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & + {broker_start_command} wait_for_termination $! {create_vector_shutdown_file_command} @@ -57,6 +48,51 @@ pub fn broker_kafka_container_commands( true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {})", STACKABLE_KERBEROS_KRB5_PATH), false => "".to_string(), }, + broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, kafka_listeners, opa_connect_string, kerberos_enabled), + } +} + +fn broker_start_command( + kafka: &v1alpha1::KafkaCluster, + cluster_id: &str, + controller_descriptors: Vec, + kafka_listeners: &KafkaListenerConfig, + opa_connect_string: Option<&str>, + kerberos_enabled: bool, +) -> String { + let opa_config = match opa_connect_string { + None => "".to_string(), + Some(opa_connect_string) => { + format!(" --override \"opa.authorizer.url={opa_connect_string}\"") + } + }; + + let jaas_config = match kerberos_enabled { + true => { + let service_name = KafkaRole::Broker.kerberos_service_name(); + let broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR); + let bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR); + // TODO replace client and bootstrap below with constants + format!(" --override \"listener.name.client.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" --override \"listener.name.bootstrap.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\"").to_string() + } + false => "".to_string(), + }; + + // TODO: copy to tmp? mount readwrite folder? + if kafka.is_controller_configured() { + formatdoc! {" + export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + cp {config_dir}/{properties_file} /tmp/{properties_file} + + echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + {KAFKA_BROKER_ID_OFFSET}))\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted + bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & + ", config_dir = STACKABLE_CONFIG_DIR, properties_file = BROKER_PROPERTIES_FILE, bootstrap_servers = to_bootstrap_servers(&controller_descriptors), @@ -64,19 +100,23 @@ pub fn broker_kafka_container_commands( listeners = kafka_listeners.listeners(), advertised_listeners = kafka_listeners.advertised_listeners(), listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), - opa_config = match opa_connect_string { - None => "".to_string(), - Some(opa_connect_string) => format!(" --override \"opa.authorizer.url={opa_connect_string}\""), - }, - jaas_config = match kerberos_enabled { - true => { - let service_name = KafkaRole::Broker.kerberos_service_name(); - let broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR); - let bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR); - // TODO replace client and bootstrap below with constants - format!(" --override \"listener.name.client.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" --override \"listener.name.bootstrap.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\"").to_string()}, - false => "".to_string(), - }, + } + } else { + formatdoc! {" + bin/kafka-server-start.sh {config_dir}/{properties_file} \ + --override \"zookeeper.connect=$ZOOKEEPER\" \ + --override \"listeners={listeners}\" \ + --override \"advertised.listeners={advertised_listeners}\" \ + --override \"listener.security.protocol.map={listener_security_protocol_map}\" \ + {opa_config} \ + {jaas_config} \ + &", + config_dir = STACKABLE_CONFIG_DIR, + properties_file = BROKER_PROPERTIES_FILE, + listeners = kafka_listeners.listeners(), + advertised_listeners = kafka_listeners.advertised_listeners(), + listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), + } } } diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 111b7525..93aab294 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -235,6 +235,7 @@ pub fn get_kafka_listener_config( // CONTROLLER if kafka.is_controller_configured() { + // TODO: SSL? listener_security_protocol_map.insert( KafkaListenerName::Controller, KafkaListenerProtocol::Plaintext, diff --git a/rust/operator-binary/src/crd/role/broker.rs b/rust/operator-binary/src/crd/role/broker.rs index c79c80b8..837ddd80 100644 --- a/rust/operator-binary/src/crd/role/broker.rs +++ b/rust/operator-binary/src/crd/role/broker.rs @@ -137,21 +137,23 @@ impl Configuration for BrokerConfigFragment { let mut config = BTreeMap::new(); if file == BROKER_PROPERTIES_FILE { - config.insert( - KAFKA_PROCESS_ROLES.to_string(), - Some(KafkaRole::Broker.to_string()), - ); - config.insert( KAFKA_LOG_DIRS.to_string(), Some("/stackable/data/topicdata".to_string()), ); - config.insert( - "controller.listener.names".to_string(), - Some(KafkaListenerName::Controller.to_string()), - ); + // KRAFT + if resource.is_controller_configured() { + config.insert( + KAFKA_PROCESS_ROLES.to_string(), + Some(KafkaRole::Broker.to_string()), + ); + config.insert( + "controller.listener.names".to_string(), + Some(KafkaListenerName::Controller.to_string()), + ); + } // OPA if resource.spec.cluster_config.authorization.opa.is_some() { config.insert( diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index 52e9d972..e0b49d72 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -15,6 +15,7 @@ use stackable_operator::{ use strum::{Display, EnumIter}; use crate::crd::{ + listener::KafkaListenerName, role::{ KAFKA_LOG_DIRS, KAFKA_PROCESS_ROLES, KafkaRole, commons::{CommonConfig, Storage, StorageFragment}, @@ -127,14 +128,20 @@ impl Configuration for ControllerConfigFragment { let mut config = BTreeMap::new(); if file == CONTROLLER_PROPERTIES_FILE { + config.insert( + KAFKA_LOG_DIRS.to_string(), + Some("/stackable/data/kraft".to_string()), + ); + + // KRAFT config.insert( KAFKA_PROCESS_ROLES.to_string(), Some(KafkaRole::Controller.to_string()), ); config.insert( - KAFKA_LOG_DIRS.to_string(), - Some("/stackable/data/kraft".to_string()), + "controller.listener.names".to_string(), + Some(KafkaListenerName::Controller.to_string()), ); config.insert( diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 53e6b06d..258eddb4 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -23,10 +23,11 @@ use stackable_operator::{ apps::v1::{StatefulSet, StatefulSetSpec}, core::v1::{ ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, EnvVarSource, - ExecAction, ObjectFieldSelector, PodSpec, Probe, ServiceAccount, Volume, + ExecAction, ObjectFieldSelector, PodSpec, Probe, ServiceAccount, TCPSocketAction, + Volume, }, }, - apimachinery::pkg::apis::meta::v1::LabelSelector, + apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, }, kube::ResourceExt, kvp::Labels, @@ -292,6 +293,7 @@ pub fn build_broker_rolegroup_statefulset( "-c".to_string(), ]) .args(vec![broker_kafka_container_commands( + kafka, cluster_id, // we need controller pods kafka @@ -661,7 +663,26 @@ pub fn build_controller_rolegroup_statefulset( .context(AddVolumeMountSnafu)? .add_volume_mount("log", STACKABLE_LOG_DIR) .context(AddVolumeMountSnafu)? - .resources(merged_config.resources().clone().into()); + .resources(merged_config.resources().clone().into()) + // TODO: improve probes + .liveness_probe(Probe { + tcp_socket: Some(TCPSocketAction { + port: IntOrString::Int(kafka_security.client_port().into()), + ..Default::default() + }), + timeout_seconds: Some(5), + period_seconds: Some(5), + ..Probe::default() + }) + .readiness_probe(Probe { + tcp_socket: Some(TCPSocketAction { + port: IntOrString::Int(kafka_security.client_port().into()), + ..Default::default() + }), + timeout_seconds: Some(5), + period_seconds: Some(5), + ..Probe::default() + }); if let ContainerLogConfig { choice: From 03bcb419b15caf22dc02562af5acfa5962ab218d Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 29 Aug 2025 14:57:38 +0200 Subject: [PATCH 30/90] fix todos --- rust/operator-binary/src/config/command.rs | 59 +++++++++---------- rust/operator-binary/src/crd/mod.rs | 7 +-- rust/operator-binary/src/kafka_controller.rs | 1 - rust/operator-binary/src/product_logging.rs | 12 +++- .../src/resource/statefulset.rs | 5 +- 5 files changed, 43 insertions(+), 41 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 332ecf39..1065df42 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -1,5 +1,3 @@ -use std::collections::BTreeMap; - use indoc::formatdoc; use stackable_operator::{ product_logging::framework::{ @@ -18,6 +16,7 @@ use crate::crd::{ KAFKA_LISTENERS, KAFKA_NODE_ID, KafkaRole, broker::BROKER_PROPERTIES_FILE, controller::CONTROLLER_PROPERTIES_FILE, }, + security::KafkaTlsSecurity, v1alpha1, }; @@ -28,7 +27,7 @@ pub fn broker_kafka_container_commands( controller_descriptors: Vec, kafka_listeners: &KafkaListenerConfig, opa_connect_string: Option<&str>, - kerberos_enabled: bool, + kafka_security: &KafkaTlsSecurity, ) -> String { formatdoc! {" {COMMON_BASH_TRAP_FUNCTIONS} @@ -44,11 +43,11 @@ pub fn broker_kafka_container_commands( ", remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR), - set_realm_env = match kerberos_enabled { + set_realm_env = match kafka_security.has_kerberos_enabled() { true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {})", STACKABLE_KERBEROS_KRB5_PATH), false => "".to_string(), }, - broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, kafka_listeners, opa_connect_string, kerberos_enabled), + broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, kafka_listeners, opa_connect_string, kafka_security), } } @@ -58,7 +57,7 @@ fn broker_start_command( controller_descriptors: Vec, kafka_listeners: &KafkaListenerConfig, opa_connect_string: Option<&str>, - kerberos_enabled: bool, + kafka_security: &KafkaTlsSecurity, ) -> String { let opa_config = match opa_connect_string { None => "".to_string(), @@ -67,7 +66,7 @@ fn broker_start_command( } }; - let jaas_config = match kerberos_enabled { + let jaas_config = match kafka_security.has_kerberos_enabled() { true => { let service_name = KafkaRole::Broker.kerberos_service_name(); let broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR); @@ -78,6 +77,8 @@ fn broker_start_command( false => "".to_string(), }; + let client_port = kafka_security.client_port(); + // TODO: copy to tmp? mount readwrite folder? if kafka.is_controller_configured() { formatdoc! {" @@ -95,8 +96,8 @@ fn broker_start_command( ", config_dir = STACKABLE_CONFIG_DIR, properties_file = BROKER_PROPERTIES_FILE, - bootstrap_servers = to_bootstrap_servers(&controller_descriptors), - initial_controllers = to_initial_controllers(&controller_descriptors), + bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), + initial_controllers = to_initial_controllers(&controller_descriptors, client_port), listeners = kafka_listeners.listeners(), advertised_listeners = kafka_listeners.advertised_listeners(), listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), @@ -123,8 +124,9 @@ fn broker_start_command( pub fn controller_kafka_container_command( cluster_id: &str, controller_descriptors: Vec, - server_start_overrides: BTreeMap, + kafka_security: &KafkaTlsSecurity, ) -> String { + let client_port = kafka_security.client_port(); // TODO: copy to tmp? mount readwrite folder? formatdoc! {" {COMMON_BASH_TRAP_FUNCTIONS} @@ -141,7 +143,7 @@ pub fn controller_kafka_container_command( echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted - bin/kafka-server-start.sh /tmp/{properties_file} {overrides} & + bin/kafka-server-start.sh /tmp/{properties_file} & wait_for_termination $! {create_vector_shutdown_file_command} @@ -149,21 +151,19 @@ pub fn controller_kafka_container_command( remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), config_dir = STACKABLE_CONFIG_DIR, properties_file = CONTROLLER_PROPERTIES_FILE, - bootstrap_servers = to_bootstrap_servers(&controller_descriptors), - listeners = to_listeners(), + bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), + listeners = to_listeners(client_port), listener_security_protocol_map = to_listener_security_protocol_map(), - initial_controllers = to_initial_controllers(&controller_descriptors), - overrides = to_kafka_overrides(server_start_overrides), + initial_controllers = to_initial_controllers(&controller_descriptors, client_port), create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) } } -fn to_listeners() -> String { +fn to_listeners(port: u16) -> String { // TODO: // - document that variables are set in stateful set // - customize listener (CONTROLLER) - // - customize port - "CONTROLLER://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:9093".to_string() + format!("CONTROLLER://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:{port}") } fn to_listener_security_protocol_map() -> String { @@ -171,27 +171,26 @@ fn to_listener_security_protocol_map() -> String { "CONTROLLER:PLAINTEXT".to_string() } -fn to_initial_controllers(controller_descriptors: &[KafkaPodDescriptor]) -> String { +fn to_initial_controllers(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { controller_descriptors .iter() - .map(|desc| desc.as_voter()) + .map(|desc| desc.as_voter(port)) .collect::>() .join(",") } -fn to_bootstrap_servers(controller_descriptors: &[KafkaPodDescriptor]) -> String { +fn to_bootstrap_servers(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { controller_descriptors .iter() - // TODO: make port configureable - .map(|desc| format!("{fqdn}:{port}", fqdn = desc.fqdn(), port = 9093)) + .map(|desc| format!("{fqdn}:{port}", fqdn = desc.fqdn())) .collect::>() .join(",") } -fn to_kafka_overrides(overrides: BTreeMap) -> String { - overrides - .iter() - .map(|(key, value)| format!("--override \"{key}={value}\"")) - .collect::>() - .join(" ") -} +// fn to_kafka_overrides(overrides: BTreeMap) -> String { +// overrides +// .iter() +// .map(|(key, value)| format!("--override \"{key}={value}\"")) +// .collect::>() +// .join(" ") +// } diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 9e833f58..2a2e9ac2 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -259,6 +259,7 @@ impl v1alpha1::KafkaCluster { }) .collect(), + // TODO: this does not work for multiple rolegroups (the index / replica) KafkaRole::Controller => self .controller_role() .iter() @@ -269,7 +270,7 @@ impl v1alpha1::KafkaCluster { .flat_map(move |(rolegroup_name, rolegroup)| { let rolegroup_ref = self.rolegroup_ref(kafka_role, rolegroup_name); let ns = ns.clone(); - (0..rolegroup.replicas.unwrap_or(0)).map(move |i| KafkaPodDescriptor { + (0..rolegroup.replicas.unwrap_or(0)).map(move |i: u16| KafkaPodDescriptor { namespace: ns.clone(), role_group_service_name: rolegroup_ref.object_name(), replica: i, @@ -329,13 +330,11 @@ impl KafkaPodDescriptor { /// * controller-0 is the replica's host, /// * 1234 is the replica's port. // TODO(@maltesander): Even though the used Uuid states to be type 4 it does not work... 0000000000-00000000000 works... - pub fn as_voter(&self) -> String { + pub fn as_voter(&self, port: u16) -> String { format!( "{replica}@{fqdn}:{port}:0000000000-{replica:0>11}", replica = self.replica, fqdn = self.fqdn(), - // TODO: make port configureable - port = 9093 ) } } diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index b7adccd8..539d66a5 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -389,7 +389,6 @@ pub async fn reconcile_kafka( .context(BuildStatefulsetSnafu)?, }; - // TODO: broker / controller? if let AnyConfig::Broker(broker_config) = merged_config { let rg_bootstrap_listener = build_broker_rolegroup_bootstrap_listener( kafka, diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 94d21659..20d9e1ef 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -10,7 +10,11 @@ use stackable_operator::{ role_utils::RoleGroupRef, }; -use crate::crd::{STACKABLE_LOG_DIR, role::AnyConfig, v1alpha1}; +use crate::crd::{ + STACKABLE_LOG_DIR, + role::{AnyConfig, broker::BrokerContainer, controller::ControllerContainer}, + v1alpha1, +}; pub const LOG4J_CONFIG_FILE: &str = "log4j.properties"; pub const KAFKA_LOG_FILE: &str = "kafka.log4j.xml"; @@ -59,8 +63,10 @@ pub fn extend_role_group_config_map( cm_builder, Some(merged_config.kafka_logging()), LOG4J_CONFIG_FILE, - // TODO: configure? - "kafka", + match merged_config { + AnyConfig::Broker(_) => BrokerContainer::Kafka.to_string(), + AnyConfig::Controller(_) => ControllerContainer::Kafka.to_string(), + }, KAFKA_LOG_FILE, MAX_KAFKA_LOG_FILES_SIZE, ); diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 258eddb4..f70cc806 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -301,7 +301,7 @@ pub fn build_broker_rolegroup_statefulset( .context(BuildPodDescriptorsSnafu)?, &kafka_listeners, opa_connect_string, - kafka_security.has_kerberos_enabled(), + kafka_security, )]) .add_env_var( "EXTRA_ARGS", @@ -629,8 +629,7 @@ pub fn build_controller_rolegroup_statefulset( kafka .pod_descriptors(kafka_role, cluster_info) .context(BuildPodDescriptorsSnafu)?, - // TODO: fix overrides - BTreeMap::new(), + kafka_security, )]) .add_env_var( "EXTRA_ARGS", From b7d9738b5548fdd7ffcdbe06f60afb98e976d9a8 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 29 Aug 2025 17:39:29 +0200 Subject: [PATCH 31/90] kraftify smoke test --- .../{20-assert.yaml => 20-assert.yaml.j2} | 2 + .../kuttl/smoke/20-install-zk.yaml.j2 | 2 + tests/templates/kuttl/smoke/30-assert.yaml.j2 | 56 ++++++++++++++++++- .../kuttl/smoke/30-install-kafka.yaml.j2 | 14 +++++ tests/templates/kuttl/smoke/test_heap.sh | 4 +- 5 files changed, 74 insertions(+), 4 deletions(-) rename tests/templates/kuttl/smoke/{20-assert.yaml => 20-assert.yaml.j2} (70%) diff --git a/tests/templates/kuttl/smoke/20-assert.yaml b/tests/templates/kuttl/smoke/20-assert.yaml.j2 similarity index 70% rename from tests/templates/kuttl/smoke/20-assert.yaml rename to tests/templates/kuttl/smoke/20-assert.yaml.j2 index c9cfcf5c..5d46bbff 100644 --- a/tests/templates/kuttl/smoke/20-assert.yaml +++ b/tests/templates/kuttl/smoke/20-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -10,3 +11,4 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 b/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 index 5ab2c212..850aa4cc 100644 --- a/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 +++ b/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -18,3 +19,4 @@ spec: roleGroups: default: replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2 index 96d71c3d..76b7f28f 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-assert.yaml.j2 @@ -15,10 +15,10 @@ spec: resources: limits: cpu: 1100m # From podOverrides - memory: 1Gi + memory: 2Gi requests: cpu: 300m # From podOverrides - memory: 1Gi + memory: 2Gi - name: kcat-prober resources: limits: @@ -86,3 +86,55 @@ status: expectedPods: 1 currentHealthy: 1 disruptionsAllowed: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: log-dirs-test-kafka-controller-default-0 +spec: + resources: + requests: + storage: 2Gi +status: + phase: Bound +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: log-dirs-test-kafka-controller-default-1 +spec: + resources: + requests: + storage: 2Gi +status: + phase: Bound +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: log-dirs-test-kafka-controller-default-2 +spec: + resources: + requests: + storage: 2Gi +status: + phase: Bound +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: test-kafka-controller +status: + expectedPods: 3 + currentHealthy: 3 + disruptionsAllowed: 1 +{% endif %} diff --git a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 index 4f3b95a0..4b3133bb 100644 --- a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 @@ -26,7 +26,21 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-zk +{% else %} + controllers: + envOverrides: + COMMON_VAR: role-value # overridden by role group below + ROLE_VAR: role-value # only defined here at role level + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + requestedSecretLifetime: 7d + roleGroups: + default: + replicas: 3 +{% endif %} brokers: configOverrides: broker.properties: diff --git a/tests/templates/kuttl/smoke/test_heap.sh b/tests/templates/kuttl/smoke/test_heap.sh index 71193850..cd76d42a 100755 --- a/tests/templates/kuttl/smoke/test_heap.sh +++ b/tests/templates/kuttl/smoke/test_heap.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash # Usage: test_heap.sh -# 1Gi * 0.8 -> 819 -EXPECTED_HEAP="-Xmx819m -Xms819m" +# 2Gi * 0.8 -> 1638 +EXPECTED_HEAP="-Xmx1638m -Xms1638m" # Check if ZK_SERVER_HEAP is set to the correct calculated value if [[ $KAFKA_HEAP_OPTS == "$EXPECTED_HEAP" ]] From 1f42b832c6db70455698174298a5fc00e98039df Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 29 Aug 2025 17:40:07 +0200 Subject: [PATCH 32/90] kraftify tests definition --- .vscode/launch.json | 23 ++++++-- examples/tls/simple-kafka-cluster-tls.yaml | 66 ++-------------------- tests/test-definition.yaml | 5 ++ 3 files changed, 29 insertions(+), 65 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index e2a19cfd..fb235e6d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,16 +4,29 @@ { "type": "lldb", "request": "launch", - "name": "Debug operator binary", + "name": "Debug stackable-kafka-operator", "cargo": { - "args": ["build"], + "args": [ + "build" + ], "filter": { - "name": "stackable-{[ operator.name }]", + "name": "stackable-kafka-operator", "kind": "bin" } }, - "args": ["run"], + "program": "${workspaceFolder}/target/debug/stackable-kafka-operator", + "args": [ + "run", + "--kafka-broker-clusterrole", + "kafka-operator", + "--operator-namespace", + "default", + "--operator-service-name", + "kafka-operator", + "--kubernetes-node-name", + "kind-control-plane" + ], "cwd": "${workspaceFolder}" } ] -} +} \ No newline at end of file diff --git a/examples/tls/simple-kafka-cluster-tls.yaml b/examples/tls/simple-kafka-cluster-tls.yaml index 2e19771b..5e3d0499 100644 --- a/examples/tls/simple-kafka-cluster-tls.yaml +++ b/examples/tls/simple-kafka-cluster-tls.yaml @@ -1,73 +1,19 @@ --- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperCluster -metadata: - name: simple-zk -spec: - image: - productVersion: 3.8.3 - servers: - roleGroups: - default: - replicas: 3 ---- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperZnode -metadata: - name: simple-kafka-znode -spec: - clusterRef: - name: simple-zk ---- -apiVersion: secrets.stackable.tech/v1alpha1 -kind: SecretClass -metadata: - name: kafka-internal-tls -spec: - backend: - autoTls: - ca: - secret: - name: secret-provisioner-kafka-internal-tls-ca - namespace: default - autoGenerate: true ---- -apiVersion: authentication.stackable.tech/v1alpha1 -kind: AuthenticationClass -metadata: - name: kafka-client-auth-tls -spec: - provider: - tls: - clientCertSecretClass: kafka-client-auth-secret ---- -apiVersion: secrets.stackable.tech/v1alpha1 -kind: SecretClass -metadata: - name: kafka-client-auth-secret -spec: - backend: - autoTls: - ca: - secret: - name: secret-provisioner-tls-kafka-client-ca - namespace: default - autoGenerate: true ---- apiVersion: kafka.stackable.tech/v1alpha1 kind: KafkaCluster metadata: name: simple-kafka spec: image: - productVersion: 3.7.2 + productVersion: 3.9.1 clusterConfig: - authentication: - - authenticationClass: kafka-client-auth-tls tls: - internalSecretClass: kafka-internal-tls + internalSecretClass: tls serverSecretClass: tls - zookeeperConfigMapName: simple-kafka-znode + controllers: + roleGroups: + default: + replicas: 3 brokers: roleGroups: default: diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index da16182f..f0b19698 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -62,12 +62,17 @@ dimensions: - "cluster-internal" - "external-stable" - "external-unstable" + - name: use-kraft-controller + values: + - "true" + - "false" tests: - name: smoke dimensions: - kafka - zookeeper - use-client-tls + - use-kraft-controller - openshift - name: configuration dimensions: From 104c1b12432a7837efca7a46cdc5f1777c47b3e7 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 29 Aug 2025 17:42:58 +0200 Subject: [PATCH 33/90] revert test changes --- .vscode/launch.json | 15 +---- examples/tls/simple-kafka-cluster-tls.yaml | 66 ++++++++++++++++++++-- 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index fb235e6d..0df32d9c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,27 +4,18 @@ { "type": "lldb", "request": "launch", - "name": "Debug stackable-kafka-operator", + "name": "Debug operator binary", "cargo": { "args": [ "build" ], "filter": { - "name": "stackable-kafka-operator", + "name": "stackable-{[ operator.name }]", "kind": "bin" } }, - "program": "${workspaceFolder}/target/debug/stackable-kafka-operator", "args": [ - "run", - "--kafka-broker-clusterrole", - "kafka-operator", - "--operator-namespace", - "default", - "--operator-service-name", - "kafka-operator", - "--kubernetes-node-name", - "kind-control-plane" + "run" ], "cwd": "${workspaceFolder}" } diff --git a/examples/tls/simple-kafka-cluster-tls.yaml b/examples/tls/simple-kafka-cluster-tls.yaml index 5e3d0499..2e19771b 100644 --- a/examples/tls/simple-kafka-cluster-tls.yaml +++ b/examples/tls/simple-kafka-cluster-tls.yaml @@ -1,19 +1,73 @@ --- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperCluster +metadata: + name: simple-zk +spec: + image: + productVersion: 3.8.3 + servers: + roleGroups: + default: + replicas: 3 +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperZnode +metadata: + name: simple-kafka-znode +spec: + clusterRef: + name: simple-zk +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: kafka-internal-tls +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-kafka-internal-tls-ca + namespace: default + autoGenerate: true +--- +apiVersion: authentication.stackable.tech/v1alpha1 +kind: AuthenticationClass +metadata: + name: kafka-client-auth-tls +spec: + provider: + tls: + clientCertSecretClass: kafka-client-auth-secret +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: kafka-client-auth-secret +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-tls-kafka-client-ca + namespace: default + autoGenerate: true +--- apiVersion: kafka.stackable.tech/v1alpha1 kind: KafkaCluster metadata: name: simple-kafka spec: image: - productVersion: 3.9.1 + productVersion: 3.7.2 clusterConfig: + authentication: + - authenticationClass: kafka-client-auth-tls tls: - internalSecretClass: tls + internalSecretClass: kafka-internal-tls serverSecretClass: tls - controllers: - roleGroups: - default: - replicas: 3 + zookeeperConfigMapName: simple-kafka-znode brokers: roleGroups: default: From f33fbc1d0204e58c2768960a2d8c77bdf7dcd066 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 1 Sep 2025 15:51:41 +0200 Subject: [PATCH 34/90] wip - controller tls --- rust/operator-binary/src/config/command.rs | 35 ++- rust/operator-binary/src/crd/listener.rs | 88 ++++++- .../src/crd/role/controller.rs | 5 - rust/operator-binary/src/crd/role/mod.rs | 1 + rust/operator-binary/src/crd/security.rs | 231 ++++++++++++------ .../operator-binary/src/resource/configmap.rs | 7 +- .../src/resource/statefulset.rs | 24 +- .../tls/{10-assert.yaml => 10-assert.yaml.j2} | 2 + .../kuttl/tls/10-install-zookeeper.yaml.j2 | 2 + tests/templates/kuttl/tls/20-assert.yaml | 12 - tests/templates/kuttl/tls/20-assert.yaml.j2 | 22 ++ .../kuttl/tls/20-install-kafka.yaml.j2 | 7 + tests/test-definition.yaml | 1 + 13 files changed, 313 insertions(+), 124 deletions(-) rename tests/templates/kuttl/tls/{10-assert.yaml => 10-assert.yaml.j2} (70%) delete mode 100644 tests/templates/kuttl/tls/20-assert.yaml create mode 100644 tests/templates/kuttl/tls/20-assert.yaml.j2 diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 1065df42..15a355ef 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -9,7 +9,7 @@ use stackable_operator::{ use crate::crd::{ KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_DIR, - listener::{KafkaListenerConfig, node_address_cmd}, + listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, role::{ KAFKA_ADVERTISED_LISTENERS, KAFKA_BROKER_ID_OFFSET, KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, @@ -106,9 +106,9 @@ fn broker_start_command( formatdoc! {" bin/kafka-server-start.sh {config_dir}/{properties_file} \ --override \"zookeeper.connect=$ZOOKEEPER\" \ - --override \"listeners={listeners}\" \ - --override \"advertised.listeners={advertised_listeners}\" \ - --override \"listener.security.protocol.map={listener_security_protocol_map}\" \ + --override \"{KAFKA_LISTENERS}={listeners}\" \ + --override \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" \ + --override \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" \ {opa_config} \ {jaas_config} \ &", @@ -124,6 +124,7 @@ fn broker_start_command( pub fn controller_kafka_container_command( cluster_id: &str, controller_descriptors: Vec, + kafka_listeners: &KafkaListenerConfig, kafka_security: &KafkaTlsSecurity, ) -> String { let client_port = kafka_security.client_port(); @@ -153,7 +154,7 @@ pub fn controller_kafka_container_command( properties_file = CONTROLLER_PROPERTIES_FILE, bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), listeners = to_listeners(client_port), - listener_security_protocol_map = to_listener_security_protocol_map(), + listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), initial_controllers = to_initial_controllers(&controller_descriptors, client_port), create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) } @@ -162,13 +163,19 @@ pub fn controller_kafka_container_command( fn to_listeners(port: u16) -> String { // TODO: // - document that variables are set in stateful set - // - customize listener (CONTROLLER) - format!("CONTROLLER://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:{port}") + // - customize listener (CONTROLLER / CONTROLLER_AUTH?) + format!( + "{listener_name}://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:{port}", + listener_name = KafkaListenerName::Controller + ) } -fn to_listener_security_protocol_map() -> String { - // TODO: make configurable - "CONTROLLER:PLAINTEXT".to_string() +fn to_listener_security_protocol_map(kafka_listeners: &KafkaListenerConfig) -> String { + // TODO: make configurable - CONTROLLER_AUTH + kafka_listeners + .listener_security_protocol_map_for_listener(&KafkaListenerName::Controller) + // todo better error + .unwrap_or("".to_string()) } fn to_initial_controllers(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { @@ -186,11 +193,3 @@ fn to_bootstrap_servers(controller_descriptors: &[KafkaPodDescriptor], port: u16 .collect::>() .join(",") } - -// fn to_kafka_overrides(overrides: BTreeMap) -> String { -// overrides -// .iter() -// .map(|(key, value)| format!("--override \"{key}={value}\"")) -// .collect::>() -// .join(" ") -// } diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 93aab294..f63bd132 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -44,6 +44,59 @@ pub enum KafkaListenerName { Bootstrap, #[strum(serialize = "CONTROLLER")] Controller, + #[strum(serialize = "CONTROLLER_AUTH")] + ControllerAuth, +} + +impl KafkaListenerName { + pub fn listener_ssl_keystore_location(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.keystore.location", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_keystore_password(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.keystore.password", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_keystore_type(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.keystore.type", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_truststore_location(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.truststore.location", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_truststore_password(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.truststore.password", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_truststore_type(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.truststore.type", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_client_auth(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.client.auth", + listener_name = self.to_string().to_lowercase() + ) + } } #[derive(Debug)] @@ -82,6 +135,16 @@ impl KafkaListenerConfig { .collect::>() .join(",") } + + /// Returns the `listener.security.protocol.map` for the Kafka `broker.properties` config. + pub fn listener_security_protocol_map_for_listener( + &self, + listener_name: &KafkaListenerName, + ) -> Option { + self.listener_security_protocol_map + .get(listener_name) + .map(|protocol| format!("{listener_name}:{protocol}")) + } } #[derive(Debug)] @@ -127,6 +190,10 @@ pub fn get_kafka_listener_config( }); listener_security_protocol_map .insert(KafkaListenerName::ClientAuth, KafkaListenerProtocol::Ssl); + listener_security_protocol_map.insert( + KafkaListenerName::ControllerAuth, + KafkaListenerProtocol::Ssl, + ); } else if kafka_security.has_kerberos_enabled() { // 2) Kerberos and TLS authentication classes are mutually exclusive listeners.push(KafkaListener { @@ -144,6 +211,10 @@ pub fn get_kafka_listener_config( }); listener_security_protocol_map .insert(KafkaListenerName::Client, KafkaListenerProtocol::SaslSsl); + listener_security_protocol_map.insert( + KafkaListenerName::Controller, + KafkaListenerProtocol::SaslSsl, + ); } else if kafka_security.tls_server_secret_class().is_some() { // 3) If no client authentication but tls is required we expose CLIENT with SSL listeners.push(KafkaListener { @@ -180,7 +251,7 @@ pub fn get_kafka_listener_config( .insert(KafkaListenerName::Client, KafkaListenerProtocol::Plaintext); } - // INTERNAL + // INTERNAL / CONTROLLER if kafka_security.has_kerberos_enabled() || kafka_security.tls_internal_secret_class().is_some() { // 5) & 6) Kerberos and TLS authentication classes are mutually exclusive but both require internal tls to be used @@ -196,6 +267,8 @@ pub fn get_kafka_listener_config( }); listener_security_protocol_map .insert(KafkaListenerName::Internal, KafkaListenerProtocol::Ssl); + listener_security_protocol_map + .insert(KafkaListenerName::Controller, KafkaListenerProtocol::Ssl); } else { // 7) If no internal tls is required we expose INTERNAL as PLAINTEXT listeners.push(KafkaListener { @@ -212,6 +285,10 @@ pub fn get_kafka_listener_config( KafkaListenerName::Internal, KafkaListenerProtocol::Plaintext, ); + listener_security_protocol_map.insert( + KafkaListenerName::Controller, + KafkaListenerProtocol::Plaintext, + ); } // BOOTSTRAP @@ -233,15 +310,6 @@ pub fn get_kafka_listener_config( .insert(KafkaListenerName::Bootstrap, KafkaListenerProtocol::SaslSsl); } - // CONTROLLER - if kafka.is_controller_configured() { - // TODO: SSL? - listener_security_protocol_map.insert( - KafkaListenerName::Controller, - KafkaListenerProtocol::Plaintext, - ); - } - Ok(KafkaListenerConfig { listeners, advertised_listeners, diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index e0b49d72..0113c4d2 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -143,11 +143,6 @@ impl Configuration for ControllerConfigFragment { "controller.listener.names".to_string(), Some(KafkaListenerName::Controller.to_string()), ); - - config.insert( - "controller.listener.names".to_string(), - Some("CONTROLLER".to_string()), - ); } Ok(config) diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 6e2fc757..9e28b7f4 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -130,6 +130,7 @@ impl KafkaRole { /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. /// We only have one role and will use "kafka" everywhere (which e.g. differs from the current hdfs implementation, /// but is similar to HBase). + // TODO: split into broker / controller? pub fn kerberos_service_name(&self) -> &'static str { "kafka" } diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index 66ca4f44..16c132b6 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -27,7 +27,7 @@ use crate::crd::{ LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BROKER_DIR, authentication::{self, ResolvedAuthenticationClasses}, - listener::{self, node_address_cmd}, + listener::{self, KafkaListenerName, node_address_cmd}, role::KafkaRole, tls, v1alpha1, }; @@ -68,59 +68,13 @@ impl KafkaTlsSecurity { // be able to expose principals for both the broker and bootstrap in the // JAAS configuration, so that clients can use both. pub const BOOTSTRAP_PORT_NAME: &'static str = "bootstrap"; - // - Bootstrapper - const BOOTSTRAP_SSL_KEYSTORE_LOCATION: &'static str = - "listener.name.bootstrap.ssl.keystore.location"; - const BOOTSTRAP_SSL_KEYSTORE_PASSWORD: &'static str = - "listener.name.bootstrap.ssl.keystore.password"; - const BOOTSTRAP_SSL_KEYSTORE_TYPE: &'static str = "listener.name.bootstrap.ssl.keystore.type"; - const BOOTSTRAP_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.bootstrap.ssl.truststore.location"; - const BOOTSTRAP_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.bootstrap.ssl.truststore.password"; - const BOOTSTRAP_SSL_TRUSTSTORE_TYPE: &'static str = - "listener.name.bootstrap.ssl.truststore.type"; - const CLIENT_AUTH_SSL_CLIENT_AUTH: &'static str = "listener.name.client_auth.ssl.client.auth"; - // - TLS client authentication - const CLIENT_AUTH_SSL_KEYSTORE_LOCATION: &'static str = - "listener.name.client_auth.ssl.keystore.location"; - const CLIENT_AUTH_SSL_KEYSTORE_PASSWORD: &'static str = - "listener.name.client_auth.ssl.keystore.password"; - const CLIENT_AUTH_SSL_KEYSTORE_TYPE: &'static str = - "listener.name.client_auth.ssl.keystore.type"; - const CLIENT_AUTH_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.client_auth.ssl.truststore.location"; - const CLIENT_AUTH_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.client_auth.ssl.truststore.password"; - const CLIENT_AUTH_SSL_TRUSTSTORE_TYPE: &'static str = - "listener.name.client_auth.ssl.truststore.type"; pub const CLIENT_PORT: u16 = 9092; // ports pub const CLIENT_PORT_NAME: &'static str = "kafka"; - // - TLS client - const CLIENT_SSL_KEYSTORE_LOCATION: &'static str = "listener.name.client.ssl.keystore.location"; - const CLIENT_SSL_KEYSTORE_PASSWORD: &'static str = "listener.name.client.ssl.keystore.password"; - const CLIENT_SSL_KEYSTORE_TYPE: &'static str = "listener.name.client.ssl.keystore.type"; - const CLIENT_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.client.ssl.truststore.location"; - const CLIENT_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.client.ssl.truststore.password"; - const CLIENT_SSL_TRUSTSTORE_TYPE: &'static str = "listener.name.client.ssl.truststore.type"; // internal pub const INTERNAL_PORT: u16 = 19092; // - TLS internal const INTER_BROKER_LISTENER_NAME: &'static str = "inter.broker.listener.name"; - const INTER_SSL_CLIENT_AUTH: &'static str = "listener.name.internal.ssl.client.auth"; - const INTER_SSL_KEYSTORE_LOCATION: &'static str = - "listener.name.internal.ssl.keystore.location"; - const INTER_SSL_KEYSTORE_PASSWORD: &'static str = - "listener.name.internal.ssl.keystore.password"; - const INTER_SSL_KEYSTORE_TYPE: &'static str = "listener.name.internal.ssl.keystore.type"; - const INTER_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.internal.ssl.truststore.location"; - const INTER_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.internal.ssl.truststore.password"; - const INTER_SSL_TRUSTSTORE_TYPE: &'static str = "listener.name.internal.ssl.truststore.type"; pub const SECURE_BOOTSTRAP_PORT: u16 = 9095; pub const SECURE_CLIENT_PORT: u16 = 9093; pub const SECURE_CLIENT_PORT_NAME: &'static str = "kafka-tls"; @@ -345,9 +299,9 @@ impl KafkaTlsSecurity { args } - /// Adds required volumes and volume mounts to the pod and container builders + /// Adds required volumes and volume mounts to the broker pod and container builders /// depending on the tls and authentication settings. - pub fn add_volume_and_volume_mounts( + pub fn add_broker_volume_and_volume_mounts( &self, pod_builder: &mut PodBuilder, cb_kcat_prober: &mut ContainerBuilder, @@ -405,9 +359,43 @@ impl KafkaTlsSecurity { Ok(()) } + /// Adds required volumes and volume mounts to the controller pod and container builders + /// depending on the tls and authentication settings. + pub fn add_controller_volume_and_volume_mounts( + &self, + pod_builder: &mut PodBuilder, + cb_kafka: &mut ContainerBuilder, + requested_secret_lifetime: &Duration, + ) -> Result<(), Error> { + if let Some(tls_internal_secret_class) = self.tls_internal_secret_class() { + pod_builder + .add_volume( + VolumeBuilder::new(Self::STACKABLE_TLS_KAFKA_INTERNAL_VOLUME_NAME) + .ephemeral( + SecretOperatorVolumeSourceBuilder::new(tls_internal_secret_class) + .with_pod_scope() + .with_format(SecretFormat::TlsPkcs12) + .with_auto_tls_cert_lifetime(*requested_secret_lifetime) + .build() + .context(SecretVolumeBuildSnafu)?, + ) + .build(), + ) + .context(AddVolumeSnafu)?; + cb_kafka + .add_volume_mount( + Self::STACKABLE_TLS_KAFKA_INTERNAL_VOLUME_NAME, + Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR, + ) + .context(AddVolumeMountSnafu)?; + } + + Ok(()) + } + /// Returns required Kafka configuration settings for the `broker.properties` file /// depending on the tls and authentication settings. - pub fn config_settings(&self) -> BTreeMap { + pub fn broker_config_settings(&self) -> BTreeMap { let mut config = BTreeMap::new(); // We set either client tls with authentication or client tls without authentication @@ -415,57 +403,57 @@ impl KafkaTlsSecurity { // be trusted. if self.tls_client_authentication_class().is_some() { config.insert( - Self::CLIENT_AUTH_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_AUTH_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_AUTH_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::CLIENT_AUTH_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_AUTH_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_truststore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_AUTH_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_truststore_type(), "PKCS12".to_string(), ); // client auth required config.insert( - Self::CLIENT_AUTH_SSL_CLIENT_AUTH.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_client_auth(), "required".to_string(), ); } else if self.tls_server_secret_class().is_some() { config.insert( - Self::CLIENT_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::Client.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::Client.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::Client.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::CLIENT_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::Client.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::Client.listener_ssl_truststore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::Client.listener_ssl_truststore_type(), "PKCS12".to_string(), ); } @@ -473,59 +461,86 @@ impl KafkaTlsSecurity { if self.has_kerberos_enabled() { // Bootstrap config.insert( - Self::BOOTSTRAP_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::BOOTSTRAP_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::BOOTSTRAP_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::BOOTSTRAP_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::BOOTSTRAP_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_truststore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::BOOTSTRAP_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_truststore_type(), "PKCS12".to_string(), ); } // Internal TLS if self.tls_internal_secret_class().is_some() { + // BROKERS + config.insert( + KafkaListenerName::Internal.listener_ssl_keystore_location(), + format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_keystore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_keystore_type(), + "PKCS12".to_string(), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_truststore_location(), + format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); config.insert( - Self::INTER_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::Internal.listener_ssl_truststore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_truststore_type(), + "PKCS12".to_string(), + ); + // CONTROLLERS + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), ); config.insert( - Self::INTER_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::Controller.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::INTER_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::Controller.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::INTER_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::Controller.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), ); config.insert( - Self::INTER_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::Controller.listener_ssl_truststore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::INTER_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::Controller.listener_ssl_truststore_type(), "PKCS12".to_string(), ); + // client auth required config.insert( - Self::INTER_SSL_CLIENT_AUTH.to_string(), + KafkaListenerName::Internal.listener_ssl_client_auth(), "required".to_string(), ); } @@ -533,6 +548,7 @@ impl KafkaTlsSecurity { // Kerberos if self.has_kerberos_enabled() { config.insert("sasl.enabled.mechanisms".to_string(), "GSSAPI".to_string()); + // TODO: what service name? config.insert( "sasl.kerberos.service.name".to_string(), KafkaRole::Broker.kerberos_service_name().to_string(), @@ -553,6 +569,69 @@ impl KafkaTlsSecurity { config } + /// Returns required Kafka configuration settings for the `controller.properties` file + /// depending on the tls and authentication settings. + pub fn controller_config_settings(&self) -> BTreeMap { + let mut config = BTreeMap::new(); + + // We set either client tls with authentication or client tls without authentication + // If authentication is explicitly required we do not want to have any other CAs to + // be trusted. + if self.tls_client_authentication_class().is_some() { + // client auth required + config.insert( + KafkaListenerName::ControllerAuth.listener_ssl_client_auth(), + "required".to_string(), + ); + } + + if self.tls_client_authentication_class().is_some() + || self.tls_internal_secret_class().is_some() + { + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_location(), + format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_type(), + "PKCS12".to_string(), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_location(), + format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_type(), + "PKCS12".to_string(), + ); + } + + // Kerberos + if self.has_kerberos_enabled() { + config.insert("sasl.enabled.mechanisms".to_string(), "GSSAPI".to_string()); + // TODO: what service name? + config.insert( + "sasl.kerberos.service.name".to_string(), + KafkaRole::Broker.kerberos_service_name().to_string(), + ); + config.insert( + "sasl.mechanism.inter.broker.protocol".to_string(), + "GSSAPI".to_string(), + ); + tracing::debug!("Kerberos configs added: [{:#?}]", config); + } + + config + } + /// Returns the `SecretClass` provided in a `AuthenticationClass` for TLS. fn get_tls_secret_class(&self) -> Option<&String> { self.resolved_authentication_classes diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index da42e95b..e96f42bd 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -67,8 +67,11 @@ pub fn build_rolegroup_config_map( .cloned() .unwrap_or_default(); - if let AnyConfig::Broker(_) = merged_config { - kafka_config.extend(kafka_security.config_settings()) + match merged_config { + AnyConfig::Broker(_) => kafka_config.extend(kafka_security.broker_config_settings()), + AnyConfig::Controller(_) => { + kafka_config.extend(kafka_security.controller_config_settings()) + } } kafka_config.extend(graceful_shutdown_config_properties()); diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index f70cc806..878ce8f7 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -203,7 +203,7 @@ pub fn build_broker_rolegroup_statefulset( .requested_secret_lifetime .context(MissingSecretLifetimeSnafu)?; kafka_security - .add_volume_and_volume_mounts( + .add_broker_volume_and_volume_mounts( &mut pod_builder, &mut cb_kcat_prober, &mut cb_kafka, @@ -615,6 +615,14 @@ pub fn build_controller_rolegroup_statefulset( ..EnvVar::default() }); + let kafka_listeners = get_kafka_listener_config( + kafka, + kafka_security, + &rolegroup_ref.object_name(), + cluster_info, + ) + .context(InvalidKafkaListenersSnafu)?; + cb_kafka .image_from_product_image(resolved_product_image) .command(vec![ @@ -629,6 +637,7 @@ pub fn build_controller_rolegroup_statefulset( kafka .pod_descriptors(kafka_role, cluster_info) .context(BuildPodDescriptorsSnafu)?, + &kafka_listeners, kafka_security, )]) .add_env_var( @@ -712,6 +721,19 @@ pub fn build_controller_rolegroup_statefulset( .context(MetadataBuildSnafu)? .build(); + // Add TLS related volumes and volume mounts + let requested_secret_lifetime = merged_config + .deref() + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; + kafka_security + .add_controller_volume_and_volume_mounts( + &mut pod_builder, + &mut cb_kafka, + &requested_secret_lifetime, + ) + .context(AddVolumesAndVolumeMountsSnafu)?; + pod_builder .metadata(metadata) .image_pull_secrets_from_product_image(resolved_product_image) diff --git a/tests/templates/kuttl/tls/10-assert.yaml b/tests/templates/kuttl/tls/10-assert.yaml.j2 similarity index 70% rename from tests/templates/kuttl/tls/10-assert.yaml rename to tests/templates/kuttl/tls/10-assert.yaml.j2 index e0766c49..656007a4 100644 --- a/tests/templates/kuttl/tls/10-assert.yaml +++ b/tests/templates/kuttl/tls/10-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -10,3 +11,4 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 b/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 index 6b462fa4..2479c30b 100644 --- a/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 +++ b/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -18,3 +19,4 @@ spec: roleGroups: default: replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/tls/20-assert.yaml b/tests/templates/kuttl/tls/20-assert.yaml deleted file mode 100644 index 01ba15d1..00000000 --- a/tests/templates/kuttl/tls/20-assert.yaml +++ /dev/null @@ -1,12 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 600 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-default -status: - readyReplicas: 3 - replicas: 3 diff --git a/tests/templates/kuttl/tls/20-assert.yaml.j2 b/tests/templates/kuttl/tls/20-assert.yaml.j2 new file mode 100644 index 00000000..ee11bdb7 --- /dev/null +++ b/tests/templates/kuttl/tls/20-assert.yaml.j2 @@ -0,0 +1,22 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 +{% endif %} diff --git a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 index 54a7a49d..f69b1c8e 100644 --- a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 @@ -58,7 +58,14 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-kafka-znode +{% else %} + controllers: + roleGroups: + default: + replicas: 3 +{% endif %} brokers: config: logging: diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index f0b19698..9521ab8d 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -93,6 +93,7 @@ tests: - zookeeper-latest - use-client-tls - use-client-auth-tls + - use-kraft-controller - openshift - name: delete-rolegroup dimensions: From e64d9aa1e88b6a2193ea2935de7165da0a771cd6 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 2 Sep 2025 16:06:28 +0200 Subject: [PATCH 35/90] fix 3.7.x commands --- rust/operator-binary/src/config/command.rs | 70 ++++++++++++++++--- rust/operator-binary/src/crd/mod.rs | 8 +++ rust/operator-binary/src/crd/role/mod.rs | 4 ++ .../src/resource/statefulset.rs | 2 + 4 files changed, 75 insertions(+), 9 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 15a355ef..4f6049bf 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -12,9 +12,9 @@ use crate::crd::{ listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, role::{ KAFKA_ADVERTISED_LISTENERS, KAFKA_BROKER_ID_OFFSET, - KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, - KAFKA_LISTENERS, KAFKA_NODE_ID, KafkaRole, broker::BROKER_PROPERTIES_FILE, - controller::CONTROLLER_PROPERTIES_FILE, + KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_CONTROLLER_QUORUM_VOTERS, + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, KAFKA_NODE_ID, KafkaRole, + broker::BROKER_PROPERTIES_FILE, controller::CONTROLLER_PROPERTIES_FILE, }, security::KafkaTlsSecurity, v1alpha1, @@ -28,6 +28,7 @@ pub fn broker_kafka_container_commands( kafka_listeners: &KafkaListenerConfig, opa_connect_string: Option<&str>, kafka_security: &KafkaTlsSecurity, + product_version: &str, ) -> String { formatdoc! {" {COMMON_BASH_TRAP_FUNCTIONS} @@ -47,7 +48,7 @@ pub fn broker_kafka_container_commands( true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {})", STACKABLE_KERBEROS_KRB5_PATH), false => "".to_string(), }, - broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, kafka_listeners, opa_connect_string, kafka_security), + broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, kafka_listeners, opa_connect_string, kafka_security, product_version), } } @@ -58,6 +59,7 @@ fn broker_start_command( kafka_listeners: &KafkaListenerConfig, opa_connect_string: Option<&str>, kafka_security: &KafkaTlsSecurity, + product_version: &str, ) -> String { let opa_config = match opa_connect_string { None => "".to_string(), @@ -90,17 +92,23 @@ fn broker_start_command( echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} - - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted + {controller_quorum_voters} + + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & ", config_dir = STACKABLE_CONFIG_DIR, properties_file = BROKER_PROPERTIES_FILE, bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), - initial_controllers = to_initial_controllers(&controller_descriptors, client_port), listeners = kafka_listeners.listeners(), advertised_listeners = kafka_listeners.advertised_listeners(), listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), + controller_quorum_voters = controller_quorum_voters_command( + product_version, + BROKER_PROPERTIES_FILE, + &to_quorum_voters(&controller_descriptors, client_port) + ), + initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), } } else { formatdoc! {" @@ -126,8 +134,10 @@ pub fn controller_kafka_container_command( controller_descriptors: Vec, kafka_listeners: &KafkaListenerConfig, kafka_security: &KafkaTlsSecurity, + product_version: &str, ) -> String { let client_port = kafka_security.client_port(); + // TODO: copy to tmp? mount readwrite folder? formatdoc! {" {COMMON_BASH_TRAP_FUNCTIONS} @@ -142,8 +152,10 @@ pub fn controller_kafka_container_command( echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + {controller_quorum_voters} + cat /tmp/{properties_file} - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --initial-controllers {initial_controllers} --ignore-formatted + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} & wait_for_termination $! @@ -155,7 +167,12 @@ pub fn controller_kafka_container_command( bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), listeners = to_listeners(client_port), listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), - initial_controllers = to_initial_controllers(&controller_descriptors, client_port), + initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), + controller_quorum_voters = controller_quorum_voters_command( + product_version, + CONTROLLER_PROPERTIES_FILE, + &to_quorum_voters(&controller_descriptors, client_port) + ), create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) } } @@ -186,6 +203,14 @@ fn to_initial_controllers(controller_descriptors: &[KafkaPodDescriptor], port: u .join(",") } +fn to_quorum_voters(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { + controller_descriptors + .iter() + .map(|desc| desc.as_quorum_voter(port)) + .collect::>() + .join(",") +} + fn to_bootstrap_servers(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { controller_descriptors .iter() @@ -193,3 +218,30 @@ fn to_bootstrap_servers(controller_descriptors: &[KafkaPodDescriptor], port: u16 .collect::>() .join(",") } + +fn initial_controllers_command( + controller_descriptors: &[KafkaPodDescriptor], + product_version: &str, + client_port: u16, +) -> String { + match product_version.starts_with("3.7") { + true => "".to_string(), + false => format!( + "--initial-controllers {initial_controllers}", + initial_controllers = to_initial_controllers(controller_descriptors, client_port), + ), + } +} + +fn controller_quorum_voters_command( + product_version: &str, + properties_file: &str, + quorum_voters: &str, +) -> String { + match product_version.starts_with("3.7") { + true => format!( + "echo \"{KAFKA_CONTROLLER_QUORUM_VOTERS}={quorum_voters}\" >> /tmp/{properties_file}" + ), + false => "".to_string(), + } +} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 2a2e9ac2..374bda27 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -337,6 +337,14 @@ impl KafkaPodDescriptor { fqdn = self.fqdn(), ) } + + pub fn as_quorum_voter(&self, port: u16) -> String { + format!( + "{replica}@{fqdn}:{port}", + replica = self.replica, + fqdn = self.fqdn(), + ) + } } #[derive(Clone, Default, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 9e28b7f4..69dc8bfc 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -66,6 +66,10 @@ pub const KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: &str = "listener.security.protoc /// For example: localhost:9092,localhost:9093,localhost:9094. pub const KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS: &str = "controller.quorum.bootstrap.servers"; +/// Map of id/endpoint information for the set of voters in a comma-separated list of {id}@{host}:{port} entries. +/// For example: 1@localhost:9092,2@localhost:9093,3@localhost:9094 +pub const KAFKA_CONTROLLER_QUORUM_VOTERS: &str = "controller.quorum.voters"; + #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("fragment validation failure"))] diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 878ce8f7..a6c7178c 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -302,6 +302,7 @@ pub fn build_broker_rolegroup_statefulset( &kafka_listeners, opa_connect_string, kafka_security, + &resolved_product_image.product_version, )]) .add_env_var( "EXTRA_ARGS", @@ -639,6 +640,7 @@ pub fn build_controller_rolegroup_statefulset( .context(BuildPodDescriptorsSnafu)?, &kafka_listeners, kafka_security, + &resolved_product_image.product_version, )]) .add_env_var( "EXTRA_ARGS", From b660793cadb62e4dcc519036eb590705dee2d6b5 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 2 Sep 2025 18:13:45 +0200 Subject: [PATCH 36/90] hash node.id offset from rolegroup --- rust/operator-binary/src/config/command.rs | 12 +++++----- rust/operator-binary/src/config/mod.rs | 1 + .../src/config/node_id_hasher.rs | 19 ++++++++++++++++ rust/operator-binary/src/crd/mod.rs | 22 ++++++++++++------- rust/operator-binary/src/crd/role/mod.rs | 4 ++-- .../src/resource/statefulset.rs | 18 +++++++++++++-- 6 files changed, 58 insertions(+), 18 deletions(-) create mode 100644 rust/operator-binary/src/config/node_id_hasher.rs diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 4f6049bf..f1d518b4 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -11,10 +11,10 @@ use crate::crd::{ STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_DIR, listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, role::{ - KAFKA_ADVERTISED_LISTENERS, KAFKA_BROKER_ID_OFFSET, - KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_CONTROLLER_QUORUM_VOTERS, - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, KAFKA_NODE_ID, KafkaRole, - broker::BROKER_PROPERTIES_FILE, controller::CONTROLLER_PROPERTIES_FILE, + KAFKA_ADVERTISED_LISTENERS, KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, + KAFKA_CONTROLLER_QUORUM_VOTERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, + KAFKA_NODE_ID, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BROKER_PROPERTIES_FILE, + controller::CONTROLLER_PROPERTIES_FILE, }, security::KafkaTlsSecurity, v1alpha1, @@ -87,7 +87,7 @@ fn broker_start_command( export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') cp {config_dir}/{properties_file} /tmp/{properties_file} - echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + {KAFKA_BROKER_ID_OFFSET}))\" >> /tmp/{properties_file} + echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + ${KAFKA_NODE_ID_OFFSET}))\" >> /tmp/{properties_file} echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} @@ -148,7 +148,7 @@ pub fn controller_kafka_container_command( export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') cp {config_dir}/{properties_file} /tmp/{properties_file} - echo \"{KAFKA_NODE_ID}=$REPLICA_ID\" >> /tmp/{properties_file} + echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + ${KAFKA_NODE_ID_OFFSET}))\" >> /tmp/{properties_file} echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs index 7a4b4e4a..ae92b3c2 100644 --- a/rust/operator-binary/src/config/mod.rs +++ b/rust/operator-binary/src/config/mod.rs @@ -1,2 +1,3 @@ pub mod command; pub mod jvm; +pub mod node_id_hasher; diff --git a/rust/operator-binary/src/config/node_id_hasher.rs b/rust/operator-binary/src/config/node_id_hasher.rs new file mode 100644 index 00000000..d59bb418 --- /dev/null +++ b/rust/operator-binary/src/config/node_id_hasher.rs @@ -0,0 +1,19 @@ +pub fn node_id_hash32_offset(rolegroup: &str) -> u32 { + let hash = fnv_hash32(rolegroup); + let range = hash & 0x0000FFFF; + let offset = range * 0x0000FFFF; + offset +} + +/// Simple FNV-1a hash impl +fn fnv_hash32(input: &str) -> u32 { + const FNV_OFFSET: u32 = 0x811c9dc5; + const FNV_PRIME: u32 = 0x01000193; + + let mut hash = FNV_OFFSET; + for byte in input.as_bytes() { + hash ^= u32::from(*byte); + hash = hash.wrapping_mul(FNV_PRIME); + } + hash +} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 374bda27..156a40c9 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -24,10 +24,13 @@ use stackable_operator::{ versioned::versioned, }; -use crate::crd::{ - authorization::KafkaAuthorization, - role::{KafkaRole, broker::BrokerConfigFragment, controller::ControllerConfigFragment}, - tls::KafkaTls, +use crate::{ + config::node_id_hasher::node_id_hash32_offset, + crd::{ + authorization::KafkaAuthorization, + role::{KafkaRole, broker::BrokerConfigFragment, controller::ControllerConfigFragment}, + tls::KafkaTls, + }, }; pub const DOCKER_IMAGE_BASE_NAME: &str = "kafka"; @@ -255,6 +258,7 @@ impl v1alpha1::KafkaCluster { role_group_service_name: rolegroup_ref.object_name(), replica: i, cluster_domain: cluster_info.cluster_domain.clone(), + node_id: node_id_hash32_offset(rolegroup_name) + u32::from(i), }) }) .collect(), @@ -275,6 +279,7 @@ impl v1alpha1::KafkaCluster { role_group_service_name: rolegroup_ref.object_name(), replica: i, cluster_domain: cluster_info.cluster_domain.clone(), + node_id: node_id_hash32_offset(rolegroup_name) + u32::from(i), }) }) .collect(), @@ -291,6 +296,7 @@ pub struct KafkaPodDescriptor { role_group_service_name: String, replica: u16, cluster_domain: DomainName, + node_id: u32, } impl KafkaPodDescriptor { @@ -332,16 +338,16 @@ impl KafkaPodDescriptor { // TODO(@maltesander): Even though the used Uuid states to be type 4 it does not work... 0000000000-00000000000 works... pub fn as_voter(&self, port: u16) -> String { format!( - "{replica}@{fqdn}:{port}:0000000000-{replica:0>11}", - replica = self.replica, + "{node_id}@{fqdn}:{port}:0000000000-{node_id:0>11}", + node_id = self.node_id, fqdn = self.fqdn(), ) } pub fn as_quorum_voter(&self, port: u16) -> String { format!( - "{replica}@{fqdn}:{port}", - replica = self.replica, + "{node_id}@{fqdn}:{port}", + node_id = self.node_id, fqdn = self.fqdn(), ) } diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 69dc8bfc..47ed5b3d 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -30,8 +30,8 @@ use crate::{ v1alpha1, }; -/// Broker and Kafka node.id properties should not clash; This is an offset for brokers. -pub const KAFKA_BROKER_ID_OFFSET: u16 = 1000; +/// Env var +pub const KAFKA_NODE_ID_OFFSET: &str = "NODE_ID_OFFSET"; // See: https://kafka.apache.org/documentation/#brokerconfigs /// The node ID associated with the roles this process is playing when process.roles is non-empty. diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index a6c7178c..470d6a90 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -43,14 +43,20 @@ use stackable_operator::{ }; use crate::{ - config::command::{broker_kafka_container_commands, controller_kafka_container_command}, + config::{ + command::{broker_kafka_container_commands, controller_kafka_container_command}, + node_id_hasher::node_id_hash32_offset, + }, crd::{ self, APP_NAME, KAFKA_HEAP_OPTS, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, listener::get_kafka_listener_config, - role::{AnyConfig, KafkaRole, broker::BrokerContainer, controller::ControllerContainer}, + role::{ + AnyConfig, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BrokerContainer, + controller::ControllerContainer, + }, security::KafkaTlsSecurity, v1alpha1, }, @@ -325,6 +331,10 @@ pub fn build_broker_rolegroup_statefulset( "CONTAINERDEBUG_LOG_DIRECTORY", format!("{STACKABLE_LOG_DIR}/containerdebug"), ) + .add_env_var( + KAFKA_NODE_ID_OFFSET, + node_id_hash32_offset(&rolegroup_ref.role_group).to_string(), + ) .add_env_vars(env) .add_container_ports(container_ports(kafka_security)) .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) @@ -663,6 +673,10 @@ pub fn build_controller_rolegroup_statefulset( "CONTAINERDEBUG_LOG_DIRECTORY", format!("{STACKABLE_LOG_DIR}/containerdebug"), ) + .add_env_var( + KAFKA_NODE_ID_OFFSET, + node_id_hash32_offset(&rolegroup_ref.role_group).to_string(), + ) .add_env_vars(env) .add_container_ports(container_ports(kafka_security)) .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) From d38d6ba3dae77323a29589bd50aa1ba9ce429dc9 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 2 Sep 2025 19:01:11 +0200 Subject: [PATCH 37/90] started kerberos tests --- .../src/config/node_id_hasher.rs | 15 ++++++++++--- rust/operator-binary/src/crd/mod.rs | 18 ++++----------- .../src/resource/statefulset.rs | 4 ++-- .../{10-assert.yaml => 10-assert.yaml.j2} | 2 ++ .../kuttl/kerberos/10-install-zk.yaml.j2 | 2 ++ tests/templates/kuttl/kerberos/20-assert.yaml | 12 ---------- .../kuttl/kerberos/20-assert.yaml.j2 | 22 +++++++++++++++++++ .../kuttl/kerberos/20-install-kafka.yaml.j2 | 12 +++++++++- tests/test-definition.yaml | 1 + 9 files changed, 56 insertions(+), 32 deletions(-) rename tests/templates/kuttl/kerberos/{10-assert.yaml => 10-assert.yaml.j2} (70%) delete mode 100644 tests/templates/kuttl/kerberos/20-assert.yaml create mode 100644 tests/templates/kuttl/kerberos/20-assert.yaml.j2 diff --git a/rust/operator-binary/src/config/node_id_hasher.rs b/rust/operator-binary/src/config/node_id_hasher.rs index d59bb418..808c0563 100644 --- a/rust/operator-binary/src/config/node_id_hasher.rs +++ b/rust/operator-binary/src/config/node_id_hasher.rs @@ -1,7 +1,16 @@ -pub fn node_id_hash32_offset(rolegroup: &str) -> u32 { - let hash = fnv_hash32(rolegroup); +use stackable_operator::role_utils::RoleGroupRef; + +use crate::crd::v1alpha1::KafkaCluster; + +pub fn node_id_hash32_offset(rolegroup_ref: &RoleGroupRef) -> u32 { + let hash = fnv_hash32(&format!( + "{role}-{rolegroup}", + role = rolegroup_ref.role, + rolegroup = rolegroup_ref.role_group + )); let range = hash & 0x0000FFFF; - let offset = range * 0x0000FFFF; + // unsigned in kafka + let offset = range * 0x00007FFF; offset } diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 156a40c9..3ca329b0 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -258,12 +258,12 @@ impl v1alpha1::KafkaCluster { role_group_service_name: rolegroup_ref.object_name(), replica: i, cluster_domain: cluster_info.cluster_domain.clone(), - node_id: node_id_hash32_offset(rolegroup_name) + u32::from(i), + // TODO: check for hash collisions? + node_id: node_id_hash32_offset(&rolegroup_ref) + u32::from(i), }) }) .collect(), - // TODO: this does not work for multiple rolegroups (the index / replica) KafkaRole::Controller => self .controller_role() .iter() @@ -279,7 +279,8 @@ impl v1alpha1::KafkaCluster { role_group_service_name: rolegroup_ref.object_name(), replica: i, cluster_domain: cluster_info.cluster_domain.clone(), - node_id: node_id_hash32_offset(rolegroup_name) + u32::from(i), + // TODO: check for hash collisions? + node_id: node_id_hash32_offset(&rolegroup_ref) + u32::from(i), }) }) .collect(), @@ -312,17 +313,6 @@ impl KafkaPodDescriptor { ) } - /// Return the fully qualified domain name for "replica" - /// Format: -...svc. - pub fn fqdn_for_replica(&self, replica: u16) -> String { - format!( - "{service_name}-{replica}.{service_name}.{namespace}.svc.{cluster_domain}", - service_name = self.role_group_service_name, - namespace = self.namespace, - cluster_domain = self.cluster_domain - ) - } - pub fn pod_name(&self) -> String { format!("{}-{}", self.role_group_service_name, self.replica) } diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 470d6a90..2cd587a7 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -333,7 +333,7 @@ pub fn build_broker_rolegroup_statefulset( ) .add_env_var( KAFKA_NODE_ID_OFFSET, - node_id_hash32_offset(&rolegroup_ref.role_group).to_string(), + node_id_hash32_offset(&rolegroup_ref).to_string(), ) .add_env_vars(env) .add_container_ports(container_ports(kafka_security)) @@ -675,7 +675,7 @@ pub fn build_controller_rolegroup_statefulset( ) .add_env_var( KAFKA_NODE_ID_OFFSET, - node_id_hash32_offset(&rolegroup_ref.role_group).to_string(), + node_id_hash32_offset(&rolegroup_ref).to_string(), ) .add_env_vars(env) .add_container_ports(container_ports(kafka_security)) diff --git a/tests/templates/kuttl/kerberos/10-assert.yaml b/tests/templates/kuttl/kerberos/10-assert.yaml.j2 similarity index 70% rename from tests/templates/kuttl/kerberos/10-assert.yaml rename to tests/templates/kuttl/kerberos/10-assert.yaml.j2 index e0766c49..656007a4 100644 --- a/tests/templates/kuttl/kerberos/10-assert.yaml +++ b/tests/templates/kuttl/kerberos/10-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -10,3 +11,4 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 b/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 index 6b462fa4..2479c30b 100644 --- a/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 +++ b/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -18,3 +19,4 @@ spec: roleGroups: default: replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/kerberos/20-assert.yaml b/tests/templates/kuttl/kerberos/20-assert.yaml deleted file mode 100644 index 01ba15d1..00000000 --- a/tests/templates/kuttl/kerberos/20-assert.yaml +++ /dev/null @@ -1,12 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 600 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-default -status: - readyReplicas: 3 - replicas: 3 diff --git a/tests/templates/kuttl/kerberos/20-assert.yaml.j2 b/tests/templates/kuttl/kerberos/20-assert.yaml.j2 new file mode 100644 index 00000000..ee11bdb7 --- /dev/null +++ b/tests/templates/kuttl/kerberos/20-assert.yaml.j2 @@ -0,0 +1,22 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 +{% endif %} diff --git a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 index a03d03ed..e6a5c343 100644 --- a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 @@ -36,7 +36,6 @@ commands: {% endif %} pullPolicy: IfNotPresent clusterConfig: - zookeeperConfigMapName: test-kafka-znode authentication: - authenticationClass: kerberos-auth-$NAMESPACE tls: @@ -44,6 +43,17 @@ commands: serverSecretClass: tls {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} + zookeeperConfigMapName: test-kafka-znode +{% else %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 {% endif %} brokers: config: diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 9521ab8d..4985c32b 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -120,6 +120,7 @@ tests: - openshift - broker-listener-class - bootstrap-listener-class + - use-kraft-controller suites: - name: nightly From 6d7383e8bdb45394289c00cd7fc5bd925682594c Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 3 Sep 2025 08:26:44 +0200 Subject: [PATCH 38/90] adapt logging tests --- .../{02-assert.yaml => 02-assert.yaml.j2} | 2 + .../logging/02-install-zookeeper.yaml.j2 | 2 + tests/templates/kuttl/logging/04-assert.yaml | 20 -------- .../templates/kuttl/logging/04-assert.yaml.j2 | 38 +++++++++++++++ .../kuttl/logging/04-install-kafka.yaml.j2 | 47 +++++++++++++++++++ .../kafka-vector-aggregator-values.yaml.j2 | 26 ++++++++++ tests/test-definition.yaml | 1 + 7 files changed, 116 insertions(+), 20 deletions(-) rename tests/templates/kuttl/logging/{02-assert.yaml => 02-assert.yaml.j2} (70%) delete mode 100644 tests/templates/kuttl/logging/04-assert.yaml create mode 100644 tests/templates/kuttl/logging/04-assert.yaml.j2 diff --git a/tests/templates/kuttl/logging/02-assert.yaml b/tests/templates/kuttl/logging/02-assert.yaml.j2 similarity index 70% rename from tests/templates/kuttl/logging/02-assert.yaml rename to tests/templates/kuttl/logging/02-assert.yaml.j2 index e0766c49..656007a4 100644 --- a/tests/templates/kuttl/logging/02-assert.yaml +++ b/tests/templates/kuttl/logging/02-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -10,3 +11,4 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 b/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 index 96078f76..827064d7 100644 --- a/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 +++ b/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -26,3 +27,4 @@ metadata: spec: clusterRef: name: test-zk +{% endif %} diff --git a/tests/templates/kuttl/logging/04-assert.yaml b/tests/templates/kuttl/logging/04-assert.yaml deleted file mode 100644 index e445cb01..00000000 --- a/tests/templates/kuttl/logging/04-assert.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 600 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-automatic-log-config -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-custom-log-config -status: - readyReplicas: 1 - replicas: 1 diff --git a/tests/templates/kuttl/logging/04-assert.yaml.j2 b/tests/templates/kuttl/logging/04-assert.yaml.j2 new file mode 100644 index 00000000..3bc3f09b --- /dev/null +++ b/tests/templates/kuttl/logging/04-assert.yaml.j2 @@ -0,0 +1,38 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-automatic-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-custom-log-config +status: + readyReplicas: 1 + replicas: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-automatic-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-custom-log-config +status: + readyReplicas: 1 + replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 index e83fc50b..e804cf16 100644 --- a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 @@ -36,7 +36,54 @@ spec: tls: serverSecretClass: null vectorAggregatorConfigMapName: kafka-vector-aggregator-discovery +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-kafka-znode +{% else %} + controllers: + roleGroups: + automatic-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + vector: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + podOverrides: + spec: + containers: + - name: vector + volumeMounts: + - name: prepared-logs + mountPath: /stackable/log/prepared-logs + volumes: + - name: prepared-logs + configMap: + name: prepared-logs + custom-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + custom: + configMap: kafka-log-config +{% endif %} brokers: roleGroups: automatic-log-config: diff --git a/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 index f30e142e..e8fac339 100644 --- a/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 @@ -48,6 +48,32 @@ customConfig: condition: >- .pod == "test-kafka-broker-custom-log-config-0" && .container == "vector" +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} + filteredAutomaticLogConfigControllerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-automatic-log-config-0" && + .container == "kafka" + filteredAutomaticLogConfigControllerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-automatic-log-config-0" && + .container == "vector" + filteredCustomLogConfigControllerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-custom-log-config-0" && + .container == "kafka" + filteredCustomLogConfigControllerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-custom-log-config-0" && + .container == "vector" +{% endif %} filteredInvalidEvents: type: filter inputs: [vector] diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 4985c32b..38b4712b 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -104,6 +104,7 @@ tests: dimensions: - kafka - zookeeper-latest + - use-kraft-controller - openshift - name: cluster-operation dimensions: From f2b97a5b6ed901f4012e4ac651d998b2cbd31f2e Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 3 Sep 2025 14:26:52 +0200 Subject: [PATCH 39/90] error out on node id hash collision --- .../src/config/node_id_hasher.rs | 5 +- rust/operator-binary/src/crd/mod.rs | 93 ++++++++++++------- .../src/resource/statefulset.rs | 4 +- 3 files changed, 63 insertions(+), 39 deletions(-) diff --git a/rust/operator-binary/src/config/node_id_hasher.rs b/rust/operator-binary/src/config/node_id_hasher.rs index 808c0563..eebee090 100644 --- a/rust/operator-binary/src/config/node_id_hasher.rs +++ b/rust/operator-binary/src/config/node_id_hasher.rs @@ -9,9 +9,8 @@ pub fn node_id_hash32_offset(rolegroup_ref: &RoleGroupRef) -> u32 rolegroup = rolegroup_ref.role_group )); let range = hash & 0x0000FFFF; - // unsigned in kafka - let offset = range * 0x00007FFF; - offset + // Kafka uses signed integer + range * 0x00007FFF } /// Simple FNV-1a hash impl diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 3ca329b0..759a6770 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -6,7 +6,7 @@ pub mod role; pub mod security; pub mod tls; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use authentication::KafkaAuthentication; use serde::{Deserialize, Serialize}; @@ -60,10 +60,10 @@ pub const STACKABLE_KERBEROS_KRB5_PATH: &str = "/stackable/kerberos/krb5.conf"; #[derive(Snafu, Debug)] pub enum Error { - #[snafu(display("the Kafka role [{role}] is missing from spec"))] + #[snafu(display("The Kafka role [{role}] is missing from spec"))] MissingRole { role: String }, - #[snafu(display("object has no namespace associated"))] + #[snafu(display("Object has no namespace associated"))] NoNamespace, #[snafu(display( @@ -75,6 +75,15 @@ pub enum Error { "Kraft controller (`spec.controller`) and ZooKeeper (`spec.clusterConfig.zookeeperConfigMapName`) are configured. Please only choose one" ))] KraftAndZookeeperConfigured, + + #[snafu(display( + "Could not calculate ({role}) 'node.id' hash offset for rolegroup '{rolegroup}' which collides with rolegroup '{colliding_rolegroup}'. Please try to rename one of the rolegroups." + ))] + KafkaNodeIdHashCollision { + role: KafkaRole, + rolegroup: String, + colliding_rolegroup: String, + }, } #[versioned( @@ -241,49 +250,65 @@ impl v1alpha1::KafkaCluster { kafka_role: &KafkaRole, cluster_info: &KubernetesClusterInfo, ) -> Result, Error> { - let ns = self.metadata.namespace.clone().context(NoNamespaceSnafu)?; + let namespace = self.metadata.namespace.clone().context(NoNamespaceSnafu)?; + let rolegroup_replicas = self.extract_rolegroup_replicas(kafka_role)?; + let mut pod_descriptors = Vec::new(); + let mut seen_hashes = HashMap::::new(); + + for (rolegroup, replicas) in rolegroup_replicas { + let rolegroup_ref = self.rolegroup_ref(kafka_role, &rolegroup); + let node_id_hash_offset = node_id_hash32_offset(&rolegroup_ref); + + match seen_hashes.get(&node_id_hash_offset) { + Some(colliding_rolegroup) => { + return KafkaNodeIdHashCollisionSnafu { + role: kafka_role.clone(), + rolegroup: rolegroup.clone(), + colliding_rolegroup: colliding_rolegroup.clone(), + } + .fail(); + } + None => seen_hashes.insert(node_id_hash_offset, rolegroup), + }; + + for replica in 0..replicas { + pod_descriptors.push(KafkaPodDescriptor { + namespace: namespace.clone(), + role_group_service_name: rolegroup_ref.object_name(), + replica, + cluster_domain: cluster_info.cluster_domain.clone(), + node_id: node_id_hash_offset + u32::from(replica), + }); + } + } + + Ok(pod_descriptors) + } + + fn extract_rolegroup_replicas( + &self, + kafka_role: &KafkaRole, + ) -> Result, Error> { Ok(match kafka_role { KafkaRole::Broker => self .broker_role() .iter() .flat_map(|role| &role.role_groups) - // Order rolegroups consistently, to avoid spurious downstream rewrites - .collect::>() - .into_iter() - .flat_map(move |(rolegroup_name, rolegroup)| { - let rolegroup_ref = self.rolegroup_ref(kafka_role, rolegroup_name); - let ns = ns.clone(); - (0..rolegroup.replicas.unwrap_or(0)).map(move |i| KafkaPodDescriptor { - namespace: ns.clone(), - role_group_service_name: rolegroup_ref.object_name(), - replica: i, - cluster_domain: cluster_info.cluster_domain.clone(), - // TODO: check for hash collisions? - node_id: node_id_hash32_offset(&rolegroup_ref) + u32::from(i), - }) + .flat_map(|(rolegroup_name, rolegroup)| { + std::iter::once((rolegroup_name.to_string(), rolegroup.replicas.unwrap_or(0))) }) - .collect(), + // Order rolegroups consistently, to avoid spurious downstream rewrites + .collect::>(), KafkaRole::Controller => self .controller_role() .iter() .flat_map(|role| &role.role_groups) - // Order rolegroups consistently, to avoid spurious downstream rewrites - .collect::>() - .into_iter() - .flat_map(move |(rolegroup_name, rolegroup)| { - let rolegroup_ref = self.rolegroup_ref(kafka_role, rolegroup_name); - let ns = ns.clone(); - (0..rolegroup.replicas.unwrap_or(0)).map(move |i: u16| KafkaPodDescriptor { - namespace: ns.clone(), - role_group_service_name: rolegroup_ref.object_name(), - replica: i, - cluster_domain: cluster_info.cluster_domain.clone(), - // TODO: check for hash collisions? - node_id: node_id_hash32_offset(&rolegroup_ref) + u32::from(i), - }) + .flat_map(|(rolegroup_name, rolegroup)| { + std::iter::once((rolegroup_name.to_string(), rolegroup.replicas.unwrap_or(0))) }) - .collect(), + // Order rolegroups consistently, to avoid spurious downstream rewrites + .collect::>(), }) } } diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 2cd587a7..5f287058 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -333,7 +333,7 @@ pub fn build_broker_rolegroup_statefulset( ) .add_env_var( KAFKA_NODE_ID_OFFSET, - node_id_hash32_offset(&rolegroup_ref).to_string(), + node_id_hash32_offset(rolegroup_ref).to_string(), ) .add_env_vars(env) .add_container_ports(container_ports(kafka_security)) @@ -675,7 +675,7 @@ pub fn build_controller_rolegroup_statefulset( ) .add_env_var( KAFKA_NODE_ID_OFFSET, - node_id_hash32_offset(&rolegroup_ref).to_string(), + node_id_hash32_offset(rolegroup_ref).to_string(), ) .add_env_vars(env) .add_container_ports(container_ports(kafka_security)) From 34bde59c1ac6946a6972dc63d0addb29866a929a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 3 Sep 2025 15:02:23 +0200 Subject: [PATCH 40/90] fix cluster operation tests --- .../kafka-operator/configs/properties.yaml | 16 ---------------- deploy/helm/kafka-operator/crds/crds.yaml | 9 ++++++++- rust/operator-binary/src/crd/mod.rs | 13 +++++++++++++ .../{10-assert.yaml => 10-assert.yaml.j2} | 2 ++ .../cluster-operation/10-install-zk.yaml.j2 | 2 ++ .../{50-assert.yaml => 20-assert.yaml.j2} | 10 ++++++++++ .../cluster-operation/20-install-kafka.yaml.j2 | 12 ++++++++++++ .../{30-assert.yaml => 30-assert.yaml.j2} | 9 +++++++++ .../cluster-operation/30-stop-kafka.yaml.j2 | 18 +++++++++++++++--- .../{40-assert.yaml => 40-assert.yaml.j2} | 9 +++++++++ .../cluster-operation/40-pause-kafka.yaml.j2 | 18 +++++++++++++++--- .../{20-assert.yaml => 50-assert.yaml.j2} | 10 ++++++++++ .../cluster-operation/50-restart-kafka.yaml.j2 | 18 +++++++++++++++--- tests/test-definition.yaml | 1 + 14 files changed, 121 insertions(+), 26 deletions(-) rename tests/templates/kuttl/cluster-operation/{10-assert.yaml => 10-assert.yaml.j2} (70%) rename tests/templates/kuttl/cluster-operation/{50-assert.yaml => 20-assert.yaml.j2} (60%) rename tests/templates/kuttl/cluster-operation/{30-assert.yaml => 30-assert.yaml.j2} (61%) rename tests/templates/kuttl/cluster-operation/{40-assert.yaml => 40-assert.yaml.j2} (62%) rename tests/templates/kuttl/cluster-operation/{20-assert.yaml => 50-assert.yaml.j2} (60%) diff --git a/deploy/helm/kafka-operator/configs/properties.yaml b/deploy/helm/kafka-operator/configs/properties.yaml index 8e67978b..8ebbbeb0 100644 --- a/deploy/helm/kafka-operator/configs/properties.yaml +++ b/deploy/helm/kafka-operator/configs/properties.yaml @@ -149,19 +149,3 @@ properties: required: false asOfVersion: "0.0.0" description: "The number of seconds after which the OPA authorizer cache expires" - - - property: &logDirs - propertyNames: - - name: "log.dirs" - kind: - type: "file" - file: "broker.properties" - datatype: - type: "string" - recommendedValues: - - value: "/stackable/data/topicdata" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "A comma separated list of directories under which to store log files" diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 26fc4ff3..9e03950a 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -609,6 +609,14 @@ spec: - roleGroups type: object clusterConfig: + default: + authentication: [] + authorization: + opa: null + tls: + internalSecretClass: tls + serverSecretClass: tls + zookeeperConfigMapName: null description: |- Kafka settings that affect all roles and role groups. @@ -1320,7 +1328,6 @@ spec: type: string type: object required: - - clusterConfig - image type: object status: diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 759a6770..728bf14b 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -123,6 +123,7 @@ pub mod versioned { /// Kafka settings that affect all roles and role groups. /// /// The settings in the `clusterConfig` are cluster wide settings that do not need to be configurable at role or role group level. + #[serde(default)] pub cluster_config: v1alpha1::KafkaClusterConfig, // no doc - docs in ClusterOperation struct. @@ -164,6 +165,18 @@ pub mod versioned { } } +impl Default for v1alpha1::KafkaClusterConfig { + fn default() -> Self { + Self { + authentication: vec![], + authorization: KafkaAuthorization::default(), + tls: tls::default_kafka_tls(), + vector_aggregator_config_map_name: None, + zookeeper_config_map_name: None, + } + } +} + impl HasStatusCondition for v1alpha1::KafkaCluster { fn conditions(&self) -> Vec { match &self.status { diff --git a/tests/templates/kuttl/cluster-operation/10-assert.yaml b/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 similarity index 70% rename from tests/templates/kuttl/cluster-operation/10-assert.yaml rename to tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 index c9cfcf5c..5d46bbff 100644 --- a/tests/templates/kuttl/cluster-operation/10-assert.yaml +++ b/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -10,3 +11,4 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 b/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 index 6b462fa4..2479c30b 100644 --- a/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -18,3 +19,4 @@ spec: roleGroups: default: replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/50-assert.yaml b/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 similarity index 60% rename from tests/templates/kuttl/cluster-operation/50-assert.yaml rename to tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 index c6be6814..9076d21f 100644 --- a/tests/templates/kuttl/cluster-operation/50-assert.yaml +++ b/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 @@ -12,3 +12,13 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 1 + replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 index 6d391b65..2079522a 100644 --- a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 @@ -16,11 +16,23 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: +{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-zk +{% else %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 +{% endif %} brokers: config: logging: diff --git a/tests/templates/kuttl/cluster-operation/30-assert.yaml b/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 similarity index 61% rename from tests/templates/kuttl/cluster-operation/30-assert.yaml rename to tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 index 5b92f6da..8cb8a023 100644 --- a/tests/templates/kuttl/cluster-operation/30-assert.yaml +++ b/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 @@ -11,3 +11,12 @@ metadata: name: test-kafka-broker-default status: replicas: 0 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + replicas: 0 +{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 index 08f1661a..54d7d242 100644 --- a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 @@ -16,14 +16,23 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: +{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-zk - clusterOperation: - stopped: true - reconciliationPaused: false +{% else %} + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 +{% endif %} brokers: config: logging: @@ -31,3 +40,6 @@ spec: roleGroups: default: replicas: 1 + clusterOperation: + stopped: true + reconciliationPaused: false diff --git a/tests/templates/kuttl/cluster-operation/40-assert.yaml b/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 similarity index 62% rename from tests/templates/kuttl/cluster-operation/40-assert.yaml rename to tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 index 293cb5c8..171bc856 100644 --- a/tests/templates/kuttl/cluster-operation/40-assert.yaml +++ b/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 @@ -11,3 +11,12 @@ metadata: name: test-kafka-broker-default status: replicas: 0 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + replicas: 0 +{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 index c961ca4d..a9b27d01 100644 --- a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 @@ -16,14 +16,23 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: +{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-zk - clusterOperation: - stopped: false - reconciliationPaused: true +{% else %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 +{% endif %} brokers: config: logging: @@ -31,3 +40,6 @@ spec: roleGroups: default: replicas: 1 + clusterOperation: + stopped: false + reconciliationPaused: true diff --git a/tests/templates/kuttl/cluster-operation/20-assert.yaml b/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 similarity index 60% rename from tests/templates/kuttl/cluster-operation/20-assert.yaml rename to tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 index c6be6814..9076d21f 100644 --- a/tests/templates/kuttl/cluster-operation/20-assert.yaml +++ b/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 @@ -12,3 +12,13 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 1 + replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 index 81d43637..ac29774f 100644 --- a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 @@ -15,14 +15,23 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: +{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-zk - clusterOperation: - stopped: false - reconciliationPaused: false +{% else %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 +{% endif %} brokers: config: logging: @@ -30,3 +39,6 @@ spec: roleGroups: default: replicas: 1 + clusterOperation: + stopped: false + reconciliationPaused: false diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 38b4712b..4cf03486 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -110,6 +110,7 @@ tests: dimensions: - zookeeper-latest - kafka-latest + - use-kraft-controller - openshift - name: kerberos dimensions: From 094ecaa265b10e179220e79f2dd03b10e2fcf7bb Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 3 Sep 2025 15:14:07 +0200 Subject: [PATCH 41/90] fix configuration tests --- .../kuttl/configuration/10-assert.yaml | 12 ------ .../{20-assert.yaml.j2 => 10-assert.yaml.j2} | 35 +++++++++++++++- ...kafka.yaml.j2 => 10-install-kafka.yaml.j2} | 40 +++++++++++++++++-- .../kuttl/configuration/10-install-zk.yaml.j2 | 24 ----------- tests/test-definition.yaml | 1 - 5 files changed, 71 insertions(+), 41 deletions(-) delete mode 100644 tests/templates/kuttl/configuration/10-assert.yaml rename tests/templates/kuttl/configuration/{20-assert.yaml.j2 => 10-assert.yaml.j2} (53%) rename tests/templates/kuttl/configuration/{20-install-kafka.yaml.j2 => 10-install-kafka.yaml.j2} (59%) delete mode 100644 tests/templates/kuttl/configuration/10-install-zk.yaml.j2 diff --git a/tests/templates/kuttl/configuration/10-assert.yaml b/tests/templates/kuttl/configuration/10-assert.yaml deleted file mode 100644 index c9cfcf5c..00000000 --- a/tests/templates/kuttl/configuration/10-assert.yaml +++ /dev/null @@ -1,12 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 300 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-zk-server-default -status: - readyReplicas: 1 - replicas: 1 diff --git a/tests/templates/kuttl/configuration/20-assert.yaml.j2 b/tests/templates/kuttl/configuration/10-assert.yaml.j2 similarity index 53% rename from tests/templates/kuttl/configuration/20-assert.yaml.j2 rename to tests/templates/kuttl/configuration/10-assert.yaml.j2 index f3f09708..0f6272bd 100644 --- a/tests/templates/kuttl/configuration/20-assert.yaml.j2 +++ b/tests/templates/kuttl/configuration/10-assert.yaml.j2 @@ -6,7 +6,7 @@ timeout: 300 apiVersion: apps/v1 kind: StatefulSet metadata: - name: test-kafka-broker-default + name: test-kafka-controller-default spec: template: spec: @@ -23,6 +23,39 @@ spec: cpu: 250m # value set in the rolegroup configuration memory: 2Gi +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + - name: vector +{% endif %} + volumeClaimTemplates: + - metadata: + name: log-dirs + spec: + resources: + requests: + # value set in the role configuration and overridden in + # the rolegroup configuration + storage: 1Gi +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +spec: + template: + spec: + containers: + - name: kafka + resources: + limits: + # value set in the role configuration + cpu: 500m + # value set in the rolegroup configuration + memory: 3Gi + requests: + # default value set by the operator + cpu: 250m + # value set in the rolegroup configuration + memory: 3Gi - name: kcat-prober {% if lookup('env', 'VECTOR_AGGREGATOR') %} - name: vector diff --git a/tests/templates/kuttl/configuration/20-install-kafka.yaml.j2 b/tests/templates/kuttl/configuration/10-install-kafka.yaml.j2 similarity index 59% rename from tests/templates/kuttl/configuration/20-install-kafka.yaml.j2 rename to tests/templates/kuttl/configuration/10-install-kafka.yaml.j2 index eefa7550..af9f97e8 100644 --- a/tests/templates/kuttl/configuration/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/configuration/10-install-kafka.yaml.j2 @@ -18,10 +18,10 @@ spec: pullPolicy: IfNotPresent clusterConfig: {% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} - zookeeperConfigMapName: test-zk - brokers: + controllers: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} @@ -37,7 +37,7 @@ spec: storage: logDirs: # Override the default value '2Gi' set by the operator - capacity: '1Gi' + capacity: '1.5Gi' roleGroups: default: config: @@ -50,6 +50,40 @@ spec: memory: # Override the default value '1Gi' set by the operator limit: '2Gi' + storage: + logDirs: + # Override the value '1.5Gi' set in the role configuration + capacity: '1Gi' + replicas: 1 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + resources: + cpu: + # Inherit the default value '250m' set by the operator + # min: '250m' + # Override the default value '1' set by the operator + max: '500m' + # memory: + # Inherit the default value '2Gi' set by the operator + # limit: '2Gi' + storage: + logDirs: + # Override the default value '2Gi' set by the operator + capacity: '1.5Gi' + roleGroups: + default: + config: + resources: + # cpu: + # Inherit the default value '250m' set by the operator + # min: '250m' + # Inherit the value '1000m' set in the role configuration + # max: '1000m' + memory: + # Override the default value '2Gi' set by the operator + limit: '3Gi' storage: logDirs: # Override the value '2Gi' set in the role configuration diff --git a/tests/templates/kuttl/configuration/10-install-zk.yaml.j2 b/tests/templates/kuttl/configuration/10-install-zk.yaml.j2 deleted file mode 100644 index b402b25d..00000000 --- a/tests/templates/kuttl/configuration/10-install-zk.yaml.j2 +++ /dev/null @@ -1,24 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -timeout: 300 ---- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperCluster -metadata: - name: test-zk -spec: - image: - productVersion: "{{ test_scenario['values']['zookeeper-latest'] }}" - pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - clusterConfig: - vectorAggregatorConfigMapName: vector-aggregator-discovery -{% endif %} - servers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 1 diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 4cf03486..3817f971 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -77,7 +77,6 @@ tests: - name: configuration dimensions: - kafka-latest - - zookeeper-latest - openshift - name: upgrade dimensions: From ec9fd3cc4f92b31a537373c79fb74dc817428342 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 3 Sep 2025 19:02:42 +0200 Subject: [PATCH 42/90] fix upgrade tests --- rust/operator-binary/src/config/command.rs | 31 +++---------------- .../src/resource/statefulset.rs | 6 +++- .../{01-assert.yaml => 01-assert.yaml.j2} | 2 ++ .../kuttl/upgrade/01-install-zk.yaml.j2 | 2 ++ tests/templates/kuttl/upgrade/02-assert.yaml | 12 ------- .../templates/kuttl/upgrade/02-assert.yaml.j2 | 22 +++++++++++++ .../kuttl/upgrade/02-install-kafka.yaml.j2 | 14 ++++++++- .../templates/kuttl/upgrade/04-assert.yaml.j2 | 14 +++++++++ tests/test-definition.yaml | 1 + 9 files changed, 64 insertions(+), 40 deletions(-) rename tests/templates/kuttl/upgrade/{01-assert.yaml => 01-assert.yaml.j2} (70%) delete mode 100644 tests/templates/kuttl/upgrade/02-assert.yaml create mode 100644 tests/templates/kuttl/upgrade/02-assert.yaml.j2 diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index f1d518b4..f4248092 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -82,6 +82,7 @@ fn broker_start_command( let client_port = kafka_security.client_port(); // TODO: copy to tmp? mount readwrite folder? + // TODO: do "cat /tmp/{properties_file}" ? if kafka.is_controller_configured() { formatdoc! {" export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') @@ -92,7 +93,7 @@ fn broker_start_command( echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} - {controller_quorum_voters} + echo \"{KAFKA_CONTROLLER_QUORUM_VOTERS}={controller_quorum_voters}\" >> /tmp/{properties_file} bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & @@ -103,11 +104,7 @@ fn broker_start_command( listeners = kafka_listeners.listeners(), advertised_listeners = kafka_listeners.advertised_listeners(), listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), - controller_quorum_voters = controller_quorum_voters_command( - product_version, - BROKER_PROPERTIES_FILE, - &to_quorum_voters(&controller_descriptors, client_port) - ), + controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), } } else { @@ -152,8 +149,7 @@ pub fn controller_kafka_container_command( echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} - {controller_quorum_voters} - cat /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_VOTERS}={controller_quorum_voters}\" >> /tmp/{properties_file} bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} & @@ -168,11 +164,7 @@ pub fn controller_kafka_container_command( listeners = to_listeners(client_port), listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), - controller_quorum_voters = controller_quorum_voters_command( - product_version, - CONTROLLER_PROPERTIES_FILE, - &to_quorum_voters(&controller_descriptors, client_port) - ), + controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) } } @@ -232,16 +224,3 @@ fn initial_controllers_command( ), } } - -fn controller_quorum_voters_command( - product_version: &str, - properties_file: &str, - quorum_voters: &str, -) -> String { - match product_version.starts_with("3.7") { - true => format!( - "echo \"{KAFKA_CONTROLLER_QUORUM_VOTERS}={quorum_voters}\" >> /tmp/{properties_file}" - ), - false => "".to_string(), - } -} diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 5f287058..66850359 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -20,7 +20,7 @@ use stackable_operator::{ k8s_openapi::{ DeepMerge, api::{ - apps::v1::{StatefulSet, StatefulSetSpec}, + apps::v1::{StatefulSet, StatefulSetSpec, StatefulSetUpdateStrategy}, core::v1::{ ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, EnvVarSource, ExecAction, ObjectFieldSelector, PodSpec, Probe, ServiceAccount, TCPSocketAction, @@ -839,6 +839,10 @@ pub fn build_controller_rolegroup_statefulset( .build(), spec: Some(StatefulSetSpec { pod_management_policy: Some("Parallel".to_string()), + update_strategy: Some(StatefulSetUpdateStrategy { + type_: Some("RollingUpdate".to_string()), + ..StatefulSetUpdateStrategy::default() + }), replicas: kafka_role .replicas(kafka, &rolegroup_ref.role_group) .context(RoleGroupReplicasSnafu)? diff --git a/tests/templates/kuttl/upgrade/01-assert.yaml b/tests/templates/kuttl/upgrade/01-assert.yaml.j2 similarity index 70% rename from tests/templates/kuttl/upgrade/01-assert.yaml rename to tests/templates/kuttl/upgrade/01-assert.yaml.j2 index c9cfcf5c..5d46bbff 100644 --- a/tests/templates/kuttl/upgrade/01-assert.yaml +++ b/tests/templates/kuttl/upgrade/01-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -10,3 +11,4 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 index 5ab2c212..dba37759 100644 --- a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 +++ b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -18,3 +19,4 @@ spec: roleGroups: default: replicas: 1 +{% endif %} \ No newline at end of file diff --git a/tests/templates/kuttl/upgrade/02-assert.yaml b/tests/templates/kuttl/upgrade/02-assert.yaml deleted file mode 100644 index 7c231d57..00000000 --- a/tests/templates/kuttl/upgrade/02-assert.yaml +++ /dev/null @@ -1,12 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 300 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-default -status: - readyReplicas: 1 - replicas: 1 diff --git a/tests/templates/kuttl/upgrade/02-assert.yaml.j2 b/tests/templates/kuttl/upgrade/02-assert.yaml.j2 new file mode 100644 index 00000000..264b636c --- /dev/null +++ b/tests/templates/kuttl/upgrade/02-assert.yaml.j2 @@ -0,0 +1,22 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 1 + replicas: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 +{% endif %} diff --git a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 index b7b1ec7d..e448d77a 100644 --- a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 @@ -53,9 +53,21 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} - zookeeperConfigMapName: test-kafka-znode +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} + zookeeperConfigMapName: test-zk +{% else %} + controllers: + config: + gracefulShutdownTimeout: 30s # speed up tests + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 +{% endif %} brokers: config: + gracefulShutdownTimeout: 30s # speed up tests logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: diff --git a/tests/templates/kuttl/upgrade/04-assert.yaml.j2 b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 index 2a95af44..19434a7c 100644 --- a/tests/templates/kuttl/upgrade/04-assert.yaml.j2 +++ b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 @@ -14,3 +14,17 @@ status: replicas: 1 currentReplicas: 1 updatedReplicas: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default + labels: + app.kubernetes.io/version: "{{ test_scenario['values']['upgrade_new'] }}-stackable0.0.0-dev" +status: + readyReplicas: 3 + replicas: 3 + currentReplicas: 3 + updatedReplicas: 3 +{% endif %} diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 3817f971..d89fda74 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -85,6 +85,7 @@ tests: - upgrade_old - use-client-tls - use-client-auth-tls + - use-kraft-controller - openshift - name: tls dimensions: From f20d5f32811ad61503c8e0d3f96a3eb5812f8edf Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 3 Sep 2025 20:37:10 +0200 Subject: [PATCH 43/90] start fix remove rolegroup tests --- rust/operator-binary/src/crd/mod.rs | 1 + .../{01-assert.yaml => 01-assert.yaml.j2} | 2 + .../delete-rolegroup/01-install-zk.yaml.j2 | 2 + .../kuttl/delete-rolegroup/02-assert.yaml.j2 | 38 +++++++++++++++++++ .../delete-rolegroup/02-install-kafka.yaml.j2 | 14 +++++++ .../kuttl/delete-rolegroup/03-assert.yaml | 12 ------ .../{02-assert.yaml => 03-assert.yaml.j2} | 8 ++-- .../03-delete-secondary.yaml.j2 | 13 +++++++ .../kuttl/delete-rolegroup/03-errors.yaml | 5 --- .../kuttl/delete-rolegroup/03-errors.yaml.j2 | 12 ++++++ .../kuttl/upgrade/01-install-zk.yaml.j2 | 2 +- tests/test-definition.yaml | 1 + 12 files changed, 89 insertions(+), 21 deletions(-) rename tests/templates/kuttl/delete-rolegroup/{01-assert.yaml => 01-assert.yaml.j2} (70%) create mode 100644 tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 delete mode 100644 tests/templates/kuttl/delete-rolegroup/03-assert.yaml rename tests/templates/kuttl/delete-rolegroup/{02-assert.yaml => 03-assert.yaml.j2} (62%) delete mode 100644 tests/templates/kuttl/delete-rolegroup/03-errors.yaml create mode 100644 tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 728bf14b..42691ae4 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -258,6 +258,7 @@ impl v1alpha1::KafkaCluster { /// /// We try to predict the pods here rather than looking at the current cluster state in order to /// avoid instance churn. + // TODO: this currently only checks within each role, node.id must be unique for all brokers and controllers pub fn pod_descriptors( &self, kafka_role: &KafkaRole, diff --git a/tests/templates/kuttl/delete-rolegroup/01-assert.yaml b/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 similarity index 70% rename from tests/templates/kuttl/delete-rolegroup/01-assert.yaml rename to tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 index c9cfcf5c..5d46bbff 100644 --- a/tests/templates/kuttl/delete-rolegroup/01-assert.yaml +++ b/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -10,3 +11,4 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 index 6b462fa4..2479c30b 100644 --- a/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -18,3 +19,4 @@ spec: roleGroups: default: replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 new file mode 100644 index 00000000..36236617 --- /dev/null +++ b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 @@ -0,0 +1,38 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-secondary +status: + readyReplicas: 1 + replicas: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 2 + replicas: 2 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-secondary +status: + readyReplicas: 1 + replicas: 1 +{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 index be8651b3..b88e8b7d 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 @@ -20,9 +20,23 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-zk +{% else %} + controllers: + config: + gracefulShutdownTimeout: 30s # speed up tests + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 2 + secondary: + replicas: 1 +{% endif %} brokers: config: + gracefulShutdownTimeout: 30s # speed up tests logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: diff --git a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml deleted file mode 100644 index 7c231d57..00000000 --- a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml +++ /dev/null @@ -1,12 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 300 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-default -status: - readyReplicas: 1 - replicas: 1 diff --git a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 similarity index 62% rename from tests/templates/kuttl/delete-rolegroup/02-assert.yaml rename to tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 index f88993ed..e9d67757 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml +++ b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 @@ -10,11 +10,13 @@ metadata: status: readyReplicas: 1 replicas: 1 +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} --- apiVersion: apps/v1 kind: StatefulSet metadata: - name: test-kafka-broker-secondary + name: test-kafka-controller-default status: - readyReplicas: 1 - replicas: 1 + readyReplicas: 2 + replicas: 2 +{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 index 3c7824a8..3951c7b3 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 @@ -19,9 +19,22 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} +{% if test_scenario['values']['use-kraft-controller'] == 'false' %} zookeeperConfigMapName: test-zk +{% else %} + controllers: + config: + gracefulShutdownTimeout: 30s # speed up tests + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 2 + secondary: null +{% endif %} brokers: config: + gracefulShutdownTimeout: 30s # speed up tests logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: diff --git a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml deleted file mode 100644 index 6a1a6cf0..00000000 --- a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml +++ /dev/null @@ -1,5 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-secondary diff --git a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 new file mode 100644 index 00000000..35d9e39d --- /dev/null +++ b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 @@ -0,0 +1,12 @@ +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-secondary +{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-secondary +{% endif %} diff --git a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 index dba37759..850aa4cc 100644 --- a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 +++ b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 @@ -19,4 +19,4 @@ spec: roleGroups: default: replicas: 1 -{% endif %} \ No newline at end of file +{% endif %} diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index d89fda74..160e2f6c 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -99,6 +99,7 @@ tests: dimensions: - kafka - zookeeper-latest + - use-kraft-controller - openshift - name: logging dimensions: From 1f8d9a5a2d633b4847d01160aae65c7892ffdcbe Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 4 Sep 2025 14:30:00 +0200 Subject: [PATCH 44/90] started kraft docs --- .../pages/usage-guide/kraft-controller.adoc | 104 ++++++++++++++++++ docs/modules/kafka/partials/nav.adoc | 1 + 2 files changed, 105 insertions(+) create mode 100644 docs/modules/kafka/pages/usage-guide/kraft-controller.adoc diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc new file mode 100644 index 00000000..362e35d9 --- /dev/null +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -0,0 +1,104 @@ += KRaft +:description: Apache Kafka KRaft mode with the Stackable Operator for Apache Kafka + +Apache Kafka's KRaft mode (Kafka Raft Metadata mode) replaces Apache ZooKeeper with Kafka’s own built-in consensus mechanism based on the Raft protocol. +This simplifies Kafka’s architecture, reducing operational complexity by consolidating cluster metadata management into Kafka itself. + +WARNING: The Stackable Operator for Apache Kafka currently does not support automatic cluster upgrades from Apache ZooKeeper to KRaft. + +== Overview + +* Introduced: Kafka 2.8.0 (early preview, not production-ready). +* Matured: Kafka 3.3.x (production-ready, though ZooKeeper is still supported). +* Default & Recommended: Kafka 3.5+ strongly recommends KRaft for new clusters. +* Full Replacement: Kafka 4.0.0 (2025) removes ZooKeeper completely. +* Migration: Tools exist to migrate from ZooKeeper to KRaft, but new deployments should start with KRaft. + +== Configuration + +The Stackable Kafka operator introduces a new xref:concepts:roles-and-role-groups.adoc[role] in the KafkaCluster CRD called KRaft `Controller`. +Configuring the `Controller` will put Kafka into KRaft mode. Apache ZooKeeper will not be required anymore. + +[source,yaml] +---- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: kafka +spec: + image: + productVersion: "3.9.1" + brokers: + roleGroups: + default: + replicas: 1 + controllers: + roleGroups: + default: + replicas: 3 +---- + +NOTE: This is mutally exclusive with `spec.clusterConfig.zookeeperConfigMapName`. + +=== Recommendations + +A minimal KRaft setup consisting of at least 3 Controllers has the following https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/[resource requirements]: + +* `600m` CPU request +* `3000m` CPU limit +* `3000Mi` memory request and limit +* `6Gi` persistent storage + +NOTE: The Controller replicas should sum up to an odd number for the Raft consensus. + +=== Resources + +Corresponding to the values above, the operator uses the following resource defaults: + +[source,yaml] +---- +controllers: + config: + resources: + memory: + limit: 1Gi + cpu: + min: 250m + max: 1000m + storage: + logDirs: + capacity: 2Gi +---- + +=== Affinities + +=== PDBs + +=== Overrides + +== Internal operator details + +KRaft mode requires major configuration changes compared to ZooKeeper: + +* `cluster-id`: This is set to the `metadata.uid` of the KafkaCluster resource during initial formatting +* `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and `replica` id. +* `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not possible. + +== Troubleshooting + +=== Cluster does not start + +Check that at least a quorum (majority) of controllers are reachable. + +=== Frequent leader elections + +Likely caused by controller resource starvation or unstable Kubernetes scheduling. + +=== Migration issues (ZooKeeper to KRaft) + +Ensure Kafka version 3.9.x and higher and follow the official migration documentation. + +=== Scaling issues + +The https://developers.redhat.com/articles/2024/11/27/dynamic-kafka-controller-quorum?utm_source=chatgpt.com#[Dynamic scaling] is only supported from Kafka version 3.9.0. +If you are using older versions, automatic scaling may not work properly (e.g. adding or removing controller replicas). diff --git a/docs/modules/kafka/partials/nav.adoc b/docs/modules/kafka/partials/nav.adoc index 7de36dbd..faaeab95 100644 --- a/docs/modules/kafka/partials/nav.adoc +++ b/docs/modules/kafka/partials/nav.adoc @@ -2,6 +2,7 @@ ** xref:kafka:getting_started/installation.adoc[] ** xref:kafka:getting_started/first_steps.adoc[] * xref:kafka:usage-guide/index.adoc[] +** xref:kafka:usage-guide/kraft-controller.adoc[] ** xref:kafka:usage-guide/listenerclass.adoc[] ** xref:kafka:usage-guide/storage-resources.adoc[] ** xref:kafka:usage-guide/security.adoc[] From ce171c0012024f662045427fd111a50452af2aaa Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 5 Sep 2025 15:56:27 +0200 Subject: [PATCH 45/90] fix unit tests --- rust/operator-binary/src/config/jvm.rs | 2 +- rust/operator-binary/src/crd/listener.rs | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 75039767..f3ecb9e8 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -132,7 +132,7 @@ mod tests { "-Djava.security.properties=/stackable/config/security.properties \ -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9606:/stackable/jmx/broker.yaml" ); - assert_eq!(heap_jvm_args, "-Xmx819m -Xms819m"); + assert_eq!(heap_jvm_args, "-Xmx1638m -Xms1638m"); } #[test] diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index f63bd132..124d0342 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -429,11 +429,15 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{controller_name}:{controller_protocol},{controller_auth_name}:{controller_auth_protocol}", name = KafkaListenerName::ClientAuth, protocol = KafkaListenerProtocol::Ssl, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Ssl, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Ssl, + controller_auth_name = KafkaListenerName::ControllerAuth, + controller_auth_protocol = KafkaListenerProtocol::Ssl, ) ); @@ -477,11 +481,13 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{controller_name}:{controller_protocol}", name = KafkaListenerName::Client, protocol = KafkaListenerProtocol::Ssl, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Ssl, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Ssl, ) ); @@ -526,11 +532,13 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{controller_name}:{controller_protocol}", name = KafkaListenerName::Client, protocol = KafkaListenerProtocol::Plaintext, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Plaintext, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Plaintext, ) ); } @@ -617,13 +625,15 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol},{bootstrap_name}:{bootstrap_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{bootstrap_name}:{bootstrap_protocol},{controller_name}:{controller_protocol}", name = KafkaListenerName::Client, protocol = KafkaListenerProtocol::SaslSsl, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Ssl, bootstrap_name = KafkaListenerName::Bootstrap, bootstrap_protocol = KafkaListenerProtocol::SaslSsl, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Ssl, ) ); } From e5f366b7759e614d8ac06e4fca9ff268bc5df90e Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 5 Sep 2025 16:04:19 +0200 Subject: [PATCH 46/90] fix docs --- rust/operator-binary/src/crd/mod.rs | 6 +++--- rust/operator-binary/src/kafka_controller.rs | 4 ++-- rust/operator-binary/src/resource/service.rs | 2 +- rust/operator-binary/src/resource/statefulset.rs | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 42691ae4..dbf00e90 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -341,7 +341,7 @@ pub struct KafkaPodDescriptor { impl KafkaPodDescriptor { /// Return the fully qualified domain name - /// Format: ...svc. + /// Format: `...svc.` pub fn fqdn(&self) -> String { format!( "{pod_name}.{service_name}.{namespace}.svc.{cluster_domain}", @@ -357,11 +357,11 @@ impl KafkaPodDescriptor { } /// Build the Kraft voter String - /// See: https://kafka.apache.org/documentation/#kraft_storage_voters + /// See: /// Example: 0@controller-0:1234:0000000000-00000000000 /// * 0 is the replica id /// * 0000000000-00000000000 is the replica directory id (even though the used Uuid states to be type 4 it does not work) - /// See: https://github.com/apache/kafka/blob/c5169ca805bd03d870a5bcd49744dcc34891cf15/clients/src/main/java/org/apache/kafka/common/Uuid.java#L29 + /// See: /// * controller-0 is the replica's host, /// * 1234 is the replica's port. // TODO(@maltesander): Even though the used Uuid states to be type 4 it does not work... 0000000000-00000000000 works... diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 539d66a5..47b701a7 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -49,7 +49,7 @@ use crate::{ resource::{ configmap::build_rolegroup_config_map, listener::build_broker_rolegroup_bootstrap_listener, - service::build_broker_rolegroup_service, + service::build_rolegroup_service, statefulset::{build_broker_rolegroup_statefulset, build_controller_rolegroup_statefulset}, }, }; @@ -348,7 +348,7 @@ pub async fn reconcile_kafka( .context(FailedToResolveConfigSnafu)?; let rg_service = - build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref) + build_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref) .context(BuildServiceSnafu)?; let rg_configmap = build_rolegroup_config_map( diff --git a/rust/operator-binary/src/resource/service.rs b/rust/operator-binary/src/resource/service.rs index 38f7b34b..d9c7c7c9 100644 --- a/rust/operator-binary/src/resource/service.rs +++ b/rust/operator-binary/src/resource/service.rs @@ -34,7 +34,7 @@ pub enum Error { /// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup /// /// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. -pub fn build_broker_rolegroup_service( +pub fn build_rolegroup_service( kafka: &v1alpha1::KafkaCluster, resolved_product_image: &ResolvedProductImage, rolegroup: &RoleGroupRef, diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 66850359..29fbc97f 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -155,7 +155,8 @@ pub enum Error { /// The broker rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// -/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_broker_rolegroup_service`]). +/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding +/// [`Service`](`stackable_operator::k8s_openapi::api::core::v1::Service`) from [`build_rolegroup_service`](`crate::resource::service::build_rolegroup_service`). #[allow(clippy::too_many_arguments)] pub fn build_broker_rolegroup_statefulset( kafka: &v1alpha1::KafkaCluster, From d654a978a5bc023cf8443bab9cf9d7d22f705263 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 5 Sep 2025 16:12:15 +0200 Subject: [PATCH 47/90] run pre commit --- .vscode/launch.json | 2 +- .../pages/usage-guide/kraft-controller.adoc | 4 +-- rust/operator-binary/src/config/command.rs | 4 +-- .../20-install-kafka.yaml.j2 | 2 +- .../cluster-operation/30-stop-kafka.yaml.j2 | 2 +- .../cluster-operation/40-pause-kafka.yaml.j2 | 2 +- .../50-restart-kafka.yaml.j2 | 2 +- .../kuttl/delete-rolegroup/02-assert.yaml.j2 | 2 +- .../delete-rolegroup/02-install-kafka.yaml.j2 | 4 +-- .../kuttl/delete-rolegroup/03-assert.yaml.j2 | 2 +- .../03-delete-secondary.yaml.j2 | 4 +-- .../kuttl/logging/test_log_aggregation.py | 35 ++++++++++--------- .../kuttl/smoke/30-install-kafka.yaml.j2 | 2 +- tests/templates/kuttl/smoke/metrics.py | 8 +++-- .../kuttl/upgrade/02-install-kafka.yaml.j2 | 4 +-- 15 files changed, 42 insertions(+), 37 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 0df32d9c..8c151dc2 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -20,4 +20,4 @@ "cwd": "${workspaceFolder}" } ] -} \ No newline at end of file +} diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index 362e35d9..cb1a9b3e 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -1,7 +1,7 @@ = KRaft :description: Apache Kafka KRaft mode with the Stackable Operator for Apache Kafka -Apache Kafka's KRaft mode (Kafka Raft Metadata mode) replaces Apache ZooKeeper with Kafka’s own built-in consensus mechanism based on the Raft protocol. +Apache Kafka's KRaft mode (Kafka Raft Metadata mode) replaces Apache ZooKeeper with Kafka’s own built-in consensus mechanism based on the Raft protocol. This simplifies Kafka’s architecture, reducing operational complexity by consolidating cluster metadata management into Kafka itself. WARNING: The Stackable Operator for Apache Kafka currently does not support automatic cluster upgrades from Apache ZooKeeper to KRaft. @@ -76,7 +76,7 @@ controllers: === Overrides -== Internal operator details +== Internal operator details KRaft mode requires major configuration changes compared to ZooKeeper: diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index f4248092..1f324397 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -151,12 +151,12 @@ pub fn controller_kafka_container_command( echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} echo \"{KAFKA_CONTROLLER_QUORUM_VOTERS}={controller_quorum_voters}\" >> /tmp/{properties_file} - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} & wait_for_termination $! {create_vector_shutdown_file_command} - ", + ", remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), config_dir = STACKABLE_CONFIG_DIR, properties_file = CONTROLLER_PROPERTIES_FILE, diff --git a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 index 2079522a..4c49ef6d 100644 --- a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 @@ -16,7 +16,7 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} diff --git a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 index 54d7d242..d27784c7 100644 --- a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 @@ -16,7 +16,7 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} diff --git a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 index a9b27d01..48feb7d3 100644 --- a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 @@ -16,7 +16,7 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} diff --git a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 index ac29774f..0148b052 100644 --- a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 @@ -15,7 +15,7 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} diff --git a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 index 36236617..4160b291 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 @@ -35,4 +35,4 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} +{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 index b88e8b7d..6d00f8d2 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 @@ -27,13 +27,13 @@ spec: config: gracefulShutdownTimeout: 30s # speed up tests logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: default: replicas: 2 secondary: replicas: 1 -{% endif %} +{% endif %} brokers: config: gracefulShutdownTimeout: 30s # speed up tests diff --git a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 index e9d67757..9565546f 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 @@ -19,4 +19,4 @@ metadata: status: readyReplicas: 2 replicas: 2 -{% endif %} +{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 index 3951c7b3..aee1bba7 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 @@ -26,12 +26,12 @@ spec: config: gracefulShutdownTimeout: 30s # speed up tests logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: default: replicas: 2 secondary: null -{% endif %} +{% endif %} brokers: config: gracefulShutdownTimeout: 30s # speed up tests diff --git a/tests/templates/kuttl/logging/test_log_aggregation.py b/tests/templates/kuttl/logging/test_log_aggregation.py index ebc38cd8..c27b1992 100755 --- a/tests/templates/kuttl/logging/test_log_aggregation.py +++ b/tests/templates/kuttl/logging/test_log_aggregation.py @@ -4,9 +4,9 @@ def check_sent_events(): response = requests.post( - 'http://kafka-vector-aggregator:8686/graphql', + "http://kafka-vector-aggregator:8686/graphql", json={ - 'query': """ + "query": """ { transforms(first:100) { nodes { @@ -20,29 +20,30 @@ def check_sent_events(): } } """ - } + }, ) - assert response.status_code == 200, \ - 'Cannot access the API of the vector aggregator.' + assert response.status_code == 200, ( + "Cannot access the API of the vector aggregator." + ) result = response.json() - transforms = result['data']['transforms']['nodes'] + transforms = result["data"]["transforms"]["nodes"] for transform in transforms: - sentEvents = transform['metrics']['sentEventsTotal'] - componentId = transform['componentId'] + sentEvents = transform["metrics"]["sentEventsTotal"] + componentId = transform["componentId"] - if componentId == 'filteredInvalidEvents': - assert sentEvents is None or \ - sentEvents['sentEventsTotal'] == 0, \ - 'Invalid log events were sent.' + if componentId == "filteredInvalidEvents": + assert sentEvents is None or sentEvents["sentEventsTotal"] == 0, ( + "Invalid log events were sent." + ) else: - assert sentEvents is not None and \ - sentEvents['sentEventsTotal'] > 0, \ - f'No events were sent in "{componentId}".' + assert sentEvents is not None and sentEvents["sentEventsTotal"] > 0, ( + f'No events were sent in "{componentId}".' + ) -if __name__ == '__main__': +if __name__ == "__main__": check_sent_events() - print('Test successful!') + print("Test successful!") diff --git a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 index 4b3133bb..430d549e 100644 --- a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 @@ -40,7 +40,7 @@ spec: roleGroups: default: replicas: 3 -{% endif %} +{% endif %} brokers: configOverrides: broker.properties: diff --git a/tests/templates/kuttl/smoke/metrics.py b/tests/templates/kuttl/smoke/metrics.py index 8afffe6b..7c9f8027 100644 --- a/tests/templates/kuttl/smoke/metrics.py +++ b/tests/templates/kuttl/smoke/metrics.py @@ -5,8 +5,12 @@ if __name__ == "__main__": result = 0 - LOG_LEVEL = 'DEBUG' # if args.debug else 'INFO' - logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s %(levelname)s: %(message)s', stream=sys.stdout) + LOG_LEVEL = "DEBUG" # if args.debug else 'INFO' + logging.basicConfig( + level=LOG_LEVEL, + format="%(asctime)s %(levelname)s: %(message)s", + stream=sys.stdout, + ) http_code = requests.get("http://test-kafka-broker-default:9606").status_code if http_code != 200: diff --git a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 index e448d77a..9a3c4071 100644 --- a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 @@ -60,11 +60,11 @@ spec: config: gracefulShutdownTimeout: 30s # speed up tests logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: default: replicas: 3 -{% endif %} +{% endif %} brokers: config: gracefulShutdownTimeout: 30s # speed up tests From 53121decb7f6035f499cfdb623888aa7f669e29b Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 5 Sep 2025 16:21:00 +0200 Subject: [PATCH 48/90] fix yaml lint --- deploy/config-spec/properties.yaml | 1 + deploy/helm/kafka-operator/configs/properties.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/deploy/config-spec/properties.yaml b/deploy/config-spec/properties.yaml index 8ebbbeb0..b6f80cdd 100644 --- a/deploy/config-spec/properties.yaml +++ b/deploy/config-spec/properties.yaml @@ -1,3 +1,4 @@ +--- version: 0.1.0 spec: units: diff --git a/deploy/helm/kafka-operator/configs/properties.yaml b/deploy/helm/kafka-operator/configs/properties.yaml index 8ebbbeb0..b6f80cdd 100644 --- a/deploy/helm/kafka-operator/configs/properties.yaml +++ b/deploy/helm/kafka-operator/configs/properties.yaml @@ -1,3 +1,4 @@ +--- version: 0.1.0 spec: units: From cfac3fcebad6b54371e518293d4f6280a44d753e Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 8 Sep 2025 08:32:54 +0200 Subject: [PATCH 49/90] clarify docs, clean up --- .../pages/usage-guide/kraft-controller.adoc | 19 +++++++++---------- .../kafka/pages/usage-guide/overrides.adoc | 3 ++- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index cb1a9b3e..d68fa54c 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -1,7 +1,7 @@ -= KRaft += KRaft mode :description: Apache Kafka KRaft mode with the Stackable Operator for Apache Kafka -Apache Kafka's KRaft mode (Kafka Raft Metadata mode) replaces Apache ZooKeeper with Kafka’s own built-in consensus mechanism based on the Raft protocol. +Apache Kafka's KRaft mode replaces Apache ZooKeeper with Kafka’s own built-in consensus mechanism based on the Raft protocol. This simplifies Kafka’s architecture, reducing operational complexity by consolidating cluster metadata management into Kafka itself. WARNING: The Stackable Operator for Apache Kafka currently does not support automatic cluster upgrades from Apache ZooKeeper to KRaft. @@ -38,7 +38,7 @@ spec: replicas: 3 ---- -NOTE: This is mutally exclusive with `spec.clusterConfig.zookeeperConfigMapName`. +NOTE: Using `spec.controllers` is mutally exclusive with `spec.clusterConfig.zookeeperConfigMapName`. === Recommendations @@ -70,19 +70,17 @@ controllers: capacity: 2Gi ---- -=== Affinities - -=== PDBs - === Overrides +The configuration of overrides, JVM arguments etc. is similar to the Broker and documented on the xref:concepts:overrides.adoc[concepts page]. + == Internal operator details KRaft mode requires major configuration changes compared to ZooKeeper: * `cluster-id`: This is set to the `metadata.uid` of the KafkaCluster resource during initial formatting -* `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and `replica` id. -* `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not possible. +* `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and added `replica` id. +* `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not supported. == Troubleshooting @@ -96,7 +94,8 @@ Likely caused by controller resource starvation or unstable Kubernetes schedulin === Migration issues (ZooKeeper to KRaft) -Ensure Kafka version 3.9.x and higher and follow the official migration documentation. +Ensure Kafka version 3.9.x and higher and follow the official migration documentation. +The Stackable Kafka operator currently does not support the migration. === Scaling issues diff --git a/docs/modules/kafka/pages/usage-guide/overrides.adoc b/docs/modules/kafka/pages/usage-guide/overrides.adoc index 4a24d67c..4c874743 100644 --- a/docs/modules/kafka/pages/usage-guide/overrides.adoc +++ b/docs/modules/kafka/pages/usage-guide/overrides.adoc @@ -8,7 +8,8 @@ IMPORTANT: Overriding operator-set properties (such as the ports) can interfere For a role or role group, at the same level of `config`, you can specify: `configOverrides` for the following files: -* `broker.properties` +* `broker.properties` (brokers only) +* `controller.properties` (KRaft controllers only) * `security.properties` For example, if you want to set the `auto.create.topics.enable` to disable automatic topic creation, it can be configured in the KafkaCluster resource like so: From 6054d1c8435504d344c266a5637b331b1eb42f38 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 8 Sep 2025 08:38:16 +0200 Subject: [PATCH 50/90] linter --- docs/modules/kafka/pages/usage-guide/kraft-controller.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index d68fa54c..0a667f9d 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -94,7 +94,7 @@ Likely caused by controller resource starvation or unstable Kubernetes schedulin === Migration issues (ZooKeeper to KRaft) -Ensure Kafka version 3.9.x and higher and follow the official migration documentation. +Ensure Kafka version 3.9.x and higher and follow the official migration documentation. The Stackable Kafka operator currently does not support the migration. === Scaling issues From 0e6244cfabb45267ac5aca620fd07326c40ae495 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 8 Sep 2025 17:42:30 +0200 Subject: [PATCH 51/90] improve jaas config --- rust/operator-binary/src/config/command.rs | 15 ++++++++++----- rust/operator-binary/src/crd/listener.rs | 7 +++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 1f324397..12754195 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -70,11 +70,16 @@ fn broker_start_command( let jaas_config = match kafka_security.has_kerberos_enabled() { true => { - let service_name = KafkaRole::Broker.kerberos_service_name(); - let broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR); - let bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR); - // TODO replace client and bootstrap below with constants - format!(" --override \"listener.name.client.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" --override \"listener.name.bootstrap.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\"").to_string() + formatdoc! {" + --override \"{client_jaas_config}=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" \ + --override \"{bootstrap_jaas_config}=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\" + ", + client_jaas_config = KafkaListenerName::Client.listener_gssapi_sasl_jaas_config(), + bootstrap_jaas_config = KafkaListenerName::Bootstrap.listener_gssapi_sasl_jaas_config(), + service_name = KafkaRole::Broker.kerberos_service_name(), + broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR), + bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR), + } } false => "".to_string(), }; diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 124d0342..96b995a9 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -97,6 +97,13 @@ impl KafkaListenerName { listener_name = self.to_string().to_lowercase() ) } + + pub fn listener_gssapi_sasl_jaas_config(&self) -> String { + format!( + "listener.name.{listener_name}.gssapi.sasl.jaas.config", + listener_name = self.to_string().to_lowercase() + ) + } } #[derive(Debug)] From d413bb8366176d84478ba08e3a76ace331f3e9e5 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 8 Sep 2025 18:20:55 +0200 Subject: [PATCH 52/90] add comments, remove todo --- rust/operator-binary/src/config/command.rs | 6 +----- rust/operator-binary/src/crd/role/controller.rs | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 12754195..c25463f1 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -175,9 +175,7 @@ pub fn controller_kafka_container_command( } fn to_listeners(port: u16) -> String { - // TODO: - // - document that variables are set in stateful set - // - customize listener (CONTROLLER / CONTROLLER_AUTH?) + // The environment variables are set in the statefulset of the controller format!( "{listener_name}://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:{port}", listener_name = KafkaListenerName::Controller @@ -185,10 +183,8 @@ fn to_listeners(port: u16) -> String { } fn to_listener_security_protocol_map(kafka_listeners: &KafkaListenerConfig) -> String { - // TODO: make configurable - CONTROLLER_AUTH kafka_listeners .listener_security_protocol_map_for_listener(&KafkaListenerName::Controller) - // todo better error .unwrap_or("".to_string()) } diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index 0113c4d2..9be5464f 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -42,7 +42,6 @@ pub const CONTROLLER_PROPERTIES_FILE: &str = "controller.properties"; #[strum(serialize_all = "kebab-case")] pub enum ControllerContainer { Vector, - // TODO: Kafka, Kraft, Controller? Kafka, } From 91659bb2b3e97a937f3a0eba5dd4e5c4f393dd56 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 9 Sep 2025 15:49:49 +0200 Subject: [PATCH 53/90] remove redundant memory increase --- tests/templates/kuttl/tls/20-install-kafka.yaml.j2 | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 index f69b1c8e..824dc6c0 100644 --- a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 @@ -62,6 +62,9 @@ spec: zookeeperConfigMapName: test-kafka-znode {% else %} controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: default: replicas: 3 @@ -70,11 +73,6 @@ spec: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - resources: - # There are some test failures that kill the container due to error 137 (not enough memory) - # using the default (currently 1Gi), when running the tls test scripts. - memory: - limit: '1.3Gi' roleGroups: default: replicas: 3 From 43a175968a99d8491bbea892075bd4b5da1653f1 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 9 Sep 2025 15:50:23 +0200 Subject: [PATCH 54/90] implement node.id hashing check for all roles --- rust/operator-binary/src/crd/mod.rs | 65 ++++++++++++++---------- rust/operator-binary/src/crd/role/mod.rs | 4 +- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index dbf00e90..90824544 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -77,11 +77,12 @@ pub enum Error { KraftAndZookeeperConfigured, #[snafu(display( - "Could not calculate ({role}) 'node.id' hash offset for rolegroup '{rolegroup}' which collides with rolegroup '{colliding_rolegroup}'. Please try to rename one of the rolegroups." + "Could not calculate 'node.id' hash offset for role '{role}' and rolegroup '{rolegroup}' which collides with role '{coliding_role}' and rolegroup '{colliding_rolegroup}'. Please try to rename one of the rolegroups." ))] KafkaNodeIdHashCollision { role: KafkaRole, rolegroup: String, + coliding_role: KafkaRole, colliding_rolegroup: String, }, } @@ -258,41 +259,49 @@ impl v1alpha1::KafkaCluster { /// /// We try to predict the pods here rather than looking at the current cluster state in order to /// avoid instance churn. - // TODO: this currently only checks within each role, node.id must be unique for all brokers and controllers pub fn pod_descriptors( &self, - kafka_role: &KafkaRole, + requested_kafka_role: &KafkaRole, cluster_info: &KubernetesClusterInfo, ) -> Result, Error> { let namespace = self.metadata.namespace.clone().context(NoNamespaceSnafu)?; - let rolegroup_replicas = self.extract_rolegroup_replicas(kafka_role)?; let mut pod_descriptors = Vec::new(); - let mut seen_hashes = HashMap::::new(); - - for (rolegroup, replicas) in rolegroup_replicas { - let rolegroup_ref = self.rolegroup_ref(kafka_role, &rolegroup); - let node_id_hash_offset = node_id_hash32_offset(&rolegroup_ref); - - match seen_hashes.get(&node_id_hash_offset) { - Some(colliding_rolegroup) => { - return KafkaNodeIdHashCollisionSnafu { - role: kafka_role.clone(), - rolegroup: rolegroup.clone(), - colliding_rolegroup: colliding_rolegroup.clone(), + let mut seen_hashes = HashMap::::new(); + + for current_role in KafkaRole::roles() { + let rolegroup_replicas = self.extract_rolegroup_replicas(¤t_role)?; + for (rolegroup, replicas) in rolegroup_replicas { + let rolegroup_ref = self.rolegroup_ref(¤t_role, &rolegroup); + let node_id_hash_offset = node_id_hash32_offset(&rolegroup_ref); + + // check collisions + match seen_hashes.get(&node_id_hash_offset) { + Some((coliding_role, coliding_rolegroup)) => { + return KafkaNodeIdHashCollisionSnafu { + role: current_role.clone(), + rolegroup: rolegroup.clone(), + coliding_role: coliding_role.clone(), + colliding_rolegroup: coliding_rolegroup.to_string(), + } + .fail(); + } + None => { + seen_hashes.insert(node_id_hash_offset, (current_role.clone(), rolegroup)) + } + }; + + // only return descriptors for selected role + if current_role == *requested_kafka_role { + for replica in 0..replicas { + pod_descriptors.push(KafkaPodDescriptor { + namespace: namespace.clone(), + role_group_service_name: rolegroup_ref.object_name(), + replica, + cluster_domain: cluster_info.cluster_domain.clone(), + node_id: node_id_hash_offset + u32::from(replica), + }); } - .fail(); } - None => seen_hashes.insert(node_id_hash_offset, rolegroup), - }; - - for replica in 0..replicas { - pod_descriptors.push(KafkaPodDescriptor { - namespace: namespace.clone(), - role_group_service_name: rolegroup_ref.object_name(), - replica, - cluster_domain: cluster_info.cluster_domain.clone(), - node_id: node_id_hash_offset + u32::from(replica), - }); } } diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 47ed5b3d..ec4e1ea2 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -110,10 +110,10 @@ pub enum KafkaRole { impl KafkaRole { /// Return all available roles - pub fn roles() -> Vec { + pub fn roles() -> Vec { let mut roles = vec![]; for role in Self::iter() { - roles.push(role.to_string()) + roles.push(role) } roles } From d5ee2fd513edae3d81ab15234eee5cdf8f0423b9 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 9 Sep 2025 16:38:56 +0200 Subject: [PATCH 55/90] fix linter --- tests/templates/kuttl/tls/20-install-kafka.yaml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 index 824dc6c0..c90299c2 100644 --- a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 @@ -64,7 +64,7 @@ spec: controllers: config: logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: default: replicas: 3 From 87809b50d0ca56078a978c60071228a4c4768730 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 10 Sep 2025 15:59:21 +0200 Subject: [PATCH 56/90] prepare log4j2 config --- rust/operator-binary/src/config/command.rs | 27 ++-- rust/operator-binary/src/crd/mod.rs | 2 - rust/operator-binary/src/product_logging.rs | 147 +++++++++++++----- .../operator-binary/src/resource/configmap.rs | 7 +- .../src/resource/statefulset.rs | 15 +- 5 files changed, 136 insertions(+), 62 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index c25463f1..a7a4cd97 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -6,18 +6,21 @@ use stackable_operator::{ utils::COMMON_BASH_TRAP_FUNCTIONS, }; -use crate::crd::{ - KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, - STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_DIR, - listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, - role::{ - KAFKA_ADVERTISED_LISTENERS, KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, - KAFKA_CONTROLLER_QUORUM_VOTERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, - KAFKA_NODE_ID, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BROKER_PROPERTIES_FILE, - controller::CONTROLLER_PROPERTIES_FILE, +use crate::{ + crd::{ + KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, + STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, + listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, + role::{ + KAFKA_ADVERTISED_LISTENERS, KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, + KAFKA_CONTROLLER_QUORUM_VOTERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, + KAFKA_NODE_ID, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BROKER_PROPERTIES_FILE, + controller::CONTROLLER_PROPERTIES_FILE, + }, + security::KafkaTlsSecurity, + v1alpha1, }, - security::KafkaTlsSecurity, - v1alpha1, + product_logging::STACKABLE_LOG_DIR, }; /// Returns the commands to start the main Kafka container @@ -45,7 +48,7 @@ pub fn broker_kafka_container_commands( remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR), set_realm_env = match kafka_security.has_kerberos_enabled() { - true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {})", STACKABLE_KERBEROS_KRB5_PATH), + true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {STACKABLE_KERBEROS_KRB5_PATH})"), false => "".to_string(), }, broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, kafka_listeners, opa_connect_string, kafka_security, product_version), diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 90824544..8d778124 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -52,8 +52,6 @@ pub const STACKABLE_LISTENER_BROKER_DIR: &str = "/stackable/listener-broker"; pub const STACKABLE_LISTENER_BOOTSTRAP_DIR: &str = "/stackable/listener-bootstrap"; pub const STACKABLE_DATA_DIR: &str = "/stackable/data"; pub const STACKABLE_CONFIG_DIR: &str = "/stackable/config"; -pub const STACKABLE_LOG_CONFIG_DIR: &str = "/stackable/log_config"; -pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; // kerberos pub const STACKABLE_KERBEROS_DIR: &str = "/stackable/kerberos"; pub const STACKABLE_KERBEROS_KRB5_PATH: &str = "/stackable/kerberos/krb5.conf"; diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 20d9e1ef..ddf480dc 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -11,65 +11,74 @@ use stackable_operator::{ }; use crate::crd::{ - STACKABLE_LOG_DIR, role::{AnyConfig, broker::BrokerContainer, controller::ControllerContainer}, v1alpha1, }; +pub const STACKABLE_LOG_CONFIG_DIR: &str = "/stackable/log_config"; +pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; +// log4j pub const LOG4J_CONFIG_FILE: &str = "log4j.properties"; -pub const KAFKA_LOG_FILE: &str = "kafka.log4j.xml"; - +pub const KAFKA_LOG4J_FILE: &str = "kafka.log4j.xml"; +// log4j2 +pub const LOG4J2_CONFIG_FILE: &str = "log4j2.properties"; +pub const KAFKA_LOG4J2_FILE: &str = "kafka.log4j2.xml"; +// max size pub const MAX_KAFKA_LOG_FILES_SIZE: MemoryQuantity = MemoryQuantity { value: 10.0, unit: BinaryMultiple::Mebi, }; -const CONSOLE_CONVERSION_PATTERN: &str = "[%d] %p %m (%c)%n"; +const CONSOLE_CONVERSION_PATTERN_LOG4J: &str = "[%d] %p %m (%c)%n"; +const CONSOLE_CONVERSION_PATTERN_LOG4J2: &str = "%d{ISO8601} %p [%t] %c - %m%n"; + +pub fn kafka_log_opts(product_version: &str) -> String { + if product_version.starts_with("4.") { + format!("-Dlog4j2.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J2_CONFIG_FILE}") + } else { + format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}") + } +} + +pub fn kafka_log_opts_env_var(product_version: &str) -> String { + if product_version.starts_with("4.") { + "KAFKA_LOG4J2_OPTS".to_string() + } else { + "KAFKA_LOG4J_OPTS".to_string() + } +} /// Extend the role group ConfigMap with logging and Vector configurations pub fn extend_role_group_config_map( + product_version: &str, rolegroup: &RoleGroupRef, merged_config: &AnyConfig, cm_builder: &mut ConfigMapBuilder, ) { - fn add_log4j_config_if_automatic( - cm_builder: &mut ConfigMapBuilder, - log_config: Option>, - log_config_file: &str, - container_name: impl Display, - log_file: &str, - max_log_file_size: MemoryQuantity, - ) { - if let Some(ContainerLogConfig { - choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = log_config.as_deref() - { - cm_builder.add_data( - log_config_file, - product_logging::framework::create_log4j_config( - &format!("{STACKABLE_LOG_DIR}/{container_name}"), - log_file, - max_log_file_size - .scale_to(BinaryMultiple::Mebi) - .floor() - .value as u32, - CONSOLE_CONVERSION_PATTERN, - log_config, - ), - ); - } + let container_name = match merged_config { + AnyConfig::Broker(_) => BrokerContainer::Kafka.to_string(), + AnyConfig::Controller(_) => ControllerContainer::Kafka.to_string(), + }; + + // Starting with Kafka 4.0, log4j2 is used instead of log4j. + match product_version.starts_with("4.") { + true => add_log4j2_config_if_automatic( + cm_builder, + Some(merged_config.kafka_logging()), + LOG4J2_CONFIG_FILE, + container_name, + KAFKA_LOG4J2_FILE, + MAX_KAFKA_LOG_FILES_SIZE, + ), + false => add_log4j_config_if_automatic( + cm_builder, + Some(merged_config.kafka_logging()), + LOG4J_CONFIG_FILE, + container_name, + KAFKA_LOG4J_FILE, + MAX_KAFKA_LOG_FILES_SIZE, + ), } - add_log4j_config_if_automatic( - cm_builder, - Some(merged_config.kafka_logging()), - LOG4J_CONFIG_FILE, - match merged_config { - AnyConfig::Broker(_) => BrokerContainer::Kafka.to_string(), - AnyConfig::Controller(_) => ControllerContainer::Kafka.to_string(), - }, - KAFKA_LOG_FILE, - MAX_KAFKA_LOG_FILES_SIZE, - ); let vector_log_config = merged_config.vector_logging(); let vector_log_config = if let ContainerLogConfig { @@ -88,3 +97,59 @@ pub fn extend_role_group_config_map( ); } } + +fn add_log4j_config_if_automatic( + cm_builder: &mut ConfigMapBuilder, + log_config: Option>, + log_config_file: &str, + container_name: impl Display, + log_file: &str, + max_log_file_size: MemoryQuantity, +) { + if let Some(ContainerLogConfig { + choice: Some(ContainerLogConfigChoice::Automatic(log_config)), + }) = log_config.as_deref() + { + cm_builder.add_data( + log_config_file, + product_logging::framework::create_log4j_config( + &format!("{STACKABLE_LOG_DIR}/{container_name}"), + log_file, + max_log_file_size + .scale_to(BinaryMultiple::Mebi) + .floor() + .value as u32, + CONSOLE_CONVERSION_PATTERN_LOG4J, + log_config, + ), + ); + } +} + +fn add_log4j2_config_if_automatic( + cm_builder: &mut ConfigMapBuilder, + log_config: Option>, + log_config_file: &str, + container_name: impl Display, + log_file: &str, + max_log_file_size: MemoryQuantity, +) { + if let Some(ContainerLogConfig { + choice: Some(ContainerLogConfigChoice::Automatic(log_config)), + }) = log_config.as_deref() + { + cm_builder.add_data( + log_config_file, + product_logging::framework::create_log4j2_config( + &format!("{STACKABLE_LOG_DIR}/{container_name}",), + log_file, + max_log_file_size + .scale_to(BinaryMultiple::Mebi) + .floor() + .value as u32, + CONSOLE_CONVERSION_PATTERN_LOG4J2, + log_config, + ), + ); + } +} diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index e96f42bd..54a06921 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -129,7 +129,12 @@ pub fn build_rolegroup_config_map( tracing::debug!(?kafka_config, "Applied kafka config"); tracing::debug!(?jvm_sec_props, "Applied JVM config"); - extend_role_group_config_map(rolegroup, merged_config, &mut cm_builder); + extend_role_group_config_map( + &resolved_product_image.product_version, + rolegroup, + merged_config, + &mut cm_builder, + ); cm_builder .build() diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 29fbc97f..f490efa9 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -51,7 +51,7 @@ use crate::{ self, APP_NAME, KAFKA_HEAP_OPTS, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, - STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, + STACKABLE_LISTENER_BROKER_DIR, listener::get_kafka_listener_config, role::{ AnyConfig, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BrokerContainer, @@ -63,7 +63,10 @@ use crate::{ kafka_controller::KAFKA_CONTROLLER_NAME, kerberos::add_kerberos_pod_config, operations::graceful_shutdown::add_graceful_shutdown_config, - product_logging::{LOG4J_CONFIG_FILE, MAX_KAFKA_LOG_FILES_SIZE}, + product_logging::{ + MAX_KAFKA_LOG_FILES_SIZE, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, kafka_log_opts, + kafka_log_opts_env_var, + }, utils::build_recommended_labels, }; @@ -324,8 +327,8 @@ pub fn build_broker_rolegroup_statefulset( .context(ConstructJvmArgumentsSnafu)?, ) .add_env_var( - "KAFKA_LOG4J_OPTS", - format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), + kafka_log_opts_env_var(&resolved_product_image.product_version), + kafka_log_opts(&resolved_product_image.product_version), ) // Needed for the `containerdebug` process to log it's tracing information to. .add_env_var( @@ -666,8 +669,8 @@ pub fn build_controller_rolegroup_statefulset( .context(ConstructJvmArgumentsSnafu)?, ) .add_env_var( - "KAFKA_LOG4J_OPTS", - format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), + kafka_log_opts_env_var(&resolved_product_image.product_version), + kafka_log_opts(&resolved_product_image.product_version), ) // Needed for the `containerdebug` process to log it's tracing information to. .add_env_var( From 34f6c67f9eefc077907159437b4e32f3e62952c6 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 11 Sep 2025 14:37:40 +0200 Subject: [PATCH 57/90] consolidate cluster operation tests --- tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 | 2 +- tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 | 2 +- tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 | 2 +- .../kuttl/cluster-operation/20-install-kafka.yaml.j2 | 4 ++-- tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 | 2 +- tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 | 4 ++-- tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 | 2 +- .../templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 | 4 ++-- tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 | 2 +- .../kuttl/cluster-operation/50-restart-kafka.yaml.j2 | 4 ++-- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 index 5d46bbff..c9e55603 100644 --- a/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert diff --git a/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 b/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 index 2479c30b..43ecacca 100644 --- a/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster diff --git a/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 index 9076d21f..8c199963 100644 --- a/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 @@ -12,7 +12,7 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 index 4c49ef6d..5cce455a 100644 --- a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 @@ -16,13 +16,13 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} controllers: diff --git a/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 index 8cb8a023..49854a9c 100644 --- a/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 @@ -11,7 +11,7 @@ metadata: name: test-kafka-broker-default status: replicas: 0 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 index d27784c7..7a7a4c88 100644 --- a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 @@ -16,13 +16,13 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} brokers: diff --git a/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 index 171bc856..d0160254 100644 --- a/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 @@ -11,7 +11,7 @@ metadata: name: test-kafka-broker-default status: replicas: 0 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 index 48feb7d3..2775a363 100644 --- a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 @@ -16,13 +16,13 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} controllers: diff --git a/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 index 9076d21f..8c199963 100644 --- a/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 @@ -12,7 +12,7 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 index 0148b052..64551ae0 100644 --- a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 @@ -15,13 +15,13 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} controllers: From 98307edad9fd44e1b7a7d2a2cb4daa8c08407b50 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 11 Sep 2025 14:49:20 +0200 Subject: [PATCH 58/90] consolidate delete rolegroup --- tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 | 2 +- tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 | 2 +- tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 | 2 +- tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 | 2 +- tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 | 2 +- .../kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 | 2 +- tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 index 5d46bbff..c9e55603 100644 --- a/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert diff --git a/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 index 2479c30b..43ecacca 100644 --- a/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster diff --git a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 index 4160b291..07871b56 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 @@ -18,7 +18,7 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 index 6d00f8d2..01a7d2f6 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 @@ -20,7 +20,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} controllers: diff --git a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 index 9565546f..6e00dfea 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 @@ -10,7 +10,7 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 index aee1bba7..18373063 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 @@ -19,7 +19,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} controllers: diff --git a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 index 35d9e39d..e713dfe9 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 @@ -3,7 +3,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: test-kafka-broker-secondary -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet From aca7a240782174fe0aec00294c87138fbfefc0d5 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 11 Sep 2025 17:32:12 +0200 Subject: [PATCH 59/90] adjust kerberos tests --- tests/templates/kuttl/kerberos/10-assert.yaml.j2 | 2 +- tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 | 2 +- tests/templates/kuttl/kerberos/20-assert.yaml.j2 | 2 +- tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/templates/kuttl/kerberos/10-assert.yaml.j2 b/tests/templates/kuttl/kerberos/10-assert.yaml.j2 index 656007a4..d0f1fce4 100644 --- a/tests/templates/kuttl/kerberos/10-assert.yaml.j2 +++ b/tests/templates/kuttl/kerberos/10-assert.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert diff --git a/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 b/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 index 2479c30b..43ecacca 100644 --- a/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 +++ b/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster diff --git a/tests/templates/kuttl/kerberos/20-assert.yaml.j2 b/tests/templates/kuttl/kerberos/20-assert.yaml.j2 index ee11bdb7..0e47f477 100644 --- a/tests/templates/kuttl/kerberos/20-assert.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-assert.yaml.j2 @@ -10,7 +10,7 @@ metadata: status: readyReplicas: 3 replicas: 3 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 index e6a5c343..ad941a15 100644 --- a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 @@ -44,7 +44,7 @@ commands: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-kafka-znode {% else %} controllers: From 14eb096eb2b40b5b210411bda7f76e6cf9c90bac Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 11 Sep 2025 18:00:04 +0200 Subject: [PATCH 60/90] fix logging tests --- tests/templates/kuttl/logging/02-assert.yaml.j2 | 2 +- tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 | 2 +- tests/templates/kuttl/logging/04-assert.yaml.j2 | 2 +- tests/templates/kuttl/logging/04-install-kafka.yaml.j2 | 2 +- .../kuttl/logging/kafka-vector-aggregator-values.yaml.j2 | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/templates/kuttl/logging/02-assert.yaml.j2 b/tests/templates/kuttl/logging/02-assert.yaml.j2 index 656007a4..d0f1fce4 100644 --- a/tests/templates/kuttl/logging/02-assert.yaml.j2 +++ b/tests/templates/kuttl/logging/02-assert.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert diff --git a/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 b/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 index 827064d7..71cb7972 100644 --- a/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 +++ b/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster diff --git a/tests/templates/kuttl/logging/04-assert.yaml.j2 b/tests/templates/kuttl/logging/04-assert.yaml.j2 index 3bc3f09b..c9152f62 100644 --- a/tests/templates/kuttl/logging/04-assert.yaml.j2 +++ b/tests/templates/kuttl/logging/04-assert.yaml.j2 @@ -18,7 +18,7 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 index e804cf16..a0bf4efc 100644 --- a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 @@ -36,7 +36,7 @@ spec: tls: serverSecretClass: null vectorAggregatorConfigMapName: kafka-vector-aggregator-discovery -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-kafka-znode {% else %} controllers: diff --git a/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 index e8fac339..f0278d46 100644 --- a/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 @@ -48,7 +48,7 @@ customConfig: condition: >- .pod == "test-kafka-broker-custom-log-config-0" && .container == "vector" -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} filteredAutomaticLogConfigControllerKafka: type: filter inputs: [validEvents] From 3e49961a06d48d5189a0319c04744629fe18c2a5 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 11 Sep 2025 18:21:40 +0200 Subject: [PATCH 61/90] fix smoke test --- tests/templates/kuttl/smoke/20-assert.yaml.j2 | 2 +- tests/templates/kuttl/smoke/20-install-zk.yaml.j2 | 2 +- tests/templates/kuttl/smoke/30-assert.yaml.j2 | 2 +- tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/templates/kuttl/smoke/20-assert.yaml.j2 b/tests/templates/kuttl/smoke/20-assert.yaml.j2 index 5d46bbff..1654015f 100644 --- a/tests/templates/kuttl/smoke/20-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/20-assert.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert diff --git a/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 b/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 index 850aa4cc..f5762fc5 100644 --- a/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 +++ b/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2 index 76b7f28f..6a53a5e3 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-assert.yaml.j2 @@ -86,7 +86,7 @@ status: expectedPods: 1 currentHealthy: 1 disruptionsAllowed: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 index 430d549e..1774d928 100644 --- a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 @@ -26,7 +26,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} controllers: From c91ce2088bb93e3dc8bba9c838dfdad554c23263 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 11 Sep 2025 19:38:56 +0200 Subject: [PATCH 62/90] fix tls tests --- tests/templates/kuttl/tls/10-assert.yaml.j2 | 2 +- tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 | 2 +- tests/templates/kuttl/tls/20-assert.yaml.j2 | 2 +- tests/templates/kuttl/tls/20-install-kafka.yaml.j2 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/templates/kuttl/tls/10-assert.yaml.j2 b/tests/templates/kuttl/tls/10-assert.yaml.j2 index 656007a4..d0f1fce4 100644 --- a/tests/templates/kuttl/tls/10-assert.yaml.j2 +++ b/tests/templates/kuttl/tls/10-assert.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert diff --git a/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 b/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 index 2479c30b..43ecacca 100644 --- a/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 +++ b/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster diff --git a/tests/templates/kuttl/tls/20-assert.yaml.j2 b/tests/templates/kuttl/tls/20-assert.yaml.j2 index ee11bdb7..0e47f477 100644 --- a/tests/templates/kuttl/tls/20-assert.yaml.j2 +++ b/tests/templates/kuttl/tls/20-assert.yaml.j2 @@ -10,7 +10,7 @@ metadata: status: readyReplicas: 3 replicas: 3 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 index c90299c2..41dcb72b 100644 --- a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 @@ -58,7 +58,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-kafka-znode {% else %} controllers: From 604de9fbddaae4856b1728aba16624fdd2936462 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 11 Sep 2025 19:52:26 +0200 Subject: [PATCH 63/90] fix upgrade tests --- tests/templates/kuttl/upgrade/01-assert.yaml.j2 | 2 +- .../templates/kuttl/upgrade/01-install-zk.yaml.j2 | 2 +- tests/templates/kuttl/upgrade/02-assert.yaml.j2 | 2 +- .../kuttl/upgrade/02-install-kafka.yaml.j2 | 4 +++- tests/templates/kuttl/upgrade/04-assert.yaml.j2 | 2 +- tests/test-definition.yaml | 15 ++++----------- 6 files changed, 11 insertions(+), 16 deletions(-) diff --git a/tests/templates/kuttl/upgrade/01-assert.yaml.j2 b/tests/templates/kuttl/upgrade/01-assert.yaml.j2 index 5d46bbff..1654015f 100644 --- a/tests/templates/kuttl/upgrade/01-assert.yaml.j2 +++ b/tests/templates/kuttl/upgrade/01-assert.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert diff --git a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 index 850aa4cc..f5762fc5 100644 --- a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 +++ b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 @@ -1,4 +1,4 @@ -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster diff --git a/tests/templates/kuttl/upgrade/02-assert.yaml.j2 b/tests/templates/kuttl/upgrade/02-assert.yaml.j2 index 264b636c..e6b48c8c 100644 --- a/tests/templates/kuttl/upgrade/02-assert.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-assert.yaml.j2 @@ -10,7 +10,7 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 index 9a3c4071..728f4be2 100644 --- a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 @@ -1,3 +1,4 @@ +{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperZnode @@ -6,6 +7,7 @@ metadata: spec: clusterRef: name: test-zk +{% endif %} {% if test_scenario['values']['use-client-auth-tls'] == 'true' %} --- apiVersion: authentication.stackable.tech/v1alpha1 @@ -53,7 +55,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['use-kraft-controller'] == 'false' %} +{% if test_scenario['values']['zookeeper'] != 'false' %} zookeeperConfigMapName: test-zk {% else %} controllers: diff --git a/tests/templates/kuttl/upgrade/04-assert.yaml.j2 b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 index 19434a7c..391c6c31 100644 --- a/tests/templates/kuttl/upgrade/04-assert.yaml.j2 +++ b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 @@ -14,7 +14,7 @@ status: replicas: 1 currentReplicas: 1 updatedReplicas: 1 -{% if test_scenario['values']['use-kraft-controller'] == 'true' %} +{% if test_scenario['values']['zookeeper'] == 'false' %} --- apiVersion: apps/v1 kind: StatefulSet diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 160e2f6c..31135aca 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -21,9 +21,13 @@ dimensions: - name: zookeeper values: - 3.9.3 + # This enables KRaft mode + - "false" - name: zookeeper-latest values: - 3.9.3 + # This enables KRaft mode + - "false" - name: upgrade_old values: - 3.7.2 @@ -62,17 +66,12 @@ dimensions: - "cluster-internal" - "external-stable" - "external-unstable" - - name: use-kraft-controller - values: - - "true" - - "false" tests: - name: smoke dimensions: - kafka - zookeeper - use-client-tls - - use-kraft-controller - openshift - name: configuration dimensions: @@ -85,7 +84,6 @@ tests: - upgrade_old - use-client-tls - use-client-auth-tls - - use-kraft-controller - openshift - name: tls dimensions: @@ -93,25 +91,21 @@ tests: - zookeeper-latest - use-client-tls - use-client-auth-tls - - use-kraft-controller - openshift - name: delete-rolegroup dimensions: - kafka - zookeeper-latest - - use-kraft-controller - openshift - name: logging dimensions: - kafka - zookeeper-latest - - use-kraft-controller - openshift - name: cluster-operation dimensions: - zookeeper-latest - kafka-latest - - use-kraft-controller - openshift - name: kerberos dimensions: @@ -123,7 +117,6 @@ tests: - openshift - broker-listener-class - bootstrap-listener-class - - use-kraft-controller suites: - name: nightly From 1ed9489cc926a74cce9b2b8cf4e1623e6be74a02 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 12 Sep 2025 08:54:28 +0200 Subject: [PATCH 64/90] Apply suggestions from code review Co-authored-by: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> --- docs/modules/kafka/pages/usage-guide/kraft-controller.adoc | 4 ++-- rust/operator-binary/src/crd/mod.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index 0a667f9d..3dda22af 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -38,7 +38,7 @@ spec: replicas: 3 ---- -NOTE: Using `spec.controllers` is mutally exclusive with `spec.clusterConfig.zookeeperConfigMapName`. +NOTE: Using `spec.controllers` is mutually exclusive with `spec.clusterConfig.zookeeperConfigMapName`. === Recommendations @@ -99,5 +99,5 @@ The Stackable Kafka operator currently does not support the migration. === Scaling issues -The https://developers.redhat.com/articles/2024/11/27/dynamic-kafka-controller-quorum?utm_source=chatgpt.com#[Dynamic scaling] is only supported from Kafka version 3.9.0. +The https://developers.redhat.com/articles/2024/11/27/dynamic-kafka-controller-quorum[Dynamic scaling] is only supported from Kafka version 3.9.0. If you are using older versions, automatic scaling may not work properly (e.g. adding or removing controller replicas). diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 8d778124..8333ceb6 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -364,7 +364,7 @@ impl KafkaPodDescriptor { } /// Build the Kraft voter String - /// See: + /// See: /// Example: 0@controller-0:1234:0000000000-00000000000 /// * 0 is the replica id /// * 0000000000-00000000000 is the replica directory id (even though the used Uuid states to be type 4 it does not work) From 3decd232c5508fd159ed7cce68f46e018c636d5a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 12 Sep 2025 09:57:58 +0200 Subject: [PATCH 65/90] do not apply znodes in kraftmode --- tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 | 2 ++ tests/templates/kuttl/tls/20-install-kafka.yaml.j2 | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 index ad941a15..7a529c59 100644 --- a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 @@ -4,6 +4,7 @@ kind: TestStep commands: - script: | kubectl apply -n $NAMESPACE -f - < Date: Fri, 12 Sep 2025 13:32:22 +0200 Subject: [PATCH 66/90] use cluster name instead of uid for kafka cluster-id --- rust/operator-binary/src/crd/mod.rs | 5 +++-- rust/operator-binary/src/resource/statefulset.rs | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 8333ceb6..27d5b125 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -207,8 +207,9 @@ impl v1alpha1::KafkaCluster { self.spec.controllers.is_some() } - pub fn uid(&self) -> Option<&str> { - self.metadata.uid.as_deref() + // The cluster-id for Kafka + pub fn cluster_id(&self) -> Option<&str> { + self.metadata.name.as_deref() } /// The name of the load-balanced Kubernetes Service providing the bootstrap address. Kafka clients will use this diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index f490efa9..eebc2103 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -149,8 +149,10 @@ pub enum Error { #[snafu(display("failed to retrieve rolegroup replicas"))] RoleGroupReplicas { source: crd::role::Error }, - #[snafu(display("cluster does not define UID"))] - ClusterUidMissing, + #[snafu(display( + "cluster does not define 'metadata.name' which is required for the Kafka cluster id" + ))] + ClusterIdMissing, #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] VectorAggregatorConfigMapMissing, @@ -291,7 +293,7 @@ pub fn build_broker_rolegroup_statefulset( ) .context(InvalidKafkaListenersSnafu)?; - let cluster_id = kafka.uid().context(ClusterUidMissingSnafu)?; + let cluster_id = kafka.cluster_id().context(ClusterIdMissingSnafu)?; cb_kafka .image_from_product_image(resolved_product_image) @@ -648,7 +650,7 @@ pub fn build_controller_rolegroup_statefulset( "-c".to_string(), ]) .args(vec![controller_kafka_container_command( - kafka.uid().context(ClusterUidMissingSnafu)?, + kafka.cluster_id().context(ClusterIdMissingSnafu)?, kafka .pod_descriptors(kafka_role, cluster_info) .context(BuildPodDescriptorsSnafu)?, From 4f387efa13057cf8e65c523b0c1500b4c7d68e12 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 12 Sep 2025 13:33:52 +0200 Subject: [PATCH 67/90] replace uid with name for cluster-id --- docs/modules/kafka/pages/usage-guide/kraft-controller.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index 3dda22af..38c78e56 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -78,7 +78,7 @@ The configuration of overrides, JVM arguments etc. is similar to the Broker and KRaft mode requires major configuration changes compared to ZooKeeper: -* `cluster-id`: This is set to the `metadata.uid` of the KafkaCluster resource during initial formatting +* `cluster-id`: This is set to the `metadata.name` of the KafkaCluster resource during initial formatting * `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and added `replica` id. * `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not supported. From 079fe5a85fa8e279f4b17de22e5d518febd3fd7a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 18 Sep 2025 12:12:28 +0200 Subject: [PATCH 68/90] add pre-stop sleep hook to controller kafka container --- .../src/resource/statefulset.rs | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index eebc2103..5548e767 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -23,8 +23,8 @@ use stackable_operator::{ apps::v1::{StatefulSet, StatefulSetSpec, StatefulSetUpdateStrategy}, core::v1::{ ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, EnvVarSource, - ExecAction, ObjectFieldSelector, PodSpec, Probe, ServiceAccount, TCPSocketAction, - Volume, + ExecAction, Lifecycle, LifecycleHandler, ObjectFieldSelector, PodSpec, Probe, + ServiceAccount, SleepAction, TCPSocketAction, Volume, }, }, apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, @@ -756,10 +756,24 @@ pub fn build_controller_rolegroup_statefulset( ) .context(AddVolumesAndVolumeMountsSnafu)?; + // Currently, Controllers shutdown very fast, too fast in most times (flakyness) for the Brokers + // to off load properly. The Brokers then try to connect to any controllers until the + // `gracefulShutdownTimeout` is reached and the pod is finally killed. + // The `pre-stop` hook will delay the kill signal to the Controllers to provide the Brokers more + // time to offload data. + let mut kafka_container = cb_kafka.build(); + kafka_container.lifecycle = Some(Lifecycle { + pre_stop: Some(LifecycleHandler { + sleep: Some(SleepAction { seconds: 10 }), + ..Default::default() + }), + ..Default::default() + }); + pod_builder .metadata(metadata) .image_pull_secrets_from_product_image(resolved_product_image) - .add_container(cb_kafka.build()) + .add_container(kafka_container) .affinity(&merged_config.affinity) .add_volume(Volume { name: "config".to_string(), From cceb2be2c2173c72004bce587ecd3e2db76a4095 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 18 Sep 2025 13:40:54 +0200 Subject: [PATCH 69/90] add "kraft" test suite --- tests/test-definition.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 31135aca..977e6d65 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -151,3 +151,18 @@ suites: expr: "true" - name: use-client-auth-tls expr: "true" + - name: kraft + patch: + - dimensions: + - name: zookeeper + expr: "false" + - name: zookeeper-latest + expr: "false" + - name: use-client-tls + expr: "true" + - name: use-client-auth-tls + expr: "true" + - name: bootstrap-listener-class + expr: "cluster-internal" + - name: kerberos-realm + expr: "PROD.MYCORP" From c1d1dfa525d04b3cb697a0625c3ed728324d5faa Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 19 Sep 2025 08:19:17 +0200 Subject: [PATCH 70/90] add experimental warning to kraft docs --- docs/modules/kafka/pages/usage-guide/kraft-controller.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index 38c78e56..7fd2b9ad 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -1,6 +1,8 @@ -= KRaft mode += KRaft mode (experimental) :description: Apache Kafka KRaft mode with the Stackable Operator for Apache Kafka +WARNING: The Kafka KRaft mode is currently experimental, and subject to change. + Apache Kafka's KRaft mode replaces Apache ZooKeeper with Kafka’s own built-in consensus mechanism based on the Raft protocol. This simplifies Kafka’s architecture, reducing operational complexity by consolidating cluster metadata management into Kafka itself. From 4819744c0a88192827f12f046d040d6d2876ad87 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 23 Sep 2025 10:33:38 +0200 Subject: [PATCH 71/90] add kafka 4.0.0 and log4j2 --- rust/operator-binary/src/product_logging.rs | 29 ++++---- .../src/resource/statefulset.rs | 4 +- ...eate-configmap-with-prepared-logs.yaml.j2} | 4 ++ .../kuttl/logging/04-install-kafka.yaml.j2 | 14 ++++ .../kuttl/logging/prepared-logs.log4j2.xml | 72 +++++++++++++++++++ tests/test-definition.yaml | 2 +- 6 files changed, 106 insertions(+), 19 deletions(-) rename tests/templates/kuttl/logging/{03-create-configmap-with-prepared-logs.yaml => 03-create-configmap-with-prepared-logs.yaml.j2} (60%) create mode 100644 tests/templates/kuttl/logging/prepared-logs.log4j2.xml diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index ddf480dc..b4a02960 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -33,19 +33,16 @@ const CONSOLE_CONVERSION_PATTERN_LOG4J: &str = "[%d] %p %m (%c)%n"; const CONSOLE_CONVERSION_PATTERN_LOG4J2: &str = "%d{ISO8601} %p [%t] %c - %m%n"; pub fn kafka_log_opts(product_version: &str) -> String { - if product_version.starts_with("4.") { - format!("-Dlog4j2.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J2_CONFIG_FILE}") - } else { + if product_version.starts_with("3.") { format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}") + } else { + // TODO: -Dlog4j2 vs -Dlog4j + format!("-Dlog4j2.configurationFile=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J2_CONFIG_FILE}") } } -pub fn kafka_log_opts_env_var(product_version: &str) -> String { - if product_version.starts_with("4.") { - "KAFKA_LOG4J2_OPTS".to_string() - } else { - "KAFKA_LOG4J_OPTS".to_string() - } +pub fn kafka_log_opts_env_var() -> String { + "KAFKA_LOG4J_OPTS".to_string() } /// Extend the role group ConfigMap with logging and Vector configurations @@ -61,21 +58,21 @@ pub fn extend_role_group_config_map( }; // Starting with Kafka 4.0, log4j2 is used instead of log4j. - match product_version.starts_with("4.") { - true => add_log4j2_config_if_automatic( + match product_version.starts_with("3.") { + true => add_log4j_config_if_automatic( cm_builder, Some(merged_config.kafka_logging()), - LOG4J2_CONFIG_FILE, + LOG4J_CONFIG_FILE, container_name, - KAFKA_LOG4J2_FILE, + KAFKA_LOG4J_FILE, MAX_KAFKA_LOG_FILES_SIZE, ), - false => add_log4j_config_if_automatic( + false => add_log4j2_config_if_automatic( cm_builder, Some(merged_config.kafka_logging()), - LOG4J_CONFIG_FILE, + LOG4J2_CONFIG_FILE, container_name, - KAFKA_LOG4J_FILE, + KAFKA_LOG4J2_FILE, MAX_KAFKA_LOG_FILES_SIZE, ), } diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 5548e767..450977fe 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -329,7 +329,7 @@ pub fn build_broker_rolegroup_statefulset( .context(ConstructJvmArgumentsSnafu)?, ) .add_env_var( - kafka_log_opts_env_var(&resolved_product_image.product_version), + kafka_log_opts_env_var(), kafka_log_opts(&resolved_product_image.product_version), ) // Needed for the `containerdebug` process to log it's tracing information to. @@ -671,7 +671,7 @@ pub fn build_controller_rolegroup_statefulset( .context(ConstructJvmArgumentsSnafu)?, ) .add_env_var( - kafka_log_opts_env_var(&resolved_product_image.product_version), + kafka_log_opts_env_var(), kafka_log_opts(&resolved_product_image.product_version), ) // Needed for the `containerdebug` process to log it's tracing information to. diff --git a/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml b/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml.j2 similarity index 60% rename from tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml rename to tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml.j2 index df71fa4d..38a2a86b 100644 --- a/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml +++ b/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml.j2 @@ -4,5 +4,9 @@ kind: TestStep commands: - script: > kubectl create configmap prepared-logs +{% if test_scenario['values']['kafka'].startswith('3.') %} --from-file=prepared-logs.log4j.xml +{% else %} + --from-file=prepared-logs.log4j2.xml +{% endif %} --namespace=$NAMESPACE diff --git a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 index a0bf4efc..9dd6e590 100644 --- a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 @@ -4,6 +4,7 @@ kind: ConfigMap metadata: name: kafka-log-config data: +{% if test_scenario['values']['kafka'].startswith('3.') %} log4j.properties: | log4j.rootLogger=INFO, CONSOLE, FILE @@ -18,6 +19,19 @@ data: log4j.appender.FILE.MaxFileSize=5MB log4j.appender.FILE.MaxBackupIndex=1 log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout +{% else %} + log4j2.properties: |- + appenders = FILE + + appender.FILE.type = File + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/kafka/kafka.log4j2.xml + appender.FILE.layout.type = XMLLayout + + rootLogger.level=INFO + rootLogger.appenderRefs = FILE + rootLogger.appenderRef.FILE.ref = FILE +{% endif %} --- apiVersion: kafka.stackable.tech/v1alpha1 kind: KafkaCluster diff --git a/tests/templates/kuttl/logging/prepared-logs.log4j2.xml b/tests/templates/kuttl/logging/prepared-logs.log4j2.xml new file mode 100644 index 00000000..37474797 --- /dev/null +++ b/tests/templates/kuttl/logging/prepared-logs.log4j2.xml @@ -0,0 +1,72 @@ + + + Valid log event with all possible tags and attributes + + + + + + + + + + Valid log event without the Instant tag + + + + + Invalid log event without epochSecond + + + + + Invalid log event without nanoOfSecond + + + + + Invalid log event with invalid epochSecond + + + + Invalid log event without a timestamp + + + + Invalid log event with invalid timeMillis + + + + + Invalid log event without a logger + + + + + Invalid log event without a level + + + + + Invalid log event with an unknown level + + + + + + + + + + Invalid log event without the Event tag + + + + Unparsable log event + + + + + Valid log event after the unparsable one + diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 977e6d65..d0175129 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -7,8 +7,8 @@ dimensions: - name: kafka values: - 3.7.2 - - 3.9.0 - 3.9.1 + - 4.0.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. # - 3.8.0,oci.stackable.tech/sdp/kafka:3.8.0-stackable0.0.0-dev From bb27d446c2f963b0bd589339e9ff553e4df900b1 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 23 Sep 2025 14:08:36 +0200 Subject: [PATCH 72/90] Apply suggestions from code review Co-authored-by: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> --- docs/modules/kafka/pages/usage-guide/kraft-controller.adoc | 2 +- rust/operator-binary/src/crd/role/mod.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index 7fd2b9ad..bf17be2e 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -83,7 +83,7 @@ KRaft mode requires major configuration changes compared to ZooKeeper: * `cluster-id`: This is set to the `metadata.name` of the KafkaCluster resource during initial formatting * `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and added `replica` id. * `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not supported. - +* The operator configures a static voter list containing the controller pods. Controllers are not dynamicaly managed. == Troubleshooting === Cluster does not start diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index ec4e1ea2..055f4c4a 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -41,7 +41,7 @@ pub const KAFKA_NODE_ID: &str = "node.id"; /// The roles that this process plays: 'broker', 'controller', or 'broker,controller' if it is both. pub const KAFKA_PROCESS_ROLES: &str = "process.roles"; -/// A comma-separated list of the directories where the log data is stored. If not set, the value in log.dir is used. +/// A comma-separated list of the directories where the topic data is stored. pub const KAFKA_LOG_DIRS: &str = "log.dirs"; /// Listener List - Comma-separated list of URIs we will listen on and the listener names. @@ -132,7 +132,6 @@ impl KafkaRole { } /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. - /// We only have one role and will use "kafka" everywhere (which e.g. differs from the current hdfs implementation, /// but is similar to HBase). // TODO: split into broker / controller? pub fn kerberos_service_name(&self) -> &'static str { From 39ca68a9552ff14e7ceaa50045f6b9e9368d7321 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 23 Sep 2025 14:13:22 +0200 Subject: [PATCH 73/90] remove explicit filenames from doc paragraph --- docs/modules/kafka/pages/index.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/kafka/pages/index.adoc b/docs/modules/kafka/pages/index.adoc index 163c2d5d..43671170 100644 --- a/docs/modules/kafka/pages/index.adoc +++ b/docs/modules/kafka/pages/index.adoc @@ -33,7 +33,7 @@ image::kafka_overview.drawio.svg[A diagram depicting the Kubernetes resources cr For every xref:concepts:roles-and-role-groups.adoc#_role_groups[role group] in the `broker` role the operator creates a StatefulSet. Multiple Services are created - one at role level, one per role group as well as one for every individual Pod - to allow access to the entire Kafka cluster, parts of it or just individual brokers. -For every StatefulSet (role group) a ConfigMap is deployed containing a `log4j.properties` file for xref:usage-guide/logging.adoc[logging] configuration and a `broker.properties` or file containing the whole Kafka configuration which is derived from the KafkaCluster resource. +For every StatefulSet, a ConfigMap is deployed containing xref:usage-guide/logging.adoc[logging] properties and a Kafka configuration file which is derived from the KafkaCluster resource. The operator creates a xref:concepts:service_discovery.adoc[] for the whole KafkaCluster which references the Service for the whole cluster. Other operators use this ConfigMap to connect to a Kafka cluster simply by name and it can also be used by custom third party applications to find the connection endpoint. From 2ca172be2c136518da6cfb1ed2eb1a04164da462 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 23 Sep 2025 14:16:54 +0200 Subject: [PATCH 74/90] add quotes to clarify code docs --- rust/operator-binary/src/crd/role/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 055f4c4a..174026c0 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -49,11 +49,11 @@ pub const KAFKA_LOG_DIRS: &str = "log.dirs"; pub const KAFKA_LISTENERS: &str = "listeners"; /// Specifies the listener addresses that the Kafka brokers will advertise to clients and other brokers. -/// The config is useful where the actual listener configuration listeners does not represent the addresses that clients should use to connect, +/// The config is useful where the actual listener configuration 'listeners' does not represent the addresses that clients should use to connect, /// such as in cloud environments. The addresses are published to and managed by the controller, the brokers pull these data from the controller as needed. -/// In IaaS environments, this may need to be different from the interface to which the broker binds. If this is not set, the value for listeners will be used. -/// Unlike listeners, it is not valid to advertise the 0.0.0.0 meta-address. -/// Also unlike listeners, there can be duplicated ports in this property, so that one listener can be configured to advertise another listener's address. +/// In IaaS environments, this may need to be different from the interface to which the broker binds. If this is not set, the value for 'listeners' will be used. +/// Unlike 'listeners', it is not valid to advertise the 0.0.0.0 meta-address. +/// Also unlike 'listeners', there can be duplicated ports in this property, so that one listener can be configured to advertise another listener's address. /// This can be useful in some cases where external load balancers are used. pub const KAFKA_ADVERTISED_LISTENERS: &str = "advertised.listeners"; From f01244a6d6435db5afd2b7212ee981263587768f Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:52:09 +0200 Subject: [PATCH 75/90] add "smoke-kraft" test --- .../kuttl/smoke-kraft/00-limit-range.yaml | 11 + .../kuttl/smoke-kraft/00-patch-ns.yaml.j2 | 9 + .../kuttl/smoke-kraft/00-rbac.yaml.j2 | 29 +++ .../kuttl/smoke-kraft/01-assert.yaml.j2 | 8 + .../01-install-kafka-vector-aggregator.yaml | 17 ++ ...reate-configmap-with-prepared-logs.yaml.j2 | 12 ++ .../kuttl/smoke-kraft/30-assert.yaml.j2 | 44 ++++ .../smoke-kraft/30-install-kafka.yaml.j2 | 200 ++++++++++++++++++ .../kuttl/smoke-kraft/31-assert.yaml | 19 ++ .../kuttl/smoke-kraft/32-assert.yaml | 7 + .../kuttl/smoke-kraft/40-assert.yaml | 14 ++ .../40-install-test-container.yaml | 35 +++ .../kuttl/smoke-kraft/50-assert.yaml | 9 + .../kuttl/smoke-kraft/50-prepare-test.yaml | 9 + .../kuttl/smoke-kraft/60-assert.yaml | 8 + .../60-install-test-scripts-configmap.yaml | 10 + .../smoke-kraft/60_test_client_auth_tls.sh | 72 +++++++ .../kuttl/smoke-kraft/60_wrong_keystore.p12 | Bin 0 -> 3533 bytes .../kuttl/smoke-kraft/60_wrong_truststore.p12 | Bin 0 -> 1351 bytes .../kuttl/smoke-kraft/70-assert.yaml | 11 + .../kuttl/smoke-kraft/70-run-tests.yaml.j2 | 13 ++ .../kuttl/smoke-kraft/70_test-tls-job.yaml.j2 | 73 +++++++ tests/templates/kuttl/smoke-kraft/README.md | 10 + .../kafka-vector-aggregator-values.yaml.j2 | 104 +++++++++ tests/templates/kuttl/smoke-kraft/metrics.py | 19 ++ .../kuttl/smoke-kraft/prepared-logs.log4j.xml | 67 ++++++ .../smoke-kraft/prepared-logs.log4j2.xml | 72 +++++++ .../templates/kuttl/smoke-kraft/test_heap.sh | 16 ++ .../kuttl/smoke-kraft/test_log_aggregation.py | 49 +++++ tests/test-definition.yaml | 10 +- 30 files changed, 956 insertions(+), 1 deletion(-) create mode 100644 tests/templates/kuttl/smoke-kraft/00-limit-range.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/00-patch-ns.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/00-rbac.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/01-assert.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/01-install-kafka-vector-aggregator.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/03-create-configmap-with-prepared-logs.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/30-assert.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/31-assert.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/32-assert.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/40-assert.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/40-install-test-container.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/50-assert.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/50-prepare-test.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/60-assert.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/60-install-test-scripts-configmap.yaml create mode 100755 tests/templates/kuttl/smoke-kraft/60_test_client_auth_tls.sh create mode 100644 tests/templates/kuttl/smoke-kraft/60_wrong_keystore.p12 create mode 100644 tests/templates/kuttl/smoke-kraft/60_wrong_truststore.p12 create mode 100644 tests/templates/kuttl/smoke-kraft/70-assert.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/70-run-tests.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/70_test-tls-job.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/README.md create mode 100644 tests/templates/kuttl/smoke-kraft/kafka-vector-aggregator-values.yaml.j2 create mode 100644 tests/templates/kuttl/smoke-kraft/metrics.py create mode 100644 tests/templates/kuttl/smoke-kraft/prepared-logs.log4j.xml create mode 100644 tests/templates/kuttl/smoke-kraft/prepared-logs.log4j2.xml create mode 100755 tests/templates/kuttl/smoke-kraft/test_heap.sh create mode 100755 tests/templates/kuttl/smoke-kraft/test_log_aggregation.py diff --git a/tests/templates/kuttl/smoke-kraft/00-limit-range.yaml b/tests/templates/kuttl/smoke-kraft/00-limit-range.yaml new file mode 100644 index 00000000..7b6cb30e --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/00-limit-range.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: v1 +kind: LimitRange +metadata: + name: limit-request-ratio +spec: + limits: + - type: "Container" + maxLimitRequestRatio: + cpu: 5 + memory: 1 diff --git a/tests/templates/kuttl/smoke-kraft/00-patch-ns.yaml.j2 b/tests/templates/kuttl/smoke-kraft/00-patch-ns.yaml.j2 new file mode 100644 index 00000000..67185acf --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/00-patch-ns.yaml.j2 @@ -0,0 +1,9 @@ +{% if test_scenario['values']['openshift'] == 'true' %} +# see https://github.com/stackabletech/issues/issues/566 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: kubectl patch namespace $NAMESPACE -p '{"metadata":{"labels":{"pod-security.kubernetes.io/enforce":"privileged"}}}' + timeout: 120 +{% endif %} diff --git a/tests/templates/kuttl/smoke-kraft/00-rbac.yaml.j2 b/tests/templates/kuttl/smoke-kraft/00-rbac.yaml.j2 new file mode 100644 index 00000000..7ee61d23 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/00-rbac.yaml.j2 @@ -0,0 +1,29 @@ +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-role +rules: +{% if test_scenario['values']['openshift'] == "true" %} + - apiGroups: ["security.openshift.io"] + resources: ["securitycontextconstraints"] + resourceNames: ["privileged"] + verbs: ["use"] +{% endif %} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: test-sa +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-rb +subjects: + - kind: ServiceAccount + name: test-sa +roleRef: + kind: Role + name: test-role + apiGroup: rbac.authorization.k8s.io diff --git a/tests/templates/kuttl/smoke-kraft/01-assert.yaml.j2 b/tests/templates/kuttl/smoke-kraft/01-assert.yaml.j2 new file mode 100644 index 00000000..892ae718 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/01-assert.yaml.j2 @@ -0,0 +1,8 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery diff --git a/tests/templates/kuttl/smoke-kraft/01-install-kafka-vector-aggregator.yaml b/tests/templates/kuttl/smoke-kraft/01-install-kafka-vector-aggregator.yaml new file mode 100644 index 00000000..6c7b01cc --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/01-install-kafka-vector-aggregator.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: >- + helm install kafka-vector-aggregator vector + --namespace $NAMESPACE + --version 0.43.0 + --repo https://helm.vector.dev + --values kafka-vector-aggregator-values.yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: kafka-vector-aggregator:6123 diff --git a/tests/templates/kuttl/smoke-kraft/03-create-configmap-with-prepared-logs.yaml.j2 b/tests/templates/kuttl/smoke-kraft/03-create-configmap-with-prepared-logs.yaml.j2 new file mode 100644 index 00000000..11b3d373 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/03-create-configmap-with-prepared-logs.yaml.j2 @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: > + kubectl create configmap prepared-logs +{% if test_scenario['values']['kafka-kraft'].startswith('3.') %} + --from-file=prepared-logs.log4j.xml +{% else %} + --from-file=prepared-logs.log4j2.xml +{% endif %} + --namespace=$NAMESPACE diff --git a/tests/templates/kuttl/smoke-kraft/30-assert.yaml.j2 b/tests/templates/kuttl/smoke-kraft/30-assert.yaml.j2 new file mode 100644 index 00000000..02f55756 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/30-assert.yaml.j2 @@ -0,0 +1,44 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-automatic-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-custom-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-automatic-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-custom-log-config +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 new file mode 100644 index 00000000..282686e9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 @@ -0,0 +1,200 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kafka-log-config +data: +{% if test_scenario['values']['kafka-kraft'].startswith('3.') %} + log4j.properties: | + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=[%d] %p %m (%c)%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/kafka/kafka.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout +{% else %} + log4j2.properties: |- + appenders = FILE + + appender.FILE.type = File + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/kafka/kafka.log4j2.xml + appender.FILE.layout.type = XMLLayout + + rootLogger.level=INFO + rootLogger.appenderRefs = FILE + rootLogger.appenderRef.FILE.ref = FILE +{% endif %} +--- +apiVersion: authentication.stackable.tech/v1alpha1 +kind: AuthenticationClass +metadata: + name: test-kafka-client-auth-tls +spec: + provider: + tls: + clientCertSecretClass: test-kafka-client-auth-tls +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: test-kafka-client-auth-tls +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-tls-kafka-client-auth-ca + namespace: default + autoGenerate: true +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent + clusterConfig: + authentication: + - authenticationClass: test-kafka-client-auth-tls + tls: + serverSecretClass: tls + vectorAggregatorConfigMapName: vector-aggregator-discovery + controllers: + envOverrides: + COMMON_VAR: role-value # overridden by role group below + ROLE_VAR: role-value # only defined here at role level + config: + logging: + enableVectorAgent: true + requestedSecretLifetime: 7d + roleGroups: + automatic-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + vector: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + podOverrides: + spec: + containers: + - name: vector + volumeMounts: + - name: prepared-logs + mountPath: /stackable/log/prepared-logs + volumes: + - name: prepared-logs + configMap: + name: prepared-logs + custom-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + custom: + configMap: kafka-log-config + brokers: + configOverrides: + broker.properties: + compression.type: uncompressed # overridden by role group below + controller.quorum.election.backoff.max.ms: "2000" + envOverrides: + COMMON_VAR: role-value # overridden by role group below + ROLE_VAR: role-value # only defined here at role level + config: + logging: + enableVectorAgent: true + requestedSecretLifetime: 7d + roleGroups: + default: + replicas: 1 + envOverrides: + COMMON_VAR: group-value # overrides role value + GROUP_VAR: group-value # only defined here at group level + configOverrides: + broker.properties: + compression.type: snappy + controller.quorum.fetch.timeout.ms: "3000" + podOverrides: + spec: + containers: + - name: kafka + resources: + requests: + cpu: 300m + limits: + cpu: 1100m + automatic-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + vector: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + podOverrides: + spec: + containers: + - name: vector + volumeMounts: + - name: prepared-logs + mountPath: /stackable/log/prepared-logs + volumes: + - name: prepared-logs + configMap: + name: prepared-logs + custom-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + custom: + configMap: kafka-log-config diff --git a/tests/templates/kuttl/smoke-kraft/31-assert.yaml b/tests/templates/kuttl/smoke-kraft/31-assert.yaml new file mode 100644 index 00000000..26a55394 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/31-assert.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + # + # Test envOverrides + # + - script: | + kubectl -n $NAMESPACE get sts test-kafka-broker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "kafka") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts test-kafka-broker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "kafka") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts test-kafka-broker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "kafka") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' + # + # Test configOverrides + # + - script: | + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "compression.type=snappy" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.election.backoff.max.ms=2000" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.fetch.timeout.ms=3000" diff --git a/tests/templates/kuttl/smoke-kraft/32-assert.yaml b/tests/templates/kuttl/smoke-kraft/32-assert.yaml new file mode 100644 index 00000000..32d3ca11 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/32-assert.yaml @@ -0,0 +1,7 @@ +--- +# This test checks if the containerdebug-state.json file is present and valid +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + - script: kubectl exec -n $NAMESPACE --container kafka test-kafka-broker-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' diff --git a/tests/templates/kuttl/smoke-kraft/40-assert.yaml b/tests/templates/kuttl/smoke-kraft/40-assert.yaml new file mode 100644 index 00000000..58987778 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/40-assert.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: install-test-container +timeout: 300 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: python +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/smoke-kraft/40-install-test-container.yaml b/tests/templates/kuttl/smoke-kraft/40-install-test-container.yaml new file mode 100644 index 00000000..d2d79cc7 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/40-install-test-container.yaml @@ -0,0 +1,35 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: install-test-container +timeout: 300 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: python + labels: + app: python +spec: + replicas: 1 + selector: + matchLabels: + app: python + template: + metadata: + labels: + app: python + spec: + containers: + - name: webhdfs + image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev + stdin: true + tty: true + resources: + requests: + memory: "128Mi" + cpu: "512m" + limits: + memory: "128Mi" + cpu: "1" diff --git a/tests/templates/kuttl/smoke-kraft/50-assert.yaml b/tests/templates/kuttl/smoke-kraft/50-assert.yaml new file mode 100644 index 00000000..7cb89e3d --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/50-assert.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: metrics +commands: + - script: kubectl exec -n $NAMESPACE python-0 -- python /tmp/metrics.py + - script: kubectl exec -n $NAMESPACE test-kafka-broker-default-0 -- /tmp/test_heap.sh + - script: kubectl exec -n $NAMESPACE python-0 -- python /tmp/test_log_aggregation.py -n $NAMESPACE diff --git a/tests/templates/kuttl/smoke-kraft/50-prepare-test.yaml b/tests/templates/kuttl/smoke-kraft/50-prepare-test.yaml new file mode 100644 index 00000000..6895678f --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/50-prepare-test.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: kafka-tests +commands: + - script: kubectl cp -n $NAMESPACE ./metrics.py python-0:/tmp + - script: kubectl cp -n $NAMESPACE ./test_heap.sh test-kafka-broker-default-0:/tmp + - script: kubectl cp -n $NAMESPACE ./test_log_aggregation.py python-0:/tmp diff --git a/tests/templates/kuttl/smoke-kraft/60-assert.yaml b/tests/templates/kuttl/smoke-kraft/60-assert.yaml new file mode 100644 index 00000000..828b4be9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/60-assert.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: test-scripts diff --git a/tests/templates/kuttl/smoke-kraft/60-install-test-scripts-configmap.yaml b/tests/templates/kuttl/smoke-kraft/60-install-test-scripts-configmap.yaml new file mode 100644 index 00000000..959bbce5 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/60-install-test-scripts-configmap.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl create configmap test-scripts \ + --namespace $NAMESPACE \ + --from-file=test_client_auth_tls.sh=60_test_client_auth_tls.sh \ + --from-file=wrong_keystore.p12=60_wrong_keystore.p12 \ + --from-file=wrong_truststore.p12=60_wrong_truststore.p12 diff --git a/tests/templates/kuttl/smoke-kraft/60_test_client_auth_tls.sh b/tests/templates/kuttl/smoke-kraft/60_test_client_auth_tls.sh new file mode 100755 index 00000000..bae7473b --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/60_test_client_auth_tls.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Usage: test_client_auth_tls.sh namespace + +# to be safe +unset TOPIC +unset BAD_TOPIC + +echo "Connecting to boostrap address $KAFKA" + +echo "Start client auth TLS testing..." +############################################################################ +# Test the secured connection +############################################################################ +# create random topics +TOPIC=$(tr -dc A-Za-z0-9 /tmp/client.config + +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$TOPIC" --bootstrap-server "$KAFKA" --command-config /tmp/client.config +then + echo "[SUCCESS] Secure client topic created!" +else + echo "[ERROR] Secure client topic creation failed!" + exit 1 +fi + +if /stackable/kafka/bin/kafka-topics.sh --list --topic "$TOPIC" --bootstrap-server "$KAFKA" --command-config /tmp/client.config | grep "$TOPIC" +then + echo "[SUCCESS] Secure client topic read!" +else + echo "[ERROR] Secure client topic read failed!" + exit 1 +fi + +############################################################################ +# Test the connection without certificates +############################################################################ +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$BAD_TOPIC" --bootstrap-server "$KAFKA" &> /dev/null +then + echo "[ERROR] Secure client topic created without certificates!" + exit 1 +else + echo "[SUCCESS] Secure client topic creation failed without certificates!" +fi + +############################################################################ +# Test the connection with bad host name +############################################################################ +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$BAD_TOPIC" --bootstrap-server localhost:9093 --command-config /tmp/client.config &> /dev/null +then + echo "[ERROR] Secure client topic created with bad host name!" + exit 1 +else + echo "[SUCCESS] Secure client topic creation failed with bad host name!" +fi + +############################################################################ +# Test the connection with bad certificate +############################################################################ +echo $'security.protocol=SSL\nssl.keystore.location=/tmp/wrong_keystore.p12\nssl.keystore.password=changeit\nssl.truststore.location=/tmp/wrong_truststore.p12\nssl.truststore.password=changeit' > /tmp/client.config +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$BAD_TOPIC" --bootstrap-server "$KAFKA" --command-config /tmp/client.config &> /dev/null +then + echo "[ERROR] Secure client topic created with wrong certificate!" + exit 1 +else + echo "[SUCCESS] Secure client topic creation failed with wrong certificate!" +fi + +echo "All client auth TLS tests successful!" +exit 0 diff --git a/tests/templates/kuttl/smoke-kraft/60_wrong_keystore.p12 b/tests/templates/kuttl/smoke-kraft/60_wrong_keystore.p12 new file mode 100644 index 0000000000000000000000000000000000000000..e5dc3a42ed431b53b82ca1d0e197610bbdad685b GIT binary patch literal 3533 zcmV;;4KngDf(^+60Ru3C4UYy1Duzgg_YDCD0ic2nfCPdKd@zCycrbzpF9r!JhDe6@ z4FLxRpn?b>FoFmi0s#Opf(R7`2`Yw2hW8Bt2LUh~1_~;MNQU2N!?wn6-r-*jxW(9pCs_f{Mj>1zM1&XzJ^)Y zUR5C*{eA?M)dHWTmI=EpWMqH08(y%lDn*z~H&uotJNSv^|lO)&csuL{h}L>_whv%-d0`6Xk&O&vnQY zZ-|)84;LMBQK89N%K7na>d?Z+fjEN0A9l4yG!`4Mp%HP%ZXWRSYpeSdad15#0h)de zUO+Cw8t|be+W>9t`K2(Ae%m8Ea?z7?y{r^T^K%db2W%$)f{j8LOiGzL(}x>IZCYi+ z%Pm(a)`GEeeIur)UZHbM_!X(wLK7}cEtgZ_+&oY+H;TF8Tj*2?rr>~iebnEtCihI2 zHIq<^T1<5w)f_`i2# z^WVJ`3lY2A3VEsm0azogj=B@tXY((|OgtQtG^YIXyf4pyO{234yR)Ap@s(lbPKUyZ z@+$>IZfz6-ObOmrp!v0s`{)*o#XryWv;EkxQSWio$es`JVm4d3ShSbXW~l1TZ$nZk zn)$aG7J*2yV-vIv>ORV5Lve%s5qEgp;J=m#qwVmW6Ayux5nR!#vo&%p~< zo@oz$^h+$TNV(^~FB7`}Bh@xxN0oPWt3iO?^FDnT*7`Q7`u(72@x9g0XNXHqBk)92 z4KEGcyu(ijl!Cv4!0LUW^9YWDKn5K|=Gu|aYpDFXXR*GlL#3=xG*eGb1*v*oG5g{@bGvp-FIEb8Z2;9Dd=LdDfY!nJq2 z1DVAZr(yR#LHBojnzx~Q9D(JKF{Xxux&|zZq&(Xclvsrfz zoSj?17#O>&yBRc_vN?t zlo-xKF)$Xnu9n@ZGRAxbwuxU$WeRc1#&reeg0j~}{nLa7v97ky*G)f!KwX!1w=PHE zVf8QlA>E)A0UyQl6>9T2i^KHRTcJa|zz=@tAYxd$pZL$@>3Nw{?sz23*d5Q0@+Qtb zXCrsdW2O-$rsA)F7LmpziS{6NFK79up++lnpWXZ(JwB=#|!eaumGISO^G;bHnlnV8_0X9NN?F3Py-?k{b-T(^_% z4Z8D=M}*CYgCB(FvH(8vd7wOolzNe0UbccJppckem}x4#n%LaxjprB*gQSjtICvl& zx6`gJr^m|G#4~~7(27I~%_A&FWA~VTZ=9j$#S)@F=E;oSyc^{-L)9uxX)jEN`1mm; zSSh*OwuI>CR%gnqA>gS3*5rFWzR6I%p>oRRJ!1?#ZCh$3T^tF zY%`TR>#`uWdsxR7W(|@*OjG>|OkmP>f>Es*AY7}C?vyb`Ht#OFVWEXqhl6fQy|I~c zjOOhQV88x56xB6aX(x_tqS076$U>qf8uR7Y7zi+b6yRJhM%&^C+Ce?Na z?337;Hefma>~j#I;NYJC3esB@AXi~kef&K2t5TAeVmtA@RBdjO$TzKX!o6PL zy^H=z zg|e#M*9ug7rLeHCuQ@_Hp7o)#;j-~s%RYzdhv?0;;o8F7LyFp7@>1y=1?P6=Yg&PO zN>Tn72qtM0M8Y0&LNQUX!TKk!Kfjqv|EW z*)&dye3nl^)J}IZ)w^478#S0Pj=Pl2VdBdE&Ah3x1aDyi6~eBFV@a&Wfwsn075jNh zqOSMNr)u@gspCPxE!+<~XR^pkx#d)syQjixXHBKe)7-I{Aq#*2ayb560ewp0^MIn83jr#)hzJcw=(Trmya$|& zF45|uBjF_9g5IHMgGJLXl21m=kNNS?_45Oy8vBA&3r_7*1MsM?Se+3rp*8M18Ftt8 z6C^cDQSGP|u1mKTN5spDyj~joIu$@o$0>K@hZE7ZdeD-A8XU(uC_$ykt8$Hp+n8AY z1nbz1t|OU9Ym@VoThLNEF0TmbNnipE2g(*V!kDz0n&U2JD9QW6=IAEe&1Q=2b!4TT zFe6c=6=)p^Gw7Eww%0N`5A4q0@D`z|O%|oqpgP2WAXh7QJ+#~J02dSy+ck$=>Va?A z+zpaB;MLicUGuuibb%c8LEnM+!wgC8&VnFt&}MjN^}WXCp}5r#wjlhV2fxDBBjG-m0lnvI5wXiFztHTxoHc*@2v@O2sf;9)W%x7whWb>yr10`;svwFe3&DDuzgg_YDCF6)_eB6x>WwUomPTOXM3>m~?|4 z03KdF#4s^1AutIB1uG5%0vZJX1QczDTIUmUzYe literal 0 HcmV?d00001 diff --git a/tests/templates/kuttl/smoke-kraft/60_wrong_truststore.p12 b/tests/templates/kuttl/smoke-kraft/60_wrong_truststore.p12 new file mode 100644 index 0000000000000000000000000000000000000000..0eca7262c6024a7fc89b7b62399524cf8a620ba2 GIT binary patch literal 1351 zcmV-N1-SY!f(1hY0Ru3C1pEdGDuzgg_YDCD0ic2e?F51Z=`ex>&LNQUTf+hZClCSwATSID2r7n1hW8Bu2?YQ! z9R>+thDZTr0|Wso1Q6vv2<-sx`1rze2+V*~6hwf61W@quw;KM2-=Xr6LtH<_ue?#S z;E`?{n*Gi`N=b=j)6h^ha9R7}9SW=yF~hmT(advc6E0uGQxpnmob;*8^Jc+_93D#| zCc><)xR_q_WDg|Nb+{wICC}bhtGad?M!4j1(gMfKOtXm68D7CiNCLvT`au<93i+#x z%D)uV>cpRoAV!0=E1`EwryPKV1hxZAZ>!mX0Q(j{wJe5)B5aGz9n0S*SSbxiC+|LXA%Vg}v7sDT+n6B`tA%|SLGlst}fM?!>%vn_IjlF^L{ z9XJeTBO9yKeLH$Cl+Ao#Acq=h+X+GupGMM)J%2)h&)#1eZ&jiD%xU`niG2-y`gLhz z7ucTkkaOox&Uz`dnCG6GOOPzY86R~0dGc|*(ia*Q(5wD>SqaaT;E9I+NAYdP2rD=^2hzGX=o)O< zaRjS?ZO==0<7QQPMt0I3tiN^W9B#EBh|FB^MC>J5ONu}u)BO2CSAH9J|M#o0hMHP0 zjfl*m4b8M4xt#@0LLi;U%Cv4-znX~cw$mzMt67W${BY%T4UV>f9nxlR2TK^gK{l?V z<1ll=X#wgJ`2~u$R&SVTTOC|bg%HnRS1p~ba_TiCUG2-L&=(plfa7O6Uu^o8FdWh7 zQJ@kOUgKH($v(~W-N!R*Y-WKnOW7md-)#XqhVzf|UaI^d0hpfvejft^5V)F6WC5fv z`Gw71FuWZJ+1gWSJ}}xSItCSDsCuUZQ8aws5&t8WB#>8c$n-J3(TGqeC|o;EiY{Rl z)Z9nI^Pa4{&e)Zs_am`TY%S$iqGH0n(R)I8(aN~}OZ<2?=h7y}Vwwv73CDJ0gN?2( z8gF}zKMn832x-`?Xncmh$>w&7bwe;ky>5RTnOCL*W!2efdZ{d!PR9u1^6fO8Z@YFV z^F2tqDeh?<*buov&xgR~lzfqyyz`cqh9f_j8Dxkvq$Zc6bMknc)8~=+^PT?1qJ$&NNchp}n=#-X4g;?VHOb<*Y}1 zZo9}U4$1yGAB;7HEpfA5Yq+${u03haT15M;X?LHhR{{~Tg1vy|=mQU%8U%%@h8L-g z?@KKcAMyxU+&Tn}T@kf;T9ieW8}+)(NC-5M?4$~ABaP71u>6OZY$%*CMLPOl>uGCB4R;MNKSb0{1=gn5Jwi}#Ax6LWR<8Ok$-W1PdB8H+GJFd;Ar z1_dh)0|FWa00b0XN1yBz#2!y7s+?;R`~fE(Kl=Lw6mJ=tN7Wx=c%-?|!2(v6xu!Of Jr~(54hM+RlWjp`? literal 0 HcmV?d00001 diff --git a/tests/templates/kuttl/smoke-kraft/70-assert.yaml b/tests/templates/kuttl/smoke-kraft/70-assert.yaml new file mode 100644 index 00000000..52cbcc7b --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/70-assert.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: test-tls +status: + succeeded: 1 diff --git a/tests/templates/kuttl/smoke-kraft/70-run-tests.yaml.j2 b/tests/templates/kuttl/smoke-kraft/70-run-tests.yaml.j2 new file mode 100644 index 00000000..22b3e3f2 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/70-run-tests.yaml.j2 @@ -0,0 +1,13 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # Use the same Kafka image in the test Job as in the Kafka broker StatefulSet + - script: >- + KAFKA_IMAGE=$( + kubectl get statefulsets.apps test-kafka-broker-default + --namespace $NAMESPACE + --output=jsonpath='{.spec.template.spec.containers[?(.name=="kafka")].image}' + ) + envsubst < 70_test-tls-job.yaml | + kubectl apply --namespace $NAMESPACE --filename - diff --git a/tests/templates/kuttl/smoke-kraft/70_test-tls-job.yaml.j2 b/tests/templates/kuttl/smoke-kraft/70_test-tls-job.yaml.j2 new file mode 100644 index 00000000..f9da65e1 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/70_test-tls-job.yaml.j2 @@ -0,0 +1,73 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: test-tls +spec: + template: + spec: + containers: + - name: kafka + image: ${KAFKA_IMAGE} + workingDir: /stackable/test + command: + - ./test_client_auth_tls.sh + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 200m + memory: 512Mi + env: + - name: KAFKA + valueFrom: + configMapKeyRef: + name: test-kafka + key: KAFKA + volumeMounts: + - name: test-scripts + mountPath: /stackable/test + - mountPath: /stackable/tls_keystore_internal + name: tls-keystore-internal + - mountPath: /stackable/tls_keystore_server + name: tls-keystore-server + volumes: + - name: test-scripts + configMap: + name: test-scripts + defaultMode: 0777 + - name: tls-keystore-server + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: test-kafka-client-auth-tls + secrets.stackable.tech/format: tls-pkcs12 + secrets.stackable.tech/scope: pod,node + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + - name: tls-keystore-internal + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: tls + secrets.stackable.tech/format: tls-pkcs12 + secrets.stackable.tech/scope: pod,node + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + securityContext: + fsGroup: 1000 + serviceAccountName: test-sa + restartPolicy: OnFailure diff --git a/tests/templates/kuttl/smoke-kraft/README.md b/tests/templates/kuttl/smoke-kraft/README.md new file mode 100644 index 00000000..319d8f04 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/README.md @@ -0,0 +1,10 @@ + +# Kraft test bundle + +To reduce the number of tests, this one ("smoke-kraft") bundles multiple tests into one: + +* smoke +* logging +* tls (always enabled) + +This test doesn't install any zookeeper servers and only runs in Kraft mode (as the name implies). diff --git a/tests/templates/kuttl/smoke-kraft/kafka-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/smoke-kraft/kafka-vector-aggregator-values.yaml.j2 new file mode 100644 index 00000000..67eed310 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/kafka-vector-aggregator-values.yaml.j2 @@ -0,0 +1,104 @@ +--- +role: Aggregator +service: + ports: + - name: api + port: 8686 + protocol: TCP + targetPort: 8686 + - name: vector + port: 6123 + protocol: TCP + targetPort: 6000 +# resources -- Set Vector resource requests and limits. +resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 200m + memory: 512Mi +customConfig: + api: + address: 0.0.0.0:8686 + enabled: true + sources: + vector: + address: 0.0.0.0:6000 + type: vector + version: "2" + transforms: + validEvents: + type: filter + inputs: [vector] + condition: is_null(.errors) + filteredAutomaticLogConfigBrokerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-automatic-log-config-0" && + .container == "kafka" + filteredAutomaticLogConfigBrokerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-automatic-log-config-0" && + .container == "vector" + filteredCustomLogConfigBrokerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-custom-log-config-0" && + .container == "kafka" + filteredCustomLogConfigBrokerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-custom-log-config-0" && + .container == "vector" + filteredAutomaticLogConfigControllerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-automatic-log-config-0" && + .container == "kafka" + filteredAutomaticLogConfigControllerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-automatic-log-config-0" && + .container == "vector" + filteredCustomLogConfigControllerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-custom-log-config-0" && + .container == "kafka" + filteredCustomLogConfigControllerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-custom-log-config-0" && + .container == "vector" + filteredInvalidEvents: + type: filter + inputs: [vector] + condition: |- + .timestamp == from_unix_timestamp!(0) || + is_null(.level) || + is_null(.logger) || + is_null(.message) + sinks: + test: + inputs: [filtered*] + type: blackhole +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + aggregator: + inputs: [vector] + type: vector + address: {{ lookup('env', 'VECTOR_AGGREGATOR') }} + buffer: + # Avoid back pressure from VECTOR_AGGREGATOR. The test should + # not fail if the aggregator is not available. + when_full: drop_newest +{% endif %} diff --git a/tests/templates/kuttl/smoke-kraft/metrics.py b/tests/templates/kuttl/smoke-kraft/metrics.py new file mode 100644 index 00000000..7c9f8027 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/metrics.py @@ -0,0 +1,19 @@ +import sys +import logging +import requests + +if __name__ == "__main__": + result = 0 + + LOG_LEVEL = "DEBUG" # if args.debug else 'INFO' + logging.basicConfig( + level=LOG_LEVEL, + format="%(asctime)s %(levelname)s: %(message)s", + stream=sys.stdout, + ) + + http_code = requests.get("http://test-kafka-broker-default:9606").status_code + if http_code != 200: + result = 1 + + sys.exit(result) diff --git a/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j.xml b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j.xml new file mode 100644 index 00000000..707d10c9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j.xml @@ -0,0 +1,67 @@ + + Valid log event with all possible tags and attributes + TestException + + + + + Invalid log event without a timestamp + + + + + Invalid log event with an unparsable timestamp + + + + + Invalid log event without a logger + + + + + Invalid log event without a level + + + + + Invalid log event with an unknown level + + + + + + + + + + Valid log event before the one with the noevent tag + + + + + Invalid log event without the event tag + + + + + Unparsable log event + + + + Valid log event after the unparsable one + + diff --git a/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j2.xml b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j2.xml new file mode 100644 index 00000000..1562bce9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j2.xml @@ -0,0 +1,72 @@ + + + Valid log event with all possible tags and attributes + + + + + + + + + + Valid log event without the Instant tag + + + + + Invalid log event without epochSecond + + + + + Invalid log event without nanoOfSecond + + + + + Invalid log event with invalid epochSecond + + + + Invalid log event without a timestamp + + + + Invalid log event with invalid timeMillis + + + + + Invalid log event without a logger + + + + + Invalid log event without a level + + + + + Invalid log event with an unknown level + + + + + + + + + + Invalid log event without the Event tag + + + + Unparsable log event + + + + + Valid log event after the unparsable one + diff --git a/tests/templates/kuttl/smoke-kraft/test_heap.sh b/tests/templates/kuttl/smoke-kraft/test_heap.sh new file mode 100755 index 00000000..cd76d42a --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/test_heap.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Usage: test_heap.sh + +# 2Gi * 0.8 -> 1638 +EXPECTED_HEAP="-Xmx1638m -Xms1638m" + +# Check if ZK_SERVER_HEAP is set to the correct calculated value +if [[ $KAFKA_HEAP_OPTS == "$EXPECTED_HEAP" ]] +then + echo "[SUCCESS] KAFKA_HEAP_OPTS set to $EXPECTED_HEAP" +else + echo "[ERROR] KAFKA_HEAP_OPTS not set or set with wrong value: $ZK_SERVER_HEAP" + exit 1 +fi + +echo "[SUCCESS] All heap settings tests successful!" diff --git a/tests/templates/kuttl/smoke-kraft/test_log_aggregation.py b/tests/templates/kuttl/smoke-kraft/test_log_aggregation.py new file mode 100755 index 00000000..c27b1992 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/test_log_aggregation.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +import requests + + +def check_sent_events(): + response = requests.post( + "http://kafka-vector-aggregator:8686/graphql", + json={ + "query": """ + { + transforms(first:100) { + nodes { + componentId + metrics { + sentEventsTotal { + sentEventsTotal + } + } + } + } + } + """ + }, + ) + + assert response.status_code == 200, ( + "Cannot access the API of the vector aggregator." + ) + + result = response.json() + + transforms = result["data"]["transforms"]["nodes"] + for transform in transforms: + sentEvents = transform["metrics"]["sentEventsTotal"] + componentId = transform["componentId"] + + if componentId == "filteredInvalidEvents": + assert sentEvents is None or sentEvents["sentEventsTotal"] == 0, ( + "Invalid log events were sent." + ) + else: + assert sentEvents is not None and sentEvents["sentEventsTotal"] > 0, ( + f'No events were sent in "{componentId}".' + ) + + +if __name__ == "__main__": + check_sent_events() + print("Test successful!") diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index d0175129..77b752c3 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -4,11 +4,15 @@ # --- dimensions: - - name: kafka + - name: kafka-kraft values: - 3.7.2 - 3.9.1 - 4.0.0 + - name: kafka + values: + - 3.7.2 + - 3.9.1 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. # - 3.8.0,oci.stackable.tech/sdp/kafka:3.8.0-stackable0.0.0-dev @@ -67,6 +71,10 @@ dimensions: - "external-stable" - "external-unstable" tests: + - name: smoke-kraft + dimensions: + - kafka-kraft + - openshift - name: smoke dimensions: - kafka From a5a700f1a6db3a99e2145ac309f0a4537290bf8e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 24 Sep 2025 14:06:35 +0200 Subject: [PATCH 76/90] add "operations-kraft" test --- .../kuttl/operations-kraft/00-assert.yaml.j2 | 10 +++++ ...tor-aggregator-discovery-configmap.yaml.j2 | 9 +++++ .../operations-kraft/00-patch-ns.yaml.j2 | 9 +++++ .../kuttl/operations-kraft/20-assert.yaml.j2 | 22 +++++++++++ .../operations-kraft/20-install-kafka.yaml.j2 | 36 +++++++++++++++++ .../kuttl/operations-kraft/25-assert.yaml.j2 | 20 ++++++++++ .../operations-kraft/25-pause-kafka.yaml.j2 | 39 +++++++++++++++++++ .../kuttl/operations-kraft/30-assert.yaml.j2 | 20 ++++++++++ .../operations-kraft/30-stop-kafka.yaml.j2 | 39 +++++++++++++++++++ .../kuttl/operations-kraft/50-assert.yaml.j2 | 21 ++++++++++ .../operations-kraft/50-restart-kafka.yaml.j2 | 38 ++++++++++++++++++ .../kuttl/operations-kraft/60-assert.yaml.j2 | 21 ++++++++++ .../60-scale-controller-down.yaml.j2 | 38 ++++++++++++++++++ .../kuttl/operations-kraft/README.md | 6 +++ tests/test-definition.yaml | 4 ++ 15 files changed, 332 insertions(+) create mode 100644 tests/templates/kuttl/operations-kraft/00-assert.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/00-install-vector-aggregator-discovery-configmap.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/00-patch-ns.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/20-assert.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/25-assert.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/30-assert.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/50-assert.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/60-scale-controller-down.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/README.md diff --git a/tests/templates/kuttl/operations-kraft/00-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/00-assert.yaml.j2 new file mode 100644 index 00000000..50b1d4c3 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/00-assert.yaml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/00-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/operations-kraft/00-install-vector-aggregator-discovery-configmap.yaml.j2 new file mode 100644 index 00000000..2d6a0df5 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/00-install-vector-aggregator-discovery-configmap.yaml.j2 @@ -0,0 +1,9 @@ +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }} +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/00-patch-ns.yaml.j2 b/tests/templates/kuttl/operations-kraft/00-patch-ns.yaml.j2 new file mode 100644 index 00000000..67185acf --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/00-patch-ns.yaml.j2 @@ -0,0 +1,9 @@ +{% if test_scenario['values']['openshift'] == 'true' %} +# see https://github.com/stackabletech/issues/issues/566 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: kubectl patch namespace $NAMESPACE -p '{"metadata":{"labels":{"pod-security.kubernetes.io/enforce":"privileged"}}}' + timeout: 120 +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/20-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/20-assert.yaml.j2 new file mode 100644 index 00000000..6ac28598 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/20-assert.yaml.j2 @@ -0,0 +1,22 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 new file mode 100644 index 00000000..655d26a5 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 @@ -0,0 +1,36 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/25-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/25-assert.yaml.j2 new file mode 100644 index 00000000..9ba36657 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/25-assert.yaml.j2 @@ -0,0 +1,20 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=reconciliationPaused kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + replicas: 3 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 new file mode 100644 index 00000000..0e851efa --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 @@ -0,0 +1,39 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: true diff --git a/tests/templates/kuttl/operations-kraft/30-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/30-assert.yaml.j2 new file mode 100644 index 00000000..eba45c7c --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/30-assert.yaml.j2 @@ -0,0 +1,20 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=stopped kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + replicas: 0 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + replicas: 0 diff --git a/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 new file mode 100644 index 00000000..70e9f713 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 @@ -0,0 +1,39 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: true + reconciliationPaused: false diff --git a/tests/templates/kuttl/operations-kraft/50-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/50-assert.yaml.j2 new file mode 100644 index 00000000..ff21429a --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/50-assert.yaml.j2 @@ -0,0 +1,21 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 new file mode 100644 index 00000000..758b9e04 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 @@ -0,0 +1,38 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: false diff --git a/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 new file mode 100644 index 00000000..0766b5cf --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 @@ -0,0 +1,21 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/operations-kraft/60-scale-controller-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-scale-controller-down.yaml.j2 new file mode 100644 index 00000000..19b0dbb0 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/60-scale-controller-down.yaml.j2 @@ -0,0 +1,38 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: false diff --git a/tests/templates/kuttl/operations-kraft/README.md b/tests/templates/kuttl/operations-kraft/README.md new file mode 100644 index 00000000..78a2cbf3 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/README.md @@ -0,0 +1,6 @@ + +Tests Kraft cluster operations: + +- Cluster stop/pause/restart +- Scale brokers up/down +- Scale controllers up/down diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 77b752c3..c8ea6436 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -71,6 +71,10 @@ dimensions: - "external-stable" - "external-unstable" tests: + - name: operations-kraft + dimensions: + - kafka-kraft + - openshift - name: smoke-kraft dimensions: - kafka-kraft From 5fbd0ebc092c233f5003e35990f4ae132a9b8731 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:46:51 +0200 Subject: [PATCH 77/90] update operations test --- .../kuttl/operations-kraft/60-assert.yaml.j2 | 4 +- ...yaml.j2 => 60-scale-controller-up.yaml.j2} | 2 +- .../kuttl/operations-kraft/70-assert.yaml.j2 | 21 ++++++++++ .../70-scale-controller-down.yaml.j2 | 38 +++++++++++++++++++ .../kuttl/operations-kraft/README.md | 7 ++++ 5 files changed, 69 insertions(+), 3 deletions(-) rename tests/templates/kuttl/operations-kraft/{60-scale-controller-down.yaml.j2 => 60-scale-controller-up.yaml.j2} (98%) create mode 100644 tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 create mode 100644 tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 diff --git a/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 index 0766b5cf..f54bbea6 100644 --- a/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 @@ -17,5 +17,5 @@ kind: StatefulSet metadata: name: test-kafka-controller-default status: - readyReplicas: 1 - replicas: 1 + readyReplicas: 5 + replicas: 5 diff --git a/tests/templates/kuttl/operations-kraft/60-scale-controller-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 similarity index 98% rename from tests/templates/kuttl/operations-kraft/60-scale-controller-down.yaml.j2 rename to tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 index 19b0dbb0..54505741 100644 --- a/tests/templates/kuttl/operations-kraft/60-scale-controller-down.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 @@ -25,7 +25,7 @@ spec: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: default: - replicas: 1 + replicas: 5 brokers: config: logging: diff --git a/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 new file mode 100644 index 00000000..ff21429a --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 @@ -0,0 +1,21 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 new file mode 100644 index 00000000..758b9e04 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 @@ -0,0 +1,38 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: false diff --git a/tests/templates/kuttl/operations-kraft/README.md b/tests/templates/kuttl/operations-kraft/README.md index 78a2cbf3..e51db9eb 100644 --- a/tests/templates/kuttl/operations-kraft/README.md +++ b/tests/templates/kuttl/operations-kraft/README.md @@ -4,3 +4,10 @@ Tests Kraft cluster operations: - Cluster stop/pause/restart - Scale brokers up/down - Scale controllers up/down + +TODO: + +- Scaling controllers from 3 -> 1 doesn't work. + Both brokers and controllers try to communicate with old controllers. + This is why, the last step scales from 5 -> 3 controllers. + This at least, leaves the cluster in a working state. From 133853fbc2707a865b468f2c351fffd4ec66dc8d Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:59:17 +0200 Subject: [PATCH 78/90] tests: remove zookeeper=false tests --- .../kuttl/smoke/20-install-zk.yaml.j2 | 2 - tests/templates/kuttl/smoke/30-assert.yaml.j2 | 52 ------------------- .../kuttl/smoke/30-install-kafka.yaml.j2 | 14 ----- .../templates/kuttl/upgrade/01-assert.yaml.j2 | 2 - .../kuttl/upgrade/01-install-zk.yaml.j2 | 2 - .../templates/kuttl/upgrade/02-assert.yaml.j2 | 10 ---- .../kuttl/upgrade/02-install-kafka.yaml.j2 | 13 ----- .../templates/kuttl/upgrade/04-assert.yaml.j2 | 14 ----- tests/test-definition.yaml | 17 ------ 9 files changed, 126 deletions(-) diff --git a/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 b/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 index f5762fc5..5ab2c212 100644 --- a/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 +++ b/tests/templates/kuttl/smoke/20-install-zk.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -19,4 +18,3 @@ spec: roleGroups: default: replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2 index 6a53a5e3..3049df31 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-assert.yaml.j2 @@ -86,55 +86,3 @@ status: expectedPods: 1 currentHealthy: 1 disruptionsAllowed: 1 -{% if test_scenario['values']['zookeeper'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 3 - replicas: 3 ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: log-dirs-test-kafka-controller-default-0 -spec: - resources: - requests: - storage: 2Gi -status: - phase: Bound ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: log-dirs-test-kafka-controller-default-1 -spec: - resources: - requests: - storage: 2Gi -status: - phase: Bound ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: log-dirs-test-kafka-controller-default-2 -spec: - resources: - requests: - storage: 2Gi -status: - phase: Bound ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: test-kafka-controller -status: - expectedPods: 3 - currentHealthy: 3 - disruptionsAllowed: 1 -{% endif %} diff --git a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 index 1774d928..4f3b95a0 100644 --- a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 @@ -26,21 +26,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - controllers: - envOverrides: - COMMON_VAR: role-value # overridden by role group below - ROLE_VAR: role-value # only defined here at role level - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - requestedSecretLifetime: 7d - roleGroups: - default: - replicas: 3 -{% endif %} brokers: configOverrides: broker.properties: diff --git a/tests/templates/kuttl/upgrade/01-assert.yaml.j2 b/tests/templates/kuttl/upgrade/01-assert.yaml.j2 index 1654015f..c9cfcf5c 100644 --- a/tests/templates/kuttl/upgrade/01-assert.yaml.j2 +++ b/tests/templates/kuttl/upgrade/01-assert.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -11,4 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 index f5762fc5..5ab2c212 100644 --- a/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 +++ b/tests/templates/kuttl/upgrade/01-install-zk.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -19,4 +18,3 @@ spec: roleGroups: default: replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/upgrade/02-assert.yaml.j2 b/tests/templates/kuttl/upgrade/02-assert.yaml.j2 index e6b48c8c..7c231d57 100644 --- a/tests/templates/kuttl/upgrade/02-assert.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-assert.yaml.j2 @@ -10,13 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['zookeeper'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 3 - replicas: 3 -{% endif %} diff --git a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 index 728f4be2..afd99b60 100644 --- a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperZnode @@ -7,7 +6,6 @@ metadata: spec: clusterRef: name: test-zk -{% endif %} {% if test_scenario['values']['use-client-auth-tls'] == 'true' %} --- apiVersion: authentication.stackable.tech/v1alpha1 @@ -55,18 +53,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - controllers: - config: - gracefulShutdownTimeout: 30s # speed up tests - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 3 -{% endif %} brokers: config: gracefulShutdownTimeout: 30s # speed up tests diff --git a/tests/templates/kuttl/upgrade/04-assert.yaml.j2 b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 index 391c6c31..2a95af44 100644 --- a/tests/templates/kuttl/upgrade/04-assert.yaml.j2 +++ b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 @@ -14,17 +14,3 @@ status: replicas: 1 currentReplicas: 1 updatedReplicas: 1 -{% if test_scenario['values']['zookeeper'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default - labels: - app.kubernetes.io/version: "{{ test_scenario['values']['upgrade_new'] }}-stackable0.0.0-dev" -status: - readyReplicas: 3 - replicas: 3 - currentReplicas: 3 - updatedReplicas: 3 -{% endif %} diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index c8ea6436..73fc4720 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -25,8 +25,6 @@ dimensions: - name: zookeeper values: - 3.9.3 - # This enables KRaft mode - - "false" - name: zookeeper-latest values: - 3.9.3 @@ -163,18 +161,3 @@ suites: expr: "true" - name: use-client-auth-tls expr: "true" - - name: kraft - patch: - - dimensions: - - name: zookeeper - expr: "false" - - name: zookeeper-latest - expr: "false" - - name: use-client-tls - expr: "true" - - name: use-client-auth-tls - expr: "true" - - name: bootstrap-listener-class - expr: "cluster-internal" - - name: kerberos-realm - expr: "PROD.MYCORP" From a8edc947cfa8f7abf37854b500e84e1fb41e1930 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 25 Sep 2025 13:30:38 +0200 Subject: [PATCH 79/90] tests: remove zookeeper-latest: false --- .../kuttl/cluster-operation/20-assert.yaml.j2 | 10 ------ .../kuttl/cluster-operation/30-assert.yaml.j2 | 9 ----- .../kuttl/cluster-operation/40-assert.yaml.j2 | 9 ----- .../kuttl/cluster-operation/50-assert.yaml.j2 | 10 ------ .../kuttl/configuration/10-assert.yaml.j2 | 33 ------------------- .../kuttl/delete-rolegroup/02-assert.yaml.j2 | 18 ---------- .../kuttl/delete-rolegroup/03-assert.yaml.j2 | 10 ------ .../kuttl/kerberos/20-assert.yaml.j2 | 10 ------ .../kafka-vector-aggregator-values.yaml.j2 | 26 --------------- tests/templates/kuttl/tls/20-assert.yaml.j2 | 10 ------ tests/test-definition.yaml | 2 -- 11 files changed, 147 deletions(-) diff --git a/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 index 8c199963..c6be6814 100644 --- a/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 @@ -12,13 +12,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 1 - replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 index 49854a9c..5b92f6da 100644 --- a/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 @@ -11,12 +11,3 @@ metadata: name: test-kafka-broker-default status: replicas: 0 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - replicas: 0 -{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 index d0160254..293cb5c8 100644 --- a/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 @@ -11,12 +11,3 @@ metadata: name: test-kafka-broker-default status: replicas: 0 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - replicas: 0 -{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 index 8c199963..c6be6814 100644 --- a/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 @@ -12,13 +12,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 1 - replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/configuration/10-assert.yaml.j2 b/tests/templates/kuttl/configuration/10-assert.yaml.j2 index 0f6272bd..3de5ea66 100644 --- a/tests/templates/kuttl/configuration/10-assert.yaml.j2 +++ b/tests/templates/kuttl/configuration/10-assert.yaml.j2 @@ -5,39 +5,6 @@ timeout: 300 --- apiVersion: apps/v1 kind: StatefulSet -metadata: - name: test-kafka-controller-default -spec: - template: - spec: - containers: - - name: kafka - resources: - limits: - # value set in the role configuration - cpu: 500m - # value set in the rolegroup configuration - memory: 2Gi - requests: - # default value set by the operator - cpu: 250m - # value set in the rolegroup configuration - memory: 2Gi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} - volumeClaimTemplates: - - metadata: - name: log-dirs - spec: - resources: - requests: - # value set in the role configuration and overridden in - # the rolegroup configuration - storage: 1Gi ---- -apiVersion: apps/v1 -kind: StatefulSet metadata: name: test-kafka-broker-default spec: diff --git a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 index 07871b56..f88993ed 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 @@ -18,21 +18,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 2 - replicas: 2 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-secondary -status: - readyReplicas: 1 - replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 index 6e00dfea..7c231d57 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 @@ -10,13 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 2 - replicas: 2 -{% endif %} diff --git a/tests/templates/kuttl/kerberos/20-assert.yaml.j2 b/tests/templates/kuttl/kerberos/20-assert.yaml.j2 index 0e47f477..01ba15d1 100644 --- a/tests/templates/kuttl/kerberos/20-assert.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-assert.yaml.j2 @@ -10,13 +10,3 @@ metadata: status: readyReplicas: 3 replicas: 3 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 3 - replicas: 3 -{% endif %} diff --git a/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 index f0278d46..f30e142e 100644 --- a/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/kafka-vector-aggregator-values.yaml.j2 @@ -48,32 +48,6 @@ customConfig: condition: >- .pod == "test-kafka-broker-custom-log-config-0" && .container == "vector" -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} - filteredAutomaticLogConfigControllerKafka: - type: filter - inputs: [validEvents] - condition: >- - .pod == "test-kafka-controller-automatic-log-config-0" && - .container == "kafka" - filteredAutomaticLogConfigControllerVector: - type: filter - inputs: [validEvents] - condition: >- - .pod == "test-kafka-controller-automatic-log-config-0" && - .container == "vector" - filteredCustomLogConfigControllerKafka: - type: filter - inputs: [validEvents] - condition: >- - .pod == "test-kafka-controller-custom-log-config-0" && - .container == "kafka" - filteredCustomLogConfigControllerVector: - type: filter - inputs: [validEvents] - condition: >- - .pod == "test-kafka-controller-custom-log-config-0" && - .container == "vector" -{% endif %} filteredInvalidEvents: type: filter inputs: [vector] diff --git a/tests/templates/kuttl/tls/20-assert.yaml.j2 b/tests/templates/kuttl/tls/20-assert.yaml.j2 index 0e47f477..01ba15d1 100644 --- a/tests/templates/kuttl/tls/20-assert.yaml.j2 +++ b/tests/templates/kuttl/tls/20-assert.yaml.j2 @@ -10,13 +10,3 @@ metadata: status: readyReplicas: 3 replicas: 3 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-default -status: - readyReplicas: 3 - replicas: 3 -{% endif %} diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 73fc4720..94c4f426 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -28,8 +28,6 @@ dimensions: - name: zookeeper-latest values: - 3.9.3 - # This enables KRaft mode - - "false" - name: upgrade_old values: - 3.7.2 From 961842702788659bc62f14ee211dac13d457a41a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 25 Sep 2025 13:40:11 +0200 Subject: [PATCH 80/90] add 4.1.0 --- docs/modules/kafka/partials/supported-versions.adoc | 2 +- tests/test-definition.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/modules/kafka/partials/supported-versions.adoc b/docs/modules/kafka/partials/supported-versions.adoc index 4a4b9ab1..e9622e97 100644 --- a/docs/modules/kafka/partials/supported-versions.adoc +++ b/docs/modules/kafka/partials/supported-versions.adoc @@ -2,6 +2,6 @@ // This is a separate file, since it is used by both the direct Kafka documentation, and the overarching // Stackable Platform documentation. +* 4.1.0 (experimental) * 3.9.1 -* 3.9.0 (deprecated) * 3.7.2 (LTS) diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 94c4f426..f3c1315d 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -8,7 +8,7 @@ dimensions: values: - 3.7.2 - 3.9.1 - - 4.0.0 + - 4.1.0 - name: kafka values: - 3.7.2 From e92a75c655e9b7ab32c7825236e4caa12a307340 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 25 Sep 2025 13:44:17 +0200 Subject: [PATCH 81/90] added known issues section --- docs/modules/kafka/pages/usage-guide/kraft-controller.adoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index bf17be2e..cf2deabd 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -84,6 +84,13 @@ KRaft mode requires major configuration changes compared to ZooKeeper: * `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and added `replica` id. * `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not supported. * The operator configures a static voter list containing the controller pods. Controllers are not dynamicaly managed. + +== Known Issues + +* Controllers in Kafka versions lower than `3.9.x` cannot be scaled at all due to the static controller quorum. +* Scaling controllers down to one single replica does not work. Scaling e.g. from 5 to 3 replicas works. +* Kerberos is currently not supported for KRaft in all versions. + == Troubleshooting === Cluster does not start From 0e4c29378d665dcc46498e23db316ef19fc9af06 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 25 Sep 2025 13:56:21 +0200 Subject: [PATCH 82/90] tests: missed one --- tests/templates/kuttl/smoke/20-assert.yaml.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/templates/kuttl/smoke/20-assert.yaml.j2 b/tests/templates/kuttl/smoke/20-assert.yaml.j2 index 1654015f..c9cfcf5c 100644 --- a/tests/templates/kuttl/smoke/20-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/20-assert.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -11,4 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} From 92c7084ac49028fe1e4e9063e11e682933e226ca Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 25 Sep 2025 14:03:50 +0200 Subject: [PATCH 83/90] improve known issues --- docs/modules/kafka/pages/usage-guide/kraft-controller.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index cf2deabd..ea5c4946 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -87,8 +87,8 @@ KRaft mode requires major configuration changes compared to ZooKeeper: == Known Issues -* Controllers in Kafka versions lower than `3.9.x` cannot be scaled at all due to the static controller quorum. -* Scaling controllers down to one single replica does not work. Scaling e.g. from 5 to 3 replicas works. +* Automatic migration from Apache ZooKeeper to KRaft is not supported. +* Scaling controller replicas might lead to unstable clusters. * Kerberos is currently not supported for KRaft in all versions. == Troubleshooting From d8144db983d19fb727439425fda455cca82ad1df Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 25 Sep 2025 16:36:40 +0200 Subject: [PATCH 84/90] remove log4j vs log4j2 java arg todo --- rust/operator-binary/src/product_logging.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index b4a02960..b7990be6 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -36,7 +36,6 @@ pub fn kafka_log_opts(product_version: &str) -> String { if product_version.starts_with("3.") { format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}") } else { - // TODO: -Dlog4j2 vs -Dlog4j format!("-Dlog4j2.configurationFile=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J2_CONFIG_FILE}") } } From 33a0b1b106e3abafaaa265e2d6dea11dda61cdac Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 25 Sep 2025 17:08:57 +0200 Subject: [PATCH 85/90] fix / improve todos --- rust/operator-binary/src/config/command.rs | 42 +++++++++++++------ rust/operator-binary/src/crd/mod.rs | 2 +- rust/operator-binary/src/crd/role/mod.rs | 1 - rust/operator-binary/src/crd/security.rs | 4 +- .../src/resource/statefulset.rs | 5 --- 5 files changed, 31 insertions(+), 23 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index a7a4cd97..9bd4a607 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -89,8 +89,11 @@ fn broker_start_command( let client_port = kafka_security.client_port(); - // TODO: copy to tmp? mount readwrite folder? - // TODO: do "cat /tmp/{properties_file}" ? + // TODO: The properties file from the configmap is copied to the /tmp folder and appended with dynamic properties + // This should be improved: + // - mount emptyDir as readWriteConfig + // - use config-utils for proper replacements? + // - should we print the adapted properties file at startup? if kafka.is_controller_configured() { formatdoc! {" export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') @@ -143,7 +146,11 @@ pub fn controller_kafka_container_command( ) -> String { let client_port = kafka_security.client_port(); - // TODO: copy to tmp? mount readwrite folder? + // TODO: The properties file from the configmap is copied to the /tmp folder and appended with dynamic properties + // This should be improved: + // - mount emptyDir as readWriteConfig + // - use config-utils for proper replacements? + // - should we print the adapted properties file at startup? formatdoc! {" {COMMON_BASH_TRAP_FUNCTIONS} {remove_vector_shutdown_file_command} @@ -165,16 +172,24 @@ pub fn controller_kafka_container_command( wait_for_termination $! {create_vector_shutdown_file_command} ", - remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), - config_dir = STACKABLE_CONFIG_DIR, - properties_file = CONTROLLER_PROPERTIES_FILE, - bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), - listeners = to_listeners(client_port), - listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), - initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), - controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), - create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) - } + remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + config_dir = STACKABLE_CONFIG_DIR, + properties_file = CONTROLLER_PROPERTIES_FILE, + bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), + listeners = to_listeners(client_port), + listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), + initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), + controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), + create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) + + + controller.quorum.bootstrap.servers=test-kafka-controller-default-0.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,test-kafka-controller-default-1.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,test-kafka-controller-default-2.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093 + listeners=CONTROLLER://test-kafka-controller-default-1.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093 + listener.security.protocol.map=CONTROLLER:SSL + controller.quorum.voters=2110489703@test-kafka-controller-default-0.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,2110489704@test-kafka-controller-default-1.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,2110489705@test-kafka-controller-default-2.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093 + + + } } fn to_listeners(port: u16) -> String { @@ -199,6 +214,7 @@ fn to_initial_controllers(controller_descriptors: &[KafkaPodDescriptor], port: u .join(",") } +// TODO: This can be removed once 3.7.2 is removed. Used in command.rs. fn to_quorum_voters(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { controller_descriptors .iter() diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 27d5b125..fc801bc6 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -372,7 +372,7 @@ impl KafkaPodDescriptor { /// See: /// * controller-0 is the replica's host, /// * 1234 is the replica's port. - // TODO(@maltesander): Even though the used Uuid states to be type 4 it does not work... 0000000000-00000000000 works... + // NOTE(@maltesander): Even though the used Uuid states to be type 4 it does not work... 0000000000-00000000000 works... pub fn as_voter(&self, port: u16) -> String { format!( "{node_id}@{fqdn}:{port}:0000000000-{node_id:0>11}", diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 174026c0..06ae0b43 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -133,7 +133,6 @@ impl KafkaRole { /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. /// but is similar to HBase). - // TODO: split into broker / controller? pub fn kerberos_service_name(&self) -> &'static str { "kafka" } diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index 16c132b6..94fea587 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -548,7 +548,6 @@ impl KafkaTlsSecurity { // Kerberos if self.has_kerberos_enabled() { config.insert("sasl.enabled.mechanisms".to_string(), "GSSAPI".to_string()); - // TODO: what service name? config.insert( "sasl.kerberos.service.name".to_string(), KafkaRole::Broker.kerberos_service_name().to_string(), @@ -617,10 +616,9 @@ impl KafkaTlsSecurity { // Kerberos if self.has_kerberos_enabled() { config.insert("sasl.enabled.mechanisms".to_string(), "GSSAPI".to_string()); - // TODO: what service name? config.insert( "sasl.kerberos.service.name".to_string(), - KafkaRole::Broker.kerberos_service_name().to_string(), + KafkaRole::Controller.kerberos_service_name().to_string(), ); config.insert( "sasl.mechanism.inter.broker.protocol".to_string(), diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 450977fe..18bcc04c 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -825,11 +825,6 @@ pub fn build_controller_rolegroup_statefulset( add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; let mut pod_template = pod_builder.build_template(); - let pod_template_spec = pod_template.spec.get_or_insert_with(PodSpec::default); - - // Don't run kcat pod as PID 1, to ensure that default signal handlers apply - // TODO: we need that? - pod_template_spec.share_process_namespace = Some(true); pod_template.merge_from( kafka_role From f2494b4ebaf7f8b6dfda60c15952748f26c65fe8 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 25 Sep 2025 17:10:16 +0200 Subject: [PATCH 86/90] fix copy paste --- rust/operator-binary/src/config/command.rs | 28 ++++++++-------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 9bd4a607..8887c904 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -172,24 +172,16 @@ pub fn controller_kafka_container_command( wait_for_termination $! {create_vector_shutdown_file_command} ", - remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), - config_dir = STACKABLE_CONFIG_DIR, - properties_file = CONTROLLER_PROPERTIES_FILE, - bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), - listeners = to_listeners(client_port), - listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), - initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), - controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), - create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) - - - controller.quorum.bootstrap.servers=test-kafka-controller-default-0.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,test-kafka-controller-default-1.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,test-kafka-controller-default-2.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093 - listeners=CONTROLLER://test-kafka-controller-default-1.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093 - listener.security.protocol.map=CONTROLLER:SSL - controller.quorum.voters=2110489703@test-kafka-controller-default-0.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,2110489704@test-kafka-controller-default-1.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093,2110489705@test-kafka-controller-default-2.test-kafka-controller-default.kuttl-test-cute-ghoul.svc.cluster.local:9093 - - - } + remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + config_dir = STACKABLE_CONFIG_DIR, + properties_file = CONTROLLER_PROPERTIES_FILE, + bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), + listeners = to_listeners(client_port), + listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), + initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), + controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), + create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) + } } fn to_listeners(port: u16) -> String { From ffc208ebb6769dd085eaf4e04d0100e6f1c7cb2c Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 25 Sep 2025 18:14:20 +0200 Subject: [PATCH 87/90] tests: fix tls tests --- .../templates/kuttl/tls/10-install-zookeeper.yaml.j2 | 2 -- tests/templates/kuttl/tls/20-install-kafka.yaml.j2 | 12 ------------ 2 files changed, 14 deletions(-) diff --git a/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 b/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 index 43ecacca..6b462fa4 100644 --- a/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 +++ b/tests/templates/kuttl/tls/10-install-zookeeper.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -19,4 +18,3 @@ spec: roleGroups: default: replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 index 244f3166..da660f7f 100644 --- a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperZnode @@ -7,7 +6,6 @@ metadata: spec: clusterRef: name: test-zk -{% endif %} {% if test_scenario['values']['use-client-auth-tls'] == 'true' %} --- apiVersion: authentication.stackable.tech/v1alpha1 @@ -60,17 +58,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-kafka-znode -{% else %} - controllers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 3 -{% endif %} brokers: config: logging: From ec249b528fc07079abd9fd2ec1d9e3add8eae251 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 26 Sep 2025 11:21:25 +0200 Subject: [PATCH 88/90] tests: disable scaling for kraft 3.7 --- tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 | 2 ++ .../kuttl/operations-kraft/60-scale-controller-up.yaml.j2 | 2 ++ tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 | 2 ++ .../kuttl/operations-kraft/70-scale-controller-down.yaml.j2 | 2 ++ tests/templates/kuttl/operations-kraft/README.md | 5 +++-- 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 index f54bbea6..9209b5ea 100644 --- a/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -19,3 +20,4 @@ metadata: status: readyReplicas: 5 replicas: 5 +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 index 54505741..718c760e 100644 --- a/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 @@ -1,3 +1,4 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} --- apiVersion: kuttl.dev/v1beta1 kind: TestStep @@ -36,3 +37,4 @@ spec: clusterOperation: stopped: false reconciliationPaused: false +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 index ff21429a..d8da05ff 100644 --- a/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 @@ -1,3 +1,4 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -19,3 +20,4 @@ metadata: status: readyReplicas: 3 replicas: 3 +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 index 758b9e04..a8073228 100644 --- a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 @@ -1,3 +1,4 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} --- apiVersion: kuttl.dev/v1beta1 kind: TestStep @@ -36,3 +37,4 @@ spec: clusterOperation: stopped: false reconciliationPaused: false +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/README.md b/tests/templates/kuttl/operations-kraft/README.md index e51db9eb..5c0fa86b 100644 --- a/tests/templates/kuttl/operations-kraft/README.md +++ b/tests/templates/kuttl/operations-kraft/README.md @@ -1,12 +1,13 @@ - Tests Kraft cluster operations: - Cluster stop/pause/restart - Scale brokers up/down - Scale controllers up/down -TODO: +Notes +- Kafka 3.7 controllers do not scale at all. + The scaling test steps are disabled for this version. - Scaling controllers from 3 -> 1 doesn't work. Both brokers and controllers try to communicate with old controllers. This is why, the last step scales from 5 -> 3 controllers. From 57f31e723e2647c579f5de10ab45963907723980 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 26 Sep 2025 14:56:00 +0200 Subject: [PATCH 89/90] remove obsolete zk/kraft checks --- .../{10-assert.yaml.j2 => 10-assert.yaml} | 2 - .../cluster-operation/10-install-zk.yaml.j2 | 2 - .../{20-assert.yaml.j2 => 20-assert.yaml} | 0 .../20-install-kafka.yaml.j2 | 12 ----- .../{30-assert.yaml.j2 => 30-assert.yaml} | 0 .../cluster-operation/30-stop-kafka.yaml.j2 | 12 ----- .../{40-assert.yaml.j2 => 40-assert.yaml} | 0 .../cluster-operation/40-pause-kafka.yaml.j2 | 12 ----- .../{50-assert.yaml.j2 => 50-assert.yaml} | 0 .../50-restart-kafka.yaml.j2 | 12 ----- .../{01-assert.yaml.j2 => 01-assert.yaml} | 2 - .../delete-rolegroup/01-install-zk.yaml.j2 | 2 - .../{02-assert.yaml.j2 => 02-assert.yaml} | 0 .../delete-rolegroup/02-install-kafka.yaml.j2 | 13 ----- .../{03-assert.yaml.j2 => 03-assert.yaml} | 0 .../03-delete-secondary.yaml.j2 | 12 ----- .../kuttl/delete-rolegroup/03-errors.yaml | 5 ++ .../kuttl/delete-rolegroup/03-errors.yaml.j2 | 12 ----- .../10-assert.yaml} | 2 - .../kuttl/kerberos/10-install-zk.yaml.j2 | 2 - .../{20-assert.yaml.j2 => 20-assert.yaml} | 0 .../kuttl/kerberos/20-install-kafka.yaml.j2 | 12 ----- .../{02-assert.yaml.j2 => 02-assert.yaml} | 2 - .../logging/02-install-zookeeper.yaml.j2 | 2 - tests/templates/kuttl/logging/04-assert.yaml | 20 ++++++++ .../templates/kuttl/logging/04-assert.yaml.j2 | 38 --------------- .../kuttl/logging/04-install-kafka.yaml.j2 | 47 ------------------- .../10-assert.yaml.j2 => tls/10-assert.yaml} | 2 - .../tls/{20-assert.yaml.j2 => 20-assert.yaml} | 0 29 files changed, 25 insertions(+), 200 deletions(-) rename tests/templates/kuttl/cluster-operation/{10-assert.yaml.j2 => 10-assert.yaml} (71%) rename tests/templates/kuttl/cluster-operation/{20-assert.yaml.j2 => 20-assert.yaml} (100%) rename tests/templates/kuttl/cluster-operation/{30-assert.yaml.j2 => 30-assert.yaml} (100%) rename tests/templates/kuttl/cluster-operation/{40-assert.yaml.j2 => 40-assert.yaml} (100%) rename tests/templates/kuttl/cluster-operation/{50-assert.yaml.j2 => 50-assert.yaml} (100%) rename tests/templates/kuttl/delete-rolegroup/{01-assert.yaml.j2 => 01-assert.yaml} (71%) rename tests/templates/kuttl/delete-rolegroup/{02-assert.yaml.j2 => 02-assert.yaml} (100%) rename tests/templates/kuttl/delete-rolegroup/{03-assert.yaml.j2 => 03-assert.yaml} (100%) create mode 100644 tests/templates/kuttl/delete-rolegroup/03-errors.yaml delete mode 100644 tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 rename tests/templates/kuttl/{tls/10-assert.yaml.j2 => kerberos/10-assert.yaml} (71%) rename tests/templates/kuttl/kerberos/{20-assert.yaml.j2 => 20-assert.yaml} (100%) rename tests/templates/kuttl/logging/{02-assert.yaml.j2 => 02-assert.yaml} (71%) create mode 100644 tests/templates/kuttl/logging/04-assert.yaml delete mode 100644 tests/templates/kuttl/logging/04-assert.yaml.j2 rename tests/templates/kuttl/{kerberos/10-assert.yaml.j2 => tls/10-assert.yaml} (71%) rename tests/templates/kuttl/tls/{20-assert.yaml.j2 => 20-assert.yaml} (100%) diff --git a/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/10-assert.yaml similarity index 71% rename from tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 rename to tests/templates/kuttl/cluster-operation/10-assert.yaml index c9e55603..c9cfcf5c 100644 --- a/tests/templates/kuttl/cluster-operation/10-assert.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/10-assert.yaml @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -11,4 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 b/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 index 43ecacca..6b462fa4 100644 --- a/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/10-install-zk.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -19,4 +18,3 @@ spec: roleGroups: default: replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/20-assert.yaml similarity index 100% rename from tests/templates/kuttl/cluster-operation/20-assert.yaml.j2 rename to tests/templates/kuttl/cluster-operation/20-assert.yaml diff --git a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 index 5cce455a..6d391b65 100644 --- a/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/20-install-kafka.yaml.j2 @@ -16,23 +16,11 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: -{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - controllers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 1 -{% endif %} brokers: config: logging: diff --git a/tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-assert.yaml similarity index 100% rename from tests/templates/kuttl/cluster-operation/30-assert.yaml.j2 rename to tests/templates/kuttl/cluster-operation/30-assert.yaml diff --git a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 index 7a7a4c88..2c8a1532 100644 --- a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 @@ -16,23 +16,11 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: -{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - brokers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 1 -{% endif %} brokers: config: logging: diff --git a/tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-assert.yaml similarity index 100% rename from tests/templates/kuttl/cluster-operation/40-assert.yaml.j2 rename to tests/templates/kuttl/cluster-operation/40-assert.yaml diff --git a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 index 2775a363..18682f32 100644 --- a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 @@ -16,23 +16,11 @@ spec: productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: -{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - controllers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 1 -{% endif %} brokers: config: logging: diff --git a/tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-assert.yaml similarity index 100% rename from tests/templates/kuttl/cluster-operation/50-assert.yaml.j2 rename to tests/templates/kuttl/cluster-operation/50-assert.yaml diff --git a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 index 64551ae0..bece9335 100644 --- a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 @@ -15,23 +15,11 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['kafka-latest'] }}" {% endif %} -{% if lookup('env', 'VECTOR_AGGREGATOR') or test_scenario['values']['zookeeper-latest'] != 'false' %} clusterConfig: -{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - controllers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 1 -{% endif %} brokers: config: logging: diff --git a/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/01-assert.yaml similarity index 71% rename from tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 rename to tests/templates/kuttl/delete-rolegroup/01-assert.yaml index c9e55603..c9cfcf5c 100644 --- a/tests/templates/kuttl/delete-rolegroup/01-assert.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/01-assert.yaml @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -11,4 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 index 43ecacca..6b462fa4 100644 --- a/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/01-install-zk.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -19,4 +18,3 @@ spec: roleGroups: default: replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-assert.yaml similarity index 100% rename from tests/templates/kuttl/delete-rolegroup/02-assert.yaml.j2 rename to tests/templates/kuttl/delete-rolegroup/02-assert.yaml diff --git a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 index 01a7d2f6..854c3734 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 @@ -20,20 +20,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - controllers: - config: - gracefulShutdownTimeout: 30s # speed up tests - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 2 - secondary: - replicas: 1 -{% endif %} brokers: config: gracefulShutdownTimeout: 30s # speed up tests diff --git a/tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-assert.yaml similarity index 100% rename from tests/templates/kuttl/delete-rolegroup/03-assert.yaml.j2 rename to tests/templates/kuttl/delete-rolegroup/03-assert.yaml diff --git a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 index 18373063..38853bd6 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 @@ -19,19 +19,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-zk -{% else %} - controllers: - config: - gracefulShutdownTimeout: 30s # speed up tests - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 2 - secondary: null -{% endif %} brokers: config: gracefulShutdownTimeout: 30s # speed up tests diff --git a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml new file mode 100644 index 00000000..6a1a6cf0 --- /dev/null +++ b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-secondary diff --git a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 deleted file mode 100644 index e713dfe9..00000000 --- a/tests/templates/kuttl/delete-rolegroup/03-errors.yaml.j2 +++ /dev/null @@ -1,12 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-secondary -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-secondary -{% endif %} diff --git a/tests/templates/kuttl/tls/10-assert.yaml.j2 b/tests/templates/kuttl/kerberos/10-assert.yaml similarity index 71% rename from tests/templates/kuttl/tls/10-assert.yaml.j2 rename to tests/templates/kuttl/kerberos/10-assert.yaml index d0f1fce4..e0766c49 100644 --- a/tests/templates/kuttl/tls/10-assert.yaml.j2 +++ b/tests/templates/kuttl/kerberos/10-assert.yaml @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -11,4 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 b/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 index 43ecacca..6b462fa4 100644 --- a/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 +++ b/tests/templates/kuttl/kerberos/10-install-zk.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -19,4 +18,3 @@ spec: roleGroups: default: replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/kerberos/20-assert.yaml.j2 b/tests/templates/kuttl/kerberos/20-assert.yaml similarity index 100% rename from tests/templates/kuttl/kerberos/20-assert.yaml.j2 rename to tests/templates/kuttl/kerberos/20-assert.yaml diff --git a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 index 7a529c59..e30c5056 100644 --- a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 @@ -4,7 +4,6 @@ kind: TestStep commands: - script: | kubectl apply -n $NAMESPACE -f - < 0 }} - roleGroups: - default: - replicas: 3 -{% endif %} brokers: config: logging: diff --git a/tests/templates/kuttl/logging/02-assert.yaml.j2 b/tests/templates/kuttl/logging/02-assert.yaml similarity index 71% rename from tests/templates/kuttl/logging/02-assert.yaml.j2 rename to tests/templates/kuttl/logging/02-assert.yaml index d0f1fce4..e0766c49 100644 --- a/tests/templates/kuttl/logging/02-assert.yaml.j2 +++ b/tests/templates/kuttl/logging/02-assert.yaml @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -11,4 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 b/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 index 71cb7972..96078f76 100644 --- a/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 +++ b/tests/templates/kuttl/logging/02-install-zookeeper.yaml.j2 @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperCluster @@ -27,4 +26,3 @@ metadata: spec: clusterRef: name: test-zk -{% endif %} diff --git a/tests/templates/kuttl/logging/04-assert.yaml b/tests/templates/kuttl/logging/04-assert.yaml new file mode 100644 index 00000000..e445cb01 --- /dev/null +++ b/tests/templates/kuttl/logging/04-assert.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-automatic-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-custom-log-config +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/logging/04-assert.yaml.j2 b/tests/templates/kuttl/logging/04-assert.yaml.j2 deleted file mode 100644 index c9152f62..00000000 --- a/tests/templates/kuttl/logging/04-assert.yaml.j2 +++ /dev/null @@ -1,38 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 600 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-automatic-log-config -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-broker-custom-log-config -status: - readyReplicas: 1 - replicas: 1 -{% if test_scenario['values']['zookeeper-latest'] == 'false' %} ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-automatic-log-config -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: test-kafka-controller-custom-log-config -status: - readyReplicas: 1 - replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 index 9dd6e590..42588924 100644 --- a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 @@ -50,54 +50,7 @@ spec: tls: serverSecretClass: null vectorAggregatorConfigMapName: kafka-vector-aggregator-discovery -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} zookeeperConfigMapName: test-kafka-znode -{% else %} - controllers: - roleGroups: - automatic-log-config: - replicas: 1 - config: - logging: - enableVectorAgent: true - containers: - kafka: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO - vector: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO - podOverrides: - spec: - containers: - - name: vector - volumeMounts: - - name: prepared-logs - mountPath: /stackable/log/prepared-logs - volumes: - - name: prepared-logs - configMap: - name: prepared-logs - custom-log-config: - replicas: 1 - config: - logging: - enableVectorAgent: true - containers: - kafka: - custom: - configMap: kafka-log-config -{% endif %} brokers: roleGroups: automatic-log-config: diff --git a/tests/templates/kuttl/kerberos/10-assert.yaml.j2 b/tests/templates/kuttl/tls/10-assert.yaml similarity index 71% rename from tests/templates/kuttl/kerberos/10-assert.yaml.j2 rename to tests/templates/kuttl/tls/10-assert.yaml index d0f1fce4..e0766c49 100644 --- a/tests/templates/kuttl/kerberos/10-assert.yaml.j2 +++ b/tests/templates/kuttl/tls/10-assert.yaml @@ -1,4 +1,3 @@ -{% if test_scenario['values']['zookeeper-latest'] != 'false' %} --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert @@ -11,4 +10,3 @@ metadata: status: readyReplicas: 1 replicas: 1 -{% endif %} diff --git a/tests/templates/kuttl/tls/20-assert.yaml.j2 b/tests/templates/kuttl/tls/20-assert.yaml similarity index 100% rename from tests/templates/kuttl/tls/20-assert.yaml.j2 rename to tests/templates/kuttl/tls/20-assert.yaml From c392406a1d2c52bbbddbb834abc113742428a406 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Sun, 28 Sep 2025 14:04:35 +0200 Subject: [PATCH 90/90] add gracefulShutdownTimeout to stabilize kerberos test --- tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 index e30c5056..0099c2b2 100644 --- a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 @@ -51,6 +51,7 @@ commands: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} brokerListenerClass: {{ test_scenario['values']['broker-listener-class'] }} bootstrapListenerClass: {{ test_scenario['values']['bootstrap-listener-class'] }} + gracefulShutdownTimeout: 30s # speed up tests roleGroups: default: replicas: 3