diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e04d9ef131..12469ae91d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -143,6 +143,7 @@ jobs: --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.2-2' \ --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.2-3.4-2' \ --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-1' \ + --env 'RELATED_IMAGE_COLLECTOR=ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.119.0' \ --env 'PGO_FEATURE_GATES=TablespaceVolumes=true' \ --name 'postgres-operator' ubuntu \ postgres-operator diff --git a/.golangci.yaml b/.golangci.yaml index 1631433a43..da19e26976 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -114,11 +114,6 @@ issues: path: internal/kubernetes/discovery.go text: k8s.io/client-go/discovery - # PGO-2010 - - linters: [exhaustive] - path: internal/controller/postgrescluster/volumes.go - text: 'v1.PersistentVolumeClaimConditionType: v1.PersistentVolumeClaimControllerResizeError, v1.PersistentVolumeClaimNodeResizeError$' - # These value types have unmarshal methods. # https://github.com/raeperd/recvcheck/issues/7 - linters: [recvcheck] diff --git a/Makefile b/Makefile index 7e55cbd7c0..a4bf44629b 100644 --- a/Makefile +++ b/Makefile @@ -254,16 +254,16 @@ generate-kuttl: ## Generate kuttl tests ##@ Generate .PHONY: check-generate -check-generate: ## Check crd, deepcopy functions, and rbac generation -check-generate: generate-crd -check-generate: generate-deepcopy -check-generate: generate-rbac +check-generate: ## Check everything generated is also committed +check-generate: generate git diff --exit-code -- config/crd git diff --exit-code -- config/rbac + git diff --exit-code -- internal/collector git diff --exit-code -- pkg/apis .PHONY: generate -generate: ## Generate crd, deepcopy functions, and rbac +generate: ## Generate everything +generate: generate-collector generate: generate-crd generate: generate-deepcopy generate: generate-rbac @@ -276,6 +276,10 @@ generate-crd: tools/controller-gen paths='./pkg/apis/...' \ output:dir='config/crd/bases' # {directory}/{group}_{plural}.yaml +.PHONY: generate-collector +generate-collector: ## Generate OTel Collector files + $(GO) generate ./internal/collector + .PHONY: generate-deepcopy generate-deepcopy: ## Generate DeepCopy functions generate-deepcopy: tools/controller-gen @@ -304,7 +308,7 @@ endef CONTROLLER ?= hack/tools/controller-gen tools: tools/controller-gen tools/controller-gen: - $(call go-get-tool,$(CONTROLLER),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.16.5) + $(call go-get-tool,$(CONTROLLER),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.17.2) ENVTEST ?= hack/tools/setup-envtest tools: tools/setup-envtest diff --git a/config/crd/bases/postgres-operator.crunchydata.com_crunchybridgeclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_crunchybridgeclusters.yaml index 6938d25da0..080683f01b 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_crunchybridgeclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_crunchybridgeclusters.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: crunchybridgeclusters.postgres-operator.crunchydata.com spec: group: postgres-operator.crunchydata.com diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index 9b322b1365..e07621a2a7 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: pgadmins.postgres-operator.crunchydata.com spec: group: postgres-operator.crunchydata.com @@ -973,24 +973,27 @@ spec: More info: https://www.pgadmin.org/docs/pgadmin4/latest/external_database.html properties: key: - description: The key of the secret to select from. Must be - a valid secret key. + description: Name of the data field within the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ type: string + x-kubernetes-validations: + - message: cannot be "." or start with ".." + rule: self != "." && !self.startsWith("..") name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + description: Name of the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?([.][a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string optional: - description: Specify whether the Secret or its key must be - defined + description: Whether or not the Secret or its data must be + defined. Defaults to false. type: boolean required: - key + - name type: object x-kubernetes-map-type: atomic files: @@ -1327,24 +1330,27 @@ spec: More info: https://www.pgadmin.org/docs/pgadmin4/latest/ldap.html properties: key: - description: The key of the secret to select from. Must be - a valid secret key. + description: Name of the data field within the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ type: string + x-kubernetes-validations: + - message: cannot be "." or start with ".." + rule: self != "." && !self.startsWith("..") name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + description: Name of the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?([.][a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string optional: - description: Specify whether the Secret or its key must be - defined + description: Whether or not the Secret or its data must be + defined. Defaults to false. type: boolean required: - key + - name type: object x-kubernetes-map-type: atomic settings: @@ -1590,6 +1596,439 @@ spec: type: object x-kubernetes-map-type: atomic type: array + instrumentation: + description: |- + Configuration for the OpenTelemetry collector container used to collect + logs and metrics. + properties: + config: + description: Config is the place for users to configure exporters + and provide files. + properties: + exporters: + description: |- + Exporters allows users to configure OpenTelemetry exporters that exist + in the collector image. + type: object + x-kubernetes-preserve-unknown-fields: true + files: + description: |- + Files allows the user to mount projected volumes into the collector + Pod so that files can be referenced by the collector as needed. + items: + description: |- + Projection that may be projected along with other supported volume types. + Exactly one of these fields must be set. + properties: + clusterTrustBundle: + description: |- + ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field + of ClusterTrustBundle objects in an auto-updating file. + + Alpha, gated by the ClusterTrustBundleProjection feature gate. + + ClusterTrustBundle objects can either be selected by name, or by the + combination of signer name and a label selector. + + Kubelet performs aggressive normalization of the PEM contents written + into the pod filesystem. Esoteric PEM features such as inter-block + comments and block headers are stripped. Certificates are deduplicated. + The ordering of certificates within the file is arbitrary, and Kubelet + may change the order over time. + properties: + labelSelector: + description: |- + Select all ClusterTrustBundles that match this label selector. Only has + effect if signerName is set. Mutually-exclusive with name. If unset, + interpreted as "match nothing". If set but empty, interpreted as "match + everything". + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that + the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: |- + Select a single ClusterTrustBundle by object name. Mutually-exclusive + with signerName and labelSelector. + type: string + optional: + description: |- + If true, don't block pod startup if the referenced ClusterTrustBundle(s) + aren't available. If using name, then the named ClusterTrustBundle is + allowed not to exist. If using signerName, then the combination of + signerName and labelSelector is allowed to match zero + ClusterTrustBundles. + type: boolean + path: + description: Relative path from the volume root + to write the bundle. + type: string + signerName: + description: |- + Select all ClusterTrustBundles that match this signer name. + Mutually-exclusive with name. The contents of all selected + ClusterTrustBundles will be unified and deduplicated. + type: string + required: + - path + type: object + configMap: + description: configMap information about the configMap + data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional specify whether the ConfigMap + or its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + downwardAPI: + description: downwardAPI information about the downwardAPI + data to project + properties: + items: + description: Items is a list of DownwardAPIVolume + file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing the + pod field + properties: + fieldRef: + description: 'Required: Selects a field of + the pod: only annotations, labels, name, + namespace and uid are supported.' + properties: + apiVersion: + description: Version of the schema the + FieldPath is written in terms of, defaults + to "v1". + type: string + fieldPath: + description: Path of the field to select + in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the relative + path name of the file to be created. Must + not be absolute or contain the ''..'' path. + Must be utf-8 encoded. The first item of + the relative path must not start with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format + of the exposed resources, defaults to + "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + secret: + description: secret information about the secret data + to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional field specify whether the + Secret or its key must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + description: serviceAccountToken is information about + the serviceAccountToken data to project + properties: + audience: + description: |- + audience is the intended audience of the token. A recipient of a token + must identify itself with an identifier specified in the audience of the + token, and otherwise should reject the token. The audience defaults to the + identifier of the apiserver. + type: string + expirationSeconds: + description: |- + expirationSeconds is the requested duration of validity of the service + account token. As the token approaches expiration, the kubelet volume + plugin will proactively rotate the service account token. The kubelet will + start trying to rotate the token if the token is older than 80 percent of + its time to live or if the token is older than 24 hours.Defaults to 1 hour + and must be at least 10 minutes. + format: int64 + type: integer + path: + description: |- + path is the path relative to the mount point of the file to project the + token into. + type: string + required: + - path + type: object + type: object + type: array + type: object + image: + description: |- + Image name to use for collector containers. When omitted, the value + comes from an operator environment variable. + type: string + logs: + description: Logs is the place for users to configure the log + collection. + properties: + exporters: + description: |- + Exporters allows users to specify which exporters they want to use in + the logs pipeline. + items: + type: string + type: array + retentionPeriod: + description: |- + How long to retain log files locally. An RFC 3339 duration or a number + and unit: `12 hr`, `3d`, `4 weeks`, etc. + format: duration + maxLength: 20 + minLength: 1 + pattern: ^(PT)?( *[0-9]+ *(?i:(h|hr|d|w|wk)|(hour|day|week)s?))+$ + type: string + x-kubernetes-validations: + - message: must be at least one hour + rule: duration("1h") <= self && self <= duration("8760h") + type: object + resources: + description: Resources holds the resource requirements for the + collector container. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + type: object metadata: description: Metadata contains metadata for custom resources properties: diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml index 39b7bdfefd..53d72671bc 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: pgupgrades.postgres-operator.crunchydata.com spec: group: postgres-operator.crunchydata.com @@ -963,6 +963,7 @@ spec: type: object fromPostgresVersion: description: The major version of PostgreSQL before the upgrade. + format: int32 maximum: 17 minimum: 11 type: integer @@ -984,7 +985,7 @@ spec: description: |- The image pull secrets used to pull from a private registry. Changing this value causes all running PGUpgrade pods to restart. - https://k8s.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry items: description: |- LocalObjectReference contains enough information to let you locate the @@ -1002,6 +1003,13 @@ spec: type: object x-kubernetes-map-type: atomic type: array + jobs: + description: |- + The number of simultaneous processes pg_upgrade should use. + More info: https://www.postgresql.org/docs/current/pgupgrade.html + format: int32 + minimum: 0 + type: integer metadata: description: Metadata contains metadata for custom resources properties: @@ -1015,14 +1023,14 @@ spec: type: object type: object postgresClusterName: - description: The name of the cluster to be updated + description: The name of the Postgres cluster to upgrade. minLength: 1 type: string priorityClassName: description: |- Priority class name for the PGUpgrade pod. Changing this value causes PGUpgrade pod to restart. - More info: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/ + More info: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption type: string resources: description: Resource requirements for the PGUpgrade container. @@ -1083,13 +1091,9 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object - toPostgresImage: - description: |- - The image name to use for PostgreSQL containers after upgrade. - When omitted, the value comes from an operator environment variable. - type: string toPostgresVersion: description: The major version of PostgreSQL to be upgraded to. + format: int32 maximum: 17 minimum: 11 type: integer @@ -1134,11 +1138,30 @@ spec: type: string type: object type: array + transferMethod: + description: |- + The method pg_upgrade should use to transfer files to the new cluster. + More info: https://www.postgresql.org/docs/current/pgupgrade.html + enum: + - Clone + - Copy + - CopyFileRange + - Link + maxLength: 15 + type: string required: - fromPostgresVersion - postgresClusterName - toPostgresVersion type: object + x-kubernetes-validations: + - rule: self.fromPostgresVersion < self.toPostgresVersion + - message: Only Copy or Link before PostgreSQL 12 + rule: '!has(self.transferMethod) || (self.toPostgresVersion < 12 ? self.transferMethod + in ["Copy","Link"] : true)' + - message: Only Clone, Copy, or Link before PostgreSQL 17 + rule: '!has(self.transferMethod) || (self.toPostgresVersion < 17 ? self.transferMethod + in ["Clone","Copy","Link"] : true)' status: description: PGUpgradeStatus defines the observed state of PGUpgrade properties: diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index edae909760..474dd8da30 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: postgresclusters.postgres-operator.crunchydata.com spec: group: postgres-operator.crunchydata.com @@ -4370,6 +4370,7 @@ spec: config: properties: files: + description: Files to mount under "/etc/postgres". items: description: |- Projection that may be projected along with other supported volume types. @@ -4688,6 +4689,54 @@ spec: type: object type: object type: array + parameters: + additionalProperties: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + description: |- + Configuration parameters for the PostgreSQL server. Some values will + be reloaded without validation and some cause PostgreSQL to restart. + Some values cannot be changed at all. + More info: https://www.postgresql.org/docs/current/runtime-config.html + maxProperties: 50 + type: object + x-kubernetes-map-type: granular + x-kubernetes-validations: + - message: 'cannot change PGDATA path: config_file, data_directory' + rule: '!has(self.config_file) && !has(self.data_directory)' + - message: cannot change external_pid_file + rule: '!has(self.external_pid_file)' + - message: 'cannot change authentication path: hba_file, ident_file' + rule: '!has(self.hba_file) && !has(self.ident_file)' + - message: 'network connectivity is always enabled: listen_addresses' + rule: '!has(self.listen_addresses)' + - message: change port using .spec.port instead + rule: '!has(self.port)' + - message: TLS is always enabled + rule: '!has(self.ssl) && !self.exists(k, k.startsWith("ssl_"))' + - message: domain socket paths cannot be changed + rule: '!self.exists(k, k.startsWith("unix_socket_"))' + - message: wal_level must be "replica" or higher + rule: '!has(self.wal_level) || self.wal_level in ["logical"]' + - message: wal_log_hints are always enabled + rule: '!has(self.wal_log_hints)' + - rule: '!has(self.archive_mode) && !has(self.archive_command) + && !has(self.restore_command)' + - rule: '!has(self.recovery_target) && !self.exists(k, k.startsWith("recovery_target_"))' + - message: hot_standby is always enabled + rule: '!has(self.hot_standby)' + - rule: '!has(self.synchronous_standby_names)' + - rule: '!has(self.primary_conninfo) && !has(self.primary_slot_name)' + - message: delayed replication is not supported at this time + rule: '!has(self.recovery_min_apply_delay)' + - message: cluster_name is derived from the PostgresCluster name + rule: '!has(self.cluster_name)' + - message: disabling logging_collector is unsafe + rule: '!has(self.logging_collector)' + - message: log_file_mode cannot be changed + rule: '!has(self.log_file_mode)' type: object customReplicationTLSSecret: description: |- @@ -11123,6 +11172,439 @@ spec: x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map + instrumentation: + description: |- + Configuration for the OpenTelemetry collector container used to collect + logs and metrics. + properties: + config: + description: Config is the place for users to configure exporters + and provide files. + properties: + exporters: + description: |- + Exporters allows users to configure OpenTelemetry exporters that exist + in the collector image. + type: object + x-kubernetes-preserve-unknown-fields: true + files: + description: |- + Files allows the user to mount projected volumes into the collector + Pod so that files can be referenced by the collector as needed. + items: + description: |- + Projection that may be projected along with other supported volume types. + Exactly one of these fields must be set. + properties: + clusterTrustBundle: + description: |- + ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field + of ClusterTrustBundle objects in an auto-updating file. + + Alpha, gated by the ClusterTrustBundleProjection feature gate. + + ClusterTrustBundle objects can either be selected by name, or by the + combination of signer name and a label selector. + + Kubelet performs aggressive normalization of the PEM contents written + into the pod filesystem. Esoteric PEM features such as inter-block + comments and block headers are stripped. Certificates are deduplicated. + The ordering of certificates within the file is arbitrary, and Kubelet + may change the order over time. + properties: + labelSelector: + description: |- + Select all ClusterTrustBundles that match this label selector. Only has + effect if signerName is set. Mutually-exclusive with name. If unset, + interpreted as "match nothing". If set but empty, interpreted as "match + everything". + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that + the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: |- + Select a single ClusterTrustBundle by object name. Mutually-exclusive + with signerName and labelSelector. + type: string + optional: + description: |- + If true, don't block pod startup if the referenced ClusterTrustBundle(s) + aren't available. If using name, then the named ClusterTrustBundle is + allowed not to exist. If using signerName, then the combination of + signerName and labelSelector is allowed to match zero + ClusterTrustBundles. + type: boolean + path: + description: Relative path from the volume root + to write the bundle. + type: string + signerName: + description: |- + Select all ClusterTrustBundles that match this signer name. + Mutually-exclusive with name. The contents of all selected + ClusterTrustBundles will be unified and deduplicated. + type: string + required: + - path + type: object + configMap: + description: configMap information about the configMap + data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional specify whether the ConfigMap + or its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + downwardAPI: + description: downwardAPI information about the downwardAPI + data to project + properties: + items: + description: Items is a list of DownwardAPIVolume + file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing the + pod field + properties: + fieldRef: + description: 'Required: Selects a field of + the pod: only annotations, labels, name, + namespace and uid are supported.' + properties: + apiVersion: + description: Version of the schema the + FieldPath is written in terms of, defaults + to "v1". + type: string + fieldPath: + description: Path of the field to select + in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the relative + path name of the file to be created. Must + not be absolute or contain the ''..'' path. + Must be utf-8 encoded. The first item of + the relative path must not start with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format + of the exposed resources, defaults to + "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + secret: + description: secret information about the secret data + to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path within + a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional field specify whether the + Secret or its key must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + description: serviceAccountToken is information about + the serviceAccountToken data to project + properties: + audience: + description: |- + audience is the intended audience of the token. A recipient of a token + must identify itself with an identifier specified in the audience of the + token, and otherwise should reject the token. The audience defaults to the + identifier of the apiserver. + type: string + expirationSeconds: + description: |- + expirationSeconds is the requested duration of validity of the service + account token. As the token approaches expiration, the kubelet volume + plugin will proactively rotate the service account token. The kubelet will + start trying to rotate the token if the token is older than 80 percent of + its time to live or if the token is older than 24 hours.Defaults to 1 hour + and must be at least 10 minutes. + format: int64 + type: integer + path: + description: |- + path is the path relative to the mount point of the file to project the + token into. + type: string + required: + - path + type: object + type: object + type: array + type: object + image: + description: |- + Image name to use for collector containers. When omitted, the value + comes from an operator environment variable. + type: string + logs: + description: Logs is the place for users to configure the log + collection. + properties: + exporters: + description: |- + Exporters allows users to specify which exporters they want to use in + the logs pipeline. + items: + type: string + type: array + retentionPeriod: + description: |- + How long to retain log files locally. An RFC 3339 duration or a number + and unit: `12 hr`, `3d`, `4 weeks`, etc. + format: duration + maxLength: 20 + minLength: 1 + pattern: ^(PT)?( *[0-9]+ *(?i:(h|hr|d|w|wk)|(hour|day|week)s?))+$ + type: string + x-kubernetes-validations: + - message: must be at least one hour + rule: duration("1h") <= self && self <= duration("8760h") + type: object + resources: + description: Resources holds the resource requirements for the + collector container. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + type: object metadata: description: Metadata contains metadata for custom resources properties: @@ -16462,24 +16944,27 @@ spec: More info: https://www.pgadmin.org/docs/pgadmin4/latest/ldap.html properties: key: - description: The key of the secret to select from. Must - be a valid secret key. + description: Name of the data field within the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ type: string + x-kubernetes-validations: + - message: cannot be "." or start with ".." + rule: self != "." && !self.startsWith("..") name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + description: Name of the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?([.][a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string optional: - description: Specify whether the Secret or its key - must be defined + description: Whether or not the Secret or its data + must be defined. Defaults to false. type: boolean required: - key + - name type: object x-kubernetes-map-type: atomic settings: @@ -17087,9 +17572,6 @@ spec: database from this list does NOT revoke access. This field is ignored for the "postgres" user. items: - description: |- - PostgreSQL identifiers are limited in length but may contain any character. - More info: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS maxLength: 63 minLength: 1 type: string diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 8fb6bcf007..98a771bb32 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -44,6 +44,8 @@ spec: value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:latest" - name: RELATED_IMAGE_STANDALONE_PGADMIN value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-1" + - name: RELATED_IMAGE_COLLECTOR + value: "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.119.0" securityContext: allowPrivilegeEscalation: false capabilities: { drop: [ALL] } diff --git a/go.mod b/go.mod index 7ae46f070c..0db97ac83d 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,7 @@ module github.com/crunchydata/postgres-operator -go 1.22.7 +// If this is changing when you don't want it to, see hack/go-get.sh +go 1.23.0 require ( github.com/go-logr/logr v1.4.2 @@ -20,14 +21,16 @@ require ( go.opentelemetry.io/otel v1.32.0 go.opentelemetry.io/otel/sdk v1.32.0 go.opentelemetry.io/otel/trace v1.32.0 - golang.org/x/crypto v0.31.0 + golang.org/x/crypto v0.35.0 golang.org/x/tools v0.28.0 gotest.tools/v3 v3.5.1 k8s.io/api v0.31.0 k8s.io/apimachinery v0.31.0 k8s.io/client-go v0.31.0 k8s.io/component-base v0.31.0 + k8s.io/kube-openapi v0.0.0-20240521193020-835d969ad83a sigs.k8s.io/controller-runtime v0.19.3 + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd sigs.k8s.io/yaml v1.4.0 ) @@ -102,11 +105,11 @@ require ( golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect golang.org/x/mod v0.22.0 // indirect golang.org/x/net v0.33.0 // indirect - golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sync v0.10.0 // indirect - golang.org/x/sys v0.28.0 // indirect - golang.org/x/term v0.27.0 // indirect - golang.org/x/text v0.21.0 // indirect + golang.org/x/oauth2 v0.27.0 // indirect + golang.org/x/sync v0.11.0 // indirect + golang.org/x/sys v0.30.0 // indirect + golang.org/x/term v0.29.0 // indirect + golang.org/x/text v0.22.0 // indirect golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect @@ -120,9 +123,7 @@ require ( k8s.io/apiextensions-apiserver v0.31.0 // indirect k8s.io/apiserver v0.31.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20240521193020-835d969ad83a // indirect k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 // indirect - sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/go.sum b/go.sum index b1f66001ba..0fa2adc5a3 100644 --- a/go.sum +++ b/go.sum @@ -210,8 +210,8 @@ go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= -golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= +golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 h1:LoYXNGAShUG3m/ehNk4iFctuhGX/+R1ZpfJ4/ia80JM= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -224,26 +224,26 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= +golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= +golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/hack/go-get.sh b/hack/go-get.sh new file mode 100755 index 0000000000..9dbfb96640 --- /dev/null +++ b/hack/go-get.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# +# Copyright 2025 Crunchy Data Solutions, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This runs `$GO get` without changing the "go" directive in the "go.mod" file. +# To change that, pass a "go@go{version}" argument. +# +# https://go.dev/doc/toolchain +# +# Usage: $0 help +# Usage: $0 -u golang.org/x/crypto +# Usage: $0 -u golang.org/x/crypto go@go1.99.0 +# + +set -eu +: "${GO:=go}" + +if [[ "$#" -eq 0 ]] || [[ "$1" == 'help' ]] || [[ "$*" == *'--help'* ]] || [[ "$*" == *'--version'* ]] +then + self=$(command -v -- "$0") + content=$(< "${self}") + content="${content%%$'\n\n'*}" + content="#${content#*$'\n#'}" + content="${content//$'$GO'/${GO}}" + exec echo "${content//$'$0'/$0}" +fi + +version=$(${GO} list -m -f 'go@go{{.GoVersion}}') + +for arg in "$@" +do case "${arg}" in go@go*) version="${arg}" ;; *) esac +done + +${GO} get "$@" "${version}" 'toolchain@none' +${GO} mod tidy diff --git a/internal/bridge/installation_test.go b/internal/bridge/installation_test.go index 28317e07f4..766233b8bb 100644 --- a/internal/bridge/installation_test.go +++ b/internal/bridge/installation_test.go @@ -18,10 +18,10 @@ import ( corev1 "k8s.io/api/core/v1" corev1apply "k8s.io/client-go/applyconfigurations/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/yaml" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" ) func TestExtractSecretContract(t *testing.T) { @@ -136,7 +136,7 @@ func TestInstallationReconcile(t *testing.T) { assert.Assert(t, cmp.Contains(applies[0], `"kind":"Secret"`)) var decoded corev1.Secret - assert.NilError(t, yaml.Unmarshal([]byte(applies[0]), &decoded)) + require.UnmarshalInto(t, &decoded, applies[0]) assert.Assert(t, cmp.Contains(string(decoded.Data["bridge-token"]), `"id":"abc"`)) assert.Assert(t, cmp.Contains(string(decoded.Data["bridge-token"]), `"secret":"xyz"`)) }) @@ -230,7 +230,7 @@ func TestInstallationReconcile(t *testing.T) { assert.Assert(t, cmp.Contains(applies[0], `"kind":"Secret"`)) var decoded corev1.Secret - assert.NilError(t, yaml.Unmarshal([]byte(applies[0]), &decoded)) + require.UnmarshalInto(t, &decoded, applies[0]) assert.Assert(t, cmp.Contains(string(decoded.Data["bridge-token"]), `"id":"asdf"`)) }) } @@ -326,7 +326,7 @@ func TestInstallationReconcile(t *testing.T) { assert.Assert(t, cmp.Contains(applies[0], `"kind":"Secret"`)) var decoded corev1.Secret - assert.NilError(t, yaml.Unmarshal([]byte(applies[0]), &decoded)) + require.UnmarshalInto(t, &decoded, applies[0]) assert.Assert(t, cmp.Contains(string(decoded.Data["bridge-token"]), `"id":"xyz"`)) assert.Assert(t, cmp.Contains(string(decoded.Data["bridge-token"]), `"secret":"def"`)) }) @@ -373,7 +373,7 @@ func TestInstallationReconcile(t *testing.T) { assert.Assert(t, cmp.Contains(applies[0], `"kind":"Secret"`)) var decoded corev1.Secret - assert.NilError(t, yaml.Unmarshal([]byte(applies[0]), &decoded)) + require.UnmarshalInto(t, &decoded, applies[0]) assert.Equal(t, len(decoded.Data["bridge-token"]), 0) archived := string(decoded.Data["bridge-token--2020-10-28"]) @@ -463,7 +463,7 @@ func TestInstallationReconcile(t *testing.T) { assert.Assert(t, cmp.Contains(applies[0], `"kind":"Secret"`)) var decoded corev1.Secret - assert.NilError(t, yaml.Unmarshal([]byte(applies[0]), &decoded)) + require.UnmarshalInto(t, &decoded, applies[0]) assert.Assert(t, cmp.Contains(string(decoded.Data["bridge-token"]), `"id":"ddd"`)) assert.Assert(t, cmp.Contains(string(decoded.Data["bridge-token"]), `"secret":"fresh"`)) }) diff --git a/internal/collector/config.go b/internal/collector/config.go new file mode 100644 index 0000000000..d288380aea --- /dev/null +++ b/internal/collector/config.go @@ -0,0 +1,178 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + _ "embed" + "fmt" + "math" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "sigs.k8s.io/yaml" + + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +// The contents of "logrotate.conf" as a string. +// See: https://pkg.go.dev/embed +// +//go:embed "logrotate.conf" +var logrotateConfigFormatString string + +// ComponentID represents a component identifier within an OpenTelemetry +// Collector YAML configuration. Each value is a "type" followed by an optional +// slash-then-name: `type[/name]` +type ComponentID = string + +// PipelineID represents a pipeline identifier within an OpenTelemetry Collector +// YAML configuration. Each value is a signal followed by an optional +// slash-then-name: `signal[/name]` +type PipelineID = string + +// Config represents an OpenTelemetry Collector YAML configuration. +// See: https://opentelemetry.io/docs/collector/configuration +type Config struct { + Exporters map[ComponentID]any + Extensions map[ComponentID]any + Processors map[ComponentID]any + Receivers map[ComponentID]any + + Pipelines map[PipelineID]Pipeline +} + +// Pipeline represents the YAML configuration of a flow of telemetry data +// through an OpenTelemetry Collector. +// See: https://opentelemetry.io/docs/collector/configuration#pipelines +type Pipeline struct { + Extensions []ComponentID + Exporters []ComponentID + Processors []ComponentID + Receivers []ComponentID +} + +// LogrotateConfig represents the configurable pieces of a log rotate config +// that can vary based on the specific component whose logs are being rotated +type LogrotateConfig struct { + LogFiles []string + PostrotateScript string +} + +func (c *Config) ToYAML() (string, error) { + const yamlGeneratedWarning = "" + + "# Generated by postgres-operator. DO NOT EDIT.\n" + + "# Your changes will not be saved.\n" + + extensions := sets.New[ComponentID]() + pipelines := make(map[PipelineID]any, len(c.Pipelines)) + + for id, p := range c.Pipelines { + extensions.Insert(p.Extensions...) + pipelines[id] = map[string]any{ + "exporters": p.Exporters, + "processors": p.Processors, + "receivers": p.Receivers, + } + } + + b, err := yaml.Marshal(map[string]any{ + "exporters": c.Exporters, + "extensions": c.Extensions, + "processors": c.Processors, + "receivers": c.Receivers, + "service": map[string]any{ + "extensions": sets.List(extensions), // Sorted + "pipelines": pipelines, + }, + }) + return string(append([]byte(yamlGeneratedWarning), b...)), err +} + +// NewConfig creates a base config for an OTel collector container +func NewConfig(spec *v1beta1.InstrumentationSpec) *Config { + config := &Config{ + Exporters: map[ComponentID]any{ + // https://pkg.go.dev/go.opentelemetry.io/collector/exporter/debugexporter#section-readme + DebugExporter: map[string]any{"verbosity": "detailed"}, + }, + Extensions: map[ComponentID]any{}, + Processors: map[ComponentID]any{ + // https://pkg.go.dev/go.opentelemetry.io/collector/processor/batchprocessor#section-readme + OneSecondBatchProcessor: map[string]any{"timeout": "1s"}, + SubSecondBatchProcessor: map[string]any{"timeout": "200ms"}, + + // https://pkg.go.dev/github.com/open-telemetry/opentelemetry-collector-contrib/processor/groupbyattrsprocessor#readme-compaction + CompactingProcessor: map[string]any{}, + }, + Receivers: map[ComponentID]any{}, + Pipelines: map[PipelineID]Pipeline{}, + } + + // If there are exporters defined in the spec, add them to the config. + if spec != nil && spec.Config != nil && spec.Config.Exporters != nil { + for k, v := range spec.Config.Exporters { + config.Exporters[k] = v + } + } + + return config +} + +// AddLogrotateConfigs generates a logrotate configuration for each LogrotateConfig +// provided via the configs parameter and adds them to the provided configmap. +func AddLogrotateConfigs(ctx context.Context, spec *v1beta1.InstrumentationSpec, + outInstanceConfigMap *corev1.ConfigMap, configs []LogrotateConfig, +) { + if outInstanceConfigMap.Data == nil { + outInstanceConfigMap.Data = make(map[string]string) + } + + // If retentionPeriod is set in the spec, use that value; otherwise, we want + // to use a reasonably short duration. Defaulting to 1 day. + retentionPeriod := metav1.Duration{Duration: 24 * time.Hour} + if spec != nil && spec.Logs != nil && spec.Logs.RetentionPeriod != nil { + retentionPeriod = spec.Logs.RetentionPeriod.AsDuration() + } + + logrotateConfig := "" + for _, config := range configs { + logrotateConfig += generateLogrotateConfig(config, retentionPeriod) + } + + outInstanceConfigMap.Data["logrotate.conf"] = logrotateConfig +} + +// generateLogrotateConfig generates a configuration string for logrotate based +// on the provided full log file path, retention period, and postrotate script +func generateLogrotateConfig( + config LogrotateConfig, retentionPeriod metav1.Duration, +) string { + number, interval := parseDurationForLogrotate(retentionPeriod) + + return fmt.Sprintf( + logrotateConfigFormatString, + strings.Join(config.LogFiles, " "), + number, + interval, + config.PostrotateScript, + ) +} + +// parseDurationForLogrotate takes a retention period and returns the rotate +// number and interval string that should be used in the logrotate config. +// If the retentionPeriod is less than 24 hours, the function will return the +// number of hours and "hourly"; otherwise, we will round up to the nearest day +// and return the day count and "daily" +func parseDurationForLogrotate(retentionPeriod metav1.Duration) (int, string) { + hours := math.Ceil(retentionPeriod.Hours()) + if hours < 24 { + return int(hours), "hourly" + } + return int(math.Ceil(hours / 24)), "daily" +} diff --git a/internal/collector/config_test.go b/internal/collector/config_test.go new file mode 100644 index 0000000000..c621a14aad --- /dev/null +++ b/internal/collector/config_test.go @@ -0,0 +1,205 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "testing" + + "gotest.tools/v3/assert" + + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestConfigToYAML(t *testing.T) { + t.Run("NilInstrumentationSpec", func(t *testing.T) { + result, err := NewConfig(nil).ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} +receivers: {} +service: + extensions: [] + pipelines: {} +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + spec := testInstrumentationSpec() + + result, err := NewConfig(spec).ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} +receivers: {} +service: + extensions: [] + pipelines: {} +`) + }) +} + +func TestGenerateLogrotateConfig(t *testing.T) { + for _, tt := range []struct { + config LogrotateConfig + retentionPeriod string + result string + }{ + { + config: LogrotateConfig{ + LogFiles: []string{"/this/is/a/file.path"}, + PostrotateScript: "echo 'Hello, World'", + }, + retentionPeriod: "12h", + result: `/this/is/a/file.path { + rotate 12 + missingok + sharedscripts + notifempty + nocompress + hourly + postrotate + echo 'Hello, World' + endscript +} +`, + }, + { + config: LogrotateConfig{ + LogFiles: []string{"/tmp/test.log"}, + PostrotateScript: "", + }, + retentionPeriod: "5 days", + result: `/tmp/test.log { + rotate 5 + missingok + sharedscripts + notifempty + nocompress + daily + postrotate + + endscript +} +`, + }, + { + config: LogrotateConfig{ + LogFiles: []string{"/tmp/test.csv", "/tmp/test.json"}, + PostrotateScript: "pkill -HUP --exact pgbouncer", + }, + retentionPeriod: "5wk", + result: `/tmp/test.csv /tmp/test.json { + rotate 35 + missingok + sharedscripts + notifempty + nocompress + daily + postrotate + pkill -HUP --exact pgbouncer + endscript +} +`, + }, + } { + t.Run(tt.retentionPeriod, func(t *testing.T) { + duration, err := v1beta1.NewDuration(tt.retentionPeriod) + assert.NilError(t, err) + result := generateLogrotateConfig(tt.config, duration.AsDuration()) + assert.Equal(t, tt.result, result) + }) + } +} + +func TestParseDurationForLogrotate(t *testing.T) { + for _, tt := range []struct { + retentionPeriod string + number int + interval string + }{ + { + retentionPeriod: "1 h 20 min", + number: 2, + interval: "hourly", + }, + { + retentionPeriod: "12h", + number: 12, + interval: "hourly", + }, + { + retentionPeriod: "24hr", + number: 1, + interval: "daily", + }, + { + retentionPeriod: "35hour", + number: 2, + interval: "daily", + }, + { + retentionPeriod: "36 hours", + number: 2, + interval: "daily", + }, + { + retentionPeriod: "3d", + number: 3, + interval: "daily", + }, + { + retentionPeriod: "365day", + number: 365, + interval: "daily", + }, + { + retentionPeriod: "1w", + number: 7, + interval: "daily", + }, + { + retentionPeriod: "4wk", + number: 28, + interval: "daily", + }, + { + retentionPeriod: "52week", + number: 364, + interval: "daily", + }, + } { + t.Run(tt.retentionPeriod, func(t *testing.T) { + duration, err := v1beta1.NewDuration(tt.retentionPeriod) + assert.NilError(t, err) + number, interval := parseDurationForLogrotate(duration.AsDuration()) + assert.Equal(t, tt.number, number) + assert.Equal(t, tt.interval, interval) + }) + } +} diff --git a/internal/collector/generate.go b/internal/collector/generate.go new file mode 100644 index 0000000000..3593a96f9d --- /dev/null +++ b/internal/collector/generate.go @@ -0,0 +1,60 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +// [pg_query.Parse] requires CGO to compile and call https://github.com/pganalyze/libpg_query +//go:build cgo && generate + +//go:generate go run generate.go + +package main + +import ( + "bytes" + "log/slog" + "os" + "path/filepath" + "strings" + + pg_query "github.com/pganalyze/pg_query_go/v5" + "sigs.k8s.io/yaml" +) + +func main() { + cwd := need(os.Getwd()) + fileNames := map[string][]string{} + + slog.Info("Reading", "directory", cwd) + for _, entry := range need(os.ReadDir(cwd)) { + if entry.Type() == 0 { + ext := filepath.Ext(entry.Name()) + fileNames[ext] = append(fileNames[ext], entry.Name()) + } + } + + for _, sqlName := range fileNames[".sql"] { + slog.Info("Reading", "file", sqlName) + sqlData := need(pg_query.Parse(string(need(os.ReadFile(sqlName))))) + sqlPath := filepath.Join("generated", sqlName) + + slog.Info("Writing", "file", sqlPath) + must(os.WriteFile(sqlPath, []byte(need(pg_query.Deparse(sqlData))+"\n"), 0o644)) + } + + for _, yamlName := range fileNames[".yaml"] { + slog.Info("Reading", "file", yamlName) + jsonData := need(yaml.YAMLToJSONStrict(need(os.ReadFile(yamlName)))) + jsonPath := filepath.Join("generated", strings.TrimSuffix(yamlName, ".yaml")+".json") + + slog.Info("Writing", "file", jsonPath) + must(os.WriteFile(jsonPath, append(bytes.TrimSpace(jsonData), '\n'), 0o644)) + } +} + +func must(err error) { need(0, err) } +func need[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} diff --git a/internal/collector/generated/.gitattributes b/internal/collector/generated/.gitattributes new file mode 100644 index 0000000000..49e9f142dd --- /dev/null +++ b/internal/collector/generated/.gitattributes @@ -0,0 +1,2 @@ +# https://docs.github.com/en/repositories/working-with-files/managing-files/customizing-how-changed-files-appear-on-github +/*.json linguist-generated=true diff --git a/internal/collector/generated/gte_pg16_metrics.json b/internal/collector/generated/gte_pg16_metrics.json new file mode 100644 index 0000000000..3b27be7bc0 --- /dev/null +++ b/internal/collector/generated/gte_pg16_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , p.n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\n FROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/gte_pg17_metrics.json b/internal/collector/generated/gte_pg17_metrics.json new file mode 100644 index 0000000000..563abf01b3 --- /dev/null +++ b/internal/collector/generated/gte_pg17_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_written FROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"}] diff --git a/internal/collector/generated/lt_pg16_metrics.json b/internal/collector/generated/lt_pg16_metrics.json new file mode 100644 index 0000000000..98bb0cc213 --- /dev/null +++ b/internal/collector/generated/lt_pg16_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , 0::bigint AS n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\nFROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/lt_pg17_metrics.json b/internal/collector/generated/lt_pg17_metrics.json new file mode 100644 index 0000000000..d6266ffacb --- /dev/null +++ b/internal/collector/generated/lt_pg17_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_checkpoint AS buffers_written FROM pg_catalog.pg_stat_bgwriter c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.buffers_backend AS writes\n , s.buffers_backend_fsync AS fsyncs\nFROM pg_catalog.pg_stat_bgwriter s;\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.checkpoints_timed AS num_timed\n , c.checkpoints_req AS num_requested\n , c.checkpoint_write_time AS write_time\n , c.checkpoint_sync_time AS sync_time\n , c.buffers_checkpoint AS buffers_written\nFROM pg_catalog.pg_stat_bgwriter c;\n"}] diff --git a/internal/collector/generated/pgbackrest_logs_transforms.json b/internal/collector/generated/pgbackrest_logs_transforms.json new file mode 100644 index 0000000000..adf3b09af9 --- /dev/null +++ b/internal/collector/generated/pgbackrest_logs_transforms.json @@ -0,0 +1 @@ +[{"context":"log","statements":["set(instrumentation_scope.name, \"pgbackrest\")","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","merge_maps(cache, ExtractPatterns(body, \"^(?\u003ctimestamp\u003e\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}) (?\u003cprocess_id\u003eP\\\\d{2,3})\\\\s*(?\u003cerror_severity\u003e\\\\S*): (?\u003cmessage\u003e(?s).*)$\"), \"insert\") where Len(body) \u003e 0","set(severity_text, cache[\"error_severity\"]) where IsString(cache[\"error_severity\"])","set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == \"TRACE\"","set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == \"DEBUG\"","set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == \"DETAIL\"","set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == \"INFO\"","set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == \"WARN\"","set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == \"ERROR\"","set(time, Time(cache[\"timestamp\"], \"%Y-%m-%d %H:%M:%S.%L\")) where IsString(cache[\"timestamp\"])","set(attributes[\"process.pid\"], cache[\"process_id\"])","set(attributes[\"log.record.original\"], body)","set(body, cache[\"message\"])"]}] diff --git a/internal/collector/generated/pgbackrest_metrics.json b/internal/collector/generated/pgbackrest_metrics.json new file mode 100644 index 0000000000..63114afc03 --- /dev/null +++ b/internal/collector/generated/pgbackrest_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] diff --git a/internal/collector/generated/pgbouncer_metrics_queries.json b/internal/collector/generated/pgbouncer_metrics_queries.json new file mode 100644 index 0000000000..0248051d94 --- /dev/null +++ b/internal/collector/generated/pgbouncer_metrics_queries.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"Current waiting time in seconds","metric_name":"ccp_pgbouncer_clients_wait_seconds","value_column":"wait"}],"sql":"SHOW CLIENTS"},{"metrics":[{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of server connections","metric_name":"ccp_pgbouncer_databases_pool_size","value_column":"pool_size"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Minimum number of server connections","metric_name":"ccp_pgbouncer_databases_min_pool_size","value_column":"min_pool_size"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of additional connections for this database","metric_name":"ccp_pgbouncer_databases_reserve_pool","value_column":"reserve_pool"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database","metric_name":"ccp_pgbouncer_databases_max_connections","value_column":"max_connections"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Current number of connections for this database","metric_name":"ccp_pgbouncer_databases_current_connections","value_column":"current_connections"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"1 if this database is currently paused, else 0","metric_name":"ccp_pgbouncer_databases_paused","value_column":"paused"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"1 if this database is currently disabled, else 0","metric_name":"ccp_pgbouncer_databases_disabled","value_column":"disabled"}],"sql":"SHOW DATABASES"},{"metrics":[{"attribute_columns":["list"],"description":"Count of items registered with pgBouncer","metric_name":"ccp_pgbouncer_lists_item_count","value_column":"items"}],"sql":"SHOW LISTS"},{"metrics":[{"attribute_columns":["database","user"],"description":"Client connections that are either linked to server connections or are idle with no queries waiting to be processed","metric_name":"ccp_pgbouncer_pools_client_active","value_column":"cl_active"},{"attribute_columns":["database","user"],"description":"Client connections that have sent queries but have not yet got a server connection","metric_name":"ccp_pgbouncer_pools_client_waiting","value_column":"cl_waiting"},{"attribute_columns":["database","user"],"description":"Server connections that are linked to a client","metric_name":"ccp_pgbouncer_pools_server_active","value_column":"sv_active"},{"attribute_columns":["database","user"],"description":"Server connections that are unused and immediately usable for client queries","metric_name":"ccp_pgbouncer_pools_server_idle","value_column":"sv_idle"},{"attribute_columns":["database","user"],"description":"Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again","metric_name":"ccp_pgbouncer_pools_server_used","value_column":"sv_used"}],"sql":"SHOW POOLS"},{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"1 if the connection will be closed as soon as possible, because a configuration file reload or DNS update changed the connection information or RECONNECT was issued","metric_name":"ccp_pgbouncer_servers_close_needed","value_column":"close_needed"}],"sql":"SHOW SERVERS"}] diff --git a/internal/collector/generated/postgres_5m_metrics.json b/internal/collector/generated/postgres_5m_metrics.json new file mode 100644 index 0000000000..a9a3500a02 --- /dev/null +++ b/internal/collector/generated/postgres_5m_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] diff --git a/internal/collector/generated/postgres_5s_metrics.json b/internal/collector/generated/postgres_5s_metrics.json new file mode 100644 index 0000000000..09ea77846b --- /dev/null +++ b/internal/collector/generated/postgres_5s_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"}],"sql":"SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive FROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"}],"sql":"SELECT archived_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"}],"sql":"SELECT failed_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary.","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%'\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats()\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] diff --git a/internal/collector/generated/postgres_logs_transforms.json b/internal/collector/generated/postgres_logs_transforms.json new file mode 100644 index 0000000000..d3a2dbe47f --- /dev/null +++ b/internal/collector/generated/postgres_logs_transforms.json @@ -0,0 +1 @@ +[{"conditions":["body[\"format\"] == \"csv\""],"context":"log","statements":["set(cache, ParseCSV(body[\"original\"], body[\"headers\"], delimiter=\",\", mode=\"strict\"))","merge_maps(cache, ExtractPatterns(cache[\"connection_from\"], \"(?:^[[]local[]]:(?\u003cremote_port\u003e.+)|:(?\u003cremote_port\u003e[^:]+))$\"), \"insert\") where Len(cache[\"connection_from\"]) \u003e 0","set(cache[\"remote_host\"], Substring(cache[\"connection_from\"], 0, Len(cache[\"connection_from\"]) - Len(cache[\"remote_port\"]) - 1)) where Len(cache[\"connection_from\"]) \u003e 0 and IsString(cache[\"remote_port\"])","set(cache[\"remote_host\"], cache[\"connection_from\"]) where Len(cache[\"connection_from\"]) \u003e 0 and not IsString(cache[\"remote_host\"])","merge_maps(cache, ExtractPatterns(cache[\"location\"], \"^(?:(?\u003cfunc_name\u003e[^,]+), )?(?\u003cfile_name\u003e[^:]+):(?\u003cfile_line_num\u003e\\\\d+)$\"), \"insert\") where Len(cache[\"location\"]) \u003e 0","set(cache[\"cursor_position\"], Double(cache[\"cursor_position\"])) where IsMatch(cache[\"cursor_position\"], \"^[0-9.]+$\")","set(cache[\"file_line_num\"], Double(cache[\"file_line_num\"])) where IsMatch(cache[\"file_line_num\"], \"^[0-9.]+$\")","set(cache[\"internal_position\"], Double(cache[\"internal_position\"])) where IsMatch(cache[\"internal_position\"], \"^[0-9.]+$\")","set(cache[\"leader_pid\"], Double(cache[\"leader_pid\"])) where IsMatch(cache[\"leader_pid\"], \"^[0-9.]+$\")","set(cache[\"line_num\"], Double(cache[\"line_num\"])) where IsMatch(cache[\"line_num\"], \"^[0-9.]+$\")","set(cache[\"pid\"], Double(cache[\"pid\"])) where IsMatch(cache[\"pid\"], \"^[0-9.]+$\")","set(cache[\"query_id\"], Double(cache[\"query_id\"])) where IsMatch(cache[\"query_id\"], \"^[0-9.]+$\")","set(cache[\"remote_port\"], Double(cache[\"remote_port\"])) where IsMatch(cache[\"remote_port\"], \"^[0-9.]+$\")","set(body[\"parsed\"], cache)"]},{"context":"log","statements":["set(instrumentation_scope.name, \"postgres\")","set(instrumentation_scope.version, resource.attributes[\"db.version\"])","set(cache, body[\"parsed\"]) where body[\"format\"] == \"csv\"","set(cache, ParseJSON(body[\"original\"])) where body[\"format\"] == \"json\"","set(severity_text, cache[\"error_severity\"])","set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == \"DEBUG5\"","set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == \"DEBUG4\"","set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == \"DEBUG3\"","set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == \"DEBUG2\"","set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == \"DEBUG1\"","set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == \"INFO\" or severity_text == \"LOG\"","set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == \"NOTICE\"","set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == \"WARNING\"","set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == \"ERROR\"","set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == \"FATAL\"","set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == \"PANIC\"","set(time, Time(cache[\"timestamp\"], \"%F %T.%L %Z\"))","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","set(resource.attributes[\"db.system\"], \"postgresql\")","set(attributes[\"log.record.original\"], body[\"original\"])","set(body, cache)","set(attributes[\"client.address\"], body[\"remote_host\"]) where IsString(body[\"remote_host\"])","set(attributes[\"client.port\"], Int(body[\"remote_port\"])) where IsDouble(body[\"remote_port\"])","set(attributes[\"code.filepath\"], body[\"file_name\"]) where IsString(body[\"file_name\"])","set(attributes[\"code.function\"], body[\"func_name\"]) where IsString(body[\"func_name\"])","set(attributes[\"code.lineno\"], Int(body[\"file_line_num\"])) where IsDouble(body[\"file_line_num\"])","set(attributes[\"db.namespace\"], body[\"dbname\"]) where IsString(body[\"dbname\"])","set(attributes[\"db.response.status_code\"], body[\"state_code\"]) where IsString(body[\"state_code\"])","set(attributes[\"process.creation.time\"], Concat([ Substring(body[\"session_start\"], 0, 10), \"T\", Substring(body[\"session_start\"], 11, 8), \"Z\"], \"\")) where IsMatch(body[\"session_start\"], \"^[^ ]{10} [^ ]{8} UTC$\")","set(attributes[\"process.pid\"], Int(body[\"pid\"])) where IsDouble(body[\"pid\"])","set(attributes[\"process.title\"], body[\"ps\"]) where IsString(body[\"ps\"])","set(attributes[\"user.name\"], body[\"user\"]) where IsString(body[\"user\"])"]},{"conditions":["Len(body[\"message\"]) \u003e 7 and Substring(body[\"message\"], 0, 7) == \"AUDIT: \""],"context":"log","statements":["set(body[\"pgaudit\"], ParseCSV(Substring(body[\"message\"], 7, Len(body[\"message\"]) - 7), \"audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter\", delimiter=\",\", mode=\"strict\"))","set(instrumentation_scope.name, \"pgaudit\") where Len(body[\"pgaudit\"]) \u003e 0"]}] diff --git a/internal/collector/gte_pg16_metrics.yaml b/internal/collector/gte_pg16_metrics.yaml new file mode 100644 index 0000000000..319aad62dc --- /dev/null +++ b/internal/collector/gte_pg16_metrics.yaml @@ -0,0 +1,127 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are idx_scan and idx_tup_fetch and we avoid NULL by using COALESCE. + - sql: > + SELECT + current_database() as dbname + , p.schemaname + , p.relname + , p.seq_scan + , p.seq_tup_read + , COALESCE(p.idx_scan, 0) AS idx_scan + , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch + , p.n_tup_ins + , p.n_tup_upd + , p.n_tup_del + , p.n_tup_hot_upd + , p.n_tup_newpage_upd + , p.n_live_tup + , p.n_dead_tup + , p.vacuum_count + , p.autovacuum_count + , p.analyze_count + , p.autoanalyze_count + FROM pg_catalog.pg_stat_user_tables p; + metrics: + - metric_name: ccp_stat_user_tables_analyze_count + data_type: sum + value_column: analyze_count + description: Number of times this table has been manually analyzed + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_autoanalyze_count + data_type: sum + value_column: autoanalyze_count + description: Number of times this table has been analyzed by the autovacuum daemon + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_autovacuum_count + data_type: sum + value_column: autovacuum_count + description: Number of times this table has been vacuumed by the autovacuum daemon + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_idx_scan + data_type: sum + value_column: idx_scan + description: Number of index scans initiated on this table + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_idx_tup_fetch + data_type: sum + value_column: idx_tup_fetch + description: Number of live rows fetched by index scans + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_dead_tup + value_column: n_dead_tup + description: Estimated number of dead rows + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_live_tup + value_column: n_live_tup + description: Estimated number of live rows + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_tup_del + data_type: sum + value_column: n_tup_del + description: Number of rows deleted + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_tup_hot_upd + data_type: sum + value_column: n_tup_hot_upd + description: Number of rows HOT updated (i.e., with no separate index update required) + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_tup_ins + data_type: sum + value_column: n_tup_ins + description: Number of rows inserted + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_tup_upd + data_type: sum + value_column: n_tup_upd + description: Number of rows updated + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_seq_scan + data_type: sum + value_column: seq_scan + description: Number of sequential scans initiated on this table + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_seq_tup_read + data_type: sum + value_column: seq_tup_read + description: Number of live rows fetched by sequential scans + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_vacuum_count + data_type: sum + value_column: vacuum_count + description: Number of times this table has been manually vacuumed (not counting VACUUM FULL) + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/gte_pg17_metrics.yaml b/internal/collector/gte_pg17_metrics.yaml new file mode 100644 index 0000000000..de8f6786f5 --- /dev/null +++ b/internal/collector/gte_pg17_metrics.yaml @@ -0,0 +1,72 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + + - sql: > + SELECT c.buffers_written + FROM pg_catalog.pg_stat_checkpointer c; + metrics: + - metric_name: ccp_stat_bgwriter_buffers_checkpoint + value_column: buffers_written + data_type: sum + description: Number of buffers written during checkpoints and restartpoints + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + s.writes + , s.fsyncs + FROM pg_catalog.pg_stat_io s + WHERE backend_type = 'background writer'; + metrics: + - metric_name: ccp_stat_bgwriter_buffers_backend + value_column: writes + data_type: sum + description: Number of write operations, each of the size specified in op_bytes. + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_buffers_backend_fsync + value_column: fsyncs + data_type: sum + description: Number of fsync calls. These are only tracked in context normal. + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + c.num_timed + , c.num_requested + , c.write_time + , c.sync_time + , c.buffers_written + FROM pg_catalog.pg_stat_checkpointer c; + metrics: + - metric_name: ccp_stat_bgwriter_checkpoint_sync_time + value_column: sync_time + description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_checkpoint_write_time + value_column: write_time + value_type: double + description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_checkpoints_req + value_column: num_requested + description: Number of requested checkpoints that have been performed + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_checkpoints_timed + value_column: num_timed + description: Number of scheduled checkpoints that have been performed + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_checkpointer_buffers_written + description: Number of buffers written during checkpoints and restartpoints + value_column: buffers_written + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/helpers_test.go b/internal/collector/helpers_test.go new file mode 100644 index 0000000000..7f1e277e9b --- /dev/null +++ b/internal/collector/helpers_test.go @@ -0,0 +1,29 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func testInstrumentationSpec() *v1beta1.InstrumentationSpec { + spec := v1beta1.InstrumentationSpec{ + Config: &v1beta1.InstrumentationConfigSpec{ + Exporters: map[string]any{ + "googlecloud": map[string]any{ + "log": map[string]any{ + "default_log_name": "opentelemetry.io/collector-exported-log", + }, + "project": "google-project-name", + }, + }, + }, + Logs: &v1beta1.InstrumentationLogsSpec{ + Exporters: []string{"googlecloud"}, + }, + } + + return spec.DeepCopy() +} diff --git a/internal/collector/instance.go b/internal/collector/instance.go new file mode 100644 index 0000000000..970f9c9109 --- /dev/null +++ b/internal/collector/instance.go @@ -0,0 +1,198 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + "fmt" + "path" + + corev1 "k8s.io/api/core/v1" + + "github.com/crunchydata/postgres-operator/internal/config" + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/initialize" + "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/shell" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +const configDirectory = "/etc/otel-collector" + +// AddToConfigMap populates the shared ConfigMap with fields needed to run the Collector. +func AddToConfigMap( + ctx context.Context, + inConfig *Config, + outInstanceConfigMap *corev1.ConfigMap, +) error { + var err error + if outInstanceConfigMap.Data == nil { + outInstanceConfigMap.Data = make(map[string]string) + } + + outInstanceConfigMap.Data["collector.yaml"], err = inConfig.ToYAML() + + return err +} + +// AddToPod adds the OpenTelemetry collector container to a given Pod +func AddToPod( + ctx context.Context, + spec *v1beta1.InstrumentationSpec, + pullPolicy corev1.PullPolicy, + inInstanceConfigMap *corev1.ConfigMap, + outPod *corev1.PodSpec, + volumeMounts []corev1.VolumeMount, + sqlQueryPassword string, + logDirectories []string, + includeLogrotate bool, +) { + if spec == nil || + !(feature.Enabled(ctx, feature.OpenTelemetryLogs) || + feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + return + } + + // Create volume and volume mount for otel collector config + configVolumeMount := corev1.VolumeMount{ + Name: "collector-config", + MountPath: configDirectory, + ReadOnly: true, + } + configVolume := corev1.Volume{Name: configVolumeMount.Name} + configVolume.Projected = &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{{ + ConfigMap: &corev1.ConfigMapProjection{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: inInstanceConfigMap.Name, + }, + Items: []corev1.KeyToPath{{ + Key: "collector.yaml", + Path: "config.yaml", + }}, + }, + }}, + } + + // If the user has specified files to be mounted in the spec, add them to the projected config volume + if spec != nil && spec.Config != nil && spec.Config.Files != nil { + configVolume.Projected.Sources = append(configVolume.Projected.Sources, spec.Config.Files...) + } + + // Add configVolume to the pod's volumes + outPod.Volumes = append(outPod.Volumes, configVolume) + + // Create collector container + container := corev1.Container{ + Name: naming.ContainerCollector, + Image: config.CollectorContainerImage(spec), + ImagePullPolicy: pullPolicy, + Command: startCommand(logDirectories, includeLogrotate), + Env: []corev1.EnvVar{ + { + Name: "K8S_POD_NAMESPACE", + ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }}, + }, + { + Name: "K8S_POD_NAME", + ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }}, + }, + { + Name: "PGPASSWORD", + Value: sqlQueryPassword, + }, + }, + Resources: spec.Resources, + SecurityContext: initialize.RestrictedSecurityContext(), + VolumeMounts: append(volumeMounts, configVolumeMount), + } + + // If this is a pod that uses logrotate for log rotation, add config volume + // and mount for logrotate config + if includeLogrotate { + logrotateConfigVolumeMount := corev1.VolumeMount{ + Name: "logrotate-config", + MountPath: "/etc/logrotate.d", + ReadOnly: true, + } + logrotateConfigVolume := corev1.Volume{Name: logrotateConfigVolumeMount.Name} + logrotateConfigVolume.Projected = &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{{ + ConfigMap: &corev1.ConfigMapProjection{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: inInstanceConfigMap.Name, + }, + Items: []corev1.KeyToPath{{ + Key: "logrotate.conf", + Path: "logrotate.conf", + }}, + }, + }}, + } + container.VolumeMounts = append(container.VolumeMounts, logrotateConfigVolumeMount) + outPod.Volumes = append(outPod.Volumes, logrotateConfigVolume) + } + + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + container.Ports = []corev1.ContainerPort{{ + ContainerPort: int32(8889), + Name: "otel-metrics", + Protocol: corev1.ProtocolTCP, + }} + } + + outPod.Containers = append(outPod.Containers, container) +} + +// startCommand generates the command script used by the collector container +func startCommand(logDirectories []string, includeLogrotate bool) []string { + var mkdirScript string + if len(logDirectories) != 0 { + for _, logDir := range logDirectories { + mkdirScript = mkdirScript + ` +` + shell.MakeDirectories(0o775, logDir, + path.Join(logDir, "receiver")) + } + } + + var logrotateCommand string + if includeLogrotate { + logrotateCommand = `logrotate -s /tmp/logrotate.status /etc/logrotate.d/logrotate.conf` + } + + var startScript = fmt.Sprintf(` +%s +OTEL_PIDFILE=/tmp/otel.pid + +start_otel_collector() { + echo "Starting OTel Collector" + /otelcol-contrib --config %s/config.yaml & + echo $! > $OTEL_PIDFILE +} +start_otel_collector + +exec {fd}<> <(:||:) +while read -r -t 5 -u "${fd}" ||:; do + %s + if [[ "${directory}" -nt "/proc/self/fd/${fd}" ]] && kill -HUP $(head -1 ${OTEL_PIDFILE?}); + then + echo "OTel configuration changed..." + exec {fd}>&- && exec {fd}<> <(:||:) + stat --format='Loaded configuration dated %%y' "${directory}" + fi + if [[ ! -e /proc/$(head -1 ${OTEL_PIDFILE?}) ]] ; then + start_otel_collector + fi +done +`, mkdirScript, configDirectory, logrotateCommand) + + wrapper := `monitor() {` + startScript + `}; export directory="$1"; export -f monitor; exec -a "$0" bash -ceu monitor` + + return []string{"bash", "-ceu", "--", wrapper, "collector", configDirectory} +} diff --git a/internal/collector/logrotate.conf b/internal/collector/logrotate.conf new file mode 100644 index 0000000000..5323c45642 --- /dev/null +++ b/internal/collector/logrotate.conf @@ -0,0 +1,11 @@ +%s { + rotate %d + missingok + sharedscripts + notifempty + nocompress + %s + postrotate + %s + endscript +} diff --git a/internal/collector/lt_pg16_metrics.yaml b/internal/collector/lt_pg16_metrics.yaml new file mode 100644 index 0000000000..ca9fe8a0c8 --- /dev/null +++ b/internal/collector/lt_pg16_metrics.yaml @@ -0,0 +1,135 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are idx_scan and idx_tup_fetch and we avoid NULL by using COALESCE. + - sql: > + SELECT + current_database() as dbname + , p.schemaname + , p.relname + , p.seq_scan + , p.seq_tup_read + , COALESCE(p.idx_scan, 0) AS idx_scan + , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch + , p.n_tup_ins + , p.n_tup_upd + , p.n_tup_del + , p.n_tup_hot_upd + , 0::bigint AS n_tup_newpage_upd + , p.n_live_tup + , p.n_dead_tup + , p.vacuum_count + , p.autovacuum_count + , p.analyze_count + , p.autoanalyze_count + FROM pg_catalog.pg_stat_user_tables p; + metrics: + - metric_name: ccp_stat_user_tables_analyze_count + data_type: sum + value_column: analyze_count + description: Number of times this table has been manually analyzed + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_autoanalyze_count + data_type: sum + value_column: autoanalyze_count + description: Number of times this table has been analyzed by the autovacuum daemon + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_autovacuum_count + data_type: sum + value_column: autovacuum_count + description: Number of times this table has been vacuumed by the autovacuum daemon + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_idx_scan + data_type: sum + value_column: idx_scan + description: Number of index scans initiated on this table + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_idx_tup_fetch + data_type: sum + value_column: idx_tup_fetch + description: Number of live rows fetched by index scans + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_dead_tup + value_column: n_dead_tup + description: Estimated number of dead rows + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". + # The issue doesn't occur with gte_pg16. + - metric_name: ccp_stat_user_tables_n_live_tup + value_column: n_live_tup + description: Estimated number of live rows + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_tup_del + data_type: sum + value_column: n_tup_del + description: Number of rows deleted + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_tup_hot_upd + data_type: sum + value_column: n_tup_hot_upd + description: Number of rows HOT updated (i.e., with no separate index update required) + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". + # The issue doesn't occur with gte_pg16. + - metric_name: ccp_stat_user_tables_n_tup_ins + data_type: sum + value_column: n_tup_ins + description: Number of rows inserted + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_n_tup_upd + data_type: sum + value_column: n_tup_upd + description: Number of rows updated + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". + # The issue doesn't occur with gte_pg16. + - metric_name: ccp_stat_user_tables_seq_scan + data_type: sum + value_column: seq_scan + description: Number of sequential scans initiated on this table + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". + # The issue doesn't occur with gte_pg16. + - metric_name: ccp_stat_user_tables_seq_tup_read + data_type: sum + value_column: seq_tup_read + description: Number of live rows fetched by sequential scans + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_vacuum_count + data_type: sum + value_column: vacuum_count + description: Number of times this table has been manually vacuumed (not counting VACUUM FULL) + attribute_columns: ["dbname", "relname", "schemaname"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/lt_pg17_metrics.yaml b/internal/collector/lt_pg17_metrics.yaml new file mode 100644 index 0000000000..330ff7d798 --- /dev/null +++ b/internal/collector/lt_pg17_metrics.yaml @@ -0,0 +1,71 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + + - sql: > + SELECT c.buffers_checkpoint AS buffers_written + FROM pg_catalog.pg_stat_bgwriter c; + metrics: + - metric_name: ccp_stat_bgwriter_buffers_checkpoint + value_column: buffers_written + data_type: sum + description: Number of buffers written during checkpoints and restartpoints + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + s.buffers_backend AS writes + , s.buffers_backend_fsync AS fsyncs + FROM pg_catalog.pg_stat_bgwriter s; + metrics: + - metric_name: ccp_stat_bgwriter_buffers_backend + value_column: writes + data_type: sum + description: Number of write operations, each of the size specified in op_bytes. + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_buffers_backend_fsync + value_column: fsyncs + data_type: sum + description: Number of fsync calls. These are only tracked in context normal. + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + c.checkpoints_timed AS num_timed + , c.checkpoints_req AS num_requested + , c.checkpoint_write_time AS write_time + , c.checkpoint_sync_time AS sync_time + , c.buffers_checkpoint AS buffers_written + FROM pg_catalog.pg_stat_bgwriter c; + metrics: + - metric_name: ccp_stat_bgwriter_checkpoints_timed + value_column: num_timed + description: Number of scheduled checkpoints that have been performed + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_checkpoints_req + value_column: num_requested + description: Number of requested checkpoints that have been performed + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_checkpoint_write_time + value_column: write_time + value_type: double + description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_checkpoint_sync_time + value_column: sync_time + description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_checkpointer_buffers_written + description: Number of buffers written during checkpoints and restartpoints + value_column: buffers_written + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/naming.go b/internal/collector/naming.go new file mode 100644 index 0000000000..4a414a9bad --- /dev/null +++ b/internal/collector/naming.go @@ -0,0 +1,24 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +const CompactingProcessor = "groupbyattrs/compact" +const DebugExporter = "debug" +const OneSecondBatchProcessor = "batch/1s" +const SubSecondBatchProcessor = "batch/200ms" +const Prometheus = "prometheus" +const PGBouncerMetrics = "metrics/pgbouncer" +const PostgresMetrics = "metrics/postgres" +const PatroniMetrics = "metrics/patroni" + +const SqlQuery = "sqlquery" + +// For slow queries, we'll use pgMonitor's default 5 minute interval. +// https://github.com/CrunchyData/pgmonitor-extension/blob/main/sql/matviews/matviews.sql +const FiveMinuteSqlQuery = "sqlquery/300s" + +// We'll use pgMonitor's Prometheus collection interval for most queries. +// https://github.com/CrunchyData/pgmonitor/blob/development/prometheus/linux/crunchy-prometheus.yml +const FiveSecondSqlQuery = "sqlquery/5s" diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go new file mode 100644 index 0000000000..1f0846eedb --- /dev/null +++ b/internal/collector/patroni.go @@ -0,0 +1,172 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func EnablePatroniLogging(ctx context.Context, + inCluster *v1beta1.PostgresCluster, + outConfig *Config, +) { + if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + directory := naming.PatroniPGDataLogPath + + // Keep track of what log records and files have been processed. + // Use a subdirectory of the logs directory to stay within the same failure domain. + // TODO(log-rotation): Create this directory during Collector startup. + // + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/extension/storage/filestorage#readme + outConfig.Extensions["file_storage/patroni_logs"] = map[string]any{ + "directory": directory + "/receiver", + "create_directory": true, + "fsync": true, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme + outConfig.Receivers["filelog/patroni_jsonlog"] = map[string]any{ + // Read the JSON files and keep track of what has been processed. + "include": []string{directory + "/*.log"}, + "storage": "file_storage/patroni_logs", + + "operators": []map[string]any{ + {"type": "move", "from": "body", "to": "body.original"}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/resourceprocessor#readme + outConfig.Processors["resource/patroni"] = map[string]any{ + "attributes": []map[string]any{ + // Container and Namespace names need no escaping because they are DNS labels. + // Pod names need no escaping because they are DNS subdomains. + // + // https://kubernetes.io/docs/concepts/overview/working-with-objects/names + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/resource/k8s.md + {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, + {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, + {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme + outConfig.Processors["transform/patroni_logs"] = map[string]any{ + "log_statements": []map[string]any{{ + "context": "log", + "statements": []string{ + `set(instrumentation_scope.name, "patroni")`, + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsejson + `set(cache, ParseJSON(body["original"]))`, + + // The log severity is in the "levelname" field. + // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext + `set(severity_text, cache["levelname"])`, + + // Map Patroni (python) "logging levels" to OpenTelemetry severity levels. + // + // https://docs.python.org/3.6/library/logging.html#logging-levels + // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber + // https://github.com/open-telemetry/opentelemetry-python/blob/v1.29.0/opentelemetry-api/src/opentelemetry/_logs/severity/__init__.py + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums + `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG"`, + `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO"`, + `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, + `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, + `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL"`, + + // Parse the "asctime" field into the record timestamp. + // The format is neither RFC 3339 nor ISO 8601: + // + // The date and time are separated by a single space U+0020, + // followed by a comma U+002C, then milliseconds. + // + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/stanza/docs/types/timestamp.md + // https://docs.python.org/3.6/library/logging.html#logging.LogRecord + `set(time, Time(cache["asctime"], "%F %T,%L"))`, + + // Keep the unparsed log record in a standard attribute, and replace + // the log record body with the message field. + // + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md + `set(attributes["log.record.original"], body["original"])`, + `set(body, cache["message"])`, + }, + }}, + } + + // If there are exporters to be added to the logs pipelines defined in + // the spec, add them to the pipeline. Otherwise, add the DebugExporter. + var exporters []ComponentID + if inCluster.Spec.Instrumentation != nil && + inCluster.Spec.Instrumentation.Logs != nil && + inCluster.Spec.Instrumentation.Logs.Exporters != nil { + exporters = inCluster.Spec.Instrumentation.Logs.Exporters + } else { + exporters = []ComponentID{DebugExporter} + } + + outConfig.Pipelines["logs/patroni"] = Pipeline{ + Extensions: []ComponentID{"file_storage/patroni_logs"}, + Receivers: []ComponentID{"filelog/patroni_jsonlog"}, + Processors: []ComponentID{ + "resource/patroni", + "transform/patroni_logs", + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: exporters, + } + } +} + +func EnablePatroniMetrics(ctx context.Context, + inCluster *v1beta1.PostgresCluster, + outConfig *Config, +) { + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + // Add Prometheus exporter + outConfig.Exporters[Prometheus] = map[string]any{ + "endpoint": "0.0.0.0:9187", + } + + // Add Prometheus Receiver + outConfig.Receivers[Prometheus] = map[string]any{ + "config": map[string]any{ + "scrape_configs": []map[string]any{ + { + "job_name": "patroni", + "scheme": "https", + "tls_config": map[string]any{ + "insecure_skip_verify": true, + }, + "scrape_interval": "10s", + "static_configs": []map[string]any{ + { + "targets": []string{ + "0.0.0.0:8008", + }, + }, + }, + }, + }, + }, + } + + // Add Metrics Pipeline + outConfig.Pipelines[PatroniMetrics] = Pipeline{ + Receivers: []ComponentID{Prometheus}, + Processors: []ComponentID{ + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: []ComponentID{Prometheus}, + } + } +} diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go new file mode 100644 index 0000000000..dd5469f07a --- /dev/null +++ b/internal/collector/patroni_test.go @@ -0,0 +1,185 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + "testing" + + "gotest.tools/v3/assert" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestEnablePatroniLogging(t *testing.T) { + t.Run("NilInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + + EnablePatroniLogging(ctx, new(v1beta1.PostgresCluster), config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed +extensions: + file_storage/patroni_logs: + create_directory: true + directory: /pgdata/patroni/log/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/patroni: + attributes: + - action: insert + key: k8s.container.name + value: database + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/patroni_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "patroni") + - set(cache, ParseJSON(body["original"])) + - set(severity_text, cache["levelname"]) + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + - set(time, Time(cache["asctime"], "%F %T,%L")) + - set(attributes["log.record.original"], body["original"]) + - set(body, cache["message"]) +receivers: + filelog/patroni_jsonlog: + include: + - /pgdata/patroni/log/*.log + operators: + - from: body + to: body.original + type: move + storage: file_storage/patroni_logs +service: + extensions: + - file_storage/patroni_logs + pipelines: + logs/patroni: + exporters: + - debug + processors: + - resource/patroni + - transform/patroni_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/patroni_jsonlog +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePatroniLogging(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name +extensions: + file_storage/patroni_logs: + create_directory: true + directory: /pgdata/patroni/log/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/patroni: + attributes: + - action: insert + key: k8s.container.name + value: database + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/patroni_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "patroni") + - set(cache, ParseJSON(body["original"])) + - set(severity_text, cache["levelname"]) + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + - set(time, Time(cache["asctime"], "%F %T,%L")) + - set(attributes["log.record.original"], body["original"]) + - set(body, cache["message"]) +receivers: + filelog/patroni_jsonlog: + include: + - /pgdata/patroni/log/*.log + operators: + - from: body + to: body.original + type: move + storage: file_storage/patroni_logs +service: + extensions: + - file_storage/patroni_logs + pipelines: + logs/patroni: + exporters: + - googlecloud + processors: + - resource/patroni + - transform/patroni_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/patroni_jsonlog +`) + }) +} diff --git a/internal/collector/pgadmin.go b/internal/collector/pgadmin.go new file mode 100644 index 0000000000..b108b3997e --- /dev/null +++ b/internal/collector/pgadmin.go @@ -0,0 +1,128 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec, + configmap *corev1.ConfigMap, +) error { + if !feature.Enabled(ctx, feature.OpenTelemetryLogs) { + return nil + } + otelConfig := NewConfig(spec) + + otelConfig.Extensions["file_storage/pgadmin_data_logs"] = map[string]any{ + "directory": "/var/lib/pgadmin/logs/receiver", + "create_directory": false, + "fsync": true, + } + + otelConfig.Receivers["filelog/pgadmin"] = map[string]any{ + "include": []string{"/var/lib/pgadmin/logs/pgadmin.log"}, + "storage": "file_storage/pgadmin_data_logs", + } + otelConfig.Receivers["filelog/gunicorn"] = map[string]any{ + "include": []string{"/var/lib/pgadmin/logs/gunicorn.log"}, + "storage": "file_storage/pgadmin_data_logs", + } + + otelConfig.Processors["resource/pgadmin"] = map[string]any{ + "attributes": []map[string]any{ + // Container and Namespace names need no escaping because they are DNS labels. + // Pod names need no escaping because they are DNS subdomains. + // + // https://kubernetes.io/docs/concepts/overview/working-with-objects/names + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/resource/k8s.md + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md + {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerPGAdmin}, + {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, + {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + }, + } + + otelConfig.Processors["transform/pgadmin_log"] = map[string]any{ + "log_statements": []map[string]any{ + { + "context": "log", + "statements": []string{ + // Keep the unparsed log record in a standard attribute, and replace + // the log record body with the message field. + // + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md + `set(attributes["log.record.original"], body)`, + `set(cache, ParseJSON(body))`, + `merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert")`, + `set(body, cache["message"])`, + + // Set instrumentation scope to the "name" from each log record. + `set(instrumentation_scope.name, cache["name"])`, + + // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext + `set(severity_text, cache["level"])`, + `set(time_unix_nano, Int(cache["time"]*1000000000))`, + + // Map pgAdmin "logging levels" to OpenTelemetry severity levels. + // + // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber + // https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums + `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG"`, + `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO"`, + `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, + `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, + `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL"`, + }, + }, + }, + } + + // If there are exporters to be added to the logs pipelines defined in + // the spec, add them to the pipeline. Otherwise, add the DebugExporter. + var exporters []ComponentID + if spec != nil && spec.Logs != nil && spec.Logs.Exporters != nil { + exporters = spec.Logs.Exporters + } else { + exporters = []ComponentID{DebugExporter} + } + + otelConfig.Pipelines["logs/pgadmin"] = Pipeline{ + Extensions: []ComponentID{"file_storage/pgadmin_data_logs"}, + Receivers: []ComponentID{"filelog/pgadmin"}, + Processors: []ComponentID{ + "resource/pgadmin", + "transform/pgadmin_log", + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: exporters, + } + + otelConfig.Pipelines["logs/gunicorn"] = Pipeline{ + Extensions: []ComponentID{"file_storage/pgadmin_data_logs"}, + Receivers: []ComponentID{"filelog/gunicorn"}, + Processors: []ComponentID{ + "resource/pgadmin", + "transform/pgadmin_log", + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: exporters, + } + + otelYAML, err := otelConfig.ToYAML() + if err == nil { + configmap.Data["collector.yaml"] = otelYAML + } + return err +} diff --git a/internal/collector/pgadmin_test.go b/internal/collector/pgadmin_test.go new file mode 100644 index 0000000000..4da886abbc --- /dev/null +++ b/internal/collector/pgadmin_test.go @@ -0,0 +1,230 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector_test + +import ( + "context" + "testing" + + "gotest.tools/v3/assert" + corev1 "k8s.io/api/core/v1" + + "github.com/crunchydata/postgres-operator/internal/collector" + pgadmin "github.com/crunchydata/postgres-operator/internal/controller/standalone_pgadmin" + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/initialize" + "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestEnablePgAdminLogging(t *testing.T) { + t.Run("NilInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + + ctx := feature.NewContext(context.Background(), gate) + + configmap := new(corev1.ConfigMap) + initialize.Map(&configmap.Data) + err := collector.EnablePgAdminLogging(ctx, nil, configmap) + assert.NilError(t, err) + + assert.Assert(t, cmp.MarshalMatches(configmap.Data, ` +collector.yaml: | + # Generated by postgres-operator. DO NOT EDIT. + # Your changes will not be saved. + exporters: + debug: + verbosity: detailed + extensions: + file_storage/pgadmin_data_logs: + create_directory: false + directory: `+pgadmin.LogDirectoryAbsolutePath+`/receiver + fsync: true + processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgadmin: + attributes: + - action: insert + key: k8s.container.name + value: pgadmin + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/pgadmin_log: + log_statements: + - context: log + statements: + - set(attributes["log.record.original"], body) + - set(cache, ParseJSON(body)) + - merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), + "insert") + - set(body, cache["message"]) + - set(instrumentation_scope.name, cache["name"]) + - set(severity_text, cache["level"]) + - set(time_unix_nano, Int(cache["time"]*1000000000)) + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + receivers: + filelog/gunicorn: + include: + - `+pgadmin.GunicornLogFileAbsolutePath+` + storage: file_storage/pgadmin_data_logs + filelog/pgadmin: + include: + - `+pgadmin.LogFileAbsolutePath+` + storage: file_storage/pgadmin_data_logs + service: + extensions: + - file_storage/pgadmin_data_logs + pipelines: + logs/gunicorn: + exporters: + - debug + processors: + - resource/pgadmin + - transform/pgadmin_log + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/gunicorn + logs/pgadmin: + exporters: + - debug + processors: + - resource/pgadmin + - transform/pgadmin_log + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgadmin +`)) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + + ctx := feature.NewContext(context.Background(), gate) + + var spec v1beta1.InstrumentationSpec + require.UnmarshalInto(t, &spec, `{ + config: { + exporters: { + googlecloud: { + log: { default_log_name: opentelemetry.io/collector-exported-log }, + project: google-project-name, + }, + }, + }, + logs: { exporters: [googlecloud] }, + }`) + + configmap := new(corev1.ConfigMap) + initialize.Map(&configmap.Data) + err := collector.EnablePgAdminLogging(ctx, &spec, configmap) + assert.NilError(t, err) + + assert.Assert(t, cmp.MarshalMatches(configmap.Data, ` +collector.yaml: | + # Generated by postgres-operator. DO NOT EDIT. + # Your changes will not be saved. + exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + extensions: + file_storage/pgadmin_data_logs: + create_directory: false + directory: `+pgadmin.LogDirectoryAbsolutePath+`/receiver + fsync: true + processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgadmin: + attributes: + - action: insert + key: k8s.container.name + value: pgadmin + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/pgadmin_log: + log_statements: + - context: log + statements: + - set(attributes["log.record.original"], body) + - set(cache, ParseJSON(body)) + - merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), + "insert") + - set(body, cache["message"]) + - set(instrumentation_scope.name, cache["name"]) + - set(severity_text, cache["level"]) + - set(time_unix_nano, Int(cache["time"]*1000000000)) + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + receivers: + filelog/gunicorn: + include: + - `+pgadmin.GunicornLogFileAbsolutePath+` + storage: file_storage/pgadmin_data_logs + filelog/pgadmin: + include: + - `+pgadmin.LogFileAbsolutePath+` + storage: file_storage/pgadmin_data_logs + service: + extensions: + - file_storage/pgadmin_data_logs + pipelines: + logs/gunicorn: + exporters: + - googlecloud + processors: + - resource/pgadmin + - transform/pgadmin_log + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/gunicorn + logs/pgadmin: + exporters: + - googlecloud + processors: + - resource/pgadmin + - transform/pgadmin_log + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgadmin +`)) + }) +} diff --git a/internal/collector/pgbackrest.go b/internal/collector/pgbackrest.go new file mode 100644 index 0000000000..b847f854fe --- /dev/null +++ b/internal/collector/pgbackrest.go @@ -0,0 +1,116 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + _ "embed" + "encoding/json" + "fmt" + "slices" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +// The contents of "pgbackrest_logs_transforms.yaml" as JSON. +// See: https://pkg.go.dev/embed +// +//go:embed "generated/pgbackrest_logs_transforms.json" +var pgBackRestLogsTransforms json.RawMessage + +func NewConfigForPgBackrestRepoHostPod( + ctx context.Context, + spec *v1beta1.InstrumentationSpec, + repos []v1beta1.PGBackRestRepo, +) *Config { + config := NewConfig(spec) + + if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + + var directory string + for _, repo := range repos { + if repo.Volume != nil { + directory = fmt.Sprintf(naming.PGBackRestRepoLogPath, repo.Name) + break + } + } + + // We should only enter this function if a PVC is assigned for a dedicated repohost + // but if we don't have one, exit early. + if directory == "" { + return config + } + + // Keep track of what log records and files have been processed. + // Use a subdirectory of the logs directory to stay within the same failure domain. + config.Extensions["file_storage/pgbackrest_logs"] = map[string]any{ + "directory": directory + "/receiver", + "create_directory": false, + "fsync": true, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme + config.Receivers["filelog/pgbackrest_log"] = map[string]any{ + // Read the files and keep track of what has been processed. + "include": []string{ + directory + "/*.log", + }, + "storage": "file_storage/pgbackrest_logs", + // pgBackRest prints logs with a log prefix, which includes a timestamp + // as long as the timestamp is not turned off in the configuration. + // When pgBackRest starts a process, it also will print a newline + // (if the file has already been written to) and a process "banner" + // which looks like "-------------------PROCESS START-------------------\n". + // Therefore we break multiline on the timestamp or the 19 dashes that start the banner. + // - https://github.com/pgbackrest/pgbackrest/blob/main/src/common/log.c#L451 + "multiline": map[string]string{ + "line_start_pattern": `^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19}`, + }, + } + + config.Processors["resource/pgbackrest"] = map[string]any{ + "attributes": []map[string]any{ + // Container and Namespace names need no escaping because they are DNS labels. + // Pod names need no escaping because they are DNS subdomains. + // + // https://kubernetes.io/docs/concepts/overview/working-with-objects/names + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/resource/k8s.md + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md + {"action": "insert", "key": "k8s.container.name", "value": naming.PGBackRestRepoContainerName}, + {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, + {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme + config.Processors["transform/pgbackrest_logs"] = map[string]any{ + "log_statements": slices.Clone(pgBackRestLogsTransforms), + } + + // If there are exporters to be added to the logs pipelines defined in + // the spec, add them to the pipeline. Otherwise, add the DebugExporter. + var exporters []ComponentID + if spec != nil && spec.Logs != nil && spec.Logs.Exporters != nil { + exporters = spec.Logs.Exporters + } else { + exporters = []ComponentID{DebugExporter} + } + + config.Pipelines["logs/pgbackrest"] = Pipeline{ + Extensions: []ComponentID{"file_storage/pgbackrest_logs"}, + Receivers: []ComponentID{"filelog/pgbackrest_log"}, + Processors: []ComponentID{ + "resource/pgbackrest", + "transform/pgbackrest_logs", + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: exporters, + } + } + return config +} diff --git a/internal/collector/pgbackrest_logs_transforms.yaml b/internal/collector/pgbackrest_logs_transforms.yaml new file mode 100644 index 0000000000..31f4a48f94 --- /dev/null +++ b/internal/collector/pgbackrest_logs_transforms.yaml @@ -0,0 +1,43 @@ +# This list of transform statements configures an OTel Transform Processor to +# parse pgbackrest logs. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme + +- context: log + statements: + - set(instrumentation_scope.name, "pgbackrest") + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + + # Regex the pgbackrest log to capture the following groups: + # 1) the timestamp (form YYYY-MM-DD HH:MM:SS.sss) + # 2) the process id (form `P` + 2 or 3 digits) + # 3) the log level (form INFO, WARN, etc.) + # 4) the message (anything else, including newline -- we can do this because we have a multiline block on the receiver) + - >- + merge_maps(cache, + ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): (?(?s).*)$"), + "insert") + where Len(body) > 0 + + # The log severity is the "error_severity" field. + # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext + # https://pgbackrest.org/configuration.html#section-log/option-log-level-file + - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + + # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-timestamp + - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) + + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/process.md + - set(attributes["process.pid"], cache["process_id"]) + + # Keep the unparsed log record in a standard attribute, + # and replace the log record body with the message field. + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md + - set(attributes["log.record.original"], body) + - set(body, cache["message"]) diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go new file mode 100644 index 0000000000..55276c0c9b --- /dev/null +++ b/internal/collector/pgbackrest_test.go @@ -0,0 +1,197 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + "testing" + + "gotest.tools/v3/assert" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { + t.Run("NilInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + repos := []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + Volume: new(v1beta1.RepoPVC), + }, + } + + config := NewConfigForPgBackrestRepoHostPod(ctx, nil, repos) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed +extensions: + file_storage/pgbackrest_logs: + create_directory: false + directory: /pgbackrest/repo1/log/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgbackrest: + attributes: + - action: insert + key: k8s.container.name + value: pgbackrest + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/pgbackrest_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "pgbackrest") + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): + (?(?s).*)$"), "insert") where Len(body) > 0' + - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) + - set(attributes["process.pid"], cache["process_id"]) + - set(attributes["log.record.original"], body) + - set(body, cache["message"]) +receivers: + filelog/pgbackrest_log: + include: + - /pgbackrest/repo1/log/*.log + multiline: + line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} + storage: file_storage/pgbackrest_logs +service: + extensions: + - file_storage/pgbackrest_logs + pipelines: + logs/pgbackrest: + exporters: + - debug + processors: + - resource/pgbackrest + - transform/pgbackrest_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgbackrest_log +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + repos := []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + Volume: new(v1beta1.RepoPVC), + }, + } + + config := NewConfigForPgBackrestRepoHostPod(ctx, testInstrumentationSpec(), repos) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name +extensions: + file_storage/pgbackrest_logs: + create_directory: false + directory: /pgbackrest/repo1/log/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgbackrest: + attributes: + - action: insert + key: k8s.container.name + value: pgbackrest + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/pgbackrest_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "pgbackrest") + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): + (?(?s).*)$"), "insert") where Len(body) > 0' + - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) + - set(attributes["process.pid"], cache["process_id"]) + - set(attributes["log.record.original"], body) + - set(body, cache["message"]) +receivers: + filelog/pgbackrest_log: + include: + - /pgbackrest/repo1/log/*.log + multiline: + line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} + storage: file_storage/pgbackrest_logs +service: + extensions: + - file_storage/pgbackrest_logs + pipelines: + logs/pgbackrest: + exporters: + - googlecloud + processors: + - resource/pgbackrest + - transform/pgbackrest_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgbackrest_log +`) + }) +} diff --git a/internal/collector/pgbouncer.go b/internal/collector/pgbouncer.go new file mode 100644 index 0000000000..40a501e7f1 --- /dev/null +++ b/internal/collector/pgbouncer.go @@ -0,0 +1,195 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + _ "embed" + "encoding/json" + "fmt" + "slices" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +// The contents of "pgbouncer_metrics_queries.yaml" as JSON. +// See: https://pkg.go.dev/embed +// +//go:embed "generated/pgbouncer_metrics_queries.json" +var pgBouncerMetricsQueries json.RawMessage + +// PGBouncerPostRotateScript is the script that is run after pgBouncer's log +// files have been rotated. The pgbouncer process is sent a sighup signal. +const PGBouncerPostRotateScript = "pkill -HUP --exact pgbouncer" + +// NewConfigForPgBouncerPod creates a config for the OTel collector container +// that runs as a sidecar in the pgBouncer Pod +func NewConfigForPgBouncerPod( + ctx context.Context, cluster *v1beta1.PostgresCluster, sqlQueryUsername string, +) *Config { + if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil { + // pgBouncer is disabled; return nil + return nil + } + + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePgBouncerLogging(ctx, cluster, config) + EnablePgBouncerMetrics(ctx, config, sqlQueryUsername) + + return config +} + +// EnablePgBouncerLogging adds necessary configuration to the collector config to collect +// logs from pgBouncer when the OpenTelemetryLogging feature flag is enabled. +func EnablePgBouncerLogging(ctx context.Context, + inCluster *v1beta1.PostgresCluster, + outConfig *Config) { + if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + directory := naming.PGBouncerLogPath + + // Keep track of what log records and files have been processed. + // Use a subdirectory of the logs directory to stay within the same failure domain. + // + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/extension/storage/filestorage#readme + outConfig.Extensions["file_storage/pgbouncer_logs"] = map[string]any{ + "directory": directory + "/receiver", + "create_directory": false, + "fsync": true, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme + outConfig.Receivers["filelog/pgbouncer_log"] = map[string]any{ + // Read the log files and keep track of what has been processed. + // We want to watch the ".log.1" file as well as it is possible that + // a log entry or two will end up there after the original ".log" + // file is renamed to ".log.1" during rotation. OTel will not create + // duplicate log entries. + "include": []string{directory + "/*.log", directory + "/*.log.1"}, + "storage": "file_storage/pgbouncer_logs", + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/resourceprocessor#readme + outConfig.Processors["resource/pgbouncer"] = map[string]any{ + "attributes": []map[string]any{ + // Container and Namespace names need no escaping because they are DNS labels. + // Pod names need no escaping because they are DNS subdomains. + // + // https://kubernetes.io/docs/concepts/overview/working-with-objects/names + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/resource/k8s.md + {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerPGBouncer}, + {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, + {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme + outConfig.Processors["transform/pgbouncer_logs"] = map[string]any{ + "log_statements": []map[string]any{{ + "context": "log", + "statements": []string{ + // Set instrumentation scope + `set(instrumentation_scope.name, "pgbouncer")`, + + // Extract timestamp, pid, log level, and message and store in cache. + `merge_maps(cache, ExtractPatterns(body, ` + + `"^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) ` + + `\\[(?\\d+)\\] (?[A-Z]+) (?.*$)"), "insert")`, + + // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext + `set(severity_text, cache["log_level"])`, + + // Map pgBouncer (libusual) "logging levels" to OpenTelemetry severity levels. + // + // https://github.com/libusual/libusual/blob/master/usual/logging.c + // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber + // https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums + `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" or severity_text == "DEBUG"`, + `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG"`, + `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, + `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, + `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL"`, + + // Parse the timestamp. + // The format is neither RFC 3339 nor ISO 8601: + // + // The date and time are separated by a single space U+0020, + // followed by a dot U+002E, milliseconds, another space U+0020, + // then a timezone abbreviation. + // + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/stanza/docs/types/timestamp.md + `set(time, Time(cache["timestamp"], "%F %T.%L %Z"))`, + + // Keep the unparsed log record in a standard attribute, and replace + // the log record body with the message field. + // + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md + `set(attributes["log.record.original"], body)`, + + // Set pid as attribute + `set(attributes["process.pid"], cache["pid"])`, + + // Set the log message to body. + `set(body, cache["msg"])`, + }, + }}, + } + + // If there are exporters to be added to the logs pipelines defined in + // the spec, add them to the pipeline. Otherwise, add the DebugExporter. + var exporters []ComponentID + if inCluster.Spec.Instrumentation != nil && + inCluster.Spec.Instrumentation.Logs != nil && + inCluster.Spec.Instrumentation.Logs.Exporters != nil { + exporters = inCluster.Spec.Instrumentation.Logs.Exporters + } else { + exporters = []ComponentID{DebugExporter} + } + + outConfig.Pipelines["logs/pgbouncer"] = Pipeline{ + Extensions: []ComponentID{"file_storage/pgbouncer_logs"}, + Receivers: []ComponentID{"filelog/pgbouncer_log"}, + Processors: []ComponentID{ + "resource/pgbouncer", + "transform/pgbouncer_logs", + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: exporters, + } + } +} + +// EnablePgBouncerMetrics adds necessary configuration to the collector config to scrape +// metrics from pgBouncer when the OpenTelemetryMetrics feature flag is enabled. +func EnablePgBouncerMetrics(ctx context.Context, config *Config, sqlQueryUsername string) { + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + // Add Prometheus exporter + config.Exporters[Prometheus] = map[string]any{ + "endpoint": "0.0.0.0:9187", + } + + // Add SqlQuery Receiver + config.Receivers[SqlQuery] = map[string]any{ + "driver": "postgres", + "datasource": fmt.Sprintf(`host=localhost dbname=pgbouncer port=5432 user=%s password=${env:PGPASSWORD}`, + sqlQueryUsername), + "queries": slices.Clone(pgBouncerMetricsQueries), + } + + // Add Metrics Pipeline + config.Pipelines[PGBouncerMetrics] = Pipeline{ + Receivers: []ComponentID{SqlQuery}, + Processors: []ComponentID{ + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: []ComponentID{Prometheus}, + } + } +} diff --git a/internal/collector/pgbouncer_metrics_queries.yaml b/internal/collector/pgbouncer_metrics_queries.yaml new file mode 100644 index 0000000000..228fef1cc0 --- /dev/null +++ b/internal/collector/pgbouncer_metrics_queries.yaml @@ -0,0 +1,101 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from PgBouncer. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/v5.1.1/sql_exporter/common/crunchy_pgbouncer_121_collector.yml + + - sql: "SHOW CLIENTS" + metrics: + - metric_name: ccp_pgbouncer_clients_wait_seconds + value_column: wait + attribute_columns: ["database", "user", "state", "application_name", "link"] + description: "Current waiting time in seconds" + + # NOTE: Avoid collecting "host" column because it can be null; the collector will warn against null. + # The host column should always point either to pgBouncer's virtual database (the null case) or to the primary. + - sql: "SHOW DATABASES" + metrics: + - metric_name: ccp_pgbouncer_databases_pool_size + value_column: pool_size + attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + description: "Maximum number of server connections" + + - metric_name: ccp_pgbouncer_databases_min_pool_size + value_column: min_pool_size + attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + description: "Minimum number of server connections" + + - metric_name: ccp_pgbouncer_databases_reserve_pool + value_column: reserve_pool + attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + description: "Maximum number of additional connections for this database" + + - metric_name: ccp_pgbouncer_databases_max_connections + value_column: max_connections + attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + description: >- + Maximum number of allowed connections for this database, + as set by max_db_connections, either globally or per database + + - metric_name: ccp_pgbouncer_databases_current_connections + value_column: current_connections + attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + description: "Current number of connections for this database" + + - metric_name: ccp_pgbouncer_databases_paused + value_column: paused + attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + description: "1 if this database is currently paused, else 0" + + - metric_name: ccp_pgbouncer_databases_disabled + value_column: disabled + attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + description: "1 if this database is currently disabled, else 0" + + - sql: "SHOW LISTS" + metrics: + - metric_name: ccp_pgbouncer_lists_item_count + value_column: items + attribute_columns: ["list"] + description: "Count of items registered with pgBouncer" + + - sql: "SHOW POOLS" + metrics: + - metric_name: ccp_pgbouncer_pools_client_active + value_column: cl_active + attribute_columns: ["database", "user"] + description: >- + Client connections that are either linked to server connections or + are idle with no queries waiting to be processed + + - metric_name: ccp_pgbouncer_pools_client_waiting + value_column: cl_waiting + attribute_columns: ["database", "user"] + description: "Client connections that have sent queries but have not yet got a server connection" + + - metric_name: ccp_pgbouncer_pools_server_active + value_column: sv_active + attribute_columns: ["database", "user"] + description: "Server connections that are linked to a client" + + - metric_name: ccp_pgbouncer_pools_server_idle + value_column: sv_idle + attribute_columns: ["database", "user"] + description: "Server connections that are unused and immediately usable for client queries" + + - metric_name: ccp_pgbouncer_pools_server_used + value_column: sv_used + attribute_columns: ["database", "user"] + description: >- + Server connections that have been idle for more than server_check_delay, + so they need server_check_query to run on them before they can be used again + + - sql: "SHOW SERVERS" + metrics: + - metric_name: ccp_pgbouncer_servers_close_needed + value_column: close_needed + attribute_columns: ["database", "user", "state", "application_name", "link"] + description: >- + 1 if the connection will be closed as soon as possible, + because a configuration file reload or DNS update changed the connection information + or RECONNECT was issued diff --git a/internal/collector/pgbouncer_test.go b/internal/collector/pgbouncer_test.go new file mode 100644 index 0000000000..6e19ebdac2 --- /dev/null +++ b/internal/collector/pgbouncer_test.go @@ -0,0 +1,188 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + "testing" + + "gotest.tools/v3/assert" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestEnablePgBouncerLogging(t *testing.T) { + t.Run("NilInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + + EnablePgBouncerLogging(ctx, new(v1beta1.PostgresCluster), config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed +extensions: + file_storage/pgbouncer_logs: + create_directory: false + directory: /tmp/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgbouncer: + attributes: + - action: insert + key: k8s.container.name + value: pgbouncer + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/pgbouncer_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "pgbouncer") + - merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) \\[(?\\d+)\\] (?[A-Z]+) + (?.*$)"), "insert") + - set(severity_text, cache["log_level"]) + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" + or severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" + - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(attributes["log.record.original"], body) + - set(attributes["process.pid"], cache["pid"]) + - set(body, cache["msg"]) +receivers: + filelog/pgbouncer_log: + include: + - /tmp/*.log + - /tmp/*.log.1 + storage: file_storage/pgbouncer_logs +service: + extensions: + - file_storage/pgbouncer_logs + pipelines: + logs/pgbouncer: + exporters: + - debug + processors: + - resource/pgbouncer + - transform/pgbouncer_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgbouncer_log +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(testInstrumentationSpec()) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + + EnablePgBouncerLogging(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name +extensions: + file_storage/pgbouncer_logs: + create_directory: false + directory: /tmp/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgbouncer: + attributes: + - action: insert + key: k8s.container.name + value: pgbouncer + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + transform/pgbouncer_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "pgbouncer") + - merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) \\[(?\\d+)\\] (?[A-Z]+) + (?.*$)"), "insert") + - set(severity_text, cache["log_level"]) + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" + or severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" + - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(attributes["log.record.original"], body) + - set(attributes["process.pid"], cache["pid"]) + - set(body, cache["msg"]) +receivers: + filelog/pgbouncer_log: + include: + - /tmp/*.log + - /tmp/*.log.1 + storage: file_storage/pgbouncer_logs +service: + extensions: + - file_storage/pgbouncer_logs + pipelines: + logs/pgbouncer: + exporters: + - googlecloud + processors: + - resource/pgbouncer + - transform/pgbouncer_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgbouncer_log +`) + }) +} diff --git a/internal/collector/postgres.go b/internal/collector/postgres.go new file mode 100644 index 0000000000..38f680d369 --- /dev/null +++ b/internal/collector/postgres.go @@ -0,0 +1,322 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + _ "embed" + "encoding/json" + "fmt" + "math" + "slices" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func NewConfigForPostgresPod(ctx context.Context, + inCluster *v1beta1.PostgresCluster, + outParameters *postgres.ParameterSet, +) *Config { + config := NewConfig(inCluster.Spec.Instrumentation) + + // Metrics + EnablePostgresMetrics(ctx, inCluster, config) + EnablePatroniMetrics(ctx, inCluster, config) + + // Logging + EnablePostgresLogging(ctx, inCluster, config, outParameters) + EnablePatroniLogging(ctx, inCluster, config) + + return config +} + +// The contents of "postgres_logs_transforms.yaml" as JSON. +// See: https://pkg.go.dev/embed +// +//go:embed "generated/postgres_logs_transforms.json" +var postgresLogsTransforms json.RawMessage + +// postgresCSVNames returns the names of fields in the CSV logs for version. +func postgresCSVNames(version int) string { + // JSON is the preferred format, so use those names. + // https://www.postgresql.org/docs/current/runtime-config-logging.html#RUNTIME-CONFIG-LOGGING-JSONLOG + + // https://www.postgresql.org/docs/8.3/runtime-config-logging.html#RUNTIME-CONFIG-LOGGING-CSVLOG + names := `timestamp,user,dbname,pid` + + `,connection_from` + // NOTE: this contains the JSON "remote_host" and "remote_port" values + `,session_id,line_num,ps,session_start,vxid,txid` + + `,error_severity,state_code,message,detail,hint` + + `,internal_query,internal_position,context,statement,cursor_position` + + `,location` // NOTE: this contains the JSON "func_name", "file_name", and "file_line_num" values + + // https://www.postgresql.org/docs/9.0/runtime-config-logging.html#RUNTIME-CONFIG-LOGGING-CSVLOG + if version >= 9 { + names += `,application_name` + } + + // https://www.postgresql.org/docs/13/runtime-config-logging.html#RUNTIME-CONFIG-LOGGING-CSVLOG + if version >= 13 { + names += `,backend_type` + } + + // https://www.postgresql.org/docs/14/runtime-config-logging.html#RUNTIME-CONFIG-LOGGING-CSVLOG + if version >= 14 { + names += `,leader_pid,query_id` + } + + return names +} + +func EnablePostgresLogging( + ctx context.Context, + inCluster *v1beta1.PostgresCluster, + outConfig *Config, + outParameters *postgres.ParameterSet, +) { + if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + directory := postgres.LogDirectory() + + // https://www.postgresql.org/docs/current/runtime-config-logging.html + outParameters.Add("logging_collector", "on") + outParameters.Add("log_directory", directory) + + // PostgreSQL v8.3 adds support for CSV logging, and + // PostgreSQL v15 adds support for JSON logging. The latter is preferred + // because newlines are escaped as "\n", U+005C + U+006E. + if inCluster != nil && inCluster.Spec.PostgresVersion < 15 { + outParameters.Add("log_destination", "csvlog") + } else { + outParameters.Add("log_destination", "jsonlog") + } + + // If retentionPeriod is set in the spec, use that value; otherwise, we want + // to use a reasonably short duration. Defaulting to 1 day. + retentionPeriod := metav1.Duration{Duration: 24 * time.Hour} + if inCluster != nil && inCluster.Spec.Instrumentation != nil && + inCluster.Spec.Instrumentation.Logs != nil && + inCluster.Spec.Instrumentation.Logs.RetentionPeriod != nil { + retentionPeriod = inCluster.Spec.Instrumentation.Logs.RetentionPeriod.AsDuration() + } + logFilename, logRotationAge := generateLogFilenameAndRotationAge(retentionPeriod) + + // NOTE: The automated portions of log_filename are *entirely* based + // on time. There is no spelling that is guaranteed to be unique or + // monotonically increasing. + // + // TODO(logs): Limit the size/bytes of logs without losing messages; + // probably requires another process that deletes the oldest files. + // + // The ".log" suffix is replaced by ".json" for JSON log files. + outParameters.Add("log_filename", logFilename) + outParameters.Add("log_file_mode", "0660") + outParameters.Add("log_rotation_age", logRotationAge) + outParameters.Add("log_rotation_size", "0") + outParameters.Add("log_truncate_on_rotation", "on") + + // Log in a timezone that the OpenTelemetry Collector will understand. + outParameters.Add("log_timezone", "UTC") + + // Keep track of what log records and files have been processed. + // Use a subdirectory of the logs directory to stay within the same failure domain. + // TODO(log-rotation): Create this directory during Collector startup. + // + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/extension/storage/filestorage#readme + outConfig.Extensions["file_storage/postgres_logs"] = map[string]any{ + "directory": directory + "/receiver", + "create_directory": true, + "fsync": true, + } + + // TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme + outConfig.Receivers["filelog/postgres_csvlog"] = map[string]any{ + // Read the CSV files and keep track of what has been processed. + "include": []string{directory + "/*.csv"}, + "storage": "file_storage/postgres_logs", + + // Postgres does not escape newlines in its CSV log format. Search for + // the beginning of every record, starting with an unquoted timestamp. + // The 2nd through 5th fields are optional, so match through to the 7th field. + // This should do a decent job of not matching the middle of some SQL statement. + // + // The number of fields has changed over the years, but the first few + // are always formatted the same way. + // + // NOTE: This regexp is invoked in multi-line mode. https://go.dev/s/re2syntax + "multiline": map[string]string{ + "line_start_pattern": `^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d.\d{3} UTC` + // 1st: timestamp + `,(?:"[_\D](?:[^"]|"")*")?` + // 2nd: user name + `,(?:"[_\D](?:[^"]|"")*")?` + // 3rd: database name + `,\d*,(?:"(?:[^"]|"")+")?` + // 4–5th: process id, connection + `,[0-9a-f]+[.][0-9a-f]+,\d+,`, // 6–7th: session id, session line + }, + + // Differentiate these from the JSON ones below. + "operators": []map[string]any{ + {"type": "move", "from": "body", "to": "body.original"}, + {"type": "add", "field": "body.format", "value": "csv"}, + {"type": "add", "field": "body.headers", "value": postgresCSVNames(inCluster.Spec.PostgresVersion)}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme + outConfig.Receivers["filelog/postgres_jsonlog"] = map[string]any{ + // Read the JSON files and keep track of what has been processed. + "include": []string{directory + "/*.json"}, + "storage": "file_storage/postgres_logs", + + // Differentiate these from the CSV ones above. + // TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. + "operators": []map[string]any{ + {"type": "move", "from": "body", "to": "body.original"}, + {"type": "add", "field": "body.format", "value": "json"}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/resourceprocessor#readme + outConfig.Processors["resource/postgres"] = map[string]any{ + "attributes": []map[string]any{ + // Container and Namespace names need no escaping because they are DNS labels. + // Pod names need no escaping because they are DNS subdomains. + // + // https://kubernetes.io/docs/concepts/overview/working-with-objects/names + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/resource/k8s.md + {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, + {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, + {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/database#readme + {"action": "insert", "key": "db.system", "value": "postgresql"}, + {"action": "insert", "key": "db.version", "value": fmt.Sprint(inCluster.Spec.PostgresVersion)}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme + outConfig.Processors["transform/postgres_logs"] = map[string]any{ + "log_statements": slices.Clone(postgresLogsTransforms), + } + + // If there are exporters to be added to the logs pipelines defined in + // the spec, add them to the pipeline. Otherwise, add the DebugExporter. + var exporters []ComponentID + if inCluster.Spec.Instrumentation != nil && + inCluster.Spec.Instrumentation.Logs != nil && + inCluster.Spec.Instrumentation.Logs.Exporters != nil { + exporters = inCluster.Spec.Instrumentation.Logs.Exporters + } else { + exporters = []ComponentID{DebugExporter} + } + + outConfig.Pipelines["logs/postgres"] = Pipeline{ + Extensions: []ComponentID{"file_storage/postgres_logs"}, + // TODO(logs): Choose only one receiver, maybe? + Receivers: []ComponentID{ + "filelog/postgres_csvlog", + "filelog/postgres_jsonlog", + }, + Processors: []ComponentID{ + "resource/postgres", + "transform/postgres_logs", + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: exporters, + } + + // pgBackRest pipeline + outConfig.Extensions["file_storage/pgbackrest_logs"] = map[string]any{ + "directory": naming.PGBackRestPGDataLogPath + "/receiver", + "create_directory": false, + "fsync": true, + } + + outConfig.Receivers["filelog/pgbackrest_log"] = map[string]any{ + "include": []string{naming.PGBackRestPGDataLogPath + "/*.log"}, + "storage": "file_storage/pgbackrest_logs", + + // pgBackRest prints logs with a log prefix, which includes a timestamp + // as long as the timestamp is not turned off in the configuration. + // When pgBackRest starts a process, it also will print a newline + // (if the file has already been written to) and a process "banner" + // which looks like "-------------------PROCESS START-------------------\n". + // Therefore we break multiline on the timestamp or the 19 dashes that start the banner. + // - https://github.com/pgbackrest/pgbackrest/blob/main/src/common/log.c#L451 + "multiline": map[string]string{ + "line_start_pattern": `^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19}`, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/resourceprocessor#readme + outConfig.Processors["resource/pgbackrest"] = map[string]any{ + "attributes": []map[string]any{ + // Container and Namespace names need no escaping because they are DNS labels. + // Pod names need no escaping because they are DNS subdomains. + // + // https://kubernetes.io/docs/concepts/overview/working-with-objects/names + // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/resource/k8s.md + {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, + {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, + {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + }, + } + + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme + outConfig.Processors["transform/pgbackrest_logs"] = map[string]any{ + "log_statements": slices.Clone(pgBackRestLogsTransforms), + } + + outConfig.Pipelines["logs/pgbackrest"] = Pipeline{ + Extensions: []ComponentID{"file_storage/pgbackrest_logs"}, + Receivers: []ComponentID{"filelog/pgbackrest_log"}, + Processors: []ComponentID{ + "resource/pgbackrest", + "transform/pgbackrest_logs", + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: exporters, + } + } +} + +// generateLogFilenameAndRotationAge takes a retentionPeriod and returns a +// log_filename and log_rotation_age to be used to configure postgres logging +func generateLogFilenameAndRotationAge( + retentionPeriod metav1.Duration, +) (logFilename, logRotationAge string) { + // Given how postgres does its log rotation with the truncate feature, we + // will always need to make up the total retention period with multiple log + // files that hold subunits of the total time (e.g. if the retentionPeriod + // is an hour, there will be 60 1-minute long files; if the retentionPeriod + // is a day, there will be 24 1-hour long files, etc) + + hours := math.Ceil(retentionPeriod.Hours()) + + switch true { + case hours <= 1: // One hour's worth of logs in 60 minute long log files + logFilename = "postgresql-%M.log" + logRotationAge = "1min" + case hours <= 24: // One day's worth of logs in 24 hour long log files + logFilename = "postgresql-%H.log" + logRotationAge = "1h" + case hours <= 24*7: // One week's worth of logs in 7 day long log files + logFilename = "postgresql-%a.log" + logRotationAge = "1d" + case hours <= 24*28: // One month's worth of logs in 28-31 day long log files + logFilename = "postgresql-%d.log" + logRotationAge = "1d" + default: // One year's worth of logs in 365 day long log files + logFilename = "postgresql-%j.log" + logRotationAge = "1d" + } + + return +} diff --git a/internal/collector/postgres_5m_metrics.yaml b/internal/collector/postgres_5m_metrics.yaml new file mode 100644 index 0000000000..9f5c3212dc --- /dev/null +++ b/internal/collector/postgres_5m_metrics.yaml @@ -0,0 +1,143 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + - sql: > + SELECT datname as dbname + , pg_database_size(datname) as bytes + FROM pg_catalog.pg_database + WHERE datistemplate = false; + metrics: + - metric_name: ccp_database_size_bytes + value_column: bytes + description: Database size in bytes + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + # Returns count of sequences that have used up 75% of what's available. + # https://github.com/CrunchyData/pgmonitor-extension/blob/main/sql/functions/functions.sql#L67 + # NOTE: Postgres 13 requires an alias, x below, where PG 17 doesn't. + - sql: > + SELECT count(*) AS count + FROM ( + SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots + , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used + FROM pg_catalog.pg_sequences s + ) x + WHERE (ROUND(used/slots*100)::int) > 75; + metrics: + - metric_name: ccp_sequence_exhaustion_count + value_column: count + description: | + Count of sequences that have reached greater than or equal to 75% of their max available numbers. + Function monitor.sequence_status() can provide more details if run directly on system. + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT s.datname AS dbname + , s.xact_commit + , s.xact_rollback + , s.blks_read + , s.blks_hit + , s.tup_returned + , s.tup_fetched + , s.tup_inserted + , s.tup_updated + , s.tup_deleted + , s.conflicts + , s.temp_files + , s.temp_bytes + , s.deadlocks + FROM pg_catalog.pg_stat_database s + JOIN pg_catalog.pg_database d ON d.datname = s.datname + WHERE d.datistemplate = false; + metrics: + - metric_name: ccp_stat_database_blks_hit + value_column: blks_hit + description: Number of times disk blocks were found already in the buffer cache, so that a read was not necessary + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_blks_read + value_column: blks_read + description: Number of disk blocks read in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_conflicts + value_column: conflicts + description: Number of queries canceled due to conflicts with recovery in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_deadlocks + value_column: deadlocks + description: Number of deadlocks detected in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_temp_bytes + value_column: temp_bytes + description: Total amount of data written to temporary files by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_temp_files + value_column: temp_files + description: Number of rows deleted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_deleted + value_column: tup_deleted + description: Number of rows deleted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_fetched + value_column: tup_fetched + description: Number of rows fetched by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_inserted + value_column: tup_inserted + description: Number of rows inserted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_returned + value_column: tup_returned + description: Number of rows returned by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_updated + value_column: tup_updated + description: Number of rows updated by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_xact_commit + value_column: xact_commit + description: Number of transactions in this database that have been committed + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_xact_rollback + value_column: xact_rollback + description: Number of transactions in this database that have been rolled back + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + diff --git a/internal/collector/postgres_5s_metrics.yaml b/internal/collector/postgres_5s_metrics.yaml new file mode 100644 index 0000000000..4f1a142782 --- /dev/null +++ b/internal/collector/postgres_5s_metrics.yaml @@ -0,0 +1,949 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml +# + # TODO ccp_pg_stat_activity can be removed after metrics are fully aligned with the latest pgMonitor + - sql: > + SELECT + pg_database.datname, + tmp.state, + COALESCE(tmp2.usename, '') as usename, + COALESCE(tmp2.application_name, '') as application_name, + COALESCE(count,0) as count, + COALESCE(max_tx_duration,0) as max_tx_duration + FROM + ( + VALUES ('active'), + ('idle'), + ('idle in transaction'), + ('idle in transaction (aborted)'), + ('fastpath function call'), + ('disabled') + ) AS tmp(state) CROSS JOIN pg_database + LEFT JOIN + ( + SELECT + datname, + state, + usename, + application_name, + count(*) AS count, + MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration + FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2 + ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname; + metrics: + - metric_name: ccp_pg_stat_activity_count + value_column: count + description: number of connections in this state + attribute_columns: ["application_name", "datname", "state", "usename"] + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive + FROM pg_catalog.pg_stat_archiver; + + metrics: + - metric_name: ccp_archive_command_status_seconds_since_last_archive + value_column: seconds_since_last_archive + value_type: double + description: Seconds since the last successful archive operation + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT archived_count + FROM pg_catalog.pg_stat_archiver + metrics: + - metric_name: ccp_archive_command_status_archived_count + value_column: archived_count + description: Number of WAL files that have been successfully archived + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT failed_count + FROM pg_catalog.pg_stat_archiver + metrics: + - metric_name: ccp_archive_command_status_failed_count + value_column: failed_count + description: Number of failed attempts for archiving WAL files + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT CASE + WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0 + WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0 + ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) + END AS seconds_since_last_fail + FROM pg_catalog.pg_stat_archiver + + metrics: + - metric_name: ccp_archive_command_status_seconds_since_last_fail + value_column: seconds_since_last_fail + description: Seconds since the last recorded failure of the archive_command + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT ((total - idle) - idle_in_txn) as active + , total + , idle + , idle_in_txn + , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time + , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state <> 'idle' ) AS max_query_time + , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time + , max_connections + FROM ( + SELECT COUNT(*) as total + , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle + , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x + JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true); + + metrics: + - metric_name: ccp_connection_stats_active + value_column: active + description: Total non-idle connections + static_attributes: + server: "localhost:5432" + - metric_name: ccp_connection_stats_idle + value_column: idle + description: Total idle connections + static_attributes: + server: "localhost:5432" + - metric_name: ccp_connection_stats_idle_in_txn + value_column: idle_in_txn + description: Total idle in transaction connections + static_attributes: + server: "localhost:5432" + - metric_name: ccp_connection_stats_max_blocked_query_time + value_column: max_blocked_query_time + value_type: double + description: Value of max_connections for the monitored database + static_attributes: + server: "localhost:5432" + - metric_name: ccp_connection_stats_max_connections + value_column: max_connections + description: Value of max_connections for the monitored database + static_attributes: + server: "localhost:5432" + - metric_name: ccp_connection_stats_max_idle_in_txn_time + value_column: max_idle_in_txn_time + value_type: double + description: Length of time in seconds of the longest idle in transaction session + static_attributes: + server: "localhost:5432" + - metric_name: ccp_connection_stats_max_query_time + value_column: max_query_time + value_type: double + description: Length of time in seconds of the longest running query + static_attributes: + server: "localhost:5432" + - metric_name: ccp_connection_stats_total + value_column: total + description: Total idle and non-idle connections + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT datname AS dbname + , checksum_failures AS count + , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds + FROM pg_catalog.pg_stat_database + WHERE pg_stat_database.datname IS NOT NULL; + metrics: + - metric_name: ccp_data_checksum_failure_count + value_column: count + attribute_columns: ["dbname"] + description: Total number of checksum failures on this database + static_attributes: + server: "localhost:5432" + - metric_name: ccp_data_checksum_failure_time_since_last_failure_seconds + value_column: time_since_last_failure_seconds + value_type: double + attribute_columns: ["dbname"] + description: Time interval in seconds since the last checksum failure was encountered + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT pg_database.datname as dbname + , tmp.mode + , COALESCE(count,0) as count + FROM + ( + VALUES ('accesssharelock'), + ('rowsharelock'), + ('rowexclusivelock'), + ('shareupdateexclusivelock'), + ('sharelock'), + ('sharerowexclusivelock'), + ('exclusivelock'), + ('accessexclusivelock') + ) AS tmp(mode) CROSS JOIN pg_catalog.pg_database + LEFT JOIN + (SELECT database, lower(mode) AS mode,count(*) AS count + FROM pg_catalog.pg_locks WHERE database IS NOT NULL + GROUP BY database, lower(mode) + ) AS tmp2 + ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database; + metrics: + - metric_name: ccp_locks_count + value_column: count + attribute_columns: ["dbname", "mode"] + description: Return value of 1 means database is in recovery. Otherwise 2 it is a primary. + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request + , monitor.kdapi_scalar_bigint('cpu_limit') AS limit + metrics: + - metric_name: ccp_nodemx_cpu_limit + value_column: limit + description: CPU limit value in milli cores + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_cpu_request + value_column: request + description: CPU request value in milli cores + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT CASE WHEN monitor.cgroup_mode() = 'legacy' + THEN monitor.cgroup_scalar_bigint('cpuacct.usage') + ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000 + END AS usage, + extract(epoch from clock_timestamp()) AS usage_ts; + metrics: + - metric_name: ccp_nodemx_cpuacct_usage + value_column: usage + value_type: double + description: CPU usage in nanoseconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_cpuacct_usage_ts + value_column: usage_ts + value_type: double + description: CPU usage snapshot timestamp + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + CASE + WHEN monitor.cgroup_mode() = 'legacy' THEN + monitor.cgroup_scalar_bigint('cpu.cfs_period_us') + ELSE + (monitor.cgroup_array_bigint('cpu.max'))[2] + END AS period_us, + CASE + WHEN monitor.cgroup_mode() = 'legacy' THEN + GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0) + ELSE + GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0) + END AS quota_us; + metrics: + - metric_name: ccp_nodemx_cpucfs_period_us + value_column: period_us + description: The total available run-time within a period (in microseconds) + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_cpucfs_quota_us + value_column: quota_us + value_type: double + description: The length of a period (in microseconds) + static_attributes: + server: "localhost:5432" + + # NOTE: cgroup v2 has throttled_usec, vs. throttled_time. + - sql: > + WITH d(key, val) AS + (select key, val from monitor.cgroup_setof_kv('cpu.stat')) + SELECT + (SELECT val FROM d WHERE key='nr_periods') AS nr_periods, + (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled, + (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time, + extract(epoch from clock_timestamp()) as snap_ts; + metrics: + - metric_name: ccp_nodemx_cpustat_nr_periods + value_column: nr_periods + value_type: double + description: Number of periods that any thread was runnable + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_cpustat_nr_throttled + value_column: nr_throttled + description: Number of runnable periods in which the application used its entire quota and was throttled + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_cpustat_snap_ts + value_column: snap_ts + value_type: double + description: CPU stat snapshot timestamp + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_cpustat_throttled_time + value_column: throttled_time + value_type: double # TODO: Is this right? + description: Sum total amount of time individual threads within the monitor.cgroup were throttled + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes + FROM monitor.proc_mountinfo() m + JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number) + WHERE m.mount_point IN ('/pgdata', '/pgwal') OR + m.mount_point like '/tablespaces/%' + metrics: + - metric_name: ccp_nodemx_data_disk_available_bytes + value_column: available_bytes + value_type: double + description: Available size in bytes + attribute_columns: ["fs_type", "mount_point"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_data_disk_free_file_nodes + value_column: free_file_nodes + description: Available file nodes + attribute_columns: ["fs_type", "mount_point"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_data_disk_total_bytes + value_column: total_bytes + description: Size in bytes + attribute_columns: ["fs_type", "mount_point"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_data_disk_total_file_nodes + value_column: total_file_nodes + description: Total file nodes + attribute_columns: ["fs_type", "mount_point"] + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT mount_point,sectors_read,sectors_written + FROM monitor.proc_mountinfo() m + JOIN monitor.proc_diskstats() d USING (major_number, minor_number) + WHERE m.mount_point IN ('/pgdata', '/pgwal') OR + m.mount_point like '/tablespaces/%'; + metrics: + - metric_name: ccp_nodemx_disk_activity_sectors_read + value_column: sectors_read + description: Total sectors read + attribute_columns: ["mount_point"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_disk_activity_sectors_written + value_column: sectors_written + description: Total sectors written + attribute_columns: ["mount_point"] + static_attributes: + server: "localhost:5432" + + - sql: > + WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) + SELECT + monitor.kdapi_scalar_bigint('mem_request') AS request, + CASE + WHEN monitor.cgroup_mode() = 'legacy' THEN + (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END) + ELSE + (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END) + END AS limit, + CASE + WHEN monitor.cgroup_mode() = 'legacy' + THEN (SELECT val FROM d WHERE key='cache') + ELSE 0 + END as cache, + CASE + WHEN monitor.cgroup_mode() = 'legacy' + THEN (SELECT val FROM d WHERE key='rss') + ELSE 0 + END as RSS, + (SELECT val FROM d WHERE key='shmem') as shmem, + CASE + WHEN monitor.cgroup_mode() = 'legacy' + THEN (SELECT val FROM d WHERE key='mapped_file') + ELSE 0 + END as mapped_file, + CASE + WHEN monitor.cgroup_mode() = 'legacy' + THEN (SELECT val FROM d WHERE key='dirty') + ELSE (SELECT val FROM d WHERE key='file_dirty') + END as dirty, + (SELECT val FROM d WHERE key='active_anon') as active_anon, + (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon, + (SELECT val FROM d WHERE key='active_file') as active_file, + (SELECT val FROM d WHERE key='inactive_file') as inactive_file, + CASE + WHEN monitor.cgroup_mode() = 'legacy' + THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes') + ELSE monitor.cgroup_scalar_bigint('memory.current') + END as usage_in_bytes, + CASE + WHEN monitor.cgroup_mode() = 'legacy' + THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes') + ELSE 0 + END as kmem_usage_in_byte; + metrics: + - metric_name: ccp_nodemx_mem_active_anon + value_column: active_anon + value_type: double + description: Total bytes of anonymous and swap cache memory on active LRU list + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_active_file + value_column: active_file + value_type: double + description: Total bytes of file-backed memory on active LRU list + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_cache + value_column: cache + value_type: double + description: Total bytes of page cache memory + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_dirty + value_column: dirty + description: Total bytes that are waiting to get written back to the disk + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_inactive_anon + value_column: inactive_anon + value_type: double + description: Total bytes of anonymous and swap cache memory on inactive LRU list + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_inactive_file + value_column: inactive_file + value_type: double + description: Total bytes of file-backed memory on inactive LRU list + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_kmem_usage_in_byte + value_column: kmem_usage_in_byte + description: Unknown metric from ccp_nodemx_mem + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_limit + value_column: limit + description: Memory limit value in bytes + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_mapped_file + value_column: mapped_file + description: Total bytes of mapped file (includes tmpfs/shmem) + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_request + value_column: request + description: Memory request value in bytes + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_rss + value_column: rss + value_type: double + description: Total bytes of anonymous and swap cache memory + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_shmem + value_column: shmem + value_type: double + description: Total bytes of shared memory + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_mem_usage_in_bytes + value_column: usage_in_bytes + description: Total usage in bytes + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT interface + ,tx_bytes + ,tx_packets + ,rx_bytes + ,rx_packets from monitor.proc_network_stats() + metrics: + - metric_name: ccp_nodemx_network_rx_bytes + value_column: rx_bytes + description: Number of bytes received + attribute_columns: ["interface"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_network_rx_packets + value_column: rx_packets + description: Number of packets received + attribute_columns: ["interface"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_network_tx_bytes + value_column: tx_bytes + description: Number of bytes transmitted + attribute_columns: ["interface"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_nodemx_network_tx_packets + value_column: tx_packets + description: Number of packets transmitted + attribute_columns: ["interface"] + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT monitor.cgroup_process_count() as count; + metrics: + - metric_name: ccp_nodemx_process_count + value_column: count + description: Total number of database processes + static_attributes: + server: "localhost:5432" + + # Setting pg_stat_statements_reset_info to -1 means update as often as possible. + - sql: > + SELECT monitor.pg_stat_statements_reset_info(-1) as time; + metrics: + - metric_name: ccp_pg_stat_statements_reset_time + value_column: time + description: Epoch time when stats were reset + static_attributes: + server: "localhost:5432" + + + # This query against pg_stat_statements is compatible with PG 13 and later. + # https://github.com/CrunchyData/pgmonitor-extension/blob/main/sql/functions/functions.sql + # TODO: Double-check the sorting and the attribute values on the below. + - sql: > + WITH monitor AS ( + SELECT + pg_get_userbyid(s.userid) AS role + , d.datname AS dbname + , s.queryid AS queryid + , btrim(replace(left(s.query, 40), '\n', '')) AS query + , s.calls + , s.total_exec_time AS total_exec_time + , s.max_exec_time AS max_exec_time + , s.mean_exec_time AS mean_exec_time + , s.rows + , s.wal_records AS records + , s.wal_fpi AS fpi + , s.wal_bytes AS bytes + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d ON d.oid = s.dbid + ) + SELECT role + , dbname + , queryid + , query + , max(monitor.mean_exec_time) AS top_mean_exec_time_ms + FROM monitor + GROUP BY 1,2,3,4 + ORDER BY 5 DESC + LIMIT 20; + metrics: + - metric_name: ccp_pg_stat_statements_top_mean_exec_time_ms + value_column: top_mean_exec_time_ms + value_type: double + description: Average query runtime in milliseconds + attribute_columns: ["dbname", "query", "queryid", "role"] + static_attributes: + server: "localhost:5432" + + - sql: > + WITH monitor AS ( + SELECT + pg_get_userbyid(s.userid) AS role + , d.datname AS dbname + , s.calls + , s.total_exec_time + , s.mean_exec_time + , s.rows + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d ON d.oid = s.dbid + ) + SELECT role + , dbname + , sum(calls) AS calls_count + , sum(total_exec_time) AS exec_time_ms + , avg(mean_exec_time) AS mean_exec_time_ms + , sum(rows) AS row_count + FROM monitor + GROUP BY 1,2; + metrics: + - metric_name: ccp_pg_stat_statements_total_calls_count + value_column: calls_count + value_type: double + description: Total number of queries run per user/database + attribute_columns: ["dbname", "role"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_pg_stat_statements_total_exec_time_ms + value_column: exec_time_ms + value_type: double + description: Total runtime of all queries per user/database + attribute_columns: ["dbname", "role"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_pg_stat_statements_total_mean_exec_time_ms + value_column: mean_exec_time_ms + value_type: double + description: Total runtime of all queries per user/database + attribute_columns: ["dbname", "role"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_pg_stat_statements_total_row_count + value_column: row_count + value_type: double + description: Total rows returned from all queries per user/database + attribute_columns: ["dbname", "role"] + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT current_setting('server_version_num')::int AS current; + metrics: + - metric_name: ccp_postgresql_version_current + value_column: current + description: The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######). + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds; + metrics: + - metric_name: ccp_postmaster_uptime_seconds + value_column: seconds + value_type: double + description: Time interval in seconds since PostgreSQL database was last restarted. + static_attributes: + server: "localhost:5432" + + # get_replication_lag is created in metrics_setup.sql + - sql: > + SELECT * FROM get_replication_lag(); + metrics: + - metric_name: ccp_replication_lag_size_bytes + value_column: bytes + value_type: double + description: Time interval in seconds since PostgreSQL database was last restarted. + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + COALESCE( + CASE + WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0 + ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER + END, + 0 + ) AS replay_time, + COALESCE( + CASE + WHEN pg_is_in_recovery() = false THEN 0 + ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER + END, + 0 + ) AS received_time, + CASE + WHEN pg_is_in_recovery() = true THEN 'replica' + ELSE 'primary' + END AS role; + metrics: + - metric_name: ccp_replication_lag_received_time + value_column: received_time + value_type: double + description: | + Length of time since the last WAL file was received and replayed on replica. + Always increases, possibly causing false positives if the primary stops writing. + Monitors for replicas that stop receiving WAL all together. + attribute_columns: ["role"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_lag_replay_time + value_column: replay_time + value_type: double + description: | + Length of time since the last transaction was replayed on replica. + Returns zero if last WAL received equals last WAL replayed. Avoids + false positives when primary stops writing. Monitors for replicas that + cannot keep up with primary WAL generation. + attribute_columns: ["role"] + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true; + metrics: + - metric_name: ccp_settings_pending_restart_count + value_column: count + description: Number of settings from pg_settings catalog in a pending_restart state + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + buffers_clean + , maxwritten_clean + , buffers_alloc + FROM pg_catalog.pg_stat_bgwriter; + metrics: + - metric_name: ccp_stat_bgwriter_buffers_alloc + value_column: buffers_alloc + description: Number of buffers allocated + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_buffers_clean + value_column: buffers_clean + data_type: sum + description: Number of buffers written by the background writer + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_bgwriter_maxwritten_clean + value_column: maxwritten_clean + description: Number of times the background writer stopped a cleaning scan because it had written too many buffers + static_attributes: + server: "localhost:5432" + + - sql: > + WITH max_age AS ( + SELECT 2000000000 as max_old_xid + , setting AS autovacuum_freeze_max_age + FROM pg_catalog.pg_settings + WHERE name = 'autovacuum_freeze_max_age') + , per_database_stats AS ( + SELECT datname + , m.max_old_xid::int + , m.autovacuum_freeze_max_age::int + , age(d.datfrozenxid) AS oldest_current_xid + FROM pg_catalog.pg_database d + JOIN max_age m ON (true) + WHERE d.datallowconn) + SELECT max(oldest_current_xid) AS oldest_current_xid + , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound + , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac + FROM per_database_stats; + metrics: + - metric_name: ccp_transaction_wraparound_oldest_current_xid + value_column: oldest_current_xid + description: Oldest current transaction ID in cluster + static_attributes: + server: "localhost:5432" + - metric_name: ccp_transaction_wraparound_percent_towards_emergency_autovac + value_column: percent_towards_emergency_autovac + description: Percentage towards emergency autovacuum process starting + static_attributes: + server: "localhost:5432" + - metric_name: ccp_transaction_wraparound_percent_towards_wraparound + value_column: percent_towards_wraparound + description: Percentage towards transaction ID wraparound + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT last_5_min_size_bytes, + (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes + FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification > CURRENT_TIMESTAMP - '5 minutes'::interval) x; + metrics: + - metric_name: ccp_wal_activity_total_size_bytes + value_column: total_size_bytes + description: Current size in bytes of the WAL directory + static_attributes: + server: "localhost:5432" + + - sql: > + WITH monitor AS ( + SELECT + pg_get_userbyid(s.userid) AS role + , d.datname AS dbname + , s.queryid AS queryid + , btrim(replace(left(s.query, 40), '\n', '')) AS query + , s.calls + , s.total_exec_time AS total_exec_time + , s.max_exec_time AS max_exec_time_ms + , s.rows + , s.wal_records AS records + , s.wal_fpi AS fpi + , s.wal_bytes AS bytes + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d ON d.oid = s.dbid + ) + SELECT role + , dbname + , queryid + , query + , max_exec_time_ms + , records + FROM monitor + ORDER BY 5 DESC + LIMIT 20; + metrics: + - metric_name: ccp_pg_stat_statements_top_max_exec_time_ms + value_column: max_exec_time_ms + value_type: double + description: Epoch time when stats were reset + attribute_columns: ["dbname", "query", "queryid", "role"] + static_attributes: + server: "localhost:5432" + + - sql: > + WITH monitor AS ( + SELECT + pg_get_userbyid(s.userid) AS role + , d.datname AS dbname + , s.queryid AS queryid + , btrim(replace(left(s.query, 40), '\n', '')) AS query + , s.calls + , s.total_exec_time AS total_exec_time_ms + , s.rows + , s.wal_records AS records + , s.wal_fpi AS fpi + , s.wal_bytes AS bytes + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d ON d.oid = s.dbid + ) + SELECT role + , dbname + , queryid + , query + , total_exec_time_ms + , records + FROM monitor + ORDER BY 5 DESC + LIMIT 20; + metrics: + - metric_name: ccp_pg_stat_statements_top_total_exec_time_ms + value_column: total_exec_time_ms + value_type: double + description: Total time spent in the statement in milliseconds + attribute_columns: ["dbname", "query", "queryid", "role"] + static_attributes: + server: "localhost:5432" + + - sql: > + WITH monitor AS ( + SELECT + pg_get_userbyid(s.userid) AS role + , d.datname AS dbname + , s.queryid AS queryid + , btrim(replace(left(s.query, 40), '\n', '')) AS query + , s.calls + , s.total_exec_time AS total_exec_time + , s.max_exec_time AS max_exec_time + , s.mean_exec_time AS mean_exec_time + , s.rows + , s.wal_records AS records + , s.wal_fpi AS fpi + , s.wal_bytes AS bytes + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d ON d.oid = s.dbid + ) + SELECT role + , dbname + , query + , queryid + , records + , fpi + , bytes + FROM monitor + ORDER BY bytes DESC + LIMIT 20; + metrics: + - metric_name: ccp_pg_stat_statements_top_wal_bytes + value_column: bytes + value_type: double + description: Total amount of WAL generated by the statement in bytes + attribute_columns: ["dbname", "query", "queryid", "role"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_pg_stat_statements_top_wal_fpi + value_column: fpi + value_type: double + description: Total number of WAL full page images generated by the statement + attribute_columns: ["dbname", "query", "queryid", "role"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_pg_stat_statements_top_wal_records + value_column: records + value_type: double + description: Total number of WAL records generated by the statement + attribute_columns: ["dbname", "query", "queryid", "role"] + static_attributes: + server: "localhost:5432" + + - sql: | + SELECT * FROM get_pgbackrest_info(); + metrics: + - metric_name: ccp_backrest_last_diff_backup_time_since_completion_seconds + description: Seconds since the last completed full or differential backup. Differential is always based off last full. + value_column: last_diff_backup + attribute_columns: ["repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_last_full_backup_time_since_completion_seconds + description: Seconds since the last completed full backup + value_column: last_full_backup + attribute_columns: ["repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_last_incr_backup_time_since_completion_seconds + description: | + Seconds since the last completed full, differential or incremental backup. + Incremental is always based off last full or differential. + value_column: last_incr_backup + attribute_columns: ["repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_last_info_backrest_repo_version + description: pgBackRest version number when this backup was performed + value_column: last_info_backrest_repo_version + attribute_columns: ["backup_type", "repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_last_info_backup_error + description: An error has been encountered in the backup. Check logs for more information. + value_column: last_info_backup_error + attribute_columns: ["backup_type", "repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_last_info_backup_runtime_seconds + description: Total runtime in seconds of this backup + value_column: backup_runtime_seconds + attribute_columns: ["backup_type", "repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_last_info_repo_backup_size_bytes + description: Actual size of only this individual backup in the pgbackrest repository + value_column: repo_backup_size_bytes + attribute_columns: ["backup_type", "repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_last_info_repo_total_size_bytes + description: Total size of this backup in the pgbackrest repository, including all required previous backups and WAL + value_column: repo_total_size_bytes + attribute_columns: ["backup_type", "repo"] + static_attributes: + server: "localhost:5432" + stanza: "db" + - metric_name: ccp_backrest_oldest_full_backup_time_seconds + description: Seconds since the oldest completed full backup + value_column: oldest_full_backup + attribute_columns: ["repo"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/postgres_logs_transforms.yaml b/internal/collector/postgres_logs_transforms.yaml new file mode 100644 index 0000000000..f397b996e8 --- /dev/null +++ b/internal/collector/postgres_logs_transforms.yaml @@ -0,0 +1,211 @@ +# This list of transform statements configures an OTel Transform Processor to +# parse PostgreSQL logs. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme +# https://www.postgresql.org/docs/current/runtime-config-logging.html + + +# TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme +- context: log + conditions: + - body["format"] == "csv" + statements: + # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv + - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) + + # Extract the optional "remote_port" value from the "connection_from" field. It is either: + # 1. a Unix socket starting with "[local]:" or + # 2. a TCP socket ending with a colon U+003A and a port. + # + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/backend/utils/error/elog.c#l2701 + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/common/ip.c#l227 + # + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/csvlog.c#l108 + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/common/ip.c#l224 + - >- + merge_maps(cache, + ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + "insert") + where Len(cache["connection_from"]) > 0 + + # When there is a "remote_port" value, everything before it is the "remote_host" value. + - >- + set(cache["remote_host"], + Substring(cache["connection_from"], 0, Len(cache["connection_from"]) - Len(cache["remote_port"]) - 1)) + where Len(cache["connection_from"]) > 0 and IsString(cache["remote_port"]) + + # When there is still no "remote_host" value, copy the "connection_from" value, if any. + - >- + set(cache["remote_host"], cache["connection_from"]) + where Len(cache["connection_from"]) > 0 and not IsString(cache["remote_host"]) + + # Extract the values encoded in the "location" field. + # + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/backend/utils/error/elog.c#l2805 + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/csvlog.c#l207 + - >- + merge_maps(cache, + ExtractPatterns(cache["location"], "^(?:(?[^,]+), )?(?[^:]+):(?\\d+)$"), + "insert") + where Len(cache["location"]) > 0 + + # These values are numeric in JSON logs. + - >- + set(cache["cursor_position"], Double(cache["cursor_position"])) + where IsMatch(cache["cursor_position"], "^[0-9.]+$") + - >- + set(cache["file_line_num"], Double(cache["file_line_num"])) + where IsMatch(cache["file_line_num"], "^[0-9.]+$") + - >- + set(cache["internal_position"], Double(cache["internal_position"])) + where IsMatch(cache["internal_position"], "^[0-9.]+$") + - >- + set(cache["leader_pid"], Double(cache["leader_pid"])) + where IsMatch(cache["leader_pid"], "^[0-9.]+$") + - >- + set(cache["line_num"], Double(cache["line_num"])) + where IsMatch(cache["line_num"], "^[0-9.]+$") + - >- + set(cache["pid"], Double(cache["pid"])) + where IsMatch(cache["pid"], "^[0-9.]+$") + - >- + set(cache["query_id"], Double(cache["query_id"])) + where IsMatch(cache["query_id"], "^[0-9.]+$") + - >- + set(cache["remote_port"], Double(cache["remote_port"])) + where IsMatch(cache["remote_port"], "^[0-9.]+$") + + # Pass the results to the next set of statements. + - set(body["parsed"], cache) + + +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme +- context: log + statements: + - set(instrumentation_scope.name, "postgres") + - set(instrumentation_scope.version, resource.attributes["db.version"]) + + # TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. + - set(cache, body["parsed"]) where body["format"] == "csv" + + # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsejson + - set(cache, ParseJSON(body["original"])) where body["format"] == "json" + + # The log severity is in the "error_severity" field. + # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext + - set(severity_text, cache["error_severity"]) + + # Map severity text to OpenTelemetry severity levels. + # Postgres has levels beyond the typical ones: + # - Multiple DEBUG levels, with DEBUG5 being the most detailed. + # - NOTICE is more severe than INFO. + # - PANIC is more severe than FATAL. + # + # https://www.postgresql.org/docs/current/runtime-config-logging.html#RUNTIME-CONFIG-SEVERITY-LEVELS + # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber + # https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings + # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" + - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" + - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" + - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" or severity_text == "LOG" + - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" + - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" + + # Parse the "timestamp" field into the record timestamp. + # The format is neither RFC 3339 nor ISO 8601: + # + # The date and time are separated by a single space U+0020, + # followed by a dot U+002E, milliseconds, another space U+0020, + # then a timezone abbreviation. + # + # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#time + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/backend/utils/error/elog.c#l2246 + # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/elog.c#l2671 + - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + + # Rename fields emitted by Postgres to align with OpenTelemetry semantic conventions. + # + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs#readme + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/database#readme + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/database/postgresql.md + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/database/sql.md + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + - set(resource.attributes["db.system"], "postgresql") + + # Keep the unparsed log record in a standard attribute, + # and replace the log record body with the parsed fields. + # + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md + - set(attributes["log.record.original"], body["original"]) + - set(body, cache) + + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/client.md + - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) + - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) + + # These values are populated when the "log_error_verbosity" parameter is VERBOSE. + # + # https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-LOG-ERROR-VERBOSITY + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/code.md + - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) + - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) + - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) + + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/db.md + - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) + - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) + + # Postgres is multiprocess so some client/backend details align here. + # + # The "session_start" value is formatted as "%F %T UTC", but "process.creation.time" should be ISO 8601. + # + # https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/error/elog.c;hb=REL_10_0#l2256 + # https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/error/elog.c;hb=REL_17_0#l2697 + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/process.md + - >- + set(attributes["process.creation.time"], Concat([ + Substring(body["session_start"], 0, 10), "T", + Substring(body["session_start"], 11, 8), "Z"], "")) + where IsMatch(body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") + - >- + set(attributes["process.pid"], Int(body["pid"])) + where IsDouble(body["pid"]) + - >- + set(attributes["process.title"], body["ps"]) + where IsString(body["ps"]) + + # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/user.md + - >- + set(attributes["user.name"], body["user"]) + where IsString(body["user"]) + + +# Look for and parse the CSV of a pgAudit message. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme +# https://github.com/pgaudit/pgaudit#format +- context: log + conditions: + # Messages from pgAudit have always been prefixed with "AUDIT:", but that + # could change in the future. + # + # https://github.com/pgaudit/pgaudit/blame/17.0/pgaudit.c#L876 + # TODO(postgres-18): Check this prefix and update the URL above. + - >- + Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: " + statements: + # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv + - >- + set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) - 7), + "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", + delimiter=",", mode="strict")) + - >- + set(instrumentation_scope.name, "pgaudit") + where Len(body["pgaudit"]) > 0 diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go new file mode 100644 index 0000000000..5d56afbf00 --- /dev/null +++ b/internal/collector/postgres_metrics.go @@ -0,0 +1,112 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + _ "embed" + "encoding/json" + "fmt" + "slices" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/pgmonitor" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +// https://pkg.go.dev/embed +// +//go:embed "generated/postgres_5s_metrics.json" +var fiveSecondMetrics json.RawMessage + +//go:embed "generated/postgres_5m_metrics.json" +var fiveMinuteMetrics json.RawMessage + +//go:embed "generated/gte_pg17_metrics.json" +var gtePG17 json.RawMessage + +//go:embed "generated/lt_pg17_metrics.json" +var ltPG17 json.RawMessage + +//go:embed "generated/gte_pg16_metrics.json" +var gtePG16 json.RawMessage + +//go:embed "generated/lt_pg16_metrics.json" +var ltPG16 json.RawMessage + +func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresCluster, config *Config) { + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + // We must create a copy of the fiveSecondMetrics variable, otherwise we + // will continually append to it and blow up our ConfigMap + fiveSecondMetricsClone := slices.Clone(fiveSecondMetrics) + + if inCluster.Spec.PostgresVersion >= 17 { + fiveSecondMetricsClone, _ = appendToJSONArray(fiveSecondMetricsClone, gtePG17) + } else { + fiveSecondMetricsClone, _ = appendToJSONArray(fiveSecondMetricsClone, ltPG17) + } + + if inCluster.Spec.PostgresVersion >= 16 { + fiveSecondMetricsClone, _ = appendToJSONArray(fiveSecondMetricsClone, gtePG16) + } else { + fiveSecondMetricsClone, _ = appendToJSONArray(fiveSecondMetricsClone, ltPG16) + } + + // Add Prometheus exporter + config.Exporters[Prometheus] = map[string]any{ + "endpoint": "0.0.0.0:9187", + } + + config.Receivers[FiveSecondSqlQuery] = map[string]any{ + "driver": "postgres", + "datasource": fmt.Sprintf(`host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, pgmonitor.MonitoringUser), + "collection_interval": "5s", + // Give Postgres time to finish setup. + "initial_delay": "10s", + "queries": slices.Clone(fiveSecondMetricsClone), + } + + config.Receivers[FiveMinuteSqlQuery] = map[string]any{ + "driver": "postgres", + "datasource": fmt.Sprintf(`host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, pgmonitor.MonitoringUser), + "collection_interval": "300s", + // Give Postgres time to finish setup. + "initial_delay": "10s", + "queries": slices.Clone(fiveMinuteMetrics), + } + // Add Metrics Pipeline + config.Pipelines[PostgresMetrics] = Pipeline{ + Receivers: []ComponentID{FiveSecondSqlQuery, FiveMinuteSqlQuery}, + Processors: []ComponentID{ + SubSecondBatchProcessor, + CompactingProcessor, + }, + Exporters: []ComponentID{Prometheus}, + } + } +} + +// appendToJSONArray appends elements of a json.RawMessage containing an array +// to another json.RawMessage containing an array. +func appendToJSONArray(a1, a2 json.RawMessage) (json.RawMessage, error) { + var slc1 []json.RawMessage + if err := json.Unmarshal(a1, &slc1); err != nil { + return nil, err + } + + var slc2 []json.RawMessage + if err := json.Unmarshal(a2, &slc2); err != nil { + return nil, err + } + + mergedSlice := append(slc1, slc2...) + + merged, err := json.Marshal(mergedSlice) + if err != nil { + return nil, err + } + + return merged, nil +} diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go new file mode 100644 index 0000000000..1c09d32b28 --- /dev/null +++ b/internal/collector/postgres_test.go @@ -0,0 +1,477 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + "testing" + + "gotest.tools/v3/assert" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestEnablePostgresLogging(t *testing.T) { + t.Run("NilInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + + config := NewConfig(nil) + params := postgres.NewParameterSet() + + EnablePostgresLogging(ctx, cluster, config, params) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed +extensions: + file_storage/pgbackrest_logs: + create_directory: false + directory: /pgdata/pgbackrest/log/receiver + fsync: true + file_storage/postgres_logs: + create_directory: true + directory: /pgdata/logs/postgres/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgbackrest: + attributes: + - action: insert + key: k8s.container.name + value: database + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + resource/postgres: + attributes: + - action: insert + key: k8s.container.name + value: database + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + - action: insert + key: db.system + value: postgresql + - action: insert + key: db.version + value: "99" + transform/pgbackrest_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "pgbackrest") + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): + (?(?s).*)$"), "insert") where Len(body) > 0' + - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) + - set(attributes["process.pid"], cache["process_id"]) + - set(attributes["log.record.original"], body) + - set(body, cache["message"]) + transform/postgres_logs: + log_statements: + - conditions: + - body["format"] == "csv" + context: log + statements: + - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) + - merge_maps(cache, ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + "insert") where Len(cache["connection_from"]) > 0 + - set(cache["remote_host"], Substring(cache["connection_from"], 0, Len(cache["connection_from"]) + - Len(cache["remote_port"]) - 1)) where Len(cache["connection_from"]) > 0 + and IsString(cache["remote_port"]) + - set(cache["remote_host"], cache["connection_from"]) where Len(cache["connection_from"]) + > 0 and not IsString(cache["remote_host"]) + - merge_maps(cache, ExtractPatterns(cache["location"], "^(?:(?[^,]+), + )?(?[^:]+):(?\\d+)$"), "insert") where Len(cache["location"]) + > 0 + - set(cache["cursor_position"], Double(cache["cursor_position"])) where IsMatch(cache["cursor_position"], + "^[0-9.]+$") + - set(cache["file_line_num"], Double(cache["file_line_num"])) where IsMatch(cache["file_line_num"], + "^[0-9.]+$") + - set(cache["internal_position"], Double(cache["internal_position"])) where + IsMatch(cache["internal_position"], "^[0-9.]+$") + - set(cache["leader_pid"], Double(cache["leader_pid"])) where IsMatch(cache["leader_pid"], + "^[0-9.]+$") + - set(cache["line_num"], Double(cache["line_num"])) where IsMatch(cache["line_num"], + "^[0-9.]+$") + - set(cache["pid"], Double(cache["pid"])) where IsMatch(cache["pid"], "^[0-9.]+$") + - set(cache["query_id"], Double(cache["query_id"])) where IsMatch(cache["query_id"], + "^[0-9.]+$") + - set(cache["remote_port"], Double(cache["remote_port"])) where IsMatch(cache["remote_port"], + "^[0-9.]+$") + - set(body["parsed"], cache) + - context: log + statements: + - set(instrumentation_scope.name, "postgres") + - set(instrumentation_scope.version, resource.attributes["db.version"]) + - set(cache, body["parsed"]) where body["format"] == "csv" + - set(cache, ParseJSON(body["original"])) where body["format"] == "json" + - set(severity_text, cache["error_severity"]) + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" + - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" + - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" + - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + or severity_text == "LOG" + - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" + - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" + - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + - set(resource.attributes["db.system"], "postgresql") + - set(attributes["log.record.original"], body["original"]) + - set(body, cache) + - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) + - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) + - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) + - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) + - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) + - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) + - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) + - set(attributes["process.creation.time"], Concat([ Substring(body["session_start"], + 0, 10), "T", Substring(body["session_start"], 11, 8), "Z"], "")) where IsMatch(body["session_start"], + "^[^ ]{10} [^ ]{8} UTC$") + - set(attributes["process.pid"], Int(body["pid"])) where IsDouble(body["pid"]) + - set(attributes["process.title"], body["ps"]) where IsString(body["ps"]) + - set(attributes["user.name"], body["user"]) where IsString(body["user"]) + - conditions: + - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: + "' + context: log + statements: + - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) + - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", + delimiter=",", mode="strict")) + - set(instrumentation_scope.name, "pgaudit") where Len(body["pgaudit"]) > 0 +receivers: + filelog/pgbackrest_log: + include: + - /pgdata/pgbackrest/log/*.log + multiline: + line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} + storage: file_storage/pgbackrest_logs + filelog/postgres_csvlog: + include: + - /pgdata/logs/postgres/*.csv + multiline: + line_start_pattern: ^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d.\d{3} UTC,(?:"[_\D](?:[^"]|"")*")?,(?:"[_\D](?:[^"]|"")*")?,\d*,(?:"(?:[^"]|"")+")?,[0-9a-f]+[.][0-9a-f]+,\d+, + operators: + - from: body + to: body.original + type: move + - field: body.format + type: add + value: csv + - field: body.headers + type: add + value: timestamp,user,dbname,pid,connection_from,session_id,line_num,ps,session_start,vxid,txid,error_severity,state_code,message,detail,hint,internal_query,internal_position,context,statement,cursor_position,location,application_name,backend_type,leader_pid,query_id + storage: file_storage/postgres_logs + filelog/postgres_jsonlog: + include: + - /pgdata/logs/postgres/*.json + operators: + - from: body + to: body.original + type: move + - field: body.format + type: add + value: json + storage: file_storage/postgres_logs +service: + extensions: + - file_storage/pgbackrest_logs + - file_storage/postgres_logs + pipelines: + logs/pgbackrest: + exporters: + - debug + processors: + - resource/pgbackrest + - transform/pgbackrest_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgbackrest_log + logs/postgres: + exporters: + - debug + processors: + - resource/postgres + - transform/postgres_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/postgres_csvlog + - filelog/postgres_jsonlog +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + cluster.Spec.Instrumentation = testInstrumentationSpec() + + config := NewConfig(cluster.Spec.Instrumentation) + params := postgres.NewParameterSet() + + EnablePostgresLogging(ctx, cluster, config, params) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name +extensions: + file_storage/pgbackrest_logs: + create_directory: false + directory: /pgdata/pgbackrest/log/receiver + fsync: true + file_storage/postgres_logs: + create_directory: true + directory: /pgdata/logs/postgres/receiver + fsync: true +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + groupbyattrs/compact: {} + resource/pgbackrest: + attributes: + - action: insert + key: k8s.container.name + value: database + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + resource/postgres: + attributes: + - action: insert + key: k8s.container.name + value: database + - action: insert + key: k8s.namespace.name + value: ${env:K8S_POD_NAMESPACE} + - action: insert + key: k8s.pod.name + value: ${env:K8S_POD_NAME} + - action: insert + key: db.system + value: postgresql + - action: insert + key: db.version + value: "99" + transform/pgbackrest_logs: + log_statements: + - context: log + statements: + - set(instrumentation_scope.name, "pgbackrest") + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): + (?(?s).*)$"), "insert") where Len(body) > 0' + - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" + - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) + - set(attributes["process.pid"], cache["process_id"]) + - set(attributes["log.record.original"], body) + - set(body, cache["message"]) + transform/postgres_logs: + log_statements: + - conditions: + - body["format"] == "csv" + context: log + statements: + - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) + - merge_maps(cache, ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + "insert") where Len(cache["connection_from"]) > 0 + - set(cache["remote_host"], Substring(cache["connection_from"], 0, Len(cache["connection_from"]) + - Len(cache["remote_port"]) - 1)) where Len(cache["connection_from"]) > 0 + and IsString(cache["remote_port"]) + - set(cache["remote_host"], cache["connection_from"]) where Len(cache["connection_from"]) + > 0 and not IsString(cache["remote_host"]) + - merge_maps(cache, ExtractPatterns(cache["location"], "^(?:(?[^,]+), + )?(?[^:]+):(?\\d+)$"), "insert") where Len(cache["location"]) + > 0 + - set(cache["cursor_position"], Double(cache["cursor_position"])) where IsMatch(cache["cursor_position"], + "^[0-9.]+$") + - set(cache["file_line_num"], Double(cache["file_line_num"])) where IsMatch(cache["file_line_num"], + "^[0-9.]+$") + - set(cache["internal_position"], Double(cache["internal_position"])) where + IsMatch(cache["internal_position"], "^[0-9.]+$") + - set(cache["leader_pid"], Double(cache["leader_pid"])) where IsMatch(cache["leader_pid"], + "^[0-9.]+$") + - set(cache["line_num"], Double(cache["line_num"])) where IsMatch(cache["line_num"], + "^[0-9.]+$") + - set(cache["pid"], Double(cache["pid"])) where IsMatch(cache["pid"], "^[0-9.]+$") + - set(cache["query_id"], Double(cache["query_id"])) where IsMatch(cache["query_id"], + "^[0-9.]+$") + - set(cache["remote_port"], Double(cache["remote_port"])) where IsMatch(cache["remote_port"], + "^[0-9.]+$") + - set(body["parsed"], cache) + - context: log + statements: + - set(instrumentation_scope.name, "postgres") + - set(instrumentation_scope.version, resource.attributes["db.version"]) + - set(cache, body["parsed"]) where body["format"] == "csv" + - set(cache, ParseJSON(body["original"])) where body["format"] == "json" + - set(severity_text, cache["error_severity"]) + - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" + - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" + - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" + - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" + - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" + - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" + or severity_text == "LOG" + - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" + - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" + - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" + - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" + - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") + - set(resource.attributes["db.system"], "postgresql") + - set(attributes["log.record.original"], body["original"]) + - set(body, cache) + - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) + - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) + - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) + - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) + - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) + - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) + - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) + - set(attributes["process.creation.time"], Concat([ Substring(body["session_start"], + 0, 10), "T", Substring(body["session_start"], 11, 8), "Z"], "")) where IsMatch(body["session_start"], + "^[^ ]{10} [^ ]{8} UTC$") + - set(attributes["process.pid"], Int(body["pid"])) where IsDouble(body["pid"]) + - set(attributes["process.title"], body["ps"]) where IsString(body["ps"]) + - set(attributes["user.name"], body["user"]) where IsString(body["user"]) + - conditions: + - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: + "' + context: log + statements: + - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) + - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", + delimiter=",", mode="strict")) + - set(instrumentation_scope.name, "pgaudit") where Len(body["pgaudit"]) > 0 +receivers: + filelog/pgbackrest_log: + include: + - /pgdata/pgbackrest/log/*.log + multiline: + line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} + storage: file_storage/pgbackrest_logs + filelog/postgres_csvlog: + include: + - /pgdata/logs/postgres/*.csv + multiline: + line_start_pattern: ^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d.\d{3} UTC,(?:"[_\D](?:[^"]|"")*")?,(?:"[_\D](?:[^"]|"")*")?,\d*,(?:"(?:[^"]|"")+")?,[0-9a-f]+[.][0-9a-f]+,\d+, + operators: + - from: body + to: body.original + type: move + - field: body.format + type: add + value: csv + - field: body.headers + type: add + value: timestamp,user,dbname,pid,connection_from,session_id,line_num,ps,session_start,vxid,txid,error_severity,state_code,message,detail,hint,internal_query,internal_position,context,statement,cursor_position,location,application_name,backend_type,leader_pid,query_id + storage: file_storage/postgres_logs + filelog/postgres_jsonlog: + include: + - /pgdata/logs/postgres/*.json + operators: + - from: body + to: body.original + type: move + - field: body.format + type: add + value: json + storage: file_storage/postgres_logs +service: + extensions: + - file_storage/pgbackrest_logs + - file_storage/postgres_logs + pipelines: + logs/pgbackrest: + exporters: + - googlecloud + processors: + - resource/pgbackrest + - transform/pgbackrest_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/pgbackrest_log + logs/postgres: + exporters: + - googlecloud + processors: + - resource/postgres + - transform/postgres_logs + - batch/200ms + - groupbyattrs/compact + receivers: + - filelog/postgres_csvlog + - filelog/postgres_jsonlog +`) + }) +} diff --git a/internal/config/config.go b/internal/config/config.go index ff3c6507d0..cc72b921ed 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -22,20 +22,26 @@ func defaultFromEnv(value, key string) string { // FetchKeyCommand returns the fetch_key_cmd value stored in the encryption_key_command // variable used to enable TDE. func FetchKeyCommand(spec *v1beta1.PostgresClusterSpec) string { + if config := spec.Config; config != nil { + if parameters := config.Parameters; parameters != nil { + if v, ok := parameters["encryption_key_command"]; ok { + return v.String() + } + } + } + if spec.Patroni != nil { - if spec.Patroni.DynamicConfiguration != nil { - configuration := spec.Patroni.DynamicConfiguration - if configuration != nil { - if postgresql, ok := configuration["postgresql"].(map[string]any); ok { - if parameters, ok := postgresql["parameters"].(map[string]any); ok { - if parameters["encryption_key_command"] != nil { - return fmt.Sprintf("%s", parameters["encryption_key_command"]) - } + if configuration := spec.Patroni.DynamicConfiguration; configuration != nil { + if postgresql, ok := configuration["postgresql"].(map[string]any); ok { + if parameters, ok := postgresql["parameters"].(map[string]any); ok { + if parameters["encryption_key_command"] != nil { + return fmt.Sprintf("%s", parameters["encryption_key_command"]) } } } } } + return "" } @@ -99,6 +105,17 @@ func PGExporterContainerImage(cluster *v1beta1.PostgresCluster) string { return defaultFromEnv(image, "RELATED_IMAGE_PGEXPORTER") } +// CollectorContainerImage returns the container image to use for the +// collector container. +func CollectorContainerImage(instrumentation *v1beta1.InstrumentationSpec) string { + var image string + if instrumentation != nil { + image = instrumentation.Image + } + + return defaultFromEnv(image, "RELATED_IMAGE_COLLECTOR") +} + // PostgresContainerImage returns the container image to use for PostgreSQL. func PostgresContainerImage(cluster *v1beta1.PostgresCluster) string { image := cluster.Spec.Image @@ -143,6 +160,10 @@ func VerifyImageValues(cluster *v1beta1.PostgresCluster) error { cluster.Spec.Monitoring.PGMonitor.Exporter != nil { images = append(images, "crunchy-postgres-exporter") } + if CollectorContainerImage(cluster.Spec.Instrumentation) == "" && + cluster.Spec.Instrumentation != nil { + images = append(images, "crunchy-collector") + } if PostgresContainerImage(cluster) == "" { if cluster.Spec.PostGISVersion != "" { images = append(images, "crunchy-postgres-gis") diff --git a/internal/config/config_test.go b/internal/config/config_test.go index de308544f4..a6e40adddd 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -9,74 +9,121 @@ import ( "testing" "gotest.tools/v3/assert" - "sigs.k8s.io/yaml" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestFetchKeyCommand(t *testing.T) { - - spec1 := v1beta1.PostgresClusterSpec{} - assert.Assert(t, FetchKeyCommand(&spec1) == "") - - spec2 := v1beta1.PostgresClusterSpec{ - Patroni: &v1beta1.PatroniSpec{}, - } - assert.Assert(t, FetchKeyCommand(&spec2) == "") - - spec3 := v1beta1.PostgresClusterSpec{ - Patroni: &v1beta1.PatroniSpec{ - DynamicConfiguration: map[string]any{}, - }, - } - assert.Assert(t, FetchKeyCommand(&spec3) == "") - - spec4 := v1beta1.PostgresClusterSpec{ - Patroni: &v1beta1.PatroniSpec{ - DynamicConfiguration: map[string]any{ - "postgresql": map[string]any{}, + t.Run("missing", func(t *testing.T) { + spec1 := v1beta1.PostgresClusterSpec{} + assert.Assert(t, FetchKeyCommand(&spec1) == "") + + spec2 := v1beta1.PostgresClusterSpec{ + Patroni: &v1beta1.PatroniSpec{}, + } + assert.Assert(t, FetchKeyCommand(&spec2) == "") + + spec3 := v1beta1.PostgresClusterSpec{ + Patroni: &v1beta1.PatroniSpec{ + DynamicConfiguration: map[string]any{}, }, - }, - } - assert.Assert(t, FetchKeyCommand(&spec4) == "") + } + assert.Assert(t, FetchKeyCommand(&spec3) == "") - spec5 := v1beta1.PostgresClusterSpec{ - Patroni: &v1beta1.PatroniSpec{ - DynamicConfiguration: map[string]any{ - "postgresql": map[string]any{ - "parameters": map[string]any{}, + spec4 := v1beta1.PostgresClusterSpec{ + Patroni: &v1beta1.PatroniSpec{ + DynamicConfiguration: map[string]any{ + "postgresql": map[string]any{}, }, }, - }, - } - assert.Assert(t, FetchKeyCommand(&spec5) == "") - - spec6 := v1beta1.PostgresClusterSpec{ - Patroni: &v1beta1.PatroniSpec{ - DynamicConfiguration: map[string]any{ - "postgresql": map[string]any{ - "parameters": map[string]any{ - "encryption_key_command": "", + } + assert.Assert(t, FetchKeyCommand(&spec4) == "") + + spec5 := v1beta1.PostgresClusterSpec{ + Patroni: &v1beta1.PatroniSpec{ + DynamicConfiguration: map[string]any{ + "postgresql": map[string]any{ + "parameters": map[string]any{}, }, }, }, - }, - } - assert.Assert(t, FetchKeyCommand(&spec6) == "") - - spec7 := v1beta1.PostgresClusterSpec{ - Patroni: &v1beta1.PatroniSpec{ - DynamicConfiguration: map[string]any{ - "postgresql": map[string]any{ - "parameters": map[string]any{ - "encryption_key_command": "echo mykey", + } + assert.Assert(t, FetchKeyCommand(&spec5) == "") + }) + + t.Run("blank", func(t *testing.T) { + var spec1 v1beta1.PostgresClusterSpec + require.UnmarshalInto(t, &spec1, `{ + patroni: { + dynamicConfiguration: { + postgresql: { + parameters: { + encryption_key_command: "", + }, }, }, }, - }, - } - assert.Assert(t, FetchKeyCommand(&spec7) == "echo mykey") + }`) + assert.Equal(t, "", FetchKeyCommand(&spec1)) + + var spec2 v1beta1.PostgresClusterSpec + require.UnmarshalInto(t, &spec2, `{ + config: { + parameters: { + encryption_key_command: "", + }, + }, + }`) + assert.Equal(t, "", FetchKeyCommand(&spec2)) + }) + + t.Run("exists", func(t *testing.T) { + var spec1 v1beta1.PostgresClusterSpec + require.UnmarshalInto(t, &spec1, `{ + patroni: { + dynamicConfiguration: { + postgresql: { + parameters: { + encryption_key_command: "echo mykey", + }, + }, + }, + }, + }`) + assert.Equal(t, "echo mykey", FetchKeyCommand(&spec1)) + + var spec2 v1beta1.PostgresClusterSpec + require.UnmarshalInto(t, &spec2, `{ + config: { + parameters: { + encryption_key_command: "cat somefile", + }, + }, + }`) + assert.Equal(t, "cat somefile", FetchKeyCommand(&spec2)) + }) + t.Run("config.parameters takes precedence", func(t *testing.T) { + var spec v1beta1.PostgresClusterSpec + require.UnmarshalInto(t, &spec, `{ + config: { + parameters: { + encryption_key_command: "cat somefile", + }, + }, + patroni: { + dynamicConfiguration: { + postgresql: { + parameters: { + encryption_key_command: "echo mykey", + }, + }, + }, + }, + }`) + assert.Equal(t, "cat somefile", FetchKeyCommand(&spec)) + }) } func TestPGAdminContainerImage(t *testing.T) { @@ -92,9 +139,9 @@ func TestPGAdminContainerImage(t *testing.T) { t.Setenv("RELATED_IMAGE_PGADMIN", "env-var-pgadmin") assert.Equal(t, PGAdminContainerImage(cluster), "env-var-pgadmin") - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, &cluster.Spec, `{ userInterface: { pgAdmin: { image: spec-image } }, - }`), &cluster.Spec)) + }`) assert.Equal(t, PGAdminContainerImage(cluster), "spec-image") } @@ -111,9 +158,9 @@ func TestPGBackRestContainerImage(t *testing.T) { t.Setenv("RELATED_IMAGE_PGBACKREST", "env-var-pgbackrest") assert.Equal(t, PGBackRestContainerImage(cluster), "env-var-pgbackrest") - assert.NilError(t, yaml.Unmarshal([]byte(`{ - backups: { pgBackRest: { image: spec-image } }, - }`), &cluster.Spec)) + require.UnmarshalInto(t, &cluster.Spec, `{ + backups: { pgbackrest: { image: spec-image } }, + }`) assert.Equal(t, PGBackRestContainerImage(cluster), "spec-image") } @@ -130,9 +177,9 @@ func TestPGBouncerContainerImage(t *testing.T) { t.Setenv("RELATED_IMAGE_PGBOUNCER", "env-var-pgbouncer") assert.Equal(t, PGBouncerContainerImage(cluster), "env-var-pgbouncer") - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, &cluster.Spec, `{ proxy: { pgBouncer: { image: spec-image } }, - }`), &cluster.Spec)) + }`) assert.Equal(t, PGBouncerContainerImage(cluster), "spec-image") } @@ -149,9 +196,9 @@ func TestPGExporterContainerImage(t *testing.T) { t.Setenv("RELATED_IMAGE_PGEXPORTER", "env-var-pgexporter") assert.Equal(t, PGExporterContainerImage(cluster), "env-var-pgexporter") - assert.NilError(t, yaml.Unmarshal([]byte(`{ - monitoring: { pgMonitor: { exporter: { image: spec-image } } }, - }`), &cluster.Spec)) + require.UnmarshalInto(t, &cluster.Spec, `{ + monitoring: { pgmonitor: { exporter: { image: spec-image } } }, + }`) assert.Equal(t, PGExporterContainerImage(cluster), "spec-image") } @@ -168,9 +215,9 @@ func TestStandalonePGAdminContainerImage(t *testing.T) { t.Setenv("RELATED_IMAGE_STANDALONE_PGADMIN", "env-var-pgadmin") assert.Equal(t, StandalonePGAdminContainerImage(pgadmin), "env-var-pgadmin") - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, &pgadmin.Spec, `{ image: spec-image - }`), &pgadmin.Spec)) + }`) assert.Equal(t, StandalonePGAdminContainerImage(pgadmin), "spec-image") } diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index bb669d00a2..4879209734 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -5,8 +5,10 @@ package pgupgrade import ( + "cmp" "context" "fmt" + "math" "strings" appsv1 "k8s.io/api/apps/v1" @@ -35,9 +37,16 @@ func pgUpgradeJob(upgrade *v1beta1.PGUpgrade) metav1.ObjectMeta { // upgradeCommand returns an entrypoint that prepares the filesystem for // and performs a PostgreSQL major version upgrade using pg_upgrade. -func upgradeCommand(oldVersion, newVersion int, fetchKeyCommand string, availableCPUs int) []string { - // Use multiple CPUs when three or more are available. - argJobs := fmt.Sprintf(` --jobs=%d`, max(1, availableCPUs-1)) +func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []string { + argJobs := fmt.Sprintf(` --jobs=%d`, max(1, spec.Jobs)) + argMethod := cmp.Or(map[string]string{ + "Clone": ` --clone`, + "Copy": ` --copy`, + "CopyFileRange": ` --copy-file-range`, + }[spec.TransferMethod], ` --link`) + + oldVersion := spec.FromPostgresVersion + newVersion := spec.ToPostgresVersion // if the fetch key command is set for TDE, provide the value during initialization initdb := `/usr/pgsql-"${new_version}"/bin/initdb -k -D /pgdata/pg"${new_version}"` @@ -99,14 +108,14 @@ func upgradeCommand(oldVersion, newVersion int, fetchKeyCommand string, availabl `echo -e "Step 5: Running pg_upgrade check...\n"`, `time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`, `--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\`, - ` --new-datadir /pgdata/pg"${new_version}" --link --check` + argJobs, + ` --new-datadir /pgdata/pg"${new_version}" --check` + argMethod + argJobs, // Assuming the check completes successfully, the pg_upgrade command will // be run that actually prepares the upgraded pgdata directory. `echo -e "\nStep 6: Running pg_upgrade...\n"`, `time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`, `--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \`, - `--new-datadir /pgdata/pg"${new_version}" --link` + argJobs, + `--new-datadir /pgdata/pg"${new_version}"` + argMethod + argJobs, // Since we have cleared the Patroni cluster step by removing the EndPoints, we copy patroni.dynamic.json // from the old data dir to help retain PostgreSQL parameters you had set before. @@ -122,12 +131,12 @@ func upgradeCommand(oldVersion, newVersion int, fetchKeyCommand string, availabl // largestWholeCPU returns the maximum CPU request or limit as a non-negative // integer of CPUs. When resources lacks any CPU, the result is zero. -func largestWholeCPU(resources corev1.ResourceRequirements) int { +func largestWholeCPU(resources corev1.ResourceRequirements) int64 { // Read CPU quantities as millicores then divide to get the "floor." // NOTE: [resource.Quantity.Value] looks easier, but it rounds up. return max( - int(resources.Limits.Cpu().ScaledValue(resource.Milli)/1000), - int(resources.Requests.Cpu().ScaledValue(resource.Milli)/1000), + resources.Limits.Cpu().ScaledValue(resource.Milli)/1000, + resources.Requests.Cpu().ScaledValue(resource.Milli)/1000, 0) } @@ -180,10 +189,12 @@ func (r *PGUpgradeReconciler) generateUpgradeJob( job.Spec.BackoffLimit = initialize.Int32(0) job.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever - // When enabled, calculate the number of CPUs for pg_upgrade. - wholeCPUs := 0 - if feature.Enabled(ctx, feature.PGUpgradeCPUConcurrency) { - wholeCPUs = largestWholeCPU(upgrade.Spec.Resources) + settings := upgrade.Spec.PGUpgradeSettings.DeepCopy() + + // When jobs is undefined, use one less than the number of CPUs. + if settings.Jobs == 0 && feature.Enabled(ctx, feature.PGUpgradeCPUConcurrency) { + wholeCPUs := int32(min(math.MaxInt32, largestWholeCPU(upgrade.Spec.Resources))) + settings.Jobs = wholeCPUs - 1 } // Replace all containers with one that does the upgrade. @@ -198,11 +209,7 @@ func (r *PGUpgradeReconciler) generateUpgradeJob( VolumeMounts: database.VolumeMounts, // Use our upgrade command and the specified image and resources. - Command: upgradeCommand( - upgrade.Spec.FromPostgresVersion, - upgrade.Spec.ToPostgresVersion, - fetchKeyCommand, - wholeCPUs), + Command: upgradeCommand(settings, fetchKeyCommand), Image: pgUpgradeContainerImage(upgrade), ImagePullPolicy: upgrade.Spec.ImagePullPolicy, Resources: upgrade.Spec.Resources, diff --git a/internal/controller/pgupgrade/jobs_test.go b/internal/controller/pgupgrade/jobs_test.go index 7136fcf5ab..c3f3608e4d 100644 --- a/internal/controller/pgupgrade/jobs_test.go +++ b/internal/controller/pgupgrade/jobs_test.go @@ -19,17 +19,18 @@ import ( "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestLargestWholeCPU(t *testing.T) { - assert.Equal(t, 0, + assert.Equal(t, int64(0), largestWholeCPU(corev1.ResourceRequirements{}), "expected the zero value to be zero") for _, tt := range []struct { Name, ResourcesYAML string - Result int + Result int64 }{ { Name: "Negatives", ResourcesYAML: `{requests: {cpu: -3}, limits: {cpu: -5}}`, @@ -54,7 +55,7 @@ func TestLargestWholeCPU(t *testing.T) { } { t.Run(tt.Name, func(t *testing.T) { var resources corev1.ResourceRequirements - assert.NilError(t, yaml.Unmarshal([]byte(tt.ResourcesYAML), &resources)) + require.UnmarshalInto(t, &resources, tt.ResourcesYAML) assert.Equal(t, tt.Result, largestWholeCPU(resources)) }) } @@ -72,27 +73,53 @@ func TestUpgradeCommand(t *testing.T) { }) } - t.Run("CPUs", func(t *testing.T) { + t.Run("Jobs", func(t *testing.T) { for _, tt := range []struct { - CPUs int - Jobs string + Spec int32 + Args string }{ - {CPUs: 0, Jobs: "--jobs=1"}, - {CPUs: 1, Jobs: "--jobs=1"}, - {CPUs: 2, Jobs: "--jobs=1"}, - {CPUs: 3, Jobs: "--jobs=2"}, - {CPUs: 10, Jobs: "--jobs=9"}, + {Spec: -1, Args: "--jobs=1"}, + {Spec: 0, Args: "--jobs=1"}, + {Spec: 1, Args: "--jobs=1"}, + {Spec: 2, Args: "--jobs=2"}, + {Spec: 10, Args: "--jobs=10"}, } { - command := upgradeCommand(10, 11, "", tt.CPUs) + spec := &v1beta1.PGUpgradeSettings{Jobs: tt.Spec} + command := upgradeCommand(spec, "") assert.Assert(t, len(command) > 3) assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3]) script := command[3] - assert.Assert(t, cmp.Contains(script, tt.Jobs)) + assert.Assert(t, cmp.Contains(script, tt.Args)) expectScript(t, script) } }) + + t.Run("Method", func(t *testing.T) { + for _, tt := range []struct { + Spec string + Args string + }{ + {Spec: "", Args: "--link"}, + {Spec: "mystery!", Args: "--link"}, + {Spec: "Link", Args: "--link"}, + {Spec: "Clone", Args: "--clone"}, + {Spec: "Copy", Args: "--copy"}, + {Spec: "CopyFileRange", Args: "--copy-file-range"}, + } { + spec := &v1beta1.PGUpgradeSettings{TransferMethod: tt.Spec} + command := upgradeCommand(spec, "") + assert.Assert(t, len(command) > 3) + assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3]) + + script := command[3] + assert.Assert(t, cmp.Contains(script, tt.Args)) + + expectScript(t, script) + } + + }) } func TestGenerateUpgradeJob(t *testing.T) { @@ -194,7 +221,7 @@ spec: echo -e "Step 5: Running pg_upgrade check...\n" time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \ --new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\ - --new-datadir /pgdata/pg"${new_version}" --link --check --jobs=1 + --new-datadir /pgdata/pg"${new_version}" --check --link --jobs=1 echo -e "\nStep 6: Running pg_upgrade...\n" time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \ --new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \ @@ -357,8 +384,7 @@ func TestPGUpgradeContainerImage(t *testing.T) { t.Setenv("RELATED_IMAGE_PGUPGRADE", "env-var-pgbackrest") assert.Equal(t, pgUpgradeContainerImage(upgrade), "env-var-pgbackrest") - assert.NilError(t, yaml.Unmarshal( - []byte(`{ image: spec-image }`), &upgrade.Spec)) + require.UnmarshalInto(t, &upgrade.Spec, `{ image: spec-image }`) assert.Equal(t, pgUpgradeContainerImage(upgrade), "spec-image") } diff --git a/internal/controller/pgupgrade/pgupgrade_controller.go b/internal/controller/pgupgrade/pgupgrade_controller.go index e1efb44e50..06a36574f0 100644 --- a/internal/controller/pgupgrade/pgupgrade_controller.go +++ b/internal/controller/pgupgrade/pgupgrade_controller.go @@ -153,6 +153,7 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( setStatusToProgressingIfReasonWas("", upgrade) // The "from" version must be smaller than the "to" version. + // NOTE: CRD validation also rejects these values. // An invalid PGUpgrade should not be requeued. if upgrade.Spec.FromPostgresVersion >= upgrade.Spec.ToPostgresVersion { @@ -418,7 +419,7 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Set the cluster status when we know the upgrade has completed successfully. // This will serve to help the user see that the upgrade has completed if they // are only watching the PostgresCluster - patch.Status.PostgresVersion = upgrade.Spec.ToPostgresVersion + patch.Status.PostgresVersion = int(upgrade.Spec.ToPostgresVersion) // Set the pgBackRest status for bootstrapping patch.Status.PGBackRest.Repos = []v1beta1.RepoStatus{} diff --git a/internal/controller/postgrescluster/cluster.go b/internal/controller/postgrescluster/cluster.go index 67544d621b..4cd62f60c8 100644 --- a/internal/controller/postgrescluster/cluster.go +++ b/internal/controller/postgrescluster/cluster.go @@ -15,6 +15,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/patroni" @@ -29,7 +30,7 @@ import ( // files (etc) that apply to the entire cluster. func (r *Reconciler) reconcileClusterConfigMap( ctx context.Context, cluster *v1beta1.PostgresCluster, - pgHBAs postgres.HBAs, pgParameters postgres.Parameters, + pgHBAs postgres.HBAs, pgParameters *postgres.ParameterSet, ) (*corev1.ConfigMap, error) { clusterConfigMap := &corev1.ConfigMap{ObjectMeta: naming.ClusterConfigMap(cluster)} clusterConfigMap.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) @@ -44,7 +45,7 @@ func (r *Reconciler) reconcileClusterConfigMap( if err == nil { err = patroni.ClusterConfigMap(ctx, cluster, pgHBAs, pgParameters, - clusterConfigMap, r.patroniLogSize(cluster)) + clusterConfigMap, r.patroniLogSize(ctx, cluster)) } if err == nil { err = errors.WithStack(r.apply(ctx, clusterConfigMap)) @@ -57,25 +58,25 @@ func (r *Reconciler) reconcileClusterConfigMap( // If a value is set, this enables volume based log storage and triggers the // relevant Patroni configuration. If the value given is less than 25M, the log // file size storage limit defaults to 25M and an event is triggered. -func (r *Reconciler) patroniLogSize(cluster *v1beta1.PostgresCluster) int64 { +// If a value is not set, but the OpenTelemetryLogs feature gate is enabled, the +// log file size storage limit will be set to 25M. +func (r *Reconciler) patroniLogSize(ctx context.Context, cluster *v1beta1.PostgresCluster) int64 { + if cluster.Spec.Patroni != nil && cluster.Spec.Patroni.Logging != nil && + cluster.Spec.Patroni.Logging.StorageLimit != nil { - if cluster.Spec.Patroni != nil { - if cluster.Spec.Patroni.Logging != nil { - if cluster.Spec.Patroni.Logging.StorageLimit != nil { + sizeInBytes := cluster.Spec.Patroni.Logging.StorageLimit.Value() - sizeInBytes := cluster.Spec.Patroni.Logging.StorageLimit.Value() + if sizeInBytes < 25000000 { + // TODO(validation): Eventually we should be able to remove this in favor of CEL validation. + // - https://kubernetes.io/docs/reference/using-api/cel/ + r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "PatroniLogStorageLimitTooSmall", + "Configured Patroni log storage limit is too small. File size will default to 25M.") - if sizeInBytes < 25000000 { - // TODO(validation): Eventually we should be able to remove this in favor of CEL validation. - // - https://kubernetes.io/docs/reference/using-api/cel/ - r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "PatroniLogStorageLimitTooSmall", - "Configured Patroni log storage limit is too small. File size will default to 25M.") - - sizeInBytes = 25000000 - } - return sizeInBytes - } + sizeInBytes = 25000000 } + return sizeInBytes + } else if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + return 25000000 } return 0 } diff --git a/internal/controller/postgrescluster/cluster_test.go b/internal/controller/postgrescluster/cluster_test.go index e08d4e855c..6882cfa27b 100644 --- a/internal/controller/postgrescluster/cluster_test.go +++ b/internal/controller/postgrescluster/cluster_test.go @@ -21,6 +21,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/crunchydata/postgres-operator/internal/controller/runtime" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/testing/cmp" @@ -787,6 +788,7 @@ postgres-operator.crunchydata.com/role: replica } func TestPatroniLogSize(t *testing.T) { + ctx := context.Background() oneHundredMeg, err := resource.ParseQuantity("100M") assert.NilError(t, err) @@ -805,7 +807,7 @@ func TestPatroniLogSize(t *testing.T) { recorder := events.NewRecorder(t, runtime.Scheme) reconciler := &Reconciler{Recorder: recorder} - size := reconciler.patroniLogSize(&cluster) + size := reconciler.patroniLogSize(ctx, &cluster) assert.Equal(t, size, int64(0)) assert.Equal(t, len(recorder.Events), 0) @@ -818,7 +820,7 @@ func TestPatroniLogSize(t *testing.T) { cluster.Spec.Patroni = &v1beta1.PatroniSpec{ Logging: &v1beta1.PatroniLogConfig{}} - size := reconciler.patroniLogSize(&cluster) + size := reconciler.patroniLogSize(ctx, &cluster) assert.Equal(t, size, int64(0)) assert.Equal(t, len(recorder.Events), 0) @@ -833,7 +835,7 @@ func TestPatroniLogSize(t *testing.T) { StorageLimit: &oneHundredMeg, }} - size := reconciler.patroniLogSize(&cluster) + size := reconciler.patroniLogSize(ctx, &cluster) assert.Equal(t, size, int64(100000000)) assert.Equal(t, len(recorder.Events), 0) @@ -848,7 +850,7 @@ func TestPatroniLogSize(t *testing.T) { StorageLimit: &tooSmall, }} - size := reconciler.patroniLogSize(&cluster) + size := reconciler.patroniLogSize(ctx, &cluster) assert.Equal(t, size, int64(25000000)) assert.Equal(t, len(recorder.Events), 1) @@ -856,4 +858,43 @@ func TestPatroniLogSize(t *testing.T) { assert.Equal(t, recorder.Events[0].Reason, "PatroniLogStorageLimitTooSmall") assert.Equal(t, recorder.Events[0].Note, "Configured Patroni log storage limit is too small. File size will default to 25M.") }) + + t.Run("SizeUnsetOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(ctx, gate) + + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + + cluster.Spec.Patroni = nil + + size := reconciler.patroniLogSize(ctx, &cluster) + + assert.Equal(t, size, int64(25000000)) + assert.Equal(t, len(recorder.Events), 0) + }) + + t.Run("SizeSetOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(ctx, gate) + + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + Logging: &v1beta1.PatroniLogConfig{ + StorageLimit: &oneHundredMeg, + }} + + size := reconciler.patroniLogSize(ctx, &cluster) + + assert.Equal(t, size, int64(100000000)) + assert.Equal(t, len(recorder.Events), 0) + }) } diff --git a/internal/controller/postgrescluster/controller.go b/internal/controller/postgrescluster/controller.go index 9d880751e2..4de285e559 100644 --- a/internal/controller/postgrescluster/controller.go +++ b/internal/controller/postgrescluster/controller.go @@ -27,13 +27,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/kubernetes" "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/pgaudit" - "github.com/crunchydata/postgres-operator/internal/pgbackrest" "github.com/crunchydata/postgres-operator/internal/pgbouncer" "github.com/crunchydata/postgres-operator/internal/pgmonitor" "github.com/crunchydata/postgres-operator/internal/pki" @@ -233,16 +232,12 @@ func (r *Reconciler) Reconcile( } pgHBAs := postgres.NewHBAs() - pgmonitor.PostgreSQLHBAs(cluster, &pgHBAs) + pgmonitor.PostgreSQLHBAs(ctx, cluster, &pgHBAs) pgbouncer.PostgreSQL(cluster, &pgHBAs) - pgParameters := postgres.NewParameters() - pgaudit.PostgreSQLParameters(&pgParameters) - pgbackrest.PostgreSQL(cluster, &pgParameters, backupsSpecFound) - pgmonitor.PostgreSQLParameters(cluster, &pgParameters) + pgParameters := r.generatePostgresParameters(ctx, cluster, backupsSpecFound) - // Set huge_pages = try if a hugepages resource limit > 0, otherwise set "off" - postgres.SetHugePages(cluster, &pgParameters) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) if err == nil { rootCA, err = r.reconcileRootCertificate(ctx, cluster) @@ -349,7 +344,7 @@ func (r *Reconciler) Reconcile( ctx, cluster, clusterConfigMap, clusterReplicationSecret, rootCA, clusterPodService, instanceServiceAccount, instances, patroniLeaderService, primaryCertificate, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, + backupsSpecFound, otelConfig, ) } @@ -380,7 +375,7 @@ func (r *Reconciler) Reconcile( err = r.reconcilePGBouncer(ctx, cluster, instances, primaryCertificate, rootCA) } if err == nil { - err = r.reconcilePGMonitor(ctx, cluster, instances, monitoringSecret) + err = r.reconcilePGMonitorExporter(ctx, cluster, instances, monitoringSecret) } if err == nil { err = r.reconcileDatabaseInitSQL(ctx, cluster, instances) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index f5d9836b1d..6d6509eafb 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -24,6 +24,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/feature" @@ -591,6 +592,7 @@ func (r *Reconciler) reconcileInstanceSets( clusterVolumes []*corev1.PersistentVolumeClaim, exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, + otelConfig *collector.Config, ) error { // Go through the observed instances and check if a primary has been determined. @@ -628,7 +630,7 @@ func (r *Reconciler) reconcileInstanceSets( patroniLeaderService, primaryCertificate, findAvailableInstanceNames(*set, instances, clusterVolumes), numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, + backupsSpecFound, otelConfig, ) if err == nil { @@ -1063,6 +1065,7 @@ func (r *Reconciler) scaleUpInstances( clusterVolumes []*corev1.PersistentVolumeClaim, exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, + otelConfig *collector.Config, ) ([]*appsv1.StatefulSet, error) { log := logging.FromContext(ctx) @@ -1109,7 +1112,7 @@ func (r *Reconciler) scaleUpInstances( rootCA, clusterPodService, instanceServiceAccount, patroniLeaderService, primaryCertificate, instances[i], numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, + backupsSpecFound, otelConfig, ) } if err == nil { @@ -1140,6 +1143,7 @@ func (r *Reconciler) reconcileInstance( clusterVolumes []*corev1.PersistentVolumeClaim, exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, + otelConfig *collector.Config, ) error { log := logging.FromContext(ctx).WithValues("instance", instance.Name) ctx = logging.NewContext(ctx, log) @@ -1164,7 +1168,7 @@ func (r *Reconciler) reconcileInstance( ) if err == nil { - instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance) + instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig) } if err == nil { instanceCertificates, err = r.reconcileInstanceCertificates( @@ -1196,9 +1200,34 @@ func (r *Reconciler) reconcileInstance( spec, instanceCertificates, instanceConfigMap, &instance.Spec.Template) } - // Add pgMonitor resources to the instance Pod spec + // If either OpenTelemetry feature is enabled, we want to add the collector config to the pod + if err == nil && + (feature.Enabled(ctx, feature.OpenTelemetryLogs) || feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + + // If the OpenTelemetryMetrics feature is enabled, we need to get the pgpassword from the + // monitoring user secret + pgPassword := "" + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + monitoringUserSecret := &corev1.Secret{ObjectMeta: naming.MonitoringUserSecret(cluster)} + // Create new err variable to avoid abandoning the rest of the reconcile loop if there + // is an error getting the monitoring user secret + err := errors.WithStack( + r.Client.Get(ctx, client.ObjectKeyFromObject(monitoringUserSecret), monitoringUserSecret)) + if err == nil { + pgPassword = string(monitoringUserSecret.Data["password"]) + } + } + + // For now, we are not using logrotate to rotate postgres or patroni logs + // but we are using it for pgbackrest logs in the postgres pod + collector.AddToPod(ctx, cluster.Spec.Instrumentation, cluster.Spec.ImagePullPolicy, instanceConfigMap, &instance.Spec.Template.Spec, + []corev1.VolumeMount{postgres.DataVolumeMount()}, pgPassword, + []string{naming.PGBackRestPGDataLogPath}, true) + } + + // Add postgres-exporter to the instance Pod spec if err == nil { - err = addPGMonitorToInstancePodSpec(ctx, cluster, &instance.Spec.Template, exporterQueriesConfig, exporterWebConfig) + addPGMonitorExporterToInstancePodSpec(ctx, cluster, &instance.Spec.Template, exporterQueriesConfig, exporterWebConfig) } // add nss_wrapper init container and add nss_wrapper env vars to the database and pgbackrest @@ -1377,7 +1406,7 @@ func addPGBackRestToInstancePodSpec( // files (etc) that apply to instance of cluster. func (r *Reconciler) reconcileInstanceConfigMap( ctx context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec, - instance *appsv1.StatefulSet, + instance *appsv1.StatefulSet, otelConfig *collector.Config, ) (*corev1.ConfigMap, error) { instanceConfigMap := &corev1.ConfigMap{ObjectMeta: naming.InstanceConfigMap(instance)} instanceConfigMap.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) @@ -1397,6 +1426,26 @@ func (r *Reconciler) reconcileInstanceConfigMap( naming.LabelInstance: instance.Name, }) + // If OTel logging or metrics is enabled, add collector config + if err == nil && + (feature.Enabled(ctx, feature.OpenTelemetryLogs) || + feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + err = collector.AddToConfigMap(ctx, otelConfig, instanceConfigMap) + + // Add pgbackrest logrotate if OpenTelemetryLogs is enabled and + // local volumes are available + if err == nil && + feature.Enabled(ctx, feature.OpenTelemetryLogs) && + pgbackrest.RepoHostVolumeDefined(cluster) && + cluster.Spec.Instrumentation != nil { + + collector.AddLogrotateConfigs(ctx, cluster.Spec.Instrumentation, + instanceConfigMap, + []collector.LogrotateConfig{{ + LogFiles: []string{naming.PGBackRestPGDataLogPath + "/*.log"}, + }}) + } + } if err == nil { err = patroni.InstanceConfigMap(ctx, cluster, spec, instanceConfigMap) } diff --git a/internal/controller/postgrescluster/metrics_setup.sql b/internal/controller/postgrescluster/metrics_setup.sql new file mode 100644 index 0000000000..728de80c3e --- /dev/null +++ b/internal/controller/postgrescluster/metrics_setup.sql @@ -0,0 +1,222 @@ +-- +-- Copyright © 2017-2025 Crunchy Data Solutions, Inc. All Rights Reserved. +-- + +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'ccp_monitoring') THEN + CREATE ROLE ccp_monitoring WITH LOGIN; + END IF; + + -- The pgmonitor role is required by the pgnodemx extension in PostgreSQL versions 9.5 and 9.6 + -- and should be removed when upgrading to PostgreSQL 10 and above. + IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'pgmonitor') THEN + DROP ROLE pgmonitor; + END IF; +END +$$; + +GRANT pg_monitor to ccp_monitoring; +GRANT pg_execute_server_program TO ccp_monitoring; + +ALTER ROLE ccp_monitoring SET lock_timeout TO '2min'; +ALTER ROLE ccp_monitoring SET jit TO 'off'; + +CREATE SCHEMA IF NOT EXISTS monitor AUTHORIZATION ccp_monitoring; + +DROP TABLE IF EXISTS monitor.pg_stat_statements_reset_info; +-- Table to store last reset time for pg_stat_statements +CREATE TABLE monitor.pg_stat_statements_reset_info( + reset_time timestamptz +); + +DROP FUNCTION IF EXISTS monitor.pg_stat_statements_reset_info(int); +-- Function to reset pg_stat_statements periodically +CREATE FUNCTION monitor.pg_stat_statements_reset_info(p_throttle_minutes integer DEFAULT 1440) + RETURNS bigint + LANGUAGE plpgsql + SECURITY DEFINER + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + + v_reset_timestamp timestamptz; + v_throttle interval; + +BEGIN + + IF p_throttle_minutes < 0 THEN + RETURN 0; + END IF; + + v_throttle := make_interval(mins := p_throttle_minutes); + + SELECT COALESCE(max(reset_time), '1970-01-01'::timestamptz) INTO v_reset_timestamp FROM monitor.pg_stat_statements_reset_info; + + IF ((CURRENT_TIMESTAMP - v_reset_timestamp) > v_throttle) THEN + -- Ensure table is empty + DELETE FROM monitor.pg_stat_statements_reset_info; + PERFORM pg_stat_statements_reset(); + INSERT INTO monitor.pg_stat_statements_reset_info(reset_time) values (now()); + END IF; + + RETURN (SELECT extract(epoch from reset_time) FROM monitor.pg_stat_statements_reset_info); + +EXCEPTION + WHEN others then + RETURN 0; +END +$function$; + +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; +GRANT ALL ON ALL TABLES IN SCHEMA monitor TO ccp_monitoring; + +--- get_pgbackrest_info is used by the OTel collector. +--- get_replication_lag is created as function, so that we can query without warning on a replica. +CREATE OR REPLACE FUNCTION get_replication_lag() RETURNS TABLE(bytes NUMERIC) AS $$ +BEGIN + IF pg_is_in_recovery() THEN + RETURN QUERY SELECT 0::NUMERIC AS bytes; + ELSE + RETURN QUERY SELECT pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes + FROM pg_catalog.pg_stat_replication; + END IF; +END; +$$ LANGUAGE plpgsql; + +--- get_pgbackrest_info is used by the OTel collector. +--- get_pgbackrest_info is created as a function so that no ddl runs on a replica. +--- In the query, the --stanza argument matches DefaultStanzaName, defined in internal/pgbackrest/config.go. +CREATE OR REPLACE FUNCTION get_pgbackrest_info() +RETURNS TABLE ( + last_diff_backup BIGINT, + last_full_backup BIGINT, + last_incr_backup BIGINT, + last_info_backrest_repo_version TEXT, + last_info_backup_error INT, + backup_type TEXT, + backup_runtime_seconds BIGINT, + repo_backup_size_bytes TEXT, + repo_total_size_bytes TEXT, + oldest_full_backup BIGINT, + repo TEXT +) AS $$ +BEGIN + IF pg_is_in_recovery() THEN + RETURN QUERY + SELECT + 0::bigint AS last_diff_backup, + 0::bigint AS last_full_backup, + 0::bigint AS last_incr_backup, + '0' AS last_info_backrest_repo_version, + 0::int AS last_info_backup_error, + 'n/a'::text AS backup_type, + 0::bigint AS backup_runtime_seconds, + '0'::text AS repo_backup_size_bytes, + '0'::text AS repo_total_size_bytes, + 0::bigint AS oldest_full_backup, + 'n/a' AS repo; + ELSE + DROP TABLE IF EXISTS pgbackrest_info; + CREATE TEMPORARY TABLE pgbackrest_info (data json); + COPY pgbackrest_info (data) + FROM PROGRAM 'export LC_ALL=C && printf "\f" && pgbackrest info --log-level-console=info --log-level-stderr=warn --output=json --stanza=db && printf "\f"' + WITH (FORMAT csv, HEADER false, QUOTE E'\f'); + + RETURN QUERY + WITH + all_backups (data) AS ( + SELECT jsonb_array_elements(to_jsonb(data)) FROM pgbackrest_info + ), + stanza_backups (stanza, backup) AS ( + SELECT data->>'name', jsonb_array_elements(data->'backup') FROM all_backups + ), + ordered_backups (stanza, backup, seq_oldest, seq_newest) AS ( + SELECT stanza, backup, + ROW_NUMBER() OVER ( + PARTITION BY stanza, backup->'database'->>'repo-key', backup->>'type' + ORDER BY backup->'timestamp'->>'start' ASC, backup->'timestamp'->>'stop' ASC + ), + ROW_NUMBER() OVER ( + PARTITION BY stanza, backup->'database'->>'repo-key', backup->>'type' + ORDER BY backup->'timestamp'->>'start' DESC, backup->'timestamp'->>'stop' DESC + ) + FROM stanza_backups + ), + + ccp_backrest_last_info AS ( + SELECT + stanza, + split_part(backup->'backrest'->>'version', '.', 1) || lpad(split_part(backup->'backrest'->>'version', '.', 2), 2, '0') || lpad(coalesce(nullif(split_part(backup->'backrest'->>'version', '.', 3), ''), '00'), 2, '0') AS backrest_repo_version, + backup->'database'->>'repo-key' AS repo, + backup->>'type' AS backup_type, + backup->'info'->'repository'->>'delta' AS repo_backup_size_bytes, + backup->'info'->'repository'->>'size' AS repo_total_size_bytes, + (backup->'timestamp'->>'stop')::bigint - (backup->'timestamp'->>'start')::bigint AS backup_runtime_seconds, + CASE WHEN backup->>'error' = 'true' THEN 1 ELSE 0 END AS backup_error + FROM ordered_backups + WHERE seq_newest = 1 + ), + + ccp_backrest_oldest_full_backup AS ( + SELECT + stanza, + backup->'database'->>'repo-key' AS repo, + min((backup->'timestamp'->>'stop')::bigint) AS time_seconds + FROM ordered_backups + WHERE seq_oldest = 1 AND backup->>'type' IN ('full') + GROUP BY 1,2 + ), + + ccp_backrest_last_full_backup AS ( + SELECT + stanza, + backup->'database'->>'repo-key' AS repo, + EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)::bigint - max((backup->'timestamp'->>'stop')::bigint) AS time_since_completion_seconds + FROM ordered_backups + WHERE seq_newest = 1 AND backup->>'type' IN ('full') + GROUP BY 1,2 + ), + + ccp_backrest_last_diff_backup AS ( + SELECT + stanza, + backup->'database'->>'repo-key' AS repo, + EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)::bigint - max((backup->'timestamp'->>'stop')::bigint) AS time_since_completion_seconds + FROM ordered_backups + WHERE seq_newest = 1 AND backup->>'type' IN ('full','diff') + GROUP BY 1,2 + ), + + ccp_backrest_last_incr_backup AS ( + SELECT + stanza, + backup->'database'->>'repo-key' AS repo, + EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)::bigint - max((backup->'timestamp'->>'stop')::bigint) AS time_since_completion_seconds + FROM ordered_backups + WHERE seq_newest = 1 AND backup->>'type' IN ('full','diff','incr') + GROUP BY 1,2 + ) + + SELECT + ccp_backrest_last_diff_backup.time_since_completion_seconds, + ccp_backrest_last_full_backup.time_since_completion_seconds, + ccp_backrest_last_incr_backup.time_since_completion_seconds, + ccp_backrest_last_info.backrest_repo_version, + ccp_backrest_last_info.backup_error, + ccp_backrest_last_info.backup_type, + ccp_backrest_last_info.backup_runtime_seconds, + ccp_backrest_last_info.repo_backup_size_bytes, + ccp_backrest_last_info.repo_total_size_bytes, + ccp_backrest_oldest_full_backup.time_seconds, + ccp_backrest_last_incr_backup.repo + FROM + ccp_backrest_last_diff_backup + JOIN ccp_backrest_last_full_backup ON ccp_backrest_last_diff_backup.stanza = ccp_backrest_last_full_backup.stanza AND ccp_backrest_last_diff_backup.repo = ccp_backrest_last_full_backup.repo + JOIN ccp_backrest_last_incr_backup ON ccp_backrest_last_diff_backup.stanza = ccp_backrest_last_incr_backup.stanza AND ccp_backrest_last_diff_backup.repo = ccp_backrest_last_incr_backup.repo + JOIN ccp_backrest_last_info ON ccp_backrest_last_diff_backup.stanza = ccp_backrest_last_info.stanza AND ccp_backrest_last_diff_backup.repo = ccp_backrest_last_info.repo + JOIN ccp_backrest_oldest_full_backup ON ccp_backrest_last_diff_backup.stanza = ccp_backrest_oldest_full_backup.stanza AND ccp_backrest_last_diff_backup.repo = ccp_backrest_oldest_full_backup.repo; + END IF; +END; +$$ LANGUAGE plpgsql; + diff --git a/internal/controller/postgrescluster/patroni.go b/internal/controller/postgrescluster/patroni.go index 995de75b61..5242169be6 100644 --- a/internal/controller/postgrescluster/patroni.go +++ b/internal/controller/postgrescluster/patroni.go @@ -173,7 +173,7 @@ func (r *Reconciler) reconcilePatroniDistributedConfiguration( func (r *Reconciler) reconcilePatroniDynamicConfiguration( ctx context.Context, cluster *v1beta1.PostgresCluster, instances *observedInstances, - pgHBAs postgres.HBAs, pgParameters postgres.Parameters, + pgHBAs postgres.HBAs, pgParameters *postgres.ParameterSet, ) error { if !patroni.ClusterBootstrapped(cluster) { // Patroni has not yet bootstrapped. Dynamic configuration happens through diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index b823e2bd26..3645871bd5 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -27,6 +27,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/feature" @@ -687,12 +688,25 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster if pgbackrest.RepoHostVolumeDefined(postgresCluster) { // add the init container to make the pgBackRest repo volume log directory - pgbackrest.MakePGBackrestLogDir(&repo.Spec.Template, postgresCluster) + pgBackRestLogPath := pgbackrest.MakePGBackrestLogDir(&repo.Spec.Template, postgresCluster) - // add pgBackRest repo volumes to pod + containersToAdd := []string{naming.PGBackRestRepoContainerName} + + // If OpenTelemetryLogs is enabled, we want to add the collector to the pod + // and also add the RepoVolumes to the container. + if postgresCluster.Spec.Instrumentation != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + collector.AddToPod(ctx, postgresCluster.Spec.Instrumentation, postgresCluster.Spec.ImagePullPolicy, + &corev1.ConfigMap{ObjectMeta: naming.PGBackRestConfig(postgresCluster)}, + &repo.Spec.Template.Spec, []corev1.VolumeMount{}, "", + []string{pgBackRestLogPath}, true) + + containersToAdd = append(containersToAdd, naming.ContainerCollector) + } + + // add pgBackRest repo volumes to pod and to containers if err := pgbackrest.AddRepoVolumesToPod(postgresCluster, &repo.Spec.Template, getRepoPVCNames(postgresCluster, repoResources.pvcs), - naming.PGBackRestRepoContainerName); err != nil { + containersToAdd...); err != nil { return nil, errors.WithStack(err) } } @@ -2003,8 +2017,12 @@ func (r *Reconciler) reconcilePGBackRestConfig(ctx context.Context, repoHostName, configHash, serviceName, serviceNamespace string, instanceNames []string) error { - backrestConfig := pgbackrest.CreatePGBackRestConfigMapIntent(postgresCluster, repoHostName, + backrestConfig, err := pgbackrest.CreatePGBackRestConfigMapIntent(ctx, postgresCluster, repoHostName, configHash, serviceName, serviceNamespace, instanceNames) + if err != nil { + return err + } + if err := r.setControllerReference(postgresCluster, backrestConfig); err != nil { return err } diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index 1b9bb837f3..2b1dcae779 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -18,6 +18,8 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/collector" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" @@ -40,10 +42,11 @@ func (r *Reconciler) reconcilePGBouncer( service, err := r.reconcilePGBouncerService(ctx, cluster) if err == nil { - configmap, err = r.reconcilePGBouncerConfigMap(ctx, cluster) + secret, err = r.reconcilePGBouncerSecret(ctx, cluster, root, service) } if err == nil { - secret, err = r.reconcilePGBouncerSecret(ctx, cluster, root, service) + config := collector.NewConfigForPgBouncerPod(ctx, cluster, pgbouncer.PostgresqlUser) + configmap, err = r.reconcilePGBouncerConfigMap(ctx, cluster, config) } if err == nil { err = r.reconcilePGBouncerDeployment(ctx, cluster, primaryCertificate, configmap, secret) @@ -63,6 +66,7 @@ func (r *Reconciler) reconcilePGBouncer( // reconcilePGBouncerConfigMap writes the ConfigMap for a PgBouncer Pod. func (r *Reconciler) reconcilePGBouncerConfigMap( ctx context.Context, cluster *v1beta1.PostgresCluster, + otelConfig *collector.Config, ) (*corev1.ConfigMap, error) { configmap := &corev1.ConfigMap{ObjectMeta: naming.ClusterPGBouncer(cluster)} configmap.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) @@ -92,7 +96,22 @@ func (r *Reconciler) reconcilePGBouncerConfigMap( }) if err == nil { - pgbouncer.ConfigMap(cluster, configmap) + pgbouncer.ConfigMap(ctx, cluster, configmap) + } + // If OTel logging or metrics is enabled, add collector config + if otelConfig != nil && + (feature.Enabled(ctx, feature.OpenTelemetryLogs) || + feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + err = collector.AddToConfigMap(ctx, otelConfig, configmap) + } + // If OTel logging is enabled, add logrotate config + if err == nil && otelConfig != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + logrotateConfig := collector.LogrotateConfig{ + LogFiles: []string{naming.PGBouncerFullLogPath}, + PostrotateScript: collector.PGBouncerPostRotateScript, + } + collector.AddLogrotateConfigs(ctx, cluster.Spec.Instrumentation, configmap, + []collector.LogrotateConfig{logrotateConfig}) } if err == nil { err = errors.WithStack(r.apply(ctx, configmap)) @@ -456,6 +475,9 @@ func (r *Reconciler) generatePGBouncerDeployment( pgbouncer.Pod(ctx, cluster, configmap, primaryCertificate, secret, &deploy.Spec.Template.Spec) } + // Add tmp directory and volume for log files + addTMPEmptyDir(&deploy.Spec.Template) + return deploy, true, err } diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index 956a99bffd..84b955559a 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -6,6 +6,7 @@ package postgrescluster import ( "context" + _ "embed" "fmt" "io" "os" @@ -27,17 +28,8 @@ import ( "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) -// If pgMonitor is enabled the pgMonitor sidecar(s) have been added to the -// instance pod. reconcilePGMonitor will update the database to -// create the necessary objects for the tool to run -func (r *Reconciler) reconcilePGMonitor(ctx context.Context, - cluster *v1beta1.PostgresCluster, instances *observedInstances, - monitoringSecret *corev1.Secret) error { - - err := r.reconcilePGMonitorExporter(ctx, cluster, instances, monitoringSecret) - - return err -} +//go:embed "metrics_setup.sql" +var metricsSetupForOTelCollector string // reconcilePGMonitorExporter performs setup the postgres_exporter sidecar // - PodExec to run the sql in the primary database @@ -69,19 +61,24 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, // We use this ImageID and the setup.sql file in the hash we make to see if the operator needs to rerun // the `EnableExporterInPostgreSQL` funcs; that way we are always running // that function against an updated and running pod. - if pgmonitor.ExporterEnabled(cluster) { + + if pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { sql, err := os.ReadFile(fmt.Sprintf("%s/pg%d/setup.sql", pgmonitor.GetQueriesConfigDir(ctx), cluster.Spec.PostgresVersion)) if err != nil { return err } - // TODO: Revisit how pgbackrest_info.sh is used with pgMonitor. - // pgMonitor queries expect a path to a script that runs pgBackRest - // info and provides json output. In the queries yaml for pgBackRest - // the default path is `/usr/bin/pgbackrest-info.sh`. We update - // the path to point to the script in our database image. - setup = strings.ReplaceAll(string(sql), "/usr/bin/pgbackrest-info.sh", - "/opt/crunchy/bin/postgres/pgbackrest_info.sh") + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + setup = metricsSetupForOTelCollector + } else { + // TODO: Revisit how pgbackrest_info.sh is used with pgMonitor. + // pgMonitor queries expect a path to a script that runs pgBackRest + // info and provides json output. In the queries yaml for pgBackRest + // the default path is `/usr/bin/pgbackrest-info.sh`. We update + // the path to point to the script in our database image. + setup = strings.ReplaceAll(string(sql), "/usr/bin/pgbackrest-info.sh", + "/opt/crunchy/bin/postgres/pgbackrest_info.sh") + } for _, containerStatus := range writablePod.Status.ContainerStatuses { if containerStatus.Name == naming.ContainerDatabase { @@ -102,9 +99,9 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, return pgmonitor.EnableExporterInPostgreSQL(ctx, exec, monitoringSecret, pgmonitor.ExporterDB, setup) } - if !pgmonitor.ExporterEnabled(cluster) { + if !pgmonitor.ExporterEnabled(ctx, cluster) && !feature.Enabled(ctx, feature.OpenTelemetryMetrics) { action = func(ctx context.Context, exec postgres.Executor) error { - return pgmonitor.DisableExporterInPostgreSQL(ctx, exec) + return pgmonitor.DisableMonitoringUserInPostgres(ctx, exec) } } @@ -160,12 +157,11 @@ func (r *Reconciler) reconcileMonitoringSecret( return nil, err } - if !pgmonitor.ExporterEnabled(cluster) { - // TODO: Checking if the exporter is enabled to determine when monitoring - // secret should be created. If more tools are added to the monitoring - // suite, they could need the secret when the exporter is not enabled. - // This check may need to be updated. - // Exporter is disabled; delete monitoring secret if it exists. + // Checking if the exporter is enabled or OpenTelemetryMetrics feature + // is enabled to determine when monitoring secret should be created, + // since our implementation of the SqlQuery receiver in the OTel Collector + // uses the monitoring user as well. + if !pgmonitor.ExporterEnabled(ctx, cluster) && !feature.Enabled(ctx, feature.OpenTelemetryMetrics) { if err == nil { err = errors.WithStack(r.deleteControlled(ctx, cluster, existing)) } @@ -227,19 +223,6 @@ func (r *Reconciler) reconcileMonitoringSecret( return nil, err } -// addPGMonitorToInstancePodSpec performs the necessary setup to add -// pgMonitor resources on a PodTemplateSpec -func addPGMonitorToInstancePodSpec( - ctx context.Context, - cluster *v1beta1.PostgresCluster, - template *corev1.PodTemplateSpec, - exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap) error { - - err := addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, exporterWebConfig) - - return err -} - // addPGMonitorExporterToInstancePodSpec performs the necessary setup to // add pgMonitor exporter resources to a PodTemplateSpec // TODO (jmckulk): refactor to pass around monitoring secret; Without the secret @@ -249,10 +232,10 @@ func addPGMonitorExporterToInstancePodSpec( ctx context.Context, cluster *v1beta1.PostgresCluster, template *corev1.PodTemplateSpec, - exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap) error { + exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap) { - if !pgmonitor.ExporterEnabled(cluster) { - return nil + if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + return } certSecret := cluster.Spec.Monitoring.PGMonitor.Exporter.CustomTLSSecret @@ -385,21 +368,23 @@ func addPGMonitorExporterToInstancePodSpec( // add the proper label to support Pod discovery by Prometheus per pgMonitor configuration initialize.Labels(template) template.Labels[naming.LabelPGMonitorDiscovery] = "true" - - return nil } // reconcileExporterWebConfig reconciles the configmap containing the webconfig for exporter tls func (r *Reconciler) reconcileExporterWebConfig(ctx context.Context, cluster *v1beta1.PostgresCluster) (*corev1.ConfigMap, error) { + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + return nil, nil + } + existing := &corev1.ConfigMap{ObjectMeta: naming.ExporterWebConfigMap(cluster)} err := errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) if client.IgnoreNotFound(err) != nil { return nil, err } - if !pgmonitor.ExporterEnabled(cluster) || cluster.Spec.Monitoring.PGMonitor.Exporter.CustomTLSSecret == nil { + if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) || cluster.Spec.Monitoring.PGMonitor.Exporter.CustomTLSSecret == nil { // We could still have a NotFound error here so check the err. // If no error that means the configmap is found and needs to be deleted if err == nil { @@ -456,7 +441,7 @@ func (r *Reconciler) reconcileExporterQueriesConfig(ctx context.Context, return nil, err } - if !pgmonitor.ExporterEnabled(cluster) { + if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { // We could still have a NotFound error here so check the err. // If no error that means the configmap is found and needs to be deleted if err == nil { diff --git a/internal/controller/postgrescluster/pgmonitor_test.go b/internal/controller/postgrescluster/pgmonitor_test.go index 36a5027aaa..bf46dd204b 100644 --- a/internal/controller/postgrescluster/pgmonitor_test.go +++ b/internal/controller/postgrescluster/pgmonitor_test.go @@ -39,7 +39,7 @@ func testExporterCollectorsAnnotation(t *testing.T, ctx context.Context, cluster naming.PostgresExporterCollectorsAnnotation: "wrong-value", }) - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, queriesConfig, webConfig)) + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, queriesConfig, webConfig) assert.Equal(t, len(template.Spec.Containers), 1) container := template.Spec.Containers[0] @@ -56,7 +56,7 @@ func testExporterCollectorsAnnotation(t *testing.T, ctx context.Context, cluster naming.PostgresExporterCollectorsAnnotation: "None", }) - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, queriesConfig, webConfig)) + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, queriesConfig, webConfig) assert.Equal(t, len(template.Spec.Containers), 1) container := template.Spec.Containers[0] @@ -71,7 +71,7 @@ func testExporterCollectorsAnnotation(t *testing.T, ctx context.Context, cluster naming.PostgresExporterCollectorsAnnotation: "none", }) - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, queriesConfig, webConfig)) + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, queriesConfig, webConfig) assert.Assert(t, cmp.Contains(strings.Join(template.Spec.Containers[0].Command, "\n"), "--[no-]collector")) }) }) @@ -100,7 +100,7 @@ func TestAddPGMonitorExporterToInstancePodSpec(t *testing.T) { t.Run("ExporterDisabled", func(t *testing.T) { template := &corev1.PodTemplateSpec{} - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, nil, nil)) + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, nil, nil) assert.DeepEqual(t, template, &corev1.PodTemplateSpec{}) }) @@ -121,8 +121,7 @@ func TestAddPGMonitorExporterToInstancePodSpec(t *testing.T) { }, } - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, nil)) - + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, nil) assert.Equal(t, len(template.Spec.Containers), 2) container := template.Spec.Containers[1] @@ -205,8 +204,7 @@ volumeMounts: }, } - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, nil)) - + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, nil) assert.Equal(t, len(template.Spec.Containers), 2) container := template.Spec.Containers[1] @@ -255,8 +253,7 @@ name: exporter-config }, } - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, nil)) - + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, nil) assert.Equal(t, len(template.Spec.Containers), 2) container := template.Spec.Containers[1] @@ -301,8 +298,7 @@ name: exporter-config testConfigMap := new(corev1.ConfigMap) testConfigMap.Name = "test-web-conf" - assert.NilError(t, addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, testConfigMap)) - + addPGMonitorExporterToInstancePodSpec(ctx, cluster, template, exporterQueriesConfig, testConfigMap) assert.Equal(t, len(template.Spec.Containers), 2) container := template.Spec.Containers[1] diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index c0660b9707..25ffeefc99 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -6,6 +6,7 @@ package postgrescluster import ( "bytes" + "cmp" "context" "fmt" "io" @@ -29,7 +30,10 @@ import ( "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/patroni" "github.com/crunchydata/postgres-operator/internal/pgaudit" + "github.com/crunchydata/postgres-operator/internal/pgbackrest" + "github.com/crunchydata/postgres-operator/internal/pgmonitor" "github.com/crunchydata/postgres-operator/internal/postgis" "github.com/crunchydata/postgres-operator/internal/postgres" pgpassword "github.com/crunchydata/postgres-operator/internal/postgres/password" @@ -37,13 +41,74 @@ import ( "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) +// generatePostgresParameters produces the parameter set for cluster that +// incorporates, from highest to lowest precedence: +// 1. mandatory values determined by controllers +// 2. parameters in cluster.spec.config.parameters +// 3. parameters in cluster.spec.patroni.dynamicConfiguration +// 4. default values determined by contollers +func (*Reconciler) generatePostgresParameters( + ctx context.Context, cluster *v1beta1.PostgresCluster, backupsSpecFound bool, +) *postgres.ParameterSet { + builtin := postgres.NewParameters() + pgaudit.PostgreSQLParameters(&builtin) + pgbackrest.PostgreSQL(cluster, &builtin, backupsSpecFound) + pgmonitor.PostgreSQLParameters(ctx, cluster, &builtin) + postgres.SetHugePages(cluster, &builtin) + + // Last write wins, so start with the recommended defaults. + result := cmp.Or(builtin.Default.DeepCopy(), postgres.NewParameterSet()) + + // Overwrite the above with any parameters specified in the Patroni section. + for k, v := range patroni.PostgresParameters(cluster.Spec.Patroni).AsMap() { + result.Add(k, v) + } + + // Overwrite the above with any parameters specified in the Config section. + if config := cluster.Spec.Config; config != nil { + for k, v := range config.Parameters { + result.Add(k, v.String()) + } + } + + // Overwrite the above with mandatory values. + if builtin.Mandatory != nil { + // This parameter is a comma-separated list. Rather than overwrite the + // user-defined value, we want to combine it with the mandatory one. + preload := result.Value("shared_preload_libraries") + + for k, v := range builtin.Mandatory.AsMap() { + // Load mandatory libraries ahead of user-defined libraries. + if k == "shared_preload_libraries" && len(v) > 0 && len(preload) > 0 { + v = v + "," + preload + } + + result.Add(k, v) + } + } + + // Some preload libraries belong at specific positions in this list. + if preload, ok := result.Get("shared_preload_libraries"); ok { + // Load "citus" ahead of any other libraries. + // - https://github.com/citusdata/citus/blob/v12.0.0/src/backend/distributed/shared_library_init.c#L417-L419 + // - https://github.com/citusdata/citus/blob/v13.0.0/src/backend/distributed/shared_library_init.c#L420-L422 + if strings.Contains(preload, "citus") { + preload = "citus," + preload + } + + result.Add("shared_preload_libraries", preload) + } + + return result +} + // generatePostgresUserSecret returns a Secret containing a password and // connection details for the first database in spec. When existing is nil or // lacks a password or verifier, a new password and verifier are generated. func (r *Reconciler) generatePostgresUserSecret( cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresUserSpec, existing *corev1.Secret, ) (*corev1.Secret, error) { - username := string(spec.Name) + username := spec.Name intent := &corev1.Secret{ObjectMeta: naming.PostgresUserSecret(cluster, username)} intent.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Secret")) initialize.Map(&intent.Data) @@ -100,7 +165,7 @@ func (r *Reconciler) generatePostgresUserSecret( // When a database has been specified, include it and a connection URI. // - https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING if len(spec.Databases) > 0 { - database := string(spec.Databases[0]) + database := spec.Databases[0] intent.Data["dbname"] = []byte(database) intent.Data["uri"] = []byte((&url.URL{ @@ -133,7 +198,7 @@ func (r *Reconciler) generatePostgresUserSecret( intent.Data["pgbouncer-port"] = []byte(port) if len(spec.Databases) > 0 { - database := string(spec.Databases[0]) + database := spec.Databases[0] intent.Data["pgbouncer-uri"] = []byte((&url.URL{ Scheme: "postgresql", @@ -216,9 +281,7 @@ func (r *Reconciler) reconcilePostgresDatabases( } } else { for _, user := range cluster.Spec.Users { - for _, database := range user.Databases { - databases.Insert(string(database)) - } + databases.Insert(user.Databases...) } } @@ -379,10 +442,9 @@ func (r *Reconciler) reconcilePostgresUserSecrets( r.Recorder.Event(cluster, corev1.EventTypeWarning, "InvalidUser", allErrors.ToAggregate().Error()) } else { - identifier := v1beta1.PostgresIdentifier(cluster.Name) specUsers = []v1beta1.PostgresUserSpec{{ - Name: identifier, - Databases: []v1beta1.PostgresIdentifier{identifier}, + Name: cluster.Name, + Databases: []string{cluster.Name}, }} } } @@ -390,7 +452,7 @@ func (r *Reconciler) reconcilePostgresUserSecrets( // Index user specifications by PostgreSQL user name. userSpecs := make(map[string]*v1beta1.PostgresUserSpec, len(specUsers)) for i := range specUsers { - userSpecs[string(specUsers[i].Name)] = &specUsers[i] + userSpecs[specUsers[i].Name] = &specUsers[i] } secrets := &corev1.SecretList{} diff --git a/internal/controller/postgrescluster/postgres_test.go b/internal/controller/postgrescluster/postgres_test.go index 5395b6f95f..f6da644a09 100644 --- a/internal/controller/postgrescluster/postgres_test.go +++ b/internal/controller/postgrescluster/postgres_test.go @@ -21,7 +21,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/yaml" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/feature" @@ -35,6 +34,123 @@ import ( "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) +func TestGeneratePostgresParameters(t *testing.T) { + ctx := context.Background() + reconciler := &Reconciler{} + + builtin := reconciler.generatePostgresParameters(ctx, v1beta1.NewPostgresCluster(), false) + assert.Assert(t, len(builtin.AsMap()) > 0, + "expected an empty cluster to have some builtin parameters") + + assert.Equal(t, builtin.Value("jit"), "off", + "BUG IN TEST: expected JIT to be disabled") + + assert.Equal(t, builtin.Value("shared_preload_libraries"), "pgaudit", + "BUG IN TEST: expected pgAudit to be mandatory") + + t.Run("Config", func(t *testing.T) { + cluster := v1beta1.NewPostgresCluster() + require.UnmarshalInto(t, &cluster.Spec.Config, `{ + parameters: { + something: str, + another: 5, + }, + }`) + + result := reconciler.generatePostgresParameters(ctx, cluster, false) + assert.Assert(t, cmp.LenMap(result.AsMap(), len(builtin.AsMap())+2), + "expected two parameters from the Config section") + + assert.Equal(t, result.Value("another"), "5") + assert.Equal(t, result.Value("something"), "str") + }) + + t.Run("Patroni", func(t *testing.T) { + cluster := v1beta1.NewPostgresCluster() + require.UnmarshalInto(t, &cluster.Spec.Patroni, `{ + dynamicConfiguration: { + postgresql: { parameters: { + something: str, + another: 5.1, + } }, + }, + }`) + + result := reconciler.generatePostgresParameters(ctx, cluster, false) + assert.Assert(t, cmp.LenMap(result.AsMap(), len(builtin.AsMap())+2), + "expected two parameters from the Patroni section") + + assert.Equal(t, result.Value("another"), "5.1") + assert.Equal(t, result.Value("something"), "str") + }) + + t.Run("Precedence", func(t *testing.T) { + cluster := v1beta1.NewPostgresCluster() + require.UnmarshalInto(t, &cluster.Spec.Config, `{ + parameters: { + something: replaced, + unrelated: used, + jit: "on", + }, + }`) + require.UnmarshalInto(t, &cluster.Spec.Patroni, `{ + dynamicConfiguration: { + postgresql: { parameters: { + something: str, + another: 5.1, + } }, + }, + }`) + + result := reconciler.generatePostgresParameters(ctx, cluster, false) + assert.Assert(t, cmp.LenMap(result.AsMap(), len(builtin.AsMap())+3+1-1), + "expected three parameters from the Config section,"+ + "plus one from the Patroni section, minus one default") + + assert.Equal(t, result.Value("another"), "5.1") // Patroni + assert.Equal(t, result.Value("something"), "replaced") // Config + assert.Equal(t, result.Value("unrelated"), "used") // Config + assert.Equal(t, result.Value("jit"), "on") // Config + }) + + t.Run("shared_preload_libraries", func(t *testing.T) { + t.Run("NumericIncluded", func(t *testing.T) { + cluster := v1beta1.NewPostgresCluster() + require.UnmarshalInto(t, &cluster.Spec.Config, `{ + parameters: { + shared_preload_libraries: 123, + }, + }`) + + result := reconciler.generatePostgresParameters(ctx, cluster, false) + assert.Assert(t, cmp.Contains(result.Value("shared_preload_libraries"), "123")) + }) + + t.Run("Precedence", func(t *testing.T) { + cluster := v1beta1.NewPostgresCluster() + require.UnmarshalInto(t, &cluster.Spec.Config, `{ + parameters: { + shared_preload_libraries: given, + }, + }`) + + result := reconciler.generatePostgresParameters(ctx, cluster, false) + assert.Equal(t, result.Value("shared_preload_libraries"), "pgaudit,given", + "expected mandatory ahead of specified") + + require.UnmarshalInto(t, &cluster.Spec.Config, `{ + parameters: { + shared_preload_libraries: 'given, citus,other' + }, + }`) + + result = reconciler.generatePostgresParameters(ctx, cluster, false) + assert.Equal(t, result.Value("shared_preload_libraries"), "citus,pgaudit,given, citus,other", + "expected citus in front") + }) + }) +} + func TestGeneratePostgresUserSecret(t *testing.T) { _, tClient := setupKubernetes(t) require.ParallelCapacity(t, 0) @@ -163,7 +279,7 @@ func TestGeneratePostgresUserSecret(t *testing.T) { } // Present when specified. - spec.Databases = []v1beta1.PostgresIdentifier{"db1"} + spec.Databases = []string{"db1"} secret, err = reconciler.generatePostgresUserSecret(cluster, &spec, nil) assert.NilError(t, err) @@ -180,7 +296,7 @@ func TestGeneratePostgresUserSecret(t *testing.T) { } // Only the first in the list. - spec.Databases = []v1beta1.PostgresIdentifier{"first", "asdf"} + spec.Databases = []string{"first", "asdf"} secret, err = reconciler.generatePostgresUserSecret(cluster, &spec, nil) assert.NilError(t, err) @@ -198,9 +314,9 @@ func TestGeneratePostgresUserSecret(t *testing.T) { }) t.Run("PgBouncer", func(t *testing.T) { - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, &cluster.Spec, `{ proxy: { pgBouncer: { port: 10220 } }, - }`), &cluster.Spec)) + }`) secret, err := reconciler.generatePostgresUserSecret(cluster, spec, nil) assert.NilError(t, err) @@ -214,7 +330,7 @@ func TestGeneratePostgresUserSecret(t *testing.T) { // Includes a URI when possible. spec := *spec - spec.Databases = []v1beta1.PostgresIdentifier{"yes", "no"} + spec.Databases = []string{"yes", "no"} secret, err = reconciler.generatePostgresUserSecret(cluster, &spec, nil) assert.NilError(t, err) @@ -250,14 +366,14 @@ func TestReconcilePostgresVolumes(t *testing.T) { t.Cleanup(func() { assert.Check(t, tClient.Delete(ctx, cluster)) }) spec := &v1beta1.PostgresInstanceSetSpec{} - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, spec, `{ name: "some-instance", dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } }, storageClassName: "storage-class-for-data", }, - }`), spec)) + }`) instance := &appsv1.StatefulSet{ObjectMeta: naming.GenerateInstance(cluster, spec)} pvc, err := reconciler.reconcilePostgresDataVolume(ctx, cluster, spec, instance, nil, nil) @@ -290,14 +406,14 @@ volumeMode: Filesystem t.Cleanup(func() { assert.Check(t, tClient.Delete(ctx, cluster)) }) spec := &v1beta1.PostgresInstanceSetSpec{} - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, spec, `{ name: "some-instance", dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } }, storageClassName: "storage-class-for-data", }, - }`), spec)) + }`) instance := &appsv1.StatefulSet{ObjectMeta: naming.GenerateInstance(cluster, spec)} recorder := events.NewRecorder(t, runtime.Scheme) @@ -392,14 +508,14 @@ volumeMode: Filesystem t.Cleanup(func() { assert.Check(t, tClient.Delete(ctx, cluster)) }) spec := &v1beta1.PostgresInstanceSetSpec{} - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, spec, `{ name: "some-instance", dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } }, storageClassName: "storage-class-for-data", }, - }`), spec)) + }`) instance := &appsv1.StatefulSet{ObjectMeta: naming.GenerateInstance(cluster, spec)} recorder := events.NewRecorder(t, runtime.Scheme) @@ -455,14 +571,14 @@ volumeMode: Filesystem t.Cleanup(func() { assert.Check(t, tClient.Delete(ctx, cluster)) }) spec := &v1beta1.PostgresInstanceSetSpec{} - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, spec, `{ name: "some-instance", dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } }, storageClassName: "storage-class-for-data", }, - }`), spec)) + }`) instance := &appsv1.StatefulSet{ObjectMeta: naming.GenerateInstance(cluster, spec)} observed := &Instance{} @@ -475,13 +591,13 @@ volumeMode: Filesystem t.Run("Specified", func(t *testing.T) { spec := spec.DeepCopy() - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, spec, `{ walVolumeClaimSpec: { accessModes: [ReadWriteMany], resources: { requests: { storage: 2Gi } }, storageClassName: "storage-class-for-wal", }, - }`), spec)) + }`) pvc, err := reconciler.reconcilePostgresWALVolume(ctx, cluster, spec, instance, observed, nil) assert.NilError(t, err) diff --git a/internal/controller/postgrescluster/volumes.go b/internal/controller/postgrescluster/volumes.go index c8d3c0a38d..aeeeac6166 100644 --- a/internal/controller/postgrescluster/volumes.go +++ b/internal/controller/postgrescluster/volumes.go @@ -31,7 +31,8 @@ import ( // +kubebuilder:rbac:groups="",resources="persistentvolumeclaims",verbs={list} // observePersistentVolumeClaims reads all PVCs for cluster from the Kubernetes -// API and sets the PersistentVolumeResizing condition as appropriate. +// API and sets the PersistentVolumeResizing and/or the PersistentVolumeResizeError +// conditions as appropriate. func (r *Reconciler) observePersistentVolumeClaims( ctx context.Context, cluster *v1beta1.PostgresCluster, ) ([]*corev1.PersistentVolumeClaim, error) { @@ -53,6 +54,12 @@ func (r *Reconciler) observePersistentVolumeClaims( ObservedGeneration: cluster.Generation, } + // create a condition for surfacing any PVC resize error conditions + resizingError := metav1.Condition{ + Type: v1beta1.PersistentVolumeResizeError, + ObservedGeneration: cluster.Generation, + } + minNotZero := func(a, b metav1.Time) metav1.Time { if b.IsZero() || (a.Before(&b) && !a.IsZero()) { return a @@ -119,7 +126,31 @@ func (r *Reconciler) observePersistentVolumeClaims( resizing.LastTransitionTime = minNotZero( resizing.LastTransitionTime, condition.LastTransitionTime) } + case + // The "ControllerResizeError" and "NodeResizeError" conditions were added in + // Kubernetes v1.31 for indicating node and controller failures when resizing + // a volume: + // - https://github.com/kubernetes/enhancements/pull/4692 + // - https://github.com/kubernetes/kubernetes/pull/126108 + corev1.PersistentVolumeClaimControllerResizeError, + corev1.PersistentVolumeClaimNodeResizeError: + + // Add pertinent details from the resize error condition in the PVC to the resize + // error condition in the PostgresCluster status. In the event that there is both + // a controller resize error and a node resize error, only the details from one + // will be displayed at a time in the PostgresCluster condition. + if condition.Status == corev1.ConditionTrue { + resizingError.Status = metav1.ConditionStatus(condition.Status) + resizingError.Reason = condition.Reason + resizingError.Message = condition.Message + resizingError.LastTransitionTime = condition.LastTransitionTime + // corev1.PersistentVolumeClaimCondition.Reason is optional + // while metav1.Condition.Reason is required. + if resizingError.Reason == "" { + resizingError.Reason = string(condition.Type) + } + } case // The "ModifyingVolume" and "ModifyVolumeError" conditions occur // when the attribute class of a PVC is changing. These attributes @@ -140,6 +171,12 @@ func (r *Reconciler) observePersistentVolumeClaims( meta.RemoveStatusCondition(&cluster.Status.Conditions, resizing.Type) } + if resizingError.Status != "" { + meta.SetStatusCondition(&cluster.Status.Conditions, resizingError) + } else { + meta.RemoveStatusCondition(&cluster.Status.Conditions, resizingError.Type) + } + return initialize.Pointers(volumes.Items...), err } diff --git a/internal/controller/standalone_pgadmin/configmap.go b/internal/controller/standalone_pgadmin/configmap.go index 9c84ab31f5..8382bbb2ca 100644 --- a/internal/controller/standalone_pgadmin/configmap.go +++ b/internal/controller/standalone_pgadmin/configmap.go @@ -18,6 +18,7 @@ import ( "github.com/pkg/errors" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -32,6 +33,12 @@ func (r *PGAdminReconciler) reconcilePGAdminConfigMap( clusters map[string][]*v1beta1.PostgresCluster, ) (*corev1.ConfigMap, error) { configmap, err := configmap(pgadmin, clusters) + if err != nil { + return configmap, err + } + + err = collector.EnablePgAdminLogging(ctx, pgadmin.Spec.Instrumentation, configmap) + if err == nil { err = errors.WithStack(r.setControllerReference(pgadmin, configmap)) } diff --git a/internal/controller/standalone_pgadmin/pod.go b/internal/controller/standalone_pgadmin/pod.go index 481c89c27f..7590a3a3cc 100644 --- a/internal/controller/standalone_pgadmin/pod.go +++ b/internal/controller/standalone_pgadmin/pod.go @@ -7,6 +7,7 @@ package standalone_pgadmin import ( "context" "fmt" + "path" "strings" corev1 "k8s.io/api/core/v1" @@ -17,6 +18,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/kubernetes" "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/shell" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -28,8 +30,17 @@ const ( ldapFilePath = "~postgres-operator/ldap-bind-password" gunicornConfigFilePath = "~postgres-operator/" + gunicornConfigKey - // Nothing should be mounted to this location except the script our initContainer writes + // scriptMountPath is where to mount a temporary directory that is only + // writable during Pod initialization. + // + // NOTE: No ConfigMap nor Secret should ever be mounted here because they + // could be used to inject code through "config_system.py". scriptMountPath = "/etc/pgadmin" + + dataMountPath = "/var/lib/pgadmin" + LogDirectoryAbsolutePath = dataMountPath + "/logs" + GunicornLogFileAbsolutePath = LogDirectoryAbsolutePath + "/gunicorn.log" + LogFileAbsolutePath = LogDirectoryAbsolutePath + "/pgadmin.log" ) // pod populates a PodSpec with the container and volumes needed to run pgAdmin. @@ -39,19 +50,10 @@ func pod( outPod *corev1.PodSpec, pgAdminVolume *corev1.PersistentVolumeClaim, ) { - const ( - // config and data volume names - configVolumeName = "pgadmin-config" - dataVolumeName = "pgadmin-data" - logVolumeName = "pgadmin-log" - scriptVolumeName = "pgadmin-config-system" - tempVolumeName = "tmp" - ) - // create the projected volume of config maps for use in // 1. dynamic server discovery // 2. adding the config variables during pgAdmin startup - configVolume := corev1.Volume{Name: configVolumeName} + configVolume := corev1.Volume{Name: "pgadmin-config"} configVolume.VolumeSource = corev1.VolumeSource{ Projected: &corev1.ProjectedVolumeSource{ Sources: podConfigFiles(inConfigMap, *inPGAdmin), @@ -59,7 +61,7 @@ func pod( } // create the data volume for the persistent database - dataVolume := corev1.Volume{Name: dataVolumeName} + dataVolume := corev1.Volume{Name: "pgadmin-data"} dataVolume.VolumeSource = corev1.VolumeSource{ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ ClaimName: pgAdminVolume.Name, @@ -67,17 +69,9 @@ func pod( }, } - // create the temp volume for logs - logVolume := corev1.Volume{Name: logVolumeName} - logVolume.VolumeSource = corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{ - Medium: corev1.StorageMediumMemory, - }, - } - // Volume used to write a custom config_system.py file in the initContainer // which then loads the configs found in the `configVolume` - scriptVolume := corev1.Volume{Name: scriptVolumeName} + scriptVolume := corev1.Volume{Name: "pgadmin-config-system"} scriptVolume.VolumeSource = corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{ Medium: corev1.StorageMediumMemory, @@ -92,7 +86,7 @@ func pod( // create a temp volume for restart pid/other/debugging use // TODO: discuss tmp vol vs. persistent vol - tmpVolume := corev1.Volume{Name: tempVolumeName} + tmpVolume := corev1.Volume{Name: "tmp"} tmpVolume.VolumeSource = corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{ Medium: corev1.StorageMediumMemory, @@ -133,25 +127,21 @@ func pod( }, VolumeMounts: []corev1.VolumeMount{ { - Name: configVolumeName, + Name: configVolume.Name, MountPath: configMountPath, ReadOnly: true, }, { - Name: dataVolumeName, - MountPath: "/var/lib/pgadmin", + Name: dataVolume.Name, + MountPath: dataMountPath, }, { - Name: logVolumeName, - MountPath: "/var/log/pgadmin", - }, - { - Name: scriptVolumeName, + Name: scriptVolume.Name, MountPath: scriptMountPath, ReadOnly: true, }, { - Name: tempVolumeName, + Name: tmpVolume.Name, MountPath: "/tmp", }, }, @@ -186,10 +176,14 @@ func pod( VolumeMounts: []corev1.VolumeMount{ // Volume to write a custom `config_system.py` file to. { - Name: scriptVolumeName, + Name: scriptVolume.Name, MountPath: scriptMountPath, ReadOnly: false, }, + { + Name: dataVolume.Name, + MountPath: dataMountPath, + }, }, } @@ -197,7 +191,6 @@ func pod( outPod.Volumes = []corev1.Volume{ configVolume, dataVolume, - logVolume, scriptVolume, tmpVolume, } @@ -236,16 +229,9 @@ func podConfigFiles(configmap *corev1.ConfigMap, pgadmin v1beta1.PGAdmin) []core if pgadmin.Spec.Config.ConfigDatabaseURI != nil { config = append(config, corev1.VolumeProjection{ - Secret: &corev1.SecretProjection{ - LocalObjectReference: pgadmin.Spec.Config.ConfigDatabaseURI.LocalObjectReference, - Optional: pgadmin.Spec.Config.ConfigDatabaseURI.Optional, - Items: []corev1.KeyToPath{ - { - Key: pgadmin.Spec.Config.ConfigDatabaseURI.Key, - Path: configDatabaseURIPath, - }, - }, - }, + Secret: initialize.Pointer( + pgadmin.Spec.Config.ConfigDatabaseURI.AsProjection(configDatabaseURIPath), + ), }) } @@ -259,16 +245,9 @@ func podConfigFiles(configmap *corev1.ConfigMap, pgadmin v1beta1.PGAdmin) []core // - https://www.pgadmin.org/docs/pgadmin4/development/enabling_ldap_authentication.html if pgadmin.Spec.Config.LDAPBindPassword != nil { config = append(config, corev1.VolumeProjection{ - Secret: &corev1.SecretProjection{ - LocalObjectReference: pgadmin.Spec.Config.LDAPBindPassword.LocalObjectReference, - Optional: pgadmin.Spec.Config.LDAPBindPassword.Optional, - Items: []corev1.KeyToPath{ - { - Key: pgadmin.Spec.Config.LDAPBindPassword.Key, - Path: ldapFilePath, - }, - }, - }, + Secret: initialize.Pointer( + pgadmin.Spec.Config.LDAPBindPassword.AsProjection(ldapFilePath), + ), }) } @@ -396,8 +375,10 @@ func startupCommand() []string { // configDatabaseURIPath is the path for mounting the database URI connection string configDatabaseURIPathAbsolutePath = configMountPath + "/" + configDatabaseURIPath + // The constants set in configSystem will not be overridden through + // spec.config.settings. configSystem = ` -import glob, json, re, os +import glob, json, re, os, logging DEFAULT_BINARY_PATHS = {'pg': sorted([''] + glob.glob('/usr/pgsql-*/bin')).pop()} with open('` + configMountPath + `/` + configFilePath + `') as _f: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) @@ -409,32 +390,74 @@ if os.path.isfile('` + ldapPasswordAbsolutePath + `'): if os.path.isfile('` + configDatabaseURIPathAbsolutePath + `'): with open('` + configDatabaseURIPathAbsolutePath + `') as _f: CONFIG_DATABASE_URI = _f.read() + +DATA_DIR = '` + dataMountPath + `' +LOG_FILE = '` + LogFileAbsolutePath + `' +LOG_ROTATION_AGE = 24 * 60 # minutes +LOG_ROTATION_SIZE = 5 # MiB +LOG_ROTATION_MAX_LOG_FILES = 1 + +JSON_LOGGER = True +CONSOLE_LOG_LEVEL = logging.WARNING +FILE_LOG_LEVEL = logging.INFO +FILE_LOG_FORMAT_JSON = {'time': 'created', 'name': 'name', 'level': 'levelname', 'message': 'message'} ` - // gunicorn reads from the `/etc/pgadmin/gunicorn_config.py` file during startup + // Gunicorn reads from the `/etc/pgadmin/gunicorn_config.py` file during startup // after all other config files. // - https://docs.gunicorn.org/en/latest/configure.html#configuration-file // // This command writes a script in `/etc/pgadmin/gunicorn_config.py` that reads // from the `gunicorn-config.json` file and sets those variables globally. - // That way those values are available as settings when gunicorn starts. + // That way those values are available as settings when Gunicorn starts. // - // Note: All gunicorn settings are lowercase with underscores, so ignore + // Note: All Gunicorn settings are lowercase with underscores, so ignore // any keys/names that are not. + // + // Gunicorn uses the Python logging package, which sets the following attributes: + // https://docs.python.org/3/library/logging.html#logrecord-attributes. + // JsonFormatter is used to format the log: https://pypi.org/project/jsonformatter/ gunicornConfig = ` -import json, re +import json, re, collections, copy, gunicorn, gunicorn.glogging with open('` + configMountPath + `/` + gunicornConfigFilePath + `') as _f: _conf, _data = re.compile(r'[a-z_]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) + +gunicorn.SERVER_SOFTWARE = 'Python' +logconfig_dict = copy.deepcopy(gunicorn.glogging.CONFIG_DEFAULTS) +logconfig_dict['loggers']['gunicorn.access']['handlers'] = ['file'] +logconfig_dict['loggers']['gunicorn.error']['handlers'] = ['file'] +logconfig_dict['handlers']['file'] = { + 'class': 'logging.handlers.RotatingFileHandler', + 'filename': '` + GunicornLogFileAbsolutePath + `', + 'backupCount': 1, 'maxBytes': 2 << 20, # MiB + 'formatter': 'json', +} +logconfig_dict['formatters']['json'] = { + 'class': 'jsonformatter.JsonFormatter', + 'separators': (',', ':'), + 'format': collections.OrderedDict([ + ('time', 'created'), + ('name', 'name'), + ('level', 'levelname'), + ('message', 'message'), + ]) +} ` ) args := []string{strings.TrimLeft(configSystem, "\n"), strings.TrimLeft(gunicornConfig, "\n")} script := strings.Join([]string{ - // Use the initContainer to create this path to avoid the error noted here: + // Create the config directory so Kubernetes can mount it later. // - https://issue.k8s.io/121294 - `mkdir -p ` + configMountPath, + shell.MakeDirectories(0o775, scriptMountPath, configMountPath), + + // Create the logs directory with g+rwx so the OTel Collector can + // write to it as well. + // TODO(log-rotation): Move the last segment into the Collector startup. + shell.MakeDirectories(0o775, dataMountPath, path.Join(LogDirectoryAbsolutePath, "receiver")), + // Write the system and server configurations. `echo "$1" > ` + scriptMountPath + `/config_system.py`, `echo "$2" > ` + scriptMountPath + `/gunicorn_config.py`, diff --git a/internal/controller/standalone_pgadmin/pod_test.go b/internal/controller/standalone_pgadmin/pod_test.go index 08d6eb129f..ce3ad076d2 100644 --- a/internal/controller/standalone_pgadmin/pod_test.go +++ b/internal/controller/standalone_pgadmin/pod_test.go @@ -127,8 +127,6 @@ containers: readOnly: true - mountPath: /var/lib/pgadmin name: pgadmin-data - - mountPath: /var/log/pgadmin - name: pgadmin-log - mountPath: /etc/pgadmin name: pgadmin-config-system readOnly: true @@ -140,12 +138,13 @@ initContainers: - -ceu - -- - |- - mkdir -p /etc/pgadmin/conf.d + mkdir -p '/etc/pgadmin/conf.d' && chmod 0775 '/etc/pgadmin/conf.d' + mkdir -p '/var/lib/pgadmin/logs/receiver' && chmod 0775 '/var/lib/pgadmin/logs/receiver' '/var/lib/pgadmin/logs' echo "$1" > /etc/pgadmin/config_system.py echo "$2" > /etc/pgadmin/gunicorn_config.py - startup - | - import glob, json, re, os + import glob, json, re, os, logging DEFAULT_BINARY_PATHS = {'pg': sorted([''] + glob.glob('/usr/pgsql-*/bin')).pop()} with open('/etc/pgadmin/conf.d/~postgres-operator/pgadmin-settings.json') as _f: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) @@ -157,12 +156,44 @@ initContainers: if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri'): with open('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri') as _f: CONFIG_DATABASE_URI = _f.read() + + DATA_DIR = '/var/lib/pgadmin' + LOG_FILE = '/var/lib/pgadmin/logs/pgadmin.log' + LOG_ROTATION_AGE = 24 * 60 # minutes + LOG_ROTATION_SIZE = 5 # MiB + LOG_ROTATION_MAX_LOG_FILES = 1 + + JSON_LOGGER = True + CONSOLE_LOG_LEVEL = logging.WARNING + FILE_LOG_LEVEL = logging.INFO + FILE_LOG_FORMAT_JSON = {'time': 'created', 'name': 'name', 'level': 'levelname', 'message': 'message'} - | - import json, re + import json, re, collections, copy, gunicorn, gunicorn.glogging with open('/etc/pgadmin/conf.d/~postgres-operator/gunicorn-config.json') as _f: _conf, _data = re.compile(r'[a-z_]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) + + gunicorn.SERVER_SOFTWARE = 'Python' + logconfig_dict = copy.deepcopy(gunicorn.glogging.CONFIG_DEFAULTS) + logconfig_dict['loggers']['gunicorn.access']['handlers'] = ['file'] + logconfig_dict['loggers']['gunicorn.error']['handlers'] = ['file'] + logconfig_dict['handlers']['file'] = { + 'class': 'logging.handlers.RotatingFileHandler', + 'filename': '/var/lib/pgadmin/logs/gunicorn.log', + 'backupCount': 1, 'maxBytes': 2 << 20, # MiB + 'formatter': 'json', + } + logconfig_dict['formatters']['json'] = { + 'class': 'jsonformatter.JsonFormatter', + 'separators': (',', ':'), + 'format': collections.OrderedDict([ + ('time', 'created'), + ('name', 'name'), + ('level', 'levelname'), + ('message', 'message'), + ]) + } name: pgadmin-startup resources: {} securityContext: @@ -178,6 +209,8 @@ initContainers: volumeMounts: - mountPath: /etc/pgadmin name: pgadmin-config-system + - mountPath: /var/lib/pgadmin + name: pgadmin-data volumes: - name: pgadmin-config projected: @@ -193,9 +226,6 @@ volumes: - name: pgadmin-data persistentVolumeClaim: claimName: "" -- emptyDir: - medium: Memory - name: pgadmin-log - emptyDir: medium: Memory sizeLimit: 32Ki @@ -316,8 +346,6 @@ containers: readOnly: true - mountPath: /var/lib/pgadmin name: pgadmin-data - - mountPath: /var/log/pgadmin - name: pgadmin-log - mountPath: /etc/pgadmin name: pgadmin-config-system readOnly: true @@ -329,12 +357,13 @@ initContainers: - -ceu - -- - |- - mkdir -p /etc/pgadmin/conf.d + mkdir -p '/etc/pgadmin/conf.d' && chmod 0775 '/etc/pgadmin/conf.d' + mkdir -p '/var/lib/pgadmin/logs/receiver' && chmod 0775 '/var/lib/pgadmin/logs/receiver' '/var/lib/pgadmin/logs' echo "$1" > /etc/pgadmin/config_system.py echo "$2" > /etc/pgadmin/gunicorn_config.py - startup - | - import glob, json, re, os + import glob, json, re, os, logging DEFAULT_BINARY_PATHS = {'pg': sorted([''] + glob.glob('/usr/pgsql-*/bin')).pop()} with open('/etc/pgadmin/conf.d/~postgres-operator/pgadmin-settings.json') as _f: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) @@ -346,12 +375,44 @@ initContainers: if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri'): with open('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri') as _f: CONFIG_DATABASE_URI = _f.read() + + DATA_DIR = '/var/lib/pgadmin' + LOG_FILE = '/var/lib/pgadmin/logs/pgadmin.log' + LOG_ROTATION_AGE = 24 * 60 # minutes + LOG_ROTATION_SIZE = 5 # MiB + LOG_ROTATION_MAX_LOG_FILES = 1 + + JSON_LOGGER = True + CONSOLE_LOG_LEVEL = logging.WARNING + FILE_LOG_LEVEL = logging.INFO + FILE_LOG_FORMAT_JSON = {'time': 'created', 'name': 'name', 'level': 'levelname', 'message': 'message'} - | - import json, re + import json, re, collections, copy, gunicorn, gunicorn.glogging with open('/etc/pgadmin/conf.d/~postgres-operator/gunicorn-config.json') as _f: _conf, _data = re.compile(r'[a-z_]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) + + gunicorn.SERVER_SOFTWARE = 'Python' + logconfig_dict = copy.deepcopy(gunicorn.glogging.CONFIG_DEFAULTS) + logconfig_dict['loggers']['gunicorn.access']['handlers'] = ['file'] + logconfig_dict['loggers']['gunicorn.error']['handlers'] = ['file'] + logconfig_dict['handlers']['file'] = { + 'class': 'logging.handlers.RotatingFileHandler', + 'filename': '/var/lib/pgadmin/logs/gunicorn.log', + 'backupCount': 1, 'maxBytes': 2 << 20, # MiB + 'formatter': 'json', + } + logconfig_dict['formatters']['json'] = { + 'class': 'jsonformatter.JsonFormatter', + 'separators': (',', ':'), + 'format': collections.OrderedDict([ + ('time', 'created'), + ('name', 'name'), + ('level', 'levelname'), + ('message', 'message'), + ]) + } image: new-image imagePullPolicy: Always name: pgadmin-startup @@ -371,6 +432,8 @@ initContainers: volumeMounts: - mountPath: /etc/pgadmin name: pgadmin-config-system + - mountPath: /var/lib/pgadmin + name: pgadmin-data volumes: - name: pgadmin-config projected: @@ -386,9 +449,6 @@ volumes: - name: pgadmin-data persistentVolumeClaim: claimName: "" -- emptyDir: - medium: Memory - name: pgadmin-log - emptyDir: medium: Memory sizeLimit: 32Ki diff --git a/internal/controller/standalone_pgadmin/statefulset.go b/internal/controller/standalone_pgadmin/statefulset.go index 223740b5e6..2c9a17595d 100644 --- a/internal/controller/standalone_pgadmin/statefulset.go +++ b/internal/controller/standalone_pgadmin/statefulset.go @@ -15,6 +15,8 @@ import ( "github.com/pkg/errors" + "github.com/crunchydata/postgres-operator/internal/collector" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -119,5 +121,19 @@ func statefulset( pod(pgadmin, configmap, &sts.Spec.Template.Spec, dataVolume) + if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + // Logs for gunicorn and pgadmin write to /var/lib/pgadmin/logs + dataVolumeMount := corev1.VolumeMount{ + Name: "pgadmin-data", + MountPath: "/var/lib/pgadmin", + } + volumeMounts := []corev1.VolumeMount{ + dataVolumeMount, + } + + collector.AddToPod(ctx, pgadmin.Spec.Instrumentation, pgadmin.Spec.ImagePullPolicy, + configmap, &sts.Spec.Template.Spec, volumeMounts, "", []string{}, false) + } + return sts } diff --git a/internal/feature/features.go b/internal/feature/features.go index 04fd059c10..50169538b9 100644 --- a/internal/feature/features.go +++ b/internal/feature/features.go @@ -81,6 +81,12 @@ const ( // Support custom sidecars for PostgreSQL instance Pods InstanceSidecars = "InstanceSidecars" + // Export logs using OpenTelemetry + OpenTelemetryLogs = "OpenTelemetryLogs" + + // Export metrics using OpenTelemetry + OpenTelemetryMetrics = "OpenTelemetryMetrics" + // Support custom sidecars for pgBouncer Pods PGBouncerSidecars = "PGBouncerSidecars" @@ -104,8 +110,10 @@ func NewGate() MutableGate { AutoGrowVolumes: {Default: false, PreRelease: featuregate.Alpha}, BridgeIdentifiers: {Default: false, PreRelease: featuregate.Deprecated}, InstanceSidecars: {Default: false, PreRelease: featuregate.Alpha}, + OpenTelemetryLogs: {Default: false, PreRelease: featuregate.Alpha}, + OpenTelemetryMetrics: {Default: false, PreRelease: featuregate.Alpha}, PGBouncerSidecars: {Default: false, PreRelease: featuregate.Alpha}, - PGUpgradeCPUConcurrency: {Default: false, PreRelease: featuregate.Alpha}, + PGUpgradeCPUConcurrency: {Default: true, PreRelease: featuregate.Beta}, TablespaceVolumes: {Default: false, PreRelease: featuregate.Alpha}, VolumeSnapshots: {Default: false, PreRelease: featuregate.Alpha}, }); err != nil { diff --git a/internal/feature/features_test.go b/internal/feature/features_test.go index a70270e0b9..93683de4f0 100644 --- a/internal/feature/features_test.go +++ b/internal/feature/features_test.go @@ -21,8 +21,10 @@ func TestDefaults(t *testing.T) { assert.Assert(t, false == gate.Enabled(AutoGrowVolumes)) assert.Assert(t, false == gate.Enabled(BridgeIdentifiers)) assert.Assert(t, false == gate.Enabled(InstanceSidecars)) + assert.Assert(t, false == gate.Enabled(OpenTelemetryLogs)) + assert.Assert(t, false == gate.Enabled(OpenTelemetryMetrics)) assert.Assert(t, false == gate.Enabled(PGBouncerSidecars)) - assert.Assert(t, false == gate.Enabled(PGUpgradeCPUConcurrency)) + assert.Assert(t, true == gate.Enabled(PGUpgradeCPUConcurrency)) assert.Assert(t, false == gate.Enabled(TablespaceVolumes)) assert.Assert(t, false == gate.Enabled(VolumeSnapshots)) } diff --git a/internal/naming/limitations.md b/internal/naming/limitations.md index 1f25d1db3f..cc827cb0f2 100644 --- a/internal/naming/limitations.md +++ b/internal/naming/limitations.md @@ -83,6 +83,8 @@ name to 63 characters or less. The strategy for [generating Pod names](https://releases.k8s.io/v1.23.0/pkg/registry/core/pod/strategy.go#L62) truncates to 63 characters. The `.spec.hostname` field must be 63 characters or less. +[Container names are DNS labels](https://releases.k8s.io/v1.32.0/pkg/apis/core/validation/validation.go#L3563). + PodDisruptionBudget (PDB) [ReplicaSet names are DNS subdomains](https://releases.k8s.io/v1.23.0/pkg/apis/apps/validation/validation.go#L655). diff --git a/internal/naming/names.go b/internal/naming/names.go index fc310d837f..04923730fb 100644 --- a/internal/naming/names.go +++ b/internal/naming/names.go @@ -18,6 +18,8 @@ import ( ) const ( + ContainerCollector = "collector" + // ContainerDatabase is the name of the container running PostgreSQL and // supporting tools: Patroni, pgBackRest, etc. ContainerDatabase = "database" @@ -153,6 +155,12 @@ const ( // dedicated repo host, if configured. PGBackRestRepoLogPath = "/pgbackrest/%s/log" + // PGBouncerLogPath is the pgBouncer default log path configuration + PGBouncerLogPath = "/tmp" + + // PGbouncerFullLogPath is the full path to the pgbouncer log file + PGBouncerFullLogPath = PGBouncerLogPath + "/pgbouncer.log" + // suffix used with postgrescluster name for associated configmap. // for instance, if the cluster is named 'mycluster', the // configmap will be named 'mycluster-pgbackrest-config' diff --git a/internal/patroni/config.go b/internal/patroni/config.go index 16a638e262..2174607c63 100644 --- a/internal/patroni/config.go +++ b/internal/patroni/config.go @@ -13,8 +13,10 @@ import ( "sigs.k8s.io/yaml" "github.com/crunchydata/postgres-operator/internal/config" + "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/internal/shell" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -34,16 +36,10 @@ const ( "# Your changes will not be saved.\n" ) -// quoteShellWord ensures that s is interpreted by a shell as single word. -func quoteShellWord(s string) string { - // https://www.gnu.org/software/bash/manual/html_node/Quoting.html - return `'` + strings.ReplaceAll(s, `'`, `'"'"'`) + `'` -} - // clusterYAML returns Patroni settings that apply to the entire cluster. func clusterYAML( cluster *v1beta1.PostgresCluster, - pgHBAs postgres.HBAs, pgParameters postgres.Parameters, patroniLogStorageLimit int64, + pgHBAs postgres.HBAs, parameters *postgres.ParameterSet, patroniLogStorageLimit int64, ) (string, error) { root := map[string]any{ // The cluster identifier. This value cannot change during the cluster's @@ -155,9 +151,17 @@ func clusterYAML( }, } - // if a Patroni log file size is configured, configure volume file storage + // If a Patroni log file size is configured (the user set it in the + // spec or the OpenTelemetryLogs feature gate is enabled), we need to + // configure volume file storage if patroniLogStorageLimit != 0 { + logLevel := initialize.Pointer("INFO") + if cluster.Spec.Patroni != nil && cluster.Spec.Patroni.Logging != nil && + cluster.Spec.Patroni.Logging.Level != nil { + logLevel = cluster.Spec.Patroni.Logging.Level + } + // Configure the Patroni log settings // - https://patroni.readthedocs.io/en/latest/yaml_configuration.html#log root["log"] = map[string]any{ @@ -166,7 +170,12 @@ func clusterYAML( "type": "json", // defaults to "INFO" - "level": cluster.Spec.Patroni.Logging.Level, + "level": logLevel, + + // Setting group read permissions so that the OTel filelog receiver can + // read the log files. + // NOTE: This log configuration setting is only available in Patroni v4 + "mode": "0660", // There will only be two log files. Cannot set to 1 or the logs won't rotate. // - https://github.com/python/cpython/blob/3.11/Lib/logging/handlers.py#L134 @@ -183,7 +192,7 @@ func clusterYAML( // facilitate it. When Patroni is already bootstrapped, this field is ignored. root["bootstrap"] = map[string]any{ - "dcs": DynamicConfiguration(&cluster.Spec, pgHBAs, pgParameters), + "dcs": DynamicConfiguration(&cluster.Spec, pgHBAs, parameters), // Missing here is "users" which runs *after* "post_bootstrap". It is // not possible to use roles created by the former in the latter. @@ -199,7 +208,7 @@ func clusterYAML( // and returns a value that can be marshaled to JSON. func DynamicConfiguration( spec *v1beta1.PostgresClusterSpec, - pgHBAs postgres.HBAs, pgParameters postgres.Parameters, + pgHBAs postgres.HBAs, parameters *postgres.ParameterSet, ) map[string]any { // Copy the entire configuration before making any changes. root := make(map[string]any) @@ -232,42 +241,9 @@ func DynamicConfiguration( } root["postgresql"] = postgresql - // Copy the "postgresql.parameters" section over any defaults. - parameters := make(map[string]any) - if pgParameters.Default != nil { - for k, v := range pgParameters.Default.AsMap() { - parameters[k] = v - } - } - if section, ok := postgresql["parameters"].(map[string]any); ok { - for k, v := range section { - parameters[k] = v - } - } - // Override the above with mandatory parameters. - if pgParameters.Mandatory != nil { - for k, v := range pgParameters.Mandatory.AsMap() { - - // This parameter is a comma-separated list. Rather than overwrite the - // user-defined value, we want to combine it with the mandatory one. - // Some libraries belong at specific positions in the list, so figure - // that out as well. - if k == "shared_preload_libraries" { - // Load mandatory libraries ahead of user-defined libraries. - if s, ok := parameters[k].(string); ok && len(s) > 0 { - v = v + "," + s - } - // Load "citus" ahead of any other libraries. - // - https://github.com/citusdata/citus/blob/v12.0.0/src/backend/distributed/shared_library_init.c#L417-L419 - if strings.Contains(v, "citus") { - v = "citus," + v - } - } - - parameters[k] = v - } + if m := parameters.AsMap(); m != nil { + postgresql["parameters"] = m } - postgresql["parameters"] = parameters // Copy the "postgresql.pg_hba" section after any mandatory values. hba := make([]string, 0, len(pgHBAs.Mandatory)) @@ -327,7 +303,7 @@ func DynamicConfiguration( // Populate the standby leader by shipping logs through pgBackRest. // This also overrides the "restore_command" used by standby replicas. // - https://www.postgresql.org/docs/current/warm-standby.html - standby["restore_command"] = pgParameters.Mandatory.Value("restore_command") + standby["restore_command"] = parameters.Value("restore_command") } standby["create_replica_methods"] = methods @@ -576,15 +552,11 @@ func instanceYAML( "-", }, command...) - quoted := make([]string, len(command)) - for i := range command { - quoted[i] = quoteShellWord(command[i]) - } postgresql[pgBackRestCreateReplicaMethod] = map[string]any{ - "command": strings.Join(quoted, " "), - "keep_data": true, - "no_leader": true, - "no_params": true, + "command": strings.Join(shell.QuoteWords(command...), " "), + "keep_data": true, // Use the data directory from a prior method. + "no_leader": true, // Works without a replication connection. + "no_params": true, // Patroni should not add "--scope", "--role", etc. } methods = append([]string{pgBackRestCreateReplicaMethod}, methods...) } diff --git a/internal/patroni/config_test.go b/internal/patroni/config_test.go index 4f30ec592d..d5ce0eb81d 100644 --- a/internal/patroni/config_test.go +++ b/internal/patroni/config_test.go @@ -32,7 +32,7 @@ func TestClusterYAML(t *testing.T) { cluster.Namespace = "some-namespace" cluster.Name = "cluster-name" - data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}, 0) + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.NewParameterSet(), 0) assert.NilError(t, err) assert.Equal(t, data, strings.TrimSpace(` # Generated by postgres-operator. DO NOT EDIT. @@ -91,7 +91,7 @@ watchdog: cluster.Name = "cluster-name" cluster.Spec.PostgresVersion = 14 - data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}, 0) + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.NewParameterSet(), 0) assert.NilError(t, err) assert.Equal(t, data, strings.TrimSpace(` # Generated by postgres-operator. DO NOT EDIT. @@ -159,7 +159,7 @@ watchdog: Level: &logLevel, } - data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}, 1000) + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.NewParameterSet(), 1000) assert.NilError(t, err) assert.Equal(t, data, strings.TrimSpace(` # Generated by postgres-operator. DO NOT EDIT. @@ -191,6 +191,7 @@ log: file_num: 1 file_size: 500 level: DEBUG + mode: "0660" type: json postgresql: authentication: @@ -233,7 +234,7 @@ func TestDynamicConfiguration(t *testing.T) { name string spec string hbas postgres.HBAs - params postgres.Parameters + params *postgres.ParameterSet expected map[string]any }{ { @@ -242,7 +243,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{}, "use_pg_rewind": true, "use_slots": false, @@ -261,9 +261,8 @@ func TestDynamicConfiguration(t *testing.T) { expected: map[string]any{ "loop_wait": int32(10), "ttl": int32(30), - "retry_timeout": float64(5), + "retry_timeout": int64(5), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{}, "use_pg_rewind": true, "use_slots": false, @@ -286,7 +285,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(8), "ttl": int32(99), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{}, "use_pg_rewind": true, "use_slots": false, @@ -306,7 +304,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{}, "use_pg_rewind": true, "use_slots": false, @@ -329,7 +326,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{}, "use_pg_rewind": true, "use_slots": "input", @@ -337,82 +333,30 @@ func TestDynamicConfiguration(t *testing.T) { }, }, { - name: "postgresql.parameters: wrong-type is ignored", - spec: `{ - patroni: { - dynamicConfiguration: { - postgresql: { - parameters: true, - }, - }, - }, - }`, - expected: map[string]any{ - "loop_wait": int32(10), - "ttl": int32(30), - "postgresql": map[string]any{ - "parameters": map[string]any{}, - "pg_hba": []string{}, - "use_pg_rewind": true, - "use_slots": false, - }, - }, - }, - { - name: "postgresql.parameters: input passes through", - spec: `{ - patroni: { - dynamicConfiguration: { - postgresql: { - parameters: { - something: str, - another: 5, - }, - }, - }, - }, - }`, - expected: map[string]any{ - "loop_wait": int32(10), - "ttl": int32(30), - "postgresql": map[string]any{ - "parameters": map[string]any{ - "something": "str", - "another": float64(5), - }, - "pg_hba": []string{}, - "use_pg_rewind": true, - "use_slots": false, - }, - }, - }, - { - name: "postgresql.parameters: input overrides default", + name: "Postgres parameters pass through", spec: `{ patroni: { dynamicConfiguration: { postgresql: { parameters: { - something: str, - another: 5, + calculated: elsewhere, }, }, }, }, }`, - params: postgres.Parameters{ - Default: parameters(map[string]string{ - "something": "overridden", - "unrelated": "default", - }), - }, + params: parameters(map[string]string{ + "something": "str", + "another": "5", + "unrelated": "default", + }), expected: map[string]any{ "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{ + "parameters": map[string]string{ "something": "str", - "another": float64(5), + "another": "5", "unrelated": "default", }, "pg_hba": []string{}, @@ -421,134 +365,6 @@ func TestDynamicConfiguration(t *testing.T) { }, }, }, - { - name: "postgresql.parameters: mandatory overrides input", - spec: `{ - patroni: { - dynamicConfiguration: { - postgresql: { - parameters: { - something: str, - another: 5, - }, - }, - }, - }, - }`, - params: postgres.Parameters{ - Mandatory: parameters(map[string]string{ - "something": "overrides", - "unrelated": "setting", - }), - }, - expected: map[string]any{ - "loop_wait": int32(10), - "ttl": int32(30), - "postgresql": map[string]any{ - "parameters": map[string]any{ - "something": "overrides", - "another": float64(5), - "unrelated": "setting", - }, - "pg_hba": []string{}, - "use_pg_rewind": true, - "use_slots": false, - }, - }, - }, - { - name: "postgresql.parameters: mandatory shared_preload_libraries", - spec: `{ - patroni: { - dynamicConfiguration: { - postgresql: { - parameters: { - shared_preload_libraries: given, - }, - }, - }, - }, - }`, - params: postgres.Parameters{ - Mandatory: parameters(map[string]string{ - "shared_preload_libraries": "mandatory", - }), - }, - expected: map[string]any{ - "loop_wait": int32(10), - "ttl": int32(30), - "postgresql": map[string]any{ - "parameters": map[string]any{ - "shared_preload_libraries": "mandatory,given", - }, - "pg_hba": []string{}, - "use_pg_rewind": true, - "use_slots": false, - }, - }, - }, - { - name: "postgresql.parameters: mandatory shared_preload_libraries wrong-type is ignored", - spec: `{ - patroni: { - dynamicConfiguration: { - postgresql: { - parameters: { - shared_preload_libraries: 1, - }, - }, - }, - }, - }`, - params: postgres.Parameters{ - Mandatory: parameters(map[string]string{ - "shared_preload_libraries": "mandatory", - }), - }, - expected: map[string]any{ - "loop_wait": int32(10), - "ttl": int32(30), - "postgresql": map[string]any{ - "parameters": map[string]any{ - "shared_preload_libraries": "mandatory", - }, - "pg_hba": []string{}, - "use_pg_rewind": true, - "use_slots": false, - }, - }, - }, - { - name: "postgresql.parameters: shared_preload_libraries order", - spec: `{ - patroni: { - dynamicConfiguration: { - postgresql: { - parameters: { - shared_preload_libraries: "given, citus, more", - }, - }, - }, - }, - }`, - params: postgres.Parameters{ - Mandatory: parameters(map[string]string{ - "shared_preload_libraries": "mandatory", - }), - }, - expected: map[string]any{ - "loop_wait": int32(10), - "ttl": int32(30), - "postgresql": map[string]any{ - "parameters": map[string]any{ - "shared_preload_libraries": "citus,mandatory,given, citus, more", - }, - "pg_hba": []string{}, - "use_pg_rewind": true, - "use_slots": false, - }, - }, - }, { name: "postgresql.pg_hba: wrong-type is ignored", spec: `{ @@ -564,7 +380,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{}, "use_pg_rewind": true, "use_slots": false, @@ -591,7 +406,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{ "local all all peer", }, @@ -620,7 +434,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{ "custom", }, @@ -649,7 +462,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{ "local all all peer", "custom", @@ -679,7 +491,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{ "local all all peer", "custom", @@ -704,7 +515,6 @@ func TestDynamicConfiguration(t *testing.T) { "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{}, "pg_hba": []string{}, "use_pg_rewind": true, "use_slots": false, @@ -730,16 +540,14 @@ func TestDynamicConfiguration(t *testing.T) { }, }, }`, - params: postgres.Parameters{ - Mandatory: parameters(map[string]string{ - "restore_command": "mandatory", - }), - }, + params: parameters(map[string]string{ + "restore_command": "mandatory", + }), expected: map[string]any{ "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{ + "parameters": map[string]string{ "restore_command": "mandatory", }, "pg_hba": []string{}, @@ -772,16 +580,14 @@ func TestDynamicConfiguration(t *testing.T) { }, }, }`, - params: postgres.Parameters{ - Mandatory: parameters(map[string]string{ - "restore_command": "mandatory", - }), - }, + params: parameters(map[string]string{ + "restore_command": "mandatory", + }), expected: map[string]any{ "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{ + "parameters": map[string]string{ "restore_command": "mandatory", }, "pg_hba": []string{}, @@ -816,16 +622,14 @@ func TestDynamicConfiguration(t *testing.T) { }, }, }`, - params: postgres.Parameters{ - Mandatory: parameters(map[string]string{ - "restore_command": "mandatory", - }), - }, + params: parameters(map[string]string{ + "restore_command": "mandatory", + }), expected: map[string]any{ "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ - "parameters": map[string]any{ + "parameters": map[string]string{ "restore_command": "mandatory", }, "pg_hba": []string{}, @@ -842,25 +646,24 @@ func TestDynamicConfiguration(t *testing.T) { }, }, { - name: "tde enabled", + name: "config.parameters: tde enabled", spec: `{ - patroni: { - dynamicConfiguration: { - postgresql: { - parameters: { - encryption_key_command: echo test, - }, - }, + config: { + parameters: { + encryption_key_command: echo one, }, }, }`, + params: parameters(map[string]string{ + "encryption_key_command": "echo one", + }), expected: map[string]any{ "loop_wait": int32(10), "ttl": int32(30), "postgresql": map[string]any{ "bin_name": map[string]any{"pg_rewind": string("/tmp/pg_rewind_tde.sh")}, - "parameters": map[string]any{ - "encryption_key_command": "echo test", + "parameters": map[string]string{ + "encryption_key_command": "echo one", }, "pg_hba": []string{}, "use_pg_rewind": bool(true), @@ -871,7 +674,7 @@ func TestDynamicConfiguration(t *testing.T) { } { t.Run(tt.name, func(t *testing.T) { cluster := new(v1beta1.PostgresCluster) - assert.NilError(t, yaml.Unmarshal([]byte(tt.spec), &cluster.Spec)) + require.UnmarshalInto(t, &cluster.Spec, tt.spec) if cluster.Spec.PostgresVersion == 0 { cluster.Spec.PostgresVersion = 14 } diff --git a/internal/patroni/postgres.go b/internal/patroni/postgres.go new file mode 100644 index 0000000000..cb686312fa --- /dev/null +++ b/internal/patroni/postgres.go @@ -0,0 +1,56 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package patroni + +import ( + "encoding/json" + "fmt" + + "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +// PostgresParameters returns the Postgres parameters in spec, if any. +func PostgresParameters(spec *v1beta1.PatroniSpec) *postgres.ParameterSet { + result := postgres.NewParameterSet() + + if spec != nil { + // DynamicConfiguration lacks an OpenAPI schema, so it may contain any type + // at any depth. Navigate the object and convert parameter values to string. + // + // Patroni accepts booleans, integers, and strings but also parses + // string values into the types it expects: + // https://github.com/patroni/patroni/blob/v4.0.0/patroni/postgresql/validator.py + // + // Patroni passes JSON arrays and objects through Python str() which looks + // similar to YAML in simple cases: + // https://github.com/patroni/patroni/blob/v4.0.0/patroni/postgresql/config.py#L254-L259 + // + // >>> str(list((1, 2.3, True, "asdf"))) + // "[1, 2.3, True, 'asdf']" + // + // >>> str(dict(a = 1, b = True)) + // "{'a': 1, 'b': True}" + // + if root := spec.DynamicConfiguration; root != nil { + if postgresql, ok := root["postgresql"].(map[string]any); ok { + if section, ok := postgresql["parameters"].(map[string]any); ok { + for k, v := range section { + switch v.(type) { + case []any, map[string]any: + if b, err := json.Marshal(v); err == nil { + result.Add(k, string(b)) + } + default: + result.Add(k, fmt.Sprint(v)) + } + } + } + } + } + } + + return result +} diff --git a/internal/patroni/postgres_test.go b/internal/patroni/postgres_test.go new file mode 100644 index 0000000000..16fdc30fdf --- /dev/null +++ b/internal/patroni/postgres_test.go @@ -0,0 +1,112 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package patroni + +import ( + "testing" + + "gotest.tools/v3/assert" + + "github.com/crunchydata/postgres-operator/internal/testing/require" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestPostgresParameters(t *testing.T) { + t.Run("Zero", func(t *testing.T) { + result := PostgresParameters(nil) + + assert.Assert(t, result != nil) + assert.DeepEqual(t, result.AsMap(), map[string]string{}) + }) + + t.Run("NoDynamicConfig", func(t *testing.T) { + spec := new(v1beta1.PatroniSpec) + result := PostgresParameters(spec) + + assert.Assert(t, result != nil) + assert.DeepEqual(t, result.AsMap(), map[string]string{}) + }) + + t.Run("NoPostgreSQL", func(t *testing.T) { + spec := new(v1beta1.PatroniSpec) + require.UnmarshalInto(t, spec, `{ + dynamicConfiguration: {}, + }`) + result := PostgresParameters(spec) + + assert.Assert(t, result != nil) + assert.DeepEqual(t, result.AsMap(), map[string]string{}) + }) + + t.Run("WrongPostgreSQLType", func(t *testing.T) { + spec := new(v1beta1.PatroniSpec) + require.UnmarshalInto(t, spec, `{ + dynamicConfiguration: { + postgresql: asdf, + }, + }`) + result := PostgresParameters(spec) + + assert.Assert(t, result != nil) + assert.DeepEqual(t, result.AsMap(), map[string]string{}) + }) + + t.Run("NoParameters", func(t *testing.T) { + spec := new(v1beta1.PatroniSpec) + require.UnmarshalInto(t, spec, `{ + dynamicConfiguration: { + postgresql: { + use_pg_rewind: true, + }, + }, + }`) + result := PostgresParameters(spec) + + assert.Assert(t, result != nil) + assert.DeepEqual(t, result.AsMap(), map[string]string{}) + }) + + t.Run("WrongParametersType", func(t *testing.T) { + spec := new(v1beta1.PatroniSpec) + require.UnmarshalInto(t, spec, `{ + dynamicConfiguration: { + postgresql: { + parameters: [1,2], + }, + }, + }`) + result := PostgresParameters(spec) + + assert.Assert(t, result != nil) + assert.DeepEqual(t, result.AsMap(), map[string]string{}) + }) + + t.Run("Parameters", func(t *testing.T) { + spec := new(v1beta1.PatroniSpec) + require.UnmarshalInto(t, spec, `{ + dynamicConfiguration: { + postgresql: { + parameters: { + log_statement_sample_rate: 0.98, + max_connections: 1000, + wal_log_hints: true, + wal_level: replica, + strange.though: [ 1, 2.3, yes ], + }, + }, + }, + }`) + result := PostgresParameters(spec) + + assert.Assert(t, result != nil) + assert.DeepEqual(t, result.AsMap(), map[string]string{ + "log_statement_sample_rate": "0.98", + "max_connections": "1000", + "wal_log_hints": "true", + "wal_level": "replica", + "strange.though": "[1,2.3,true]", + }) + }) +} diff --git a/internal/patroni/reconcile.go b/internal/patroni/reconcile.go index 19c1131d7d..394a33d6d5 100644 --- a/internal/patroni/reconcile.go +++ b/internal/patroni/reconcile.go @@ -30,7 +30,7 @@ func ClusterBootstrapped(postgresCluster *v1beta1.PostgresCluster) bool { func ClusterConfigMap(ctx context.Context, inCluster *v1beta1.PostgresCluster, inHBAs postgres.HBAs, - inParameters postgres.Parameters, + inParameters *postgres.ParameterSet, outClusterConfigMap *corev1.ConfigMap, patroniLogStorageLimit int64, ) error { diff --git a/internal/patroni/reconcile_test.go b/internal/patroni/reconcile_test.go index 61916db258..9a82dfde2d 100644 --- a/internal/patroni/reconcile_test.go +++ b/internal/patroni/reconcile_test.go @@ -25,7 +25,7 @@ func TestClusterConfigMap(t *testing.T) { cluster := new(v1beta1.PostgresCluster) pgHBAs := postgres.HBAs{} - pgParameters := postgres.Parameters{} + pgParameters := postgres.NewParameterSet() cluster.Default() config := new(corev1.ConfigMap) diff --git a/internal/pgadmin/config.go b/internal/pgadmin/config.go index 2dbe3a2e49..d6ba5ce228 100644 --- a/internal/pgadmin/config.go +++ b/internal/pgadmin/config.go @@ -9,6 +9,7 @@ import ( corev1 "k8s.io/api/core/v1" + "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -94,16 +95,9 @@ func podConfigFiles(configmap *corev1.ConfigMap, spec v1beta1.PGAdminPodSpec) [] // - https://www.pgadmin.org/docs/pgadmin4/development/enabling_ldap_authentication.html if spec.Config.LDAPBindPassword != nil { config = append(config, corev1.VolumeProjection{ - Secret: &corev1.SecretProjection{ - LocalObjectReference: spec.Config.LDAPBindPassword.LocalObjectReference, - Optional: spec.Config.LDAPBindPassword.Optional, - Items: []corev1.KeyToPath{ - { - Key: spec.Config.LDAPBindPassword.Key, - Path: ldapPasswordPath, - }, - }, - }, + Secret: initialize.Pointer( + spec.Config.LDAPBindPassword.AsProjection(ldapPasswordPath), + ), }) } diff --git a/internal/pgadmin/reconcile_test.go b/internal/pgadmin/reconcile_test.go index fcbdf589e3..6e4cccc73a 100644 --- a/internal/pgadmin/reconcile_test.go +++ b/internal/pgadmin/reconcile_test.go @@ -316,11 +316,11 @@ volumes: Name: "test", }}, }} - cluster.Spec.UserInterface.PGAdmin.Config.LDAPBindPassword = &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ + cluster.Spec.UserInterface.PGAdmin.Config.LDAPBindPassword = &v1beta1.OptionalSecretKeyRef{ + SecretKeyRef: v1beta1.SecretKeyRef{ Name: "podtest", + Key: "podtestpw", }, - Key: "podtestpw", } call() diff --git a/internal/pgadmin/users.go b/internal/pgadmin/users.go index 6c93fcd5d2..ef51978e8f 100644 --- a/internal/pgadmin/users.go +++ b/internal/pgadmin/users.go @@ -239,7 +239,7 @@ with create_app().app_context():`, if err == nil { err = encoder.Encode(map[string]interface{}{ "username": spec.Name, - "password": passwords[string(spec.Name)], + "password": passwords[spec.Name], }) } } diff --git a/internal/pgadmin/users_test.go b/internal/pgadmin/users_test.go index 17bec23204..4dba70f81a 100644 --- a/internal/pgadmin/users_test.go +++ b/internal/pgadmin/users_test.go @@ -235,7 +235,7 @@ with create_app().app_context(): []v1beta1.PostgresUserSpec{ { Name: "user-no-options", - Databases: []v1beta1.PostgresIdentifier{"db1"}, + Databases: []string{"db1"}, }, { Name: "user-no-databases", diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index 69a996d400..c14a264ce3 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -7,6 +7,7 @@ package pgbackrest import ( "context" "fmt" + "path" "strconv" "strings" "time" @@ -14,10 +15,13 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/internal/shell" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -66,9 +70,11 @@ const ( // pgbackrest_job.conf is used by certain jobs, such as stanza create and backup // pgbackrest_primary.conf is used by the primary database pod // pgbackrest_repo.conf is used by the pgBackRest repository pod -func CreatePGBackRestConfigMapIntent(postgresCluster *v1beta1.PostgresCluster, +func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repoHostName, configHash, serviceName, serviceNamespace string, - instanceNames []string) *corev1.ConfigMap { + instanceNames []string) (*corev1.ConfigMap, error) { + + var err error meta := naming.PGBackRestConfig(postgresCluster) meta.Annotations = naming.Merge( @@ -123,17 +129,44 @@ func CreatePGBackRestConfigMapIntent(postgresCluster *v1beta1.PostgresCluster, postgresCluster.Spec.Backups.PGBackRest.Repos, postgresCluster.Spec.Backups.PGBackRest.Global, ).String() + + if RepoHostVolumeDefined(postgresCluster) && + (feature.Enabled(ctx, feature.OpenTelemetryLogs) || + feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + err = collector.AddToConfigMap(ctx, collector.NewConfigForPgBackrestRepoHostPod( + ctx, + postgresCluster.Spec.Instrumentation, + postgresCluster.Spec.Backups.PGBackRest.Repos, + ), cm) + + // If OTel logging is enabled, add logrotate config for the RepoHost + if err == nil && + postgresCluster.Spec.Instrumentation != nil && + feature.Enabled(ctx, feature.OpenTelemetryLogs) { + var pgBackRestLogPath string + for _, repo := range postgresCluster.Spec.Backups.PGBackRest.Repos { + if repo.Volume != nil { + pgBackRestLogPath = fmt.Sprintf(naming.PGBackRestRepoLogPath, repo.Name) + break + } + } + + collector.AddLogrotateConfigs(ctx, postgresCluster.Spec.Instrumentation, cm, []collector.LogrotateConfig{{ + LogFiles: []string{pgBackRestLogPath + "/*.log"}, + }}) + } + } } cm.Data[ConfigHashKey] = configHash - return cm + return cm, err } // MakePGBackrestLogDir creates the pgBackRest default log path directory used when a // dedicated repo host is configured. func MakePGBackrestLogDir(template *corev1.PodTemplateSpec, - cluster *v1beta1.PostgresCluster) { + cluster *v1beta1.PostgresCluster) string { var pgBackRestLogPath string for _, repo := range cluster.Spec.Backups.PGBackRest.Repos { @@ -144,7 +177,9 @@ func MakePGBackrestLogDir(template *corev1.PodTemplateSpec, } container := corev1.Container{ - Command: []string{"bash", "-c", "mkdir -p " + pgBackRestLogPath}, + // TODO(log-rotation): The second argument here should be the path + // of the volume mount. Find a way to calculate that consistently. + Command: []string{"bash", "-c", shell.MakeDirectories(0o775, path.Dir(pgBackRestLogPath), pgBackRestLogPath)}, Image: config.PGBackRestContainerImage(cluster), ImagePullPolicy: cluster.Spec.ImagePullPolicy, Name: naming.ContainerPGBackRestLogDirInit, @@ -159,6 +194,8 @@ func MakePGBackrestLogDir(template *corev1.PodTemplateSpec, } } template.Spec.InitContainers = append(template.Spec.InitContainers, container) + + return pgBackRestLogPath } // RestoreCommand returns the command for performing a pgBackRest restore. In addition to calling diff --git a/internal/pgbackrest/config_test.go b/internal/pgbackrest/config_test.go index f874eb10f6..08aaaf8d94 100644 --- a/internal/pgbackrest/config_test.go +++ b/internal/pgbackrest/config_test.go @@ -37,10 +37,11 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { cluster := cluster.DeepCopy() cluster.Spec.Backups.PGBackRest.Repos = nil - configmap := CreatePGBackRestConfigMapIntent(cluster, + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, "", "number", "pod-service-name", "test-ns", []string{"some-instance"}) + assert.NilError(t, err) assert.Equal(t, configmap.Data["config-hash"], "number") assert.Equal(t, configmap.Data["pgbackrest-server.conf"], "") }) @@ -71,10 +72,11 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { }, } - configmap := CreatePGBackRestConfigMapIntent(cluster, + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, "repo-hostname", "abcde12345", "pod-service-name", "test-ns", []string{"some-instance"}) + assert.NilError(t, err) assert.DeepEqual(t, configmap.Annotations, map[string]string{}) assert.DeepEqual(t, configmap.Labels, map[string]string{ "postgres-operator.crunchydata.com/cluster": "hippo-dance", @@ -176,9 +178,10 @@ pg1-socket-path = /tmp/postgres }, } - configmap := CreatePGBackRestConfigMapIntent(cluster, + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, "any", "any", "any", "any", nil) + assert.NilError(t, err) assert.DeepEqual(t, configmap.Annotations, map[string]string{ "ak1": "cluster-av1", "ak2": "backups-av2", @@ -207,10 +210,11 @@ pg1-socket-path = /tmp/postgres }, } - configmap := CreatePGBackRestConfigMapIntent(cluster, + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, "", "number", "pod-service-name", "test-ns", []string{"some-instance"}) + assert.NilError(t, err) assert.Assert(t, cmp.Contains(configmap.Data["pgbackrest_instance.conf"], "archive-header-check = n")) @@ -228,10 +232,11 @@ pg1-socket-path = /tmp/postgres }, } - configmap = CreatePGBackRestConfigMapIntent(cluster, + configmap, err = CreatePGBackRestConfigMapIntent(context.Background(), cluster, "repo1", "number", "pod-service-name", "test-ns", []string{"some-instance"}) + assert.NilError(t, err) assert.Assert(t, cmp.Contains(configmap.Data["pgbackrest_repo.conf"], "archive-header-check = n")) @@ -287,7 +292,7 @@ func TestMakePGBackrestLogDir(t *testing.T) { for _, c := range podTemplate.Spec.InitContainers { if c.Name == naming.ContainerPGBackRestLogDirInit { // ignore "bash -c", should skip repo with no volume - assert.Equal(t, "mkdir -p /pgbackrest/repo2/log", c.Command[2]) + assert.Equal(t, `mkdir -p '/pgbackrest/repo2/log' && chmod 0775 '/pgbackrest/repo2/log'`, c.Command[2]) assert.Equal(t, c.Image, "test-image") assert.Equal(t, c.ImagePullPolicy, corev1.PullAlways) assert.Assert(t, !cmp.DeepEqual(c.SecurityContext, diff --git a/internal/pgbackrest/reconcile.go b/internal/pgbackrest/reconcile.go index 378b526112..4e789d137e 100644 --- a/internal/pgbackrest/reconcile.go +++ b/internal/pgbackrest/reconcile.go @@ -213,8 +213,8 @@ func AddConfigToRestorePod( } // mount any provided configuration files to the restore Job Pod - if len(cluster.Spec.Config.Files) != 0 { - additionalConfigVolumeMount := postgres.AdditionalConfigVolumeMount() + if cluster.Spec.Config != nil && len(cluster.Spec.Config.Files) != 0 { + additionalConfigVolumeMount := postgres.ConfigVolumeMount() additionalConfigVolume := corev1.Volume{Name: additionalConfigVolumeMount.Name} additionalConfigVolume.Projected = &corev1.ProjectedVolumeSource{ Sources: append(sources, cluster.Spec.Config.Files...), diff --git a/internal/pgbackrest/reconcile_test.go b/internal/pgbackrest/reconcile_test.go index b3c50b1f8e..0c9aece2b1 100644 --- a/internal/pgbackrest/reconcile_test.go +++ b/internal/pgbackrest/reconcile_test.go @@ -522,8 +522,10 @@ func TestAddConfigToRestorePod(t *testing.T) { custom.Name = "custom-configmap-files" cluster := cluster.DeepCopy() - cluster.Spec.Config.Files = []corev1.VolumeProjection{ - {ConfigMap: &custom}, + cluster.Spec.Config = &v1beta1.PostgresConfig{ + Files: []corev1.VolumeProjection{ + {ConfigMap: &custom}, + }, } sourceCluster := cluster.DeepCopy() diff --git a/internal/pgbouncer/config.go b/internal/pgbouncer/config.go index c77ac793c3..257dc63dbd 100644 --- a/internal/pgbouncer/config.go +++ b/internal/pgbouncer/config.go @@ -5,12 +5,14 @@ package pgbouncer import ( + "context" "fmt" "sort" "strings" corev1 "k8s.io/api/core/v1" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -69,12 +71,12 @@ func authFileContents(password string) []byte { return `"` + strings.ReplaceAll(s, `"`, `""`) + `"` } - user1 := quote(postgresqlUser) + " " + quote(password) + "\n" + user1 := quote(PostgresqlUser) + " " + quote(password) + "\n" return []byte(user1) } -func clusterINI(cluster *v1beta1.PostgresCluster) string { +func clusterINI(ctx context.Context, cluster *v1beta1.PostgresCluster) string { var ( pgBouncerPort = *cluster.Spec.Proxy.PGBouncer.Port postgresPort = *cluster.Spec.Port @@ -97,7 +99,7 @@ func clusterINI(cluster *v1beta1.PostgresCluster) string { // "auth_user" requires a password, PgBouncer reads it from "auth_file". "auth_file": authFileAbsolutePath, "auth_query": "SELECT username, password from pgbouncer.get_auth($1)", - "auth_user": postgresqlUser, + "auth_user": PostgresqlUser, // TODO(cbandy): Use an HBA file to control authentication of PgBouncer // accounts; e.g. "admin_users" below. @@ -124,6 +126,17 @@ func clusterINI(cluster *v1beta1.PostgresCluster) string { "unix_socket_dir": "", } + // If OpenTelemetryLogs feature is enabled, enable logging to file + if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + global["logfile"] = naming.PGBouncerLogPath + "/pgbouncer.log" + } + + // When OTel metrics are enabled, allow pgBouncer's postgres user + // to run read-only console queries on pgBouncer's virtual db + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + global["stats_users"] = PostgresqlUser + } + // Override the above with any specified settings. for k, v := range cluster.Spec.Proxy.PGBouncer.Config.Global { global[k] = v diff --git a/internal/pgbouncer/config_test.go b/internal/pgbouncer/config_test.go index f5ddef6214..43c6b77a92 100644 --- a/internal/pgbouncer/config_test.go +++ b/internal/pgbouncer/config_test.go @@ -5,6 +5,7 @@ package pgbouncer import ( + "context" "os" "os/exec" "path/filepath" @@ -40,6 +41,7 @@ func TestAuthFileContents(t *testing.T) { } func TestClusterINI(t *testing.T) { + ctx := context.Background() t.Parallel() cluster := new(v1beta1.PostgresCluster) @@ -54,7 +56,7 @@ func TestClusterINI(t *testing.T) { *cluster.Spec.Proxy.PGBouncer.Port = 8888 t.Run("Default", func(t *testing.T) { - assert.Equal(t, clusterINI(cluster), strings.Trim(` + assert.Equal(t, clusterINI(ctx, cluster), strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. # Your changes will not be saved. @@ -94,7 +96,7 @@ unix_socket_dir = "app": "mode=rad", } - assert.Equal(t, clusterINI(cluster), strings.Trim(` + assert.Equal(t, clusterINI(ctx, cluster), strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. # Your changes will not be saved. @@ -127,7 +129,7 @@ app = mode=rad // The "conffile" setting cannot be changed. cluster.Spec.Proxy.PGBouncer.Config.Global["conffile"] = "too-far" - assert.Assert(t, !strings.Contains(clusterINI(cluster), "too-far")) + assert.Assert(t, !strings.Contains(clusterINI(ctx, cluster), "too-far")) }) } diff --git a/internal/pgbouncer/postgres.go b/internal/pgbouncer/postgres.go index d9a9d91539..d7d2bae5cf 100644 --- a/internal/pgbouncer/postgres.go +++ b/internal/pgbouncer/postgres.go @@ -23,7 +23,7 @@ const ( // to also be related to the "auth_user". // - https://github.com/pgbouncer/pgbouncer/issues/568 // - https://github.com/pgbouncer/pgbouncer/issues/302#issuecomment-815097248 - postgresqlUser = "_crunchypgbouncer" + PostgresqlUser = "_crunchypgbouncer" ) // sqlAuthenticationQuery returns the SECURITY DEFINER function that allows @@ -41,7 +41,7 @@ func sqlAuthenticationQuery(sqlFunctionName string) string { // No replicators. `NOT pg_authid.rolreplication`, // Not the PgBouncer role itself. - `pg_authid.rolname <> ` + postgres.QuoteLiteral(postgresqlUser), + `pg_authid.rolname <> ` + postgres.QuoteLiteral(PostgresqlUser), // Those without a password expiration or an expiration in the future. `(pg_authid.rolvaliduntil IS NULL OR pg_authid.rolvaliduntil >= CURRENT_TIMESTAMP)`, }, "\n AND ") @@ -93,7 +93,7 @@ SELECT pg_catalog.format('DROP OWNED BY %I CASCADE', :'username') `COMMIT;`, }, "\n"), map[string]string{ - "username": postgresqlUser, + "username": PostgresqlUser, "namespace": postgresqlSchema, "ON_ERROR_STOP": "on", // Abort when any one statement fails. @@ -108,7 +108,7 @@ SELECT pg_catalog.format('DROP OWNED BY %I CASCADE', :'username') `SELECT pg_catalog.current_database()`, `SET client_min_messages = WARNING; SET synchronous_commit = LOCAL; DROP ROLE IF EXISTS :"username";`, map[string]string{ - "username": postgresqlUser, + "username": PostgresqlUser, "ON_ERROR_STOP": "on", // Abort when any one statement fails. "QUIET": "on", // Do not print successful statements to stdout. @@ -190,7 +190,7 @@ REVOKE ALL PRIVILEGES `COMMIT;`, }, "\n"), map[string]string{ - "username": postgresqlUser, + "username": PostgresqlUser, "namespace": postgresqlSchema, "verifier": string(clusterSecret.Data[verifierSecretKey]), @@ -225,7 +225,7 @@ func postgresqlHBAs() []*postgres.HostBasedAuthentication { // - https://www.postgresql.org/docs/current/auth-password.html return []*postgres.HostBasedAuthentication{ - postgres.NewHBA().User(postgresqlUser).TLS().Method("scram-sha-256"), - postgres.NewHBA().User(postgresqlUser).TCP().Method("reject"), + postgres.NewHBA().User(PostgresqlUser).TLS().Method("scram-sha-256"), + postgres.NewHBA().User(PostgresqlUser).TCP().Method("reject"), } } diff --git a/internal/pgbouncer/reconcile.go b/internal/pgbouncer/reconcile.go index ad4f16bb08..4181cea478 100644 --- a/internal/pgbouncer/reconcile.go +++ b/internal/pgbouncer/reconcile.go @@ -11,6 +11,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" @@ -22,6 +23,7 @@ import ( // ConfigMap populates the PgBouncer ConfigMap. func ConfigMap( + ctx context.Context, inCluster *v1beta1.PostgresCluster, outConfigMap *corev1.ConfigMap, ) { @@ -33,7 +35,7 @@ func ConfigMap( initialize.Map(&outConfigMap.Data) outConfigMap.Data[emptyConfigMapKey] = "" - outConfigMap.Data[iniFileConfigMapKey] = clusterINI(inCluster) + outConfigMap.Data[iniFileConfigMapKey] = clusterINI(ctx, inCluster) } // Secret populates the PgBouncer Secret. @@ -187,6 +189,12 @@ func Pod( } outPod.Volumes = []corev1.Volume{configVolume} + + if feature.Enabled(ctx, feature.OpenTelemetryLogs) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + collector.AddToPod(ctx, inCluster.Spec.Instrumentation, inCluster.Spec.ImagePullPolicy, inConfigMap, + outPod, []corev1.VolumeMount{configVolumeMount}, string(inSecret.Data["pgbouncer-password"]), []string{naming.PGBouncerLogPath}, + true) + } } // PostgreSQL populates outHBAs with any records needed to run PgBouncer. diff --git a/internal/pgbouncer/reconcile_test.go b/internal/pgbouncer/reconcile_test.go index c5d31bc608..927f8a25fb 100644 --- a/internal/pgbouncer/reconcile_test.go +++ b/internal/pgbouncer/reconcile_test.go @@ -22,6 +22,7 @@ import ( func TestConfigMap(t *testing.T) { t.Parallel() + ctx := context.Background() cluster := new(v1beta1.PostgresCluster) config := new(corev1.ConfigMap) @@ -29,7 +30,7 @@ func TestConfigMap(t *testing.T) { t.Run("Disabled", func(t *testing.T) { // Nothing happens when PgBouncer is disabled. constant := config.DeepCopy() - ConfigMap(cluster, config) + ConfigMap(ctx, cluster, config) assert.DeepEqual(t, constant, config) }) @@ -37,15 +38,15 @@ func TestConfigMap(t *testing.T) { cluster.Spec.Proxy.PGBouncer = new(v1beta1.PGBouncerPodSpec) cluster.Default() - ConfigMap(cluster, config) + ConfigMap(ctx, cluster, config) // The output of clusterINI should go into config. - data := clusterINI(cluster) + data := clusterINI(ctx, cluster) assert.DeepEqual(t, config.Data["pgbouncer.ini"], data) // No change when called again. before := config.DeepCopy() - ConfigMap(cluster, config) + ConfigMap(ctx, cluster, config) assert.DeepEqual(t, before, config) } diff --git a/internal/pgmonitor/postgres.go b/internal/pgmonitor/postgres.go index 292d116e30..08a428d465 100644 --- a/internal/pgmonitor/postgres.go +++ b/internal/pgmonitor/postgres.go @@ -10,6 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/postgres" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -22,8 +23,8 @@ const ( // PostgreSQLHBAs provides the Postgres HBA rules for allowing the monitoring // exporter to be accessible -func PostgreSQLHBAs(inCluster *v1beta1.PostgresCluster, outHBAs *postgres.HBAs) { - if ExporterEnabled(inCluster) { +func PostgreSQLHBAs(ctx context.Context, inCluster *v1beta1.PostgresCluster, outHBAs *postgres.HBAs) { + if ExporterEnabled(ctx, inCluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { // Limit the monitoring user to local connections using SCRAM. outHBAs.Mandatory = append(outHBAs.Mandatory, postgres.NewHBA().TCP().User(MonitoringUser).Method("scram-sha-256").Network("127.0.0.0/8"), @@ -34,8 +35,8 @@ func PostgreSQLHBAs(inCluster *v1beta1.PostgresCluster, outHBAs *postgres.HBAs) // PostgreSQLParameters provides additional required configuration parameters // that Postgres needs to support monitoring -func PostgreSQLParameters(inCluster *v1beta1.PostgresCluster, outParameters *postgres.Parameters) { - if ExporterEnabled(inCluster) { +func PostgreSQLParameters(ctx context.Context, inCluster *v1beta1.PostgresCluster, outParameters *postgres.Parameters) { + if ExporterEnabled(ctx, inCluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { // Exporter expects that shared_preload_libraries are installed // pg_stat_statements: https://access.crunchydata.com/documentation/pgmonitor/latest/exporter/ // pgnodemx: https://github.com/CrunchyData/pgnodemx @@ -45,11 +46,11 @@ func PostgreSQLParameters(inCluster *v1beta1.PostgresCluster, outParameters *pos } } -// DisableExporterInPostgreSQL disables the exporter configuration in PostgreSQL. +// DisableMonitoringUserInPostgres disables the exporter configuration in PostgreSQL. // Currently the exporter is disabled by removing login permissions for the // monitoring user. // TODO: evaluate other uninstall/removal options -func DisableExporterInPostgreSQL(ctx context.Context, exec postgres.Executor) error { +func DisableMonitoringUserInPostgres(ctx context.Context, exec postgres.Executor) error { log := logging.FromContext(ctx) stdout, stderr, err := exec.Exec(ctx, strings.NewReader(` diff --git a/internal/pgmonitor/postgres_test.go b/internal/pgmonitor/postgres_test.go index b91e9ba125..3b6bff58de 100644 --- a/internal/pgmonitor/postgres_test.go +++ b/internal/pgmonitor/postgres_test.go @@ -5,6 +5,7 @@ package pgmonitor import ( + "context" "strings" "testing" @@ -15,10 +16,12 @@ import ( ) func TestPostgreSQLHBA(t *testing.T) { + ctx := context.Background() + t.Run("ExporterDisabled", func(t *testing.T) { inCluster := &v1beta1.PostgresCluster{} outHBAs := postgres.HBAs{} - PostgreSQLHBAs(inCluster, &outHBAs) + PostgreSQLHBAs(ctx, inCluster, &outHBAs) assert.Equal(t, len(outHBAs.Mandatory), 0) }) @@ -33,7 +36,7 @@ func TestPostgreSQLHBA(t *testing.T) { } outHBAs := postgres.HBAs{} - PostgreSQLHBAs(inCluster, &outHBAs) + PostgreSQLHBAs(ctx, inCluster, &outHBAs) assert.Equal(t, len(outHBAs.Mandatory), 3) assert.Equal(t, outHBAs.Mandatory[0].String(), `host all "ccp_monitoring" "127.0.0.0/8" scram-sha-256`) @@ -43,10 +46,12 @@ func TestPostgreSQLHBA(t *testing.T) { } func TestPostgreSQLParameters(t *testing.T) { + ctx := context.Background() + t.Run("ExporterDisabled", func(t *testing.T) { inCluster := &v1beta1.PostgresCluster{} outParameters := postgres.NewParameters() - PostgreSQLParameters(inCluster, &outParameters) + PostgreSQLParameters(ctx, inCluster, &outParameters) assert.Assert(t, !outParameters.Mandatory.Has("shared_preload_libraries")) }) @@ -61,7 +66,7 @@ func TestPostgreSQLParameters(t *testing.T) { } outParameters := postgres.NewParameters() - PostgreSQLParameters(inCluster, &outParameters) + PostgreSQLParameters(ctx, inCluster, &outParameters) libs, found := outParameters.Mandatory.Get("shared_preload_libraries") assert.Assert(t, found) assert.Assert(t, strings.Contains(libs, "pg_stat_statements")) @@ -80,7 +85,7 @@ func TestPostgreSQLParameters(t *testing.T) { outParameters := postgres.NewParameters() outParameters.Mandatory.Add("shared_preload_libraries", "daisy") - PostgreSQLParameters(inCluster, &outParameters) + PostgreSQLParameters(ctx, inCluster, &outParameters) libs, found := outParameters.Mandatory.Get("shared_preload_libraries") assert.Assert(t, found) assert.Assert(t, strings.Contains(libs, "pg_stat_statements")) diff --git a/internal/pgmonitor/util.go b/internal/pgmonitor/util.go index 8c89815829..32cf222448 100644 --- a/internal/pgmonitor/util.go +++ b/internal/pgmonitor/util.go @@ -8,6 +8,7 @@ import ( "context" "os" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -26,7 +27,7 @@ func GetQueriesConfigDir(ctx context.Context) string { } // ExporterEnabled returns true if the monitoring exporter is enabled -func ExporterEnabled(cluster *v1beta1.PostgresCluster) bool { +func ExporterEnabled(ctx context.Context, cluster *v1beta1.PostgresCluster) bool { if cluster.Spec.Monitoring == nil { return false } @@ -36,5 +37,8 @@ func ExporterEnabled(cluster *v1beta1.PostgresCluster) bool { if cluster.Spec.Monitoring.PGMonitor.Exporter == nil { return false } + if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + return false + } return true } diff --git a/internal/pgmonitor/util_test.go b/internal/pgmonitor/util_test.go index 30d28b45d7..e83bbb3730 100644 --- a/internal/pgmonitor/util_test.go +++ b/internal/pgmonitor/util_test.go @@ -5,24 +5,34 @@ package pgmonitor import ( + "context" "testing" "gotest.tools/v3/assert" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestExporterEnabled(t *testing.T) { cluster := &v1beta1.PostgresCluster{} - assert.Assert(t, !ExporterEnabled(cluster)) + ctx := context.Background() + assert.Assert(t, !ExporterEnabled(ctx, cluster)) cluster.Spec.Monitoring = &v1beta1.MonitoringSpec{} - assert.Assert(t, !ExporterEnabled(cluster)) + assert.Assert(t, !ExporterEnabled(ctx, cluster)) cluster.Spec.Monitoring.PGMonitor = &v1beta1.PGMonitorSpec{} - assert.Assert(t, !ExporterEnabled(cluster)) + assert.Assert(t, !ExporterEnabled(ctx, cluster)) cluster.Spec.Monitoring.PGMonitor.Exporter = &v1beta1.ExporterSpec{} - assert.Assert(t, ExporterEnabled(cluster)) + assert.Assert(t, ExporterEnabled(ctx, cluster)) + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx = feature.NewContext(ctx, gate) + cluster.Spec.Monitoring.PGMonitor.Exporter = &v1beta1.ExporterSpec{} + assert.Assert(t, !ExporterEnabled(ctx, cluster)) } diff --git a/internal/postgres/config.go b/internal/postgres/config.go index c14dbdc8cd..b3102b74dc 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -14,6 +14,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/shell" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -70,7 +71,7 @@ safelink() ( // for streaming replication and for `pg_rewind`. ReplicationUser = "_crunchyrepl" - // configMountPath is where to mount additional config files + // configMountPath is where to mount config files. configMountPath = "/etc/postgres" ) @@ -86,6 +87,12 @@ func DataDirectory(cluster *v1beta1.PostgresCluster) string { return fmt.Sprintf("%s/pg%d", dataMountPath, cluster.Spec.PostgresVersion) } +// LogDirectory returns the absolute path to the "log_directory" of cluster. +// - https://www.postgresql.org/docs/current/runtime-config-logging.html +func LogDirectory() string { + return fmt.Sprintf("%s/logs/postgres", dataMountPath) +} + // WALDirectory returns the absolute path to the directory where an instance // stores its WAL files. // - https://www.postgresql.org/docs/current/wal.html @@ -291,9 +298,9 @@ chmod +x /tmp/pg_rewind_tde.sh ` } - args := []string{version, walDir, naming.PGBackRestPGDataLogPath, naming.PatroniPGDataLogPath} + args := []string{version, walDir} script := strings.Join([]string{ - `declare -r expected_major_version="$1" pgwal_directory="$2" pgbrLog_directory="$3" patroniLog_directory="$4"`, + `declare -r expected_major_version="$1" pgwal_directory="$2"`, // Function to print the permissions of a file or directory and its parents. bashPermissions, @@ -364,15 +371,13 @@ chmod +x /tmp/pg_rewind_tde.sh `else (halt Permissions!); fi ||`, `halt "$(permissions "${postgres_data_directory}" ||:)"`, - // Create the pgBackRest log directory. - `results 'pgBackRest log directory' "${pgbrLog_directory}"`, - `install --directory --mode=0775 "${pgbrLog_directory}" ||`, - `halt "$(permissions "${pgbrLog_directory}" ||:)"`, - - // Create the Patroni log directory. - `results 'Patroni log directory' "${patroniLog_directory}"`, - `install --directory --mode=0775 "${patroniLog_directory}" ||`, - `halt "$(permissions "${patroniLog_directory}" ||:)"`, + // Create log directories. + `(` + shell.MakeDirectories(0o775, dataMountPath, naming.PGBackRestPGDataLogPath) + `) ||`, + `halt "$(permissions ` + naming.PGBackRestPGDataLogPath + ` ||:)"`, + `(` + shell.MakeDirectories(0o775, dataMountPath, naming.PatroniPGDataLogPath) + `) ||`, + `halt "$(permissions ` + naming.PatroniPGDataLogPath + ` ||:)"`, + `(` + shell.MakeDirectories(0o775, dataMountPath, LogDirectory()) + `) ||`, + `halt "$(permissions ` + LogDirectory() + ` ||:)"`, // Copy replication client certificate files // from the /pgconf/tls/replication directory to the /tmp/replication directory in order diff --git a/internal/postgres/parameters.go b/internal/postgres/parameters.go index 58b86131f8..469eef0bfb 100644 --- a/internal/postgres/parameters.go +++ b/internal/postgres/parameters.go @@ -6,6 +6,7 @@ package postgres import ( "fmt" + "maps" "slices" "strings" ) @@ -68,17 +69,21 @@ func NewParameterSet() *ParameterSet { // AsMap returns a copy of ps as a map. func (ps *ParameterSet) AsMap() map[string]string { - out := make(map[string]string, len(ps.values)) - for name, value := range ps.values { - out[name] = value + if ps == nil { + return nil } - return out + + return maps.Clone(ps.values) } // DeepCopy returns a copy of ps. -func (ps *ParameterSet) DeepCopy() (out *ParameterSet) { +func (ps *ParameterSet) DeepCopy() *ParameterSet { + if ps == nil { + return nil + } + return &ParameterSet{ - values: ps.AsMap(), + values: maps.Clone(ps.values), } } diff --git a/internal/postgres/parameters_test.go b/internal/postgres/parameters_test.go index dc08d7004a..5126899d90 100644 --- a/internal/postgres/parameters_test.go +++ b/internal/postgres/parameters_test.go @@ -31,6 +31,16 @@ func TestNewParameters(t *testing.T) { } func TestParameterSet(t *testing.T) { + t.Run("NilAsMap", func(t *testing.T) { + m := (*ParameterSet)(nil).AsMap() + assert.Assert(t, m == nil) + }) + + t.Run("NilDeepCopy", func(t *testing.T) { + ps := (*ParameterSet)(nil).DeepCopy() + assert.Assert(t, ps == nil) + }) + ps := NewParameterSet() ps.Add("x", "y") diff --git a/internal/postgres/reconcile.go b/internal/postgres/reconcile.go index 0fa792be91..fda5229792 100644 --- a/internal/postgres/reconcile.go +++ b/internal/postgres/reconcile.go @@ -46,8 +46,8 @@ func DownwardAPIVolumeMount() corev1.VolumeMount { } } -// AdditionalConfigVolumeMount returns the name and mount path of the additional config files. -func AdditionalConfigVolumeMount() corev1.VolumeMount { +// ConfigVolumeMount returns the name and mount path of PostgreSQL config files. +func ConfigVolumeMount() corev1.VolumeMount { return corev1.VolumeMount{ Name: "postgres-config", MountPath: configMountPath, @@ -232,8 +232,8 @@ func InstancePod(ctx context.Context, startup.VolumeMounts = append(startup.VolumeMounts, tablespaceVolumeMount) } - if len(inCluster.Spec.Config.Files) != 0 { - additionalConfigVolumeMount := AdditionalConfigVolumeMount() + if inCluster.Spec.Config != nil && len(inCluster.Spec.Config.Files) != 0 { + additionalConfigVolumeMount := ConfigVolumeMount() additionalConfigVolume := corev1.Volume{Name: additionalConfigVolumeMount.Name} additionalConfigVolume.Projected = &corev1.ProjectedVolumeSource{ Sources: append([]corev1.VolumeProjection{}, inCluster.Spec.Config.Files...), diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index 18bcb79135..73fabd3014 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -16,6 +16,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -230,7 +231,7 @@ initContainers: - -ceu - -- - |- - declare -r expected_major_version="$1" pgwal_directory="$2" pgbrLog_directory="$3" patroniLog_directory="$4" + declare -r expected_major_version="$1" pgwal_directory="$2" permissions() { while [[ -n "$1" ]]; do set "${1%/*}" "$@"; done; shift; stat -Lc '%A %4u %4g %n' "$@"; } halt() { local rc=$?; >&2 echo "$@"; exit "${rc/#0/1}"; } results() { printf '::postgres-operator: %s::%s\n' "$@"; } @@ -267,12 +268,12 @@ initContainers: recreate "${postgres_data_directory}" '0700' else (halt Permissions!); fi || halt "$(permissions "${postgres_data_directory}" ||:)" - results 'pgBackRest log directory' "${pgbrLog_directory}" - install --directory --mode=0775 "${pgbrLog_directory}" || - halt "$(permissions "${pgbrLog_directory}" ||:)" - results 'Patroni log directory' "${patroniLog_directory}" - install --directory --mode=0775 "${patroniLog_directory}" || - halt "$(permissions "${patroniLog_directory}" ||:)" + (mkdir -p '/pgdata/pgbackrest/log' && chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest') || + halt "$(permissions /pgdata/pgbackrest/log ||:)" + (mkdir -p '/pgdata/patroni/log' && chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni') || + halt "$(permissions /pgdata/patroni/log ||:)" + (mkdir -p '/pgdata/logs/postgres' && chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs') || + halt "$(permissions /pgdata/logs/postgres ||:)" install -D --mode=0600 -t "/tmp/replication" "/pgconf/tls/replication"/{tls.crt,tls.key,ca.crt} @@ -288,8 +289,6 @@ initContainers: - startup - "11" - /pgdata/pg11_wal - - /pgdata/pgbackrest/log - - /pgdata/patroni/log env: - name: PGDATA value: /pgdata/pg11 @@ -477,20 +476,14 @@ volumes: // Startup moves WAL files to data volume. assert.DeepEqual(t, pod.InitContainers[0].Command[4:], - []string{"startup", "11", "/pgdata/pg11_wal", "/pgdata/pgbackrest/log", "/pgdata/patroni/log"}) + []string{"startup", "11", "/pgdata/pg11_wal"}) }) t.Run("WithAdditionalConfigFiles", func(t *testing.T) { clusterWithConfig := cluster.DeepCopy() - clusterWithConfig.Spec.Config.Files = []corev1.VolumeProjection{ - { - Secret: &corev1.SecretProjection{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: "keytab", - }, - }, - }, - } + require.UnmarshalInto(t, &clusterWithConfig.Spec.Config, `{ + files: [{ secret: { name: keytab } }], + }`) pod := new(corev1.PodSpec) InstancePod(ctx, clusterWithConfig, instance, @@ -707,7 +700,7 @@ volumes: // Startup moves WAL files to WAL volume. assert.DeepEqual(t, pod.InitContainers[0].Command[4:], - []string{"startup", "11", "/pgwal/pg11_wal", "/pgdata/pgbackrest/log", "/pgdata/patroni/log"}) + []string{"startup", "11", "/pgwal/pg11_wal"}) }) } diff --git a/internal/postgres/users.go b/internal/postgres/users.go index b16be66152..0caa09cb42 100644 --- a/internal/postgres/users.go +++ b/internal/postgres/users.go @@ -106,7 +106,7 @@ CREATE TEMPORARY TABLE input (id serial, data json); "databases": databases, "options": options, "username": spec.Name, - "verifier": verifiers[string(spec.Name)], + "verifier": verifiers[spec.Name], }) } } @@ -194,9 +194,9 @@ func WriteUsersSchemasInPostgreSQL(ctx context.Context, exec Executor, spec := users[i] // We skip if the user has the name of a reserved schema - if RESERVED_SCHEMA_NAMES[string(spec.Name)] { + if RESERVED_SCHEMA_NAMES[spec.Name] { log.V(1).Info("Skipping schema creation for user with reserved name", - "name", string(spec.Name)) + "name", spec.Name) continue } @@ -239,7 +239,7 @@ func WriteUsersSchemasInPostgreSQL(ctx context.Context, exec Executor, }, "\n"), map[string]string{ "databases": string(databases), - "username": string(spec.Name), + "username": spec.Name, "ON_ERROR_STOP": "on", // Abort when any one statement fails. "QUIET": "on", // Do not print successful commands to stdout. diff --git a/internal/postgres/users_test.go b/internal/postgres/users_test.go index 57587a3b11..313a9f0134 100644 --- a/internal/postgres/users_test.go +++ b/internal/postgres/users_test.go @@ -131,7 +131,7 @@ COMMIT;`)) []v1beta1.PostgresUserSpec{ { Name: "user-no-options", - Databases: []v1beta1.PostgresIdentifier{"db1"}, + Databases: []string{"db1"}, }, { Name: "user-no-databases", @@ -175,7 +175,7 @@ COMMIT;`)) []v1beta1.PostgresUserSpec{ { Name: "postgres", - Databases: []v1beta1.PostgresIdentifier{"all", "ignored"}, + Databases: []string{"all", "ignored"}, Options: "NOLOGIN CONNECTION LIMIT 0", }, }, @@ -213,18 +213,18 @@ func TestWriteUsersSchemasInPostgreSQL(t *testing.T) { []v1beta1.PostgresUserSpec{ { Name: "user-single-db", - Databases: []v1beta1.PostgresIdentifier{"db1"}, + Databases: []string{"db1"}, }, { Name: "user-no-databases", }, { Name: "user-multi-dbs", - Databases: []v1beta1.PostgresIdentifier{"db1", "db2"}, + Databases: []string{"db1", "db2"}, }, { Name: "public", - Databases: []v1beta1.PostgresIdentifier{"db3"}, + Databases: []string{"db3"}, }, }, )) diff --git a/internal/shell/paths.go b/internal/shell/paths.go new file mode 100644 index 0000000000..3455ff8fe4 --- /dev/null +++ b/internal/shell/paths.go @@ -0,0 +1,57 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +// We want the [filepath] package to behave correctly for Linux containers. +//go:build unix + +package shell + +import ( + "fmt" + "io/fs" + "path/filepath" + "strings" +) + +// MakeDirectories returns a list of POSIX shell commands that ensure each path +// exists. It creates every directory leading to path from (but not including) +// base and sets their permissions to exactly perms, regardless of umask. +// +// See: +// - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/chmod.html +// - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/mkdir.html +// - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/test.html +// - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/umask.html +func MakeDirectories(perms fs.FileMode, base string, paths ...string) string { + // Without any paths, return a command that succeeds when the base path + // exists. + if len(paths) == 0 { + return `test -d ` + QuoteWord(base) + } + + allPaths := append([]string(nil), paths...) + for _, p := range paths { + if r, err := filepath.Rel(base, p); err == nil && filepath.IsLocal(r) { + // The result of [filepath.Rel] is a shorter representation + // of the full path; skip it. + r = filepath.Dir(r) + + for r != "." { + allPaths = append(allPaths, filepath.Join(base, r)) + r = filepath.Dir(r) + } + } + } + + return `` + + // Create all the paths and any missing parents. + `mkdir -p ` + strings.Join(QuoteWords(paths...), " ") + + + // Set the permissions of every path and each parent. + // NOTE: FileMode bits other than file permissions are ignored. + fmt.Sprintf(` && chmod %#o %s`, + perms&fs.ModePerm, + strings.Join(QuoteWords(allPaths...), " "), + ) +} diff --git a/internal/shell/paths_test.go b/internal/shell/paths_test.go new file mode 100644 index 0000000000..273f672b79 --- /dev/null +++ b/internal/shell/paths_test.go @@ -0,0 +1,66 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package shell + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "gotest.tools/v3/assert" + "sigs.k8s.io/yaml" + + "github.com/crunchydata/postgres-operator/internal/testing/require" +) + +func TestMakeDirectories(t *testing.T) { + t.Parallel() + + t.Run("NoPaths", func(t *testing.T) { + assert.Equal(t, + MakeDirectories(0o755, "/asdf/jklm"), + `test -d '/asdf/jklm'`) + }) + + t.Run("Children", func(t *testing.T) { + assert.DeepEqual(t, + MakeDirectories(0o775, "/asdf", "/asdf/jklm", "/asdf/qwerty"), + `mkdir -p '/asdf/jklm' '/asdf/qwerty' && chmod 0775 '/asdf/jklm' '/asdf/qwerty'`) + }) + + t.Run("Grandchild", func(t *testing.T) { + script := MakeDirectories(0o775, "/asdf", "/asdf/qwerty/boots") + assert.DeepEqual(t, script, + `mkdir -p '/asdf/qwerty/boots' && chmod 0775 '/asdf/qwerty/boots' '/asdf/qwerty'`) + + t.Run("ShellCheckPOSIX", func(t *testing.T) { + shellcheck := require.ShellCheck(t) + + dir := t.TempDir() + file := filepath.Join(dir, "script.sh") + assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) + + // Expect ShellCheck for "sh" to be happy. + // - https://www.shellcheck.net/wiki/SC2148 + cmd := exec.Command(shellcheck, "--enable=all", "--shell=sh", file) + output, err := cmd.CombinedOutput() + assert.NilError(t, err, "%q\n%s", cmd.Args, output) + }) + }) + + t.Run("Long", func(t *testing.T) { + script := MakeDirectories(0o700, "/", strings.Repeat("/asdf", 20)) + + t.Run("PrettyYAML", func(t *testing.T) { + b, err := yaml.Marshal(script) + s := string(b) + assert.NilError(t, err) + assert.Assert(t, !strings.HasPrefix(s, `"`) && !strings.HasPrefix(s, `'`), + "expected plain unquoted scalar, got:\n%s", b) + }) + }) +} diff --git a/internal/shell/quote.go b/internal/shell/quote.go new file mode 100644 index 0000000000..bac8d14f93 --- /dev/null +++ b/internal/shell/quote.go @@ -0,0 +1,34 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package shell + +import "strings" + +// escapeSingleQuoted is used by [QuoteWord]. +var escapeSingleQuoted = strings.NewReplacer( + // slightly shorter results for the unlikely pair of quotes. + `''`, `'"''"'`, + + // first, close the single-quote U+0027, + // add one between double-quotes U+0022, + // then reopen the single-quote U+0027. + `'`, `'"'"'`, +).Replace + +// QuoteWord ensures that v is interpreted by a shell as a single word. +func QuoteWord(v string) string { + // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html + // https://www.gnu.org/software/bash/manual/html_node/Quoting.html + return `'` + escapeSingleQuoted(v) + `'` +} + +// QuoteWords ensures that s is interpreted by a shell as individual words. +func QuoteWords(s ...string) []string { + quoted := make([]string, len(s)) + for i := range s { + quoted[i] = QuoteWord(s[i]) + } + return quoted +} diff --git a/internal/shell/quote_test.go b/internal/shell/quote_test.go new file mode 100644 index 0000000000..eaea72f673 --- /dev/null +++ b/internal/shell/quote_test.go @@ -0,0 +1,34 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package shell + +import ( + "testing" + + "gotest.tools/v3/assert" +) + +func TestQuoteWord(t *testing.T) { + assert.Equal(t, QuoteWord(""), `''`, + "expected empty and single-quoted") + + assert.Equal(t, QuoteWord("abc"), `'abc'`, + "expected single-quoted") + + assert.Equal(t, QuoteWord(`a" b"c`), `'a" b"c'`, + "expected easy double-quotes") + + assert.Equal(t, QuoteWord(`a' b'c`), + `'a'`+`"'"`+`' b'`+`"'"`+`'c'`, + "expected close-quote-open twice") + + assert.Equal(t, QuoteWord(`a''b`), + `'a'`+`"''"`+`'b'`, + "expected close-quotes-open once") + + assert.Equal(t, QuoteWord(`x''''y`), + `'x'`+`"''"`+`''`+`"''"`+`'y'`, + "expected close-quotes-open twice") +} diff --git a/internal/testing/cmp/cmp.go b/internal/testing/cmp/cmp.go index 6da0edecf4..d7b5764e41 100644 --- a/internal/testing/cmp/cmp.go +++ b/internal/testing/cmp/cmp.go @@ -5,6 +5,7 @@ package cmp import ( + "regexp" "strings" gocmp "github.com/google/go-cmp/cmp" @@ -46,10 +47,24 @@ func Contains(collection, item any) Comparison { // succeeds if the values are equal. The comparison can be customized using // comparison Options. See [github.com/google/go-cmp/cmp.Option] constructors // and [github.com/google/go-cmp/cmp/cmpopts]. -func DeepEqual(x, y any, opts ...gocmp.Option) Comparison { +func DeepEqual[T any](x, y T, opts ...gocmp.Option) Comparison { return gotest.DeepEqual(x, y, opts...) } +// Len succeeds if actual has the expected length. +func Len[Slice ~[]E, E any](actual Slice, expected int) Comparison { + return gotest.Len(actual, expected) +} + +// LenMap succeeds if actual has the expected length. +func LenMap[Map ~map[K]V, K comparable, V any](actual Map, expected int) Comparison { + // There doesn't seem to be a way to express "map or slice" in type constraints + // that [Go 1.22] compiler can nicely infer. Ideally, this function goes + // away when a better constraint can be expressed on [Len]. + + return gotest.Len(actual, expected) +} + // MarshalContains converts actual to YAML and succeeds if expected is in the result. func MarshalContains(actual any, expected string) Comparison { b, err := yaml.Marshal(actual) @@ -68,9 +83,9 @@ func MarshalMatches(actual any, expected string) Comparison { return gotest.DeepEqual(string(b), strings.Trim(expected, "\t\n")+"\n") } -// Regexp succeeds if value contains any match of the regular expression re. +// Regexp succeeds if value contains any match of the regular expression. // The regular expression may be a *regexp.Regexp or a string that is a valid // regexp pattern. -func Regexp(re any, value string) Comparison { - return gotest.Regexp(re, value) +func Regexp[RE *regexp.Regexp | ~string](regex RE, value string) Comparison { + return gotest.Regexp(regex, value) } diff --git a/internal/testing/require/encoding.go b/internal/testing/require/encoding.go new file mode 100644 index 0000000000..a99f7a42f1 --- /dev/null +++ b/internal/testing/require/encoding.go @@ -0,0 +1,39 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package require + +import ( + "errors" + "testing" + + "gotest.tools/v3/assert" + "sigs.k8s.io/json" + "sigs.k8s.io/yaml" +) + +// UnmarshalInto parses input as YAML (or JSON) the same way as the Kubernetes +// API Server writing into output. It calls t.Fatal when something fails. +func UnmarshalInto[Data ~string | ~[]byte, Destination *T, T any]( + t testing.TB, output Destination, input Data, +) { + t.Helper() + + // The REST API uses serializers: + // + // https://pkg.go.dev/k8s.io/apimachinery/pkg/runtime/serializer/json + // https://pkg.go.dev/k8s.io/apimachinery/pkg/runtime/serializer/yaml + // + // The util package follows similar paths (strict, preserve ints, etc.) + // + // https://pkg.go.dev/k8s.io/apimachinery/pkg/util/json + // https://pkg.go.dev/k8s.io/apimachinery/pkg/util/yaml + + data, err := yaml.YAMLToJSONStrict([]byte(input)) + assert.NilError(t, err) + + strict, err := json.UnmarshalStrict(data, output) + assert.NilError(t, err) + assert.NilError(t, errors.Join(strict...)) +} diff --git a/internal/testing/require/encoding_test.go b/internal/testing/require/encoding_test.go new file mode 100644 index 0000000000..e4f53611eb --- /dev/null +++ b/internal/testing/require/encoding_test.go @@ -0,0 +1,41 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package require_test + +import ( + "reflect" + "testing" + + "github.com/crunchydata/postgres-operator/internal/testing/require" +) + +func TestUnmarshalInto(t *testing.T) { + for _, tt := range []struct { + input string + expected any + }{ + // Any fraction that amounts to an integral number is converted to an integer. + // See: https://go.dev/play/p/dvXRVhYO8UH + {input: `3`, expected: int64(3)}, + {input: `3.000`, expected: int64(3)}, + {input: `0.03e2`, expected: int64(3)}, + {input: `{a: 5}`, expected: map[string]any{"a": int64(5)}}, + {input: `{a: 5.000}`, expected: map[string]any{"a": int64(5)}}, + {input: `{a: 0.05e2}`, expected: map[string]any{"a": int64(5)}}, + + // YAML or JSON + {input: `asdf`, expected: "asdf"}, + {input: `"asdf"`, expected: "asdf"}, + {input: `[1, 2.3, true]`, expected: []any{int64(1), float64(2.3), true}}, + {input: `{a: b, c, d}`, expected: map[string]any{"a": "b", "c": nil, "d": nil}}, + } { + sink := reflect.Zero(reflect.TypeOf(tt.expected)).Interface() + require.UnmarshalInto(t, &sink, tt.input) + + if !reflect.DeepEqual(tt.expected, sink) { + t.Fatalf("expected %[1]T(%#[1]v), got %[2]T(%#[2]v)", tt.expected, sink) + } + } +} diff --git a/internal/testing/require/errors.go b/internal/testing/require/errors.go new file mode 100644 index 0000000000..128a0397b0 --- /dev/null +++ b/internal/testing/require/errors.go @@ -0,0 +1,33 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package require + +import ( + "errors" + "testing" + + "gotest.tools/v3/assert" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// StatusError returns the [metav1.Status] within err's tree. +// It calls t.Fatal when err is nil or there is no status. +func StatusError(t testing.TB, err error) metav1.Status { + status, ok := err.(apierrors.APIStatus) + + assert.Assert(t, ok || errors.As(err, &status), + "%T does not implement %T", err, status) + + return status.Status() +} + +// Value returns v or panics when err is not nil. +func Value[T any](v T, err error) T { + if err != nil { + panic(err) + } + return v +} diff --git a/internal/testing/validation/pgadmin_test.go b/internal/testing/validation/pgadmin_test.go new file mode 100644 index 0000000000..e8bd72705c --- /dev/null +++ b/internal/testing/validation/pgadmin_test.go @@ -0,0 +1,94 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package validation + +import ( + "context" + "testing" + + "gotest.tools/v3/assert" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/crunchydata/postgres-operator/internal/controller/runtime" + "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestPGAdminInstrumentation(t *testing.T) { + ctx := context.Background() + cc := require.Kubernetes(t) + t.Parallel() + + namespace := require.Namespace(t, cc) + base := v1beta1.NewPGAdmin() + base.Namespace = namespace.Name + base.Name = "pgadmin-instrumentation" + + assert.NilError(t, cc.Create(ctx, base.DeepCopy(), client.DryRunAll), + "expected this base to be valid") + + t.Run("LogsRetentionPeriod", func(t *testing.T) { + pgadmin := base.DeepCopy() + require.UnmarshalInto(t, &pgadmin.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5m }, + }, + }`) + + err := cc.Create(ctx, pgadmin, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + assert.ErrorContains(t, err, "retentionPeriod") + assert.ErrorContains(t, err, "hour|day|week") + assert.ErrorContains(t, err, "one hour") + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 2)) + + for _, cause := range status.Details.Causes { + assert.Equal(t, cause.Field, "spec.instrumentation.logs.retentionPeriod") + } + + t.Run("Valid", func(t *testing.T) { + for _, tt := range []string{ + "28 weeks", + "90 DAY", + "1 hr", + "PT1D2H", + "1 week 2 days", + } { + u, err := runtime.ToUnstructuredObject(pgadmin) + assert.NilError(t, err) + assert.NilError(t, unstructured.SetNestedField(u.Object, + tt, "spec", "instrumentation", "logs", "retentionPeriod")) + + assert.NilError(t, cc.Create(ctx, u, client.DryRunAll), tt) + } + }) + + t.Run("Invalid", func(t *testing.T) { + for _, tt := range []string{ + // Amount too small + "0 days", + "0", + + // Text too long + "2 weeks 3 days 4 hours", + } { + u, err := runtime.ToUnstructuredObject(pgadmin) + assert.NilError(t, err) + assert.NilError(t, unstructured.SetNestedField(u.Object, + tt, "spec", "instrumentation", "logs", "retentionPeriod")) + + err = cc.Create(ctx, u, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err), tt) + assert.ErrorContains(t, err, "retentionPeriod") + } + }) + }) +} diff --git a/internal/testing/validation/postgrescluster_test.go b/internal/testing/validation/postgrescluster_test.go index fb79095ab6..5c8bd9f0e3 100644 --- a/internal/testing/validation/postgrescluster_test.go +++ b/internal/testing/validation/postgrescluster_test.go @@ -11,14 +11,199 @@ import ( "gotest.tools/v3/assert" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/yaml" + "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/testing/cmp" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) +func TestPostgresConfigParameters(t *testing.T) { + ctx := context.Background() + cc := require.Kubernetes(t) + t.Parallel() + + namespace := require.Namespace(t, cc) + base := v1beta1.NewPostgresCluster() + + // Start with a bunch of required fields. + require.UnmarshalInto(t, &base.Spec, `{ + postgresVersion: 16, + backups: { + pgbackrest: { + repos: [{ name: repo1 }], + }, + }, + instances: [{ + dataVolumeClaimSpec: { + accessModes: [ReadWriteOnce], + resources: { requests: { storage: 1Mi } }, + }, + }], + }`) + + base.Namespace = namespace.Name + base.Name = "postgres-config-parameters" + + assert.NilError(t, cc.Create(ctx, base.DeepCopy(), client.DryRunAll), + "expected this base cluster to be valid") + + t.Run("Allowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {"archive_timeout", int64(100)}, + {"archive_timeout", "20s"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := require.Value(runtime.ToUnstructuredObject(base)) + assert.NilError(t, unstructured.SetNestedField(cluster.Object, + tt.value, "spec", "config", "parameters", tt.key)) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + } + }) + + t.Run("Disallowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "cluster_name", value: "asdf"}, + {key: "config_file", value: "asdf"}, + {key: "data_directory", value: ""}, + {key: "external_pid_file", value: ""}, + {key: "hba_file", value: "one"}, + {key: "hot_standby", value: "off"}, + {key: "ident_file", value: "two"}, + {key: "listen_addresses", value: ""}, + {key: "log_file_mode", value: ""}, + {key: "logging_collector", value: "off"}, + {key: "port", value: int64(5)}, + {key: "wal_log_hints", value: "off"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := require.Value(runtime.ToUnstructuredObject(base)) + assert.NilError(t, unstructured.SetNestedField(cluster.Object, + tt.value, "spec", "config", "parameters", tt.key)) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 1)) + + // TODO(k8s-1.30) TODO(validation): Move the parameter name from the message to the field path. + assert.Equal(t, status.Details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(status.Details.Causes[0].Message, tt.key)) + }) + } + }) + + t.Run("NoConnections", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "ssl", value: "off"}, + {key: "ssl_ca_file", value: ""}, + {key: "unix_socket_directories", value: "one"}, + {key: "unix_socket_group", value: "two"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := require.Value(runtime.ToUnstructuredObject(base)) + assert.NilError(t, unstructured.SetNestedField(cluster.Object, + tt.value, "spec", "config", "parameters", tt.key)) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) + + t.Run("NoWriteAheadLog", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "archive_mode", value: "off"}, + {key: "archive_command", value: "true"}, + {key: "restore_command", value: "true"}, + {key: "recovery_target", value: "immediate"}, + {key: "recovery_target_name", value: "doot"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := require.Value(runtime.ToUnstructuredObject(base)) + assert.NilError(t, unstructured.SetNestedField(cluster.Object, + tt.value, "spec", "config", "parameters", tt.key)) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) + + t.Run("wal_level", func(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + cluster := base.DeepCopy() + + cluster.Spec.Config = &v1beta1.PostgresConfig{ + Parameters: map[string]intstr.IntOrString{ + "wal_level": intstr.FromString("logical"), + }, + } + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + + t.Run("Invalid", func(t *testing.T) { + cluster := base.DeepCopy() + + cluster.Spec.Config = &v1beta1.PostgresConfig{ + Parameters: map[string]intstr.IntOrString{ + "wal_level": intstr.FromString("minimal"), + }, + } + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + assert.ErrorContains(t, err, `"replica" or higher`) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 1)) + assert.Equal(t, status.Details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(status.Details.Causes[0].Message, "wal_level")) + }) + }) + + t.Run("NoReplication", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "synchronous_standby_names", value: ""}, + {key: "primary_conninfo", value: ""}, + {key: "primary_slot_name", value: ""}, + {key: "recovery_min_apply_delay", value: ""}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := require.Value(runtime.ToUnstructuredObject(base)) + assert.NilError(t, unstructured.SetNestedField(cluster.Object, + tt.value, "spec", "config", "parameters", tt.key)) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) +} + func TestPostgresUserOptions(t *testing.T) { ctx := context.Background() cc := require.Kubernetes(t) @@ -28,7 +213,7 @@ func TestPostgresUserOptions(t *testing.T) { base := v1beta1.NewPostgresCluster() // Start with a bunch of required fields. - assert.NilError(t, yaml.Unmarshal([]byte(`{ + require.UnmarshalInto(t, &base.Spec, `{ postgresVersion: 16, backups: { pgbackrest: { @@ -41,7 +226,7 @@ func TestPostgresUserOptions(t *testing.T) { resources: { requests: { storage: 1Mi } }, }, }], - }`), &base.Spec)) + }`) base.Namespace = namespace.Name base.Name = "postgres-user-options" @@ -63,10 +248,9 @@ func TestPostgresUserOptions(t *testing.T) { assert.Assert(t, apierrors.IsInvalid(err)) assert.ErrorContains(t, err, "cannot contain comments") - //nolint:errorlint // This is a test, and a panic is unlikely. - status := err.(apierrors.APIStatus).Status() + status := require.StatusError(t, err) assert.Assert(t, status.Details != nil) - assert.Equal(t, len(status.Details.Causes), 3) + assert.Assert(t, cmp.Len(status.Details.Causes, 3)) for i, cause := range status.Details.Causes { assert.Equal(t, cause.Field, fmt.Sprintf("spec.users[%d].options", i)) @@ -85,10 +269,9 @@ func TestPostgresUserOptions(t *testing.T) { assert.Assert(t, apierrors.IsInvalid(err)) assert.ErrorContains(t, err, "cannot assign password") - //nolint:errorlint // This is a test, and a panic is unlikely. - status := err.(apierrors.APIStatus).Status() + status := require.StatusError(t, err) assert.Assert(t, status.Details != nil) - assert.Equal(t, len(status.Details.Causes), 2) + assert.Assert(t, cmp.Len(status.Details.Causes, 2)) for i, cause := range status.Details.Causes { assert.Equal(t, cause.Field, fmt.Sprintf("spec.users[%d].options", i)) @@ -106,10 +289,9 @@ func TestPostgresUserOptions(t *testing.T) { assert.Assert(t, apierrors.IsInvalid(err)) assert.ErrorContains(t, err, "should match") - //nolint:errorlint // This is a test, and a panic is unlikely. - status := err.(apierrors.APIStatus).Status() + status := require.StatusError(t, err) assert.Assert(t, status.Details != nil) - assert.Equal(t, len(status.Details.Causes), 1) + assert.Assert(t, cmp.Len(status.Details.Causes, 1)) assert.Equal(t, status.Details.Causes[0].Field, "spec.users[0].options") }) diff --git a/internal/upgradecheck/http_test.go b/internal/upgradecheck/http_test.go index 6b6d419b4d..eb951f815f 100644 --- a/internal/upgradecheck/http_test.go +++ b/internal/upgradecheck/http_test.go @@ -67,7 +67,7 @@ func TestCheckForUpgrades(t *testing.T) { assert.Equal(t, data.RegistrationToken, "speakFriend") assert.Equal(t, data.BridgeClustersTotal, 2) assert.Equal(t, data.PGOClustersTotal, 2) - assert.Equal(t, data.FeatureGatesEnabled, "AutoCreateUserSchema=true,TablespaceVolumes=true") + assert.Equal(t, data.FeatureGatesEnabled, "AutoCreateUserSchema=true,PGUpgradeCPUConcurrency=true,TablespaceVolumes=true") } t.Run("success", func(t *testing.T) { diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/config_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/config_types.go new file mode 100644 index 0000000000..15eac92d55 --- /dev/null +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/config_types.go @@ -0,0 +1,52 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package v1beta1 + +import ( + corev1 "k8s.io/api/core/v1" +) + +// +structType=atomic +type OptionalSecretKeyRef struct { + SecretKeyRef `json:",inline"` + + // Whether or not the Secret or its data must be defined. Defaults to false. + // +optional + Optional *bool `json:"optional,omitempty"` +} + +// AsProjection returns a copy of this as a [corev1.SecretProjection]. +func (in *OptionalSecretKeyRef) AsProjection(path string) corev1.SecretProjection { + out := in.SecretKeyRef.AsProjection(path) + if in.Optional != nil { + v := *in.Optional + out.Optional = &v + } + return out +} + +// +structType=atomic +type SecretKeyRef struct { + // Name of the Secret. + // --- + // https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/core/validation#ValidateSecretName + // +required + Name DNS1123Subdomain `json:"name"` + + // Name of the data field within the Secret. + // --- + // https://releases.k8s.io/v1.32.0/pkg/apis/core/validation/validation.go#L2867 + // https://pkg.go.dev/k8s.io/apimachinery/pkg/util/validation#IsConfigMapKey + // +required + Key ConfigDataKey `json:"key"` +} + +// AsProjection returns a copy of this as a [corev1.SecretProjection]. +func (in *SecretKeyRef) AsProjection(path string) corev1.SecretProjection { + var out corev1.SecretProjection + out.Name = in.Name + out.Items = []corev1.KeyToPath{{Key: in.Key, Path: path}} + return out +} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/config_types_test.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/config_types_test.go new file mode 100644 index 0000000000..ff74a7a1e7 --- /dev/null +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/config_types_test.go @@ -0,0 +1,80 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package v1beta1_test + +import ( + "strings" + "testing" + + "gotest.tools/v3/assert" + "sigs.k8s.io/yaml" + + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestOptionalSecretKeyRefAsProjection(t *testing.T) { + t.Run("Null", func(t *testing.T) { + in := v1beta1.OptionalSecretKeyRef{} + in.Name, in.Key = "one", "two" + + out := in.AsProjection("three") + b, err := yaml.Marshal(out) + assert.NilError(t, err) + assert.DeepEqual(t, string(b), strings.TrimSpace(` +items: +- key: two + path: three +name: one + `)+"\n") + }) + + t.Run("True", func(t *testing.T) { + True := true + in := v1beta1.OptionalSecretKeyRef{Optional: &True} + in.Name, in.Key = "one", "two" + + out := in.AsProjection("three") + b, err := yaml.Marshal(out) + assert.NilError(t, err) + assert.DeepEqual(t, string(b), strings.TrimSpace(` +items: +- key: two + path: three +name: one +optional: true + `)+"\n") + }) + + t.Run("False", func(t *testing.T) { + False := false + in := v1beta1.OptionalSecretKeyRef{Optional: &False} + in.Name, in.Key = "one", "two" + + out := in.AsProjection("three") + b, err := yaml.Marshal(out) + assert.NilError(t, err) + assert.DeepEqual(t, string(b), strings.TrimSpace(` +items: +- key: two + path: three +name: one +optional: false + `)+"\n") + }) +} + +func TestSecretKeyRefAsProjection(t *testing.T) { + in := v1beta1.SecretKeyRef{Name: "asdf", Key: "foobar"} + out := in.AsProjection("some-path") + + b, err := yaml.Marshal(out) + assert.NilError(t, err) + assert.DeepEqual(t, string(b), strings.TrimSpace(` +items: +- key: foobar + path: some-path +name: asdf + `)+"\n") +} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go new file mode 100644 index 0000000000..93613bd1fc --- /dev/null +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go @@ -0,0 +1,73 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package v1beta1 + +import corev1 "k8s.io/api/core/v1" + +// InstrumentationSpec defines the configuration for collecting logs and metrics +// via OpenTelemetry. +type InstrumentationSpec struct { + // Image name to use for collector containers. When omitted, the value + // comes from an operator environment variable. + // +optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,order=1 + Image string `json:"image,omitempty"` + + // Resources holds the resource requirements for the collector container. + // +optional + Resources corev1.ResourceRequirements `json:"resources,omitempty"` + + // Config is the place for users to configure exporters and provide files. + // +optional + Config *InstrumentationConfigSpec `json:"config,omitempty"` + + // Logs is the place for users to configure the log collection. + // +optional + Logs *InstrumentationLogsSpec `json:"logs,omitempty"` +} + +// InstrumentationConfigSpec allows users to configure their own exporters, +// add files, etc. +type InstrumentationConfigSpec struct { + // Exporters allows users to configure OpenTelemetry exporters that exist + // in the collector image. + // +kubebuilder:pruning:PreserveUnknownFields + // +kubebuilder:validation:Schemaless + // +kubebuilder:validation:Type=object + // +optional + Exporters SchemalessObject `json:"exporters,omitempty"` + + // Files allows the user to mount projected volumes into the collector + // Pod so that files can be referenced by the collector as needed. + // +optional + Files []corev1.VolumeProjection `json:"files,omitempty"` +} + +// InstrumentationLogsSpec defines the configuration for collecting logs via +// OpenTelemetry. +type InstrumentationLogsSpec struct { + // Exporters allows users to specify which exporters they want to use in + // the logs pipeline. + // +optional + Exporters []string `json:"exporters,omitempty"` + + // How long to retain log files locally. An RFC 3339 duration or a number + // and unit: `12 hr`, `3d`, `4 weeks`, etc. + // --- + // Kubernetes ensures the value is in the "duration" format, but go ahead + // and loosely validate the format to show some acceptable units. + // NOTE: This rejects fractional numbers: https://github.com/kubernetes/kube-openapi/issues/523 + // +kubebuilder:validation:Pattern=`^(PT)?( *[0-9]+ *(?i:(h|hr|d|w|wk)|(hour|day|week)s?))+$` + // + // `controller-gen` needs to know "Type=string" to allow a "Pattern". + // +kubebuilder:validation:Type=string + // + // Set a max length to keep rule costs low. + // +kubebuilder:validation:MaxLength=20 + // +kubebuilder:validation:XValidation:rule=`duration("1h") <= self && self <= duration("8760h")`,message="must be at least one hour" + // + // +optional + RetentionPeriod *Duration `json:"retentionPeriod,omitempty"` +} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgadmin_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgadmin_types.go index 728a96fab6..e9b538368a 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgadmin_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgadmin_types.go @@ -17,7 +17,7 @@ type PGAdminConfiguration struct { // A Secret containing the value for the LDAP_BIND_PASSWORD setting. // More info: https://www.pgadmin.org/docs/pgadmin4/latest/ldap.html // +optional - LDAPBindPassword *corev1.SecretKeySelector `json:"ldapBindPassword,omitempty"` + LDAPBindPassword *OptionalSecretKeyRef `json:"ldapBindPassword,omitempty"` // Settings for the pgAdmin server process. Keys should be uppercase and // values must be constants. diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go index 8b87a7b2c7..e0bfe86d5d 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go @@ -15,9 +15,10 @@ type PGUpgradeSpec struct { // +optional Metadata *Metadata `json:"metadata,omitempty"` - // The name of the cluster to be updated - // +required + // The name of the Postgres cluster to upgrade. + // --- // +kubebuilder:validation:MinLength=1 + // +required PostgresClusterName string `json:"postgresClusterName"` // The image name to use for major PostgreSQL upgrades. @@ -42,38 +43,10 @@ type PGUpgradeSpec struct { // The image pull secrets used to pull from a private registry. // Changing this value causes all running PGUpgrade pods to restart. - // https://k8s.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + // https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry // +optional ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` - // TODO(benjaminjb): define webhook validation to make sure - // `fromPostgresVersion` is below `toPostgresVersion` - // or leverage other validation rules, such as the Common Expression Language - // rules currently in alpha as of Kubernetes 1.23 - // - https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#validation-rules - - // The major version of PostgreSQL before the upgrade. - // +kubebuilder:validation:Required - // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 - FromPostgresVersion int `json:"fromPostgresVersion"` - - // TODO(benjaminjb): define webhook validation to make sure - // `fromPostgresVersion` is below `toPostgresVersion` - // or leverage other validation rules, such as the Common Expression Language - // rules currently in alpha as of Kubernetes 1.23 - - // The major version of PostgreSQL to be upgraded to. - // +kubebuilder:validation:Required - // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 - ToPostgresVersion int `json:"toPostgresVersion"` - - // The image name to use for PostgreSQL containers after upgrade. - // When omitted, the value comes from an operator environment variable. - // +optional - ToPostgresImage string `json:"toPostgresImage,omitempty"` - // Resource requirements for the PGUpgrade container. // +optional Resources corev1.ResourceRequirements `json:"resources,omitempty"` @@ -88,7 +61,7 @@ type PGUpgradeSpec struct { // Priority class name for the PGUpgrade pod. Changing this // value causes PGUpgrade pod to restart. - // More info: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/ + // More info: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption // +optional PriorityClassName *string `json:"priorityClassName,omitempty"` @@ -96,6 +69,54 @@ type PGUpgradeSpec struct { // More info: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration // +optional Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + + PGUpgradeSettings `json:",inline"` +} + +// Arguments and settings for the pg_upgrade tool. +// See: https://www.postgresql.org/docs/current/pgupgrade.html +// --- +// +kubebuilder:validation:XValidation:rule=`self.fromPostgresVersion < self.toPostgresVersion` +// +kubebuilder:validation:XValidation:rule=`!has(self.transferMethod) || (self.toPostgresVersion < 12 ? self.transferMethod in ["Copy","Link"] : true)`,message="Only Copy or Link before PostgreSQL 12" +// +kubebuilder:validation:XValidation:rule=`!has(self.transferMethod) || (self.toPostgresVersion < 17 ? self.transferMethod in ["Clone","Copy","Link"] : true)`,message="Only Clone, Copy, or Link before PostgreSQL 17" +type PGUpgradeSettings struct { + + // The major version of PostgreSQL before the upgrade. + // --- + // +kubebuilder:validation:Minimum=11 + // +kubebuilder:validation:Maximum=17 + // +required + FromPostgresVersion int32 `json:"fromPostgresVersion"` + + // The number of simultaneous processes pg_upgrade should use. + // More info: https://www.postgresql.org/docs/current/pgupgrade.html + // --- + // +kubebuilder:validation:Minimum=0 + // +optional + Jobs int32 `json:"jobs,omitempty"` + + // The major version of PostgreSQL to be upgraded to. + // --- + // +kubebuilder:validation:Minimum=11 + // +kubebuilder:validation:Maximum=17 + // +required + ToPostgresVersion int32 `json:"toPostgresVersion"` + + // The method pg_upgrade should use to transfer files to the new cluster. + // More info: https://www.postgresql.org/docs/current/pgupgrade.html + // --- + // Different versions of the tool have different methods. + // - Copy and Link forever: https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/bin/pg_upgrade/pg_upgrade.h;hb=REL_10_0#l232 + // - Clone since 12: https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/bin/pg_upgrade/pg_upgrade.h;hb=REL_12_0#l232 + // - CopyFileRange since 17: https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/bin/pg_upgrade/pg_upgrade.h;hb=REL_17_0#l251 + // + // Kubernetes assumes the evaluation cost of an enum value is very large. + // TODO(k8s-1.29): Drop MaxLength after Kubernetes 1.29; https://issue.k8s.io/119511 + // +kubebuilder:validation:MaxLength=15 + // + // +kubebuilder:validation:Enum={Clone,Copy,CopyFileRange,Link} + // +optional + TransferMethod string `json:"transferMethod,omitempty"` } // PGUpgradeStatus defines the observed state of PGUpgrade diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go index cb69481664..c2f5cc8d0b 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go @@ -4,12 +4,76 @@ package v1beta1 +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +type PostgresConfig struct { + // Files to mount under "/etc/postgres". + // --- + // +optional + Files []corev1.VolumeProjection `json:"files,omitempty"` + + // Configuration parameters for the PostgreSQL server. Some values will + // be reloaded without validation and some cause PostgreSQL to restart. + // Some values cannot be changed at all. + // More info: https://www.postgresql.org/docs/current/runtime-config.html + // --- + // + // Postgres 17 has something like 350+ built-in parameters, but typically + // an administrator will change only a handful of these. + // +kubebuilder:validation:MaxProperties=50 + // + // # File Locations + // - https://www.postgresql.org/docs/current/runtime-config-file-locations.html + // + // +kubebuilder:validation:XValidation:rule=`!has(self.config_file) && !has(self.data_directory)`,message=`cannot change PGDATA path: config_file, data_directory` + // +kubebuilder:validation:XValidation:rule=`!has(self.external_pid_file)`,message=`cannot change external_pid_file` + // +kubebuilder:validation:XValidation:rule=`!has(self.hba_file) && !has(self.ident_file)`,message=`cannot change authentication path: hba_file, ident_file` + // + // # Connections + // - https://www.postgresql.org/docs/current/runtime-config-connection.html + // + // +kubebuilder:validation:XValidation:rule=`!has(self.listen_addresses)`,message=`network connectivity is always enabled: listen_addresses` + // +kubebuilder:validation:XValidation:rule=`!has(self.port)`,message=`change port using .spec.port instead` + // +kubebuilder:validation:XValidation:rule=`!has(self.ssl) && !self.exists(k, k.startsWith("ssl_"))`,message=`TLS is always enabled` + // +kubebuilder:validation:XValidation:rule=`!self.exists(k, k.startsWith("unix_socket_"))`,message=`domain socket paths cannot be changed` + // + // # Write Ahead Log + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + // + // +kubebuilder:validation:XValidation:rule=`!has(self.wal_level) || self.wal_level in ["logical"]`,message=`wal_level must be "replica" or higher` + // +kubebuilder:validation:XValidation:rule=`!has(self.wal_log_hints)`,message=`wal_log_hints are always enabled` + // +kubebuilder:validation:XValidation:rule=`!has(self.archive_mode) && !has(self.archive_command) && !has(self.restore_command)` + // +kubebuilder:validation:XValidation:rule=`!has(self.recovery_target) && !self.exists(k, k.startsWith("recovery_target_"))` + // + // # Replication + // - https://www.postgresql.org/docs/current/runtime-config-replication.html + // + // +kubebuilder:validation:XValidation:rule=`!has(self.hot_standby)`,message=`hot_standby is always enabled` + // +kubebuilder:validation:XValidation:rule=`!has(self.synchronous_standby_names)` + // +kubebuilder:validation:XValidation:rule=`!has(self.primary_conninfo) && !has(self.primary_slot_name)` + // +kubebuilder:validation:XValidation:rule=`!has(self.recovery_min_apply_delay)`,message=`delayed replication is not supported at this time` + // + // # Logging + // - https://www.postgresql.org/docs/current/runtime-config-logging.html + // + // +kubebuilder:validation:XValidation:rule=`!has(self.cluster_name)`,message=`cluster_name is derived from the PostgresCluster name` + // +kubebuilder:validation:XValidation:rule=`!has(self.logging_collector)`,message=`disabling logging_collector is unsafe` + // +kubebuilder:validation:XValidation:rule=`!has(self.log_file_mode)`,message=`log_file_mode cannot be changed` + // + // +mapType=granular + // +optional + Parameters map[string]intstr.IntOrString `json:"parameters,omitempty"` +} + +// --- // PostgreSQL identifiers are limited in length but may contain any character. -// More info: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS -// +// - https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS // +kubebuilder:validation:MinLength=1 // +kubebuilder:validation:MaxLength=63 -type PostgresIdentifier string +type PostgresIdentifier = string type PostgresPasswordSpec struct { // Type of password to generate. Defaults to ASCII. Valid options are ASCII @@ -23,6 +87,7 @@ type PostgresPasswordSpec struct { // // +kubebuilder:default=ASCII // +kubebuilder:validation:Enum={ASCII,AlphaNumeric} + // +required Type string `json:"type"` } @@ -33,20 +98,24 @@ const ( ) type PostgresUserSpec struct { - - // This value goes into the name of a corev1.Secret and a label value, so - // it must match both IsDNS1123Subdomain and IsValidLabelValue. The pattern - // below is IsDNS1123Subdomain without any dots, U+002E. - // The name of this PostgreSQL user. The value may contain only lowercase // letters, numbers, and hyphen so that it fits into Kubernetes metadata. + // --- + // This value goes into the name of a corev1.Secret and a label value, so + // it must match both IsDNS1123Subdomain and IsValidLabelValue. + // - https://pkg.go.dev/k8s.io/apimachinery/pkg/util/validation#IsDNS1123Subdomain + // - https://pkg.go.dev/k8s.io/apimachinery/pkg/util/validation#IsValidLabelValue + // + // This is IsDNS1123Subdomain without any dots, U+002E: // +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` - // +kubebuilder:validation:Type=string + // + // +required Name PostgresIdentifier `json:"name"` // Databases to which this user can connect and create objects. Removing a // database from this list does NOT revoke access. This field is ignored for // the "postgres" user. + // --- // +listType=set // +optional Databases []PostgresIdentifier `json:"databases,omitempty"` @@ -54,6 +123,7 @@ type PostgresUserSpec struct { // ALTER ROLE options except for PASSWORD. This field is ignored for the // "postgres" user. // More info: https://www.postgresql.org/docs/current/role-attributes.html + // --- // +kubebuilder:validation:MaxLength=200 // +kubebuilder:validation:Pattern=`^[^;]*$` // +kubebuilder:validation:XValidation:rule=`!self.matches("(?i:PASSWORD)")`,message="cannot assign password" @@ -62,6 +132,7 @@ type PostgresUserSpec struct { Options string `json:"options,omitempty"` // Properties of the password generated for this user. + // --- // +optional Password *PostgresPasswordSpec `json:"password,omitempty"` } diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 57ed32644f..9f661b0640 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -25,6 +25,9 @@ type PostgresClusterSpec struct { // +optional Backups Backups `json:"backups,omitempty"` + // +optional + Config *PostgresConfig `json:"config,omitempty"` + // The secret containing the Certificates and Keys to encrypt PostgreSQL // traffic will need to contain the server TLS certificate, TLS key and the // Certificate Authority certificate with the data keys set to tls.crt, @@ -95,6 +98,11 @@ type PostgresClusterSpec struct { // +operator-sdk:csv:customresourcedefinitions:type=spec,order=2 InstanceSets []PostgresInstanceSetSpec `json:"instances"` + // Configuration for the OpenTelemetry collector container used to collect + // logs and metrics. + // +optional + Instrumentation *InstrumentationSpec `json:"instrumentation,omitempty"` + // Whether or not the PostgreSQL cluster is being deployed to an OpenShift // environment. If the field is unset, the operator will automatically // detect the environment. @@ -183,8 +191,6 @@ type PostgresClusterSpec struct { // +kubebuilder:validation:MaxItems=64 // +optional Users []PostgresUserSpec `json:"users,omitempty"` - - Config PostgresAdditionalConfig `json:"config,omitempty"` } // DataSource defines data sources for a new PostgresCluster. @@ -410,10 +416,11 @@ type PostgresClusterStatus struct { // PostgresClusterStatus condition types. const ( - PersistentVolumeResizing = "PersistentVolumeResizing" - PostgresClusterProgressing = "Progressing" - ProxyAvailable = "ProxyAvailable" - Registered = "Registered" + PersistentVolumeResizing = "PersistentVolumeResizing" + PersistentVolumeResizeError = "PersistentVolumeResizeError" + PostgresClusterProgressing = "Progressing" + ProxyAvailable = "ProxyAvailable" + Registered = "Registered" ) type PostgresInstanceSetSpec struct { @@ -676,10 +683,6 @@ type PostgresUserInterfaceStatus struct { PGAdmin PGAdminPodStatus `json:"pgAdmin,omitempty"` } -type PostgresAdditionalConfig struct { - Files []corev1.VolumeProjection `json:"files,omitempty"` -} - // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +operator-sdk:csv:customresourcedefinitions:resources={{ConfigMap,v1},{Secret,v1},{Service,v1},{CronJob,v1beta1},{Deployment,v1},{Job,v1},{StatefulSet,v1},{PersistentVolumeClaim,v1}} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_test.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types_test.go similarity index 99% rename from pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_test.go rename to pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types_test.go index 099418b494..356e8665a6 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_test.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types_test.go @@ -42,7 +42,6 @@ spec: backups: pgbackrest: repos: null - config: {} instances: null patroni: leaderLeaseDurationSeconds: 30 @@ -75,7 +74,6 @@ spec: backups: pgbackrest: repos: null - config: {} instances: - dataVolumeClaimSpec: resources: {} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types.go index 79de9ae5f3..9ee9009a27 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types.go @@ -5,10 +5,111 @@ package v1beta1 import ( + "encoding/json" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/kube-openapi/pkg/validation/strfmt" ) +// --- +// https://pkg.go.dev/k8s.io/apimachinery/pkg/util/validation#IsConfigMapKey +// +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:MaxLength=253 +// +kubebuilder:validation:Pattern=`^[-._a-zA-Z0-9]+$` +// +kubebuilder:validation:XValidation:rule=`self != "." && !self.startsWith("..")`,message=`cannot be "." or start with ".."` +type ConfigDataKey = string + +// --- +// https://docs.k8s.io/concepts/overview/working-with-objects/names/#dns-subdomain-names +// https://pkg.go.dev/k8s.io/apimachinery/pkg/util/validation#IsDNS1123Subdomain +// https://pkg.go.dev/k8s.io/apiserver/pkg/cel/library#Format +// +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:MaxLength=253 +// +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?([.][a-z0-9]([-a-z0-9]*[a-z0-9])?)*$` +type DNS1123Subdomain = string + +// --- +// Duration represents a string accepted by the Kubernetes API in the "duration" +// [format]. This format extends the "duration" [defined by OpenAPI] by allowing +// some whitespace and more units: +// +// - nanoseconds: ns, nano, nanos +// - microseconds: us, µs, micro, micros +// - milliseconds: ms, milli, millis +// - seconds: s, sec, secs +// - minutes: m, min, mins +// - hours: h, hr, hour, hours +// - days: d, day, days +// - weeks: w, wk, week, weeks +// +// An empty amount is represented as "0" with no unit. +// One day is always 24 hours and one week is always 7 days (168 hours). +// +// +kubebuilder:validation:Format=duration +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:Type=string +// +// During CEL validation, a value of this type is a "google.protobuf.Duration". +// It is safe to pass the value to `duration()` but not necessary. +// +// - https://docs.k8s.io/reference/using-api/cel/#type-system-integration +// - https://github.com/google/cel-spec/blob/-/doc/langdef.md#types-and-conversions +// +// NOTE: When using this type, reject fractional numbers using a Pattern to +// avoid an upstream bug: https://github.com/kubernetes/kube-openapi/issues/523 +// +// [defined by OpenAPI]: https://spec.openapis.org/registry/format/duration.html +// [format]: https://spec.openapis.org/oas/latest.html#data-type-format +type Duration struct { + parsed metav1.Duration + string +} + +// NewDuration creates a duration from the Kubernetes "duration" format in s. +func NewDuration(s string) (*Duration, error) { + td, err := strfmt.ParseDuration(s) + + // The unkeyed fields here helpfully raise warnings from the compiler + // if [metav1.Duration] changes shape in the future. + type unkeyed metav1.Duration + umd := unkeyed{td} + + return &Duration{metav1.Duration(umd), s}, err +} + +// AsDuration returns d as a [metav1.Duration]. +func (d *Duration) AsDuration() metav1.Duration { + return d.parsed +} + +// MarshalJSON implements [encoding/json.Marshaler]. +func (d Duration) MarshalJSON() ([]byte, error) { + if d.parsed.Duration == 0 { + return json.Marshal("0") + } + + return json.Marshal(d.string) +} + +// UnmarshalJSON implements [encoding/json.Unmarshaler]. +func (d *Duration) UnmarshalJSON(data []byte) error { + var next *Duration + var str string + + err := json.Unmarshal(data, &str) + if err == nil { + next, err = NewDuration(str) + } + if err == nil { + *d = *next + } + return err +} + // SchemalessObject is a map compatible with JSON object. // // Use with the following markers: @@ -64,7 +165,7 @@ type ServiceSpec struct { // // +optional // +kubebuilder:validation:Enum={Cluster,Local} - InternalTrafficPolicy *corev1.ServiceInternalTrafficPolicyType `json:"internalTrafficPolicy,omitempty"` + InternalTrafficPolicy *corev1.ServiceInternalTrafficPolicy `json:"internalTrafficPolicy,omitempty"` // More info: https://kubernetes.io/docs/concepts/services-networking/service/#traffic-policies // --- @@ -75,7 +176,7 @@ type ServiceSpec struct { // // +optional // +kubebuilder:validation:Enum={Cluster,Local} - ExternalTrafficPolicy *corev1.ServiceExternalTrafficPolicyType `json:"externalTrafficPolicy,omitempty"` + ExternalTrafficPolicy *corev1.ServiceExternalTrafficPolicy `json:"externalTrafficPolicy,omitempty"` } // Sidecar defines the configuration of a sidecar container diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types_test.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types_test.go index 781f9d8c2c..5f50e0cb50 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types_test.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/shared_types_test.go @@ -7,11 +7,152 @@ package v1beta1 import ( "reflect" "testing" + "time" "gotest.tools/v3/assert" + "k8s.io/kube-openapi/pkg/validation/strfmt" "sigs.k8s.io/yaml" ) +func TestDurationYAML(t *testing.T) { + t.Parallel() + + t.Run("Zero", func(t *testing.T) { + zero, err := yaml.Marshal(Duration{}) + assert.NilError(t, err) + assert.DeepEqual(t, zero, []byte(`"0"`+"\n")) + + var parsed Duration + assert.NilError(t, yaml.UnmarshalStrict(zero, &parsed)) + assert.Equal(t, parsed.AsDuration().Duration, 0*time.Second) + }) + + t.Run("Small", func(t *testing.T) { + var parsed Duration + assert.NilError(t, yaml.UnmarshalStrict([]byte(`3ns`), &parsed)) + assert.Equal(t, parsed.AsDuration().Duration, 3*time.Nanosecond) + + b, err := yaml.Marshal(parsed) + assert.NilError(t, err) + assert.DeepEqual(t, b, []byte(`3ns`+"\n")) + }) + + t.Run("Large", func(t *testing.T) { + var parsed Duration + assert.NilError(t, yaml.UnmarshalStrict([]byte(`52 weeks`), &parsed)) + assert.Equal(t, parsed.AsDuration().Duration, 364*24*time.Hour) + + b, err := yaml.Marshal(parsed) + assert.NilError(t, err) + assert.DeepEqual(t, b, []byte(`52 weeks`+"\n")) + }) + + t.Run("UnitsIn", func(t *testing.T) { + const Day = 24 * time.Hour + const Week = 7 * Day + + for _, tt := range []struct { + input string + result time.Duration + }{ + // These can be unmarshaled: + {"1 ns", time.Nanosecond}, + {"2 nano", 2 * time.Nanosecond}, + {"3 nanos", 3 * time.Nanosecond}, + {"4 nanosec", 4 * time.Nanosecond}, + {"5 nanosecs", 5 * time.Nanosecond}, + {"6 nanopants", 6 * time.Nanosecond}, + + {"1 us", time.Microsecond}, + {"2 µs", 2 * time.Microsecond}, + {"3 micro", 3 * time.Microsecond}, + {"4 micros", 4 * time.Microsecond}, + {"5 micrometer", 5 * time.Microsecond}, + + {"1 ms", time.Millisecond}, + {"2 milli", 2 * time.Millisecond}, + {"3 millis", 3 * time.Millisecond}, + {"4 millisec", 4 * time.Millisecond}, + {"5 millisecs", 5 * time.Millisecond}, + {"6 millipede", 6 * time.Millisecond}, + + {"1s", time.Second}, + {"2 sec", 2 * time.Second}, + {"3 secs", 3 * time.Second}, + {"4 seconds", 4 * time.Second}, + {"5 security", 5 * time.Second}, + + {"1m", time.Minute}, + {"2 min", 2 * time.Minute}, + {"3 mins", 3 * time.Minute}, + {"4 minutia", 4 * time.Minute}, + {"5 mininture", 5 * time.Minute}, + + {"1h", time.Hour}, + {"2 hr", 2 * time.Hour}, + {"3 hour", 3 * time.Hour}, + {"4 hours", 4 * time.Hour}, + {"5 hourglass", 5 * time.Hour}, + + {"1d", Day}, + {"2 day", 2 * Day}, + {"3 days", 3 * Day}, + {"4 dayrock", 4 * Day}, + + {"1w", Week}, + {"2 wk", 2 * Week}, + {"3 week", 3 * Week}, + {"4 weeks", 4 * Week}, + {"5 weekpasta", 5 * Week}, + + // ISO 8601 / RFC 33339 + {"PT2D9H", (2 * Day) + 9*time.Hour}, + } { + var parsed Duration + assert.NilError(t, yaml.UnmarshalStrict([]byte(tt.input), &parsed)) + assert.Equal(t, parsed.AsDuration().Duration, tt.result) + + // This is what Kubernetes calls when validating the "duration" format. + // - https://releases.k8s.io/v1.32.0/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/validation/validation.go#L116 + assert.Assert(t, strfmt.IsDuration(tt.input)) + } + + for _, tt := range []string{ + // These cannot be unmarshaled: + "1 nss", + "2 uss", + "3 usec", + "4 usecs", + "5 µsec", + "6 mss", + "7 hs", + "8 hrs", + "9 ds", + "10 ws", + "11 wks", + } { + assert.ErrorContains(t, + yaml.UnmarshalStrict([]byte(tt), new(Duration)), "unable to parse") + + // This is what Kubernetes calls when validating the "duration" format. + // - https://releases.k8s.io/v1.32.0/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/validation/validation.go#L116 + assert.Assert(t, !strfmt.IsDuration(tt)) + } + }) + + t.Run("DoNotUsePartialAmounts", func(t *testing.T) { + var parsed Duration + assert.NilError(t, yaml.UnmarshalStrict([]byte(`1.5 hours`), &parsed)) + + expected, err := time.ParseDuration(`1.5h`) + assert.NilError(t, err) + + // The parsed value is *not* the expected amount. + assert.Assert(t, parsed.AsDuration().Duration != expected, + "expected https://github.com/kubernetes/kube-openapi/issues/523") + }) +} + func TestSchemalessObjectDeepCopy(t *testing.T) { t.Parallel() @@ -19,7 +160,7 @@ func TestSchemalessObjectDeepCopy(t *testing.T) { assert.DeepEqual(t, z, z.DeepCopy()) var one SchemalessObject - assert.NilError(t, yaml.Unmarshal( + assert.NilError(t, yaml.UnmarshalStrict( []byte(`{ str: value, num: 1, arr: [a, 2, true] }`), &one, )) diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go index 21a6c8fe2b..251c213d12 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go @@ -19,7 +19,7 @@ type StandalonePGAdminConfiguration struct { // A Secret containing the value for the CONFIG_DATABASE_URI setting. // More info: https://www.pgadmin.org/docs/pgadmin4/latest/external_database.html // +optional - ConfigDatabaseURI *corev1.SecretKeySelector `json:"configDatabaseURI,omitempty"` + ConfigDatabaseURI *OptionalSecretKeyRef `json:"configDatabaseURI,omitempty"` // Settings for the gunicorn server. // More info: https://docs.gunicorn.org/en/latest/settings.html @@ -32,7 +32,7 @@ type StandalonePGAdminConfiguration struct { // A Secret containing the value for the LDAP_BIND_PASSWORD setting. // More info: https://www.pgadmin.org/docs/pgadmin4/latest/ldap.html // +optional - LDAPBindPassword *corev1.SecretKeySelector `json:"ldapBindPassword,omitempty"` + LDAPBindPassword *OptionalSecretKeyRef `json:"ldapBindPassword,omitempty"` // Settings for the pgAdmin server process. Keys should be uppercase and // values must be constants. @@ -84,6 +84,11 @@ type PGAdminSpec struct { // +optional ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + // Configuration for the OpenTelemetry collector container used to collect + // logs and metrics. + // +optional + Instrumentation *InstrumentationSpec `json:"instrumentation,omitempty"` + // Resource requirements for the PGAdmin container. // +optional Resources corev1.ResourceRequirements `json:"resources,omitempty"` @@ -216,6 +221,12 @@ func (p *PGAdmin) Default() { } } +func NewPGAdmin() *PGAdmin { + p := &PGAdmin{} + p.Default() + return p +} + //+kubebuilder:object:root=true // PGAdminList contains a list of PGAdmin diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index a9c87a7abd..86f3fcb34f 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -363,6 +363,22 @@ func (in *DatabaseInitSQL) DeepCopy() *DatabaseInitSQL { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Duration) DeepCopyInto(out *Duration) { + *out = *in + out.parsed = in.parsed +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Duration. +func (in *Duration) DeepCopy() *Duration { + if in == nil { + return nil + } + out := new(Duration) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ExporterSpec) DeepCopyInto(out *ExporterSpec) { *out = *in @@ -411,6 +427,80 @@ func (in *InstanceSidecars) DeepCopy() *InstanceSidecars { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InstrumentationConfigSpec) DeepCopyInto(out *InstrumentationConfigSpec) { + *out = *in + out.Exporters = in.Exporters.DeepCopy() + if in.Files != nil { + in, out := &in.Files, &out.Files + *out = make([]corev1.VolumeProjection, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationConfigSpec. +func (in *InstrumentationConfigSpec) DeepCopy() *InstrumentationConfigSpec { + if in == nil { + return nil + } + out := new(InstrumentationConfigSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InstrumentationLogsSpec) DeepCopyInto(out *InstrumentationLogsSpec) { + *out = *in + if in.Exporters != nil { + in, out := &in.Exporters, &out.Exporters + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.RetentionPeriod != nil { + in, out := &in.RetentionPeriod, &out.RetentionPeriod + *out = new(Duration) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationLogsSpec. +func (in *InstrumentationLogsSpec) DeepCopy() *InstrumentationLogsSpec { + if in == nil { + return nil + } + out := new(InstrumentationLogsSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InstrumentationSpec) DeepCopyInto(out *InstrumentationSpec) { + *out = *in + in.Resources.DeepCopyInto(&out.Resources) + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = new(InstrumentationConfigSpec) + (*in).DeepCopyInto(*out) + } + if in.Logs != nil { + in, out := &in.Logs, &out.Logs + *out = new(InstrumentationLogsSpec) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationSpec. +func (in *InstrumentationSpec) DeepCopy() *InstrumentationSpec { + if in == nil { + return nil + } + out := new(InstrumentationSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Metadata) DeepCopyInto(out *Metadata) { *out = *in @@ -475,6 +565,27 @@ func (in *MonitoringStatus) DeepCopy() *MonitoringStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OptionalSecretKeyRef) DeepCopyInto(out *OptionalSecretKeyRef) { + *out = *in + in.SecretKeyRef.DeepCopyInto(&out.SecretKeyRef) + if in.Optional != nil { + in, out := &in.Optional, &out.Optional + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OptionalSecretKeyRef. +func (in *OptionalSecretKeyRef) DeepCopy() *OptionalSecretKeyRef { + if in == nil { + return nil + } + out := new(OptionalSecretKeyRef) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PGAdmin) DeepCopyInto(out *PGAdmin) { *out = *in @@ -514,7 +625,7 @@ func (in *PGAdminConfiguration) DeepCopyInto(out *PGAdminConfiguration) { } if in.LDAPBindPassword != nil { in, out := &in.LDAPBindPassword, &out.LDAPBindPassword - *out = new(corev1.SecretKeySelector) + *out = new(OptionalSecretKeyRef) (*in).DeepCopyInto(*out) } out.Settings = in.Settings.DeepCopy() @@ -654,6 +765,11 @@ func (in *PGAdminSpec) DeepCopyInto(out *PGAdminSpec) { *out = make([]corev1.LocalObjectReference, len(*in)) copy(*out, *in) } + if in.Instrumentation != nil { + in, out := &in.Instrumentation, &out.Instrumentation + *out = new(InstrumentationSpec) + (*in).DeepCopyInto(*out) + } in.Resources.DeepCopyInto(&out.Resources) if in.Affinity != nil { in, out := &in.Affinity, &out.Affinity @@ -1377,6 +1493,21 @@ func (in *PGUpgradeList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PGUpgradeSettings) DeepCopyInto(out *PGUpgradeSettings) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PGUpgradeSettings. +func (in *PGUpgradeSettings) DeepCopy() *PGUpgradeSettings { + if in == nil { + return nil + } + out := new(PGUpgradeSettings) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PGUpgradeSpec) DeepCopyInto(out *PGUpgradeSpec) { *out = *in @@ -1413,6 +1544,7 @@ func (in *PGUpgradeSpec) DeepCopyInto(out *PGUpgradeSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + out.PGUpgradeSettings = in.PGUpgradeSettings } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PGUpgradeSpec. @@ -1558,28 +1690,6 @@ func (in *PatroniSwitchover) DeepCopy() *PatroniSwitchover { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PostgresAdditionalConfig) DeepCopyInto(out *PostgresAdditionalConfig) { - *out = *in - if in.Files != nil { - in, out := &in.Files, &out.Files - *out = make([]corev1.VolumeProjection, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresAdditionalConfig. -func (in *PostgresAdditionalConfig) DeepCopy() *PostgresAdditionalConfig { - if in == nil { - return nil - } - out := new(PostgresAdditionalConfig) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PostgresCluster) DeepCopyInto(out *PostgresCluster) { *out = *in @@ -1691,6 +1801,11 @@ func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { (*in).DeepCopyInto(*out) } in.Backups.DeepCopyInto(&out.Backups) + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = new(PostgresConfig) + (*in).DeepCopyInto(*out) + } if in.CustomTLSSecret != nil { in, out := &in.CustomTLSSecret, &out.CustomTLSSecret *out = new(corev1.SecretProjection) @@ -1723,6 +1838,11 @@ func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Instrumentation != nil { + in, out := &in.Instrumentation, &out.Instrumentation + *out = new(InstrumentationSpec) + (*in).DeepCopyInto(*out) + } if in.OpenShift != nil { in, out := &in.OpenShift, &out.OpenShift *out = new(bool) @@ -1790,7 +1910,6 @@ func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - in.Config.DeepCopyInto(&out.Config) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterSpec. @@ -1855,6 +1974,35 @@ func (in *PostgresClusterStatus) DeepCopy() *PostgresClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresConfig) DeepCopyInto(out *PostgresConfig) { + *out = *in + if in.Files != nil { + in, out := &in.Files, &out.Files + *out = make([]corev1.VolumeProjection, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Parameters != nil { + in, out := &in.Parameters, &out.Parameters + *out = make(map[string]intstr.IntOrString, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresConfig. +func (in *PostgresConfig) DeepCopy() *PostgresConfig { + if in == nil { + return nil + } + out := new(PostgresConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PostgresInstanceSetSpec) DeepCopyInto(out *PostgresInstanceSetSpec) { *out = *in @@ -2184,6 +2332,21 @@ func (in SchemalessObject) DeepCopyInto(out *SchemalessObject) { } } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SecretKeyRef) DeepCopyInto(out *SecretKeyRef) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SecretKeyRef. +func (in *SecretKeyRef) DeepCopy() *SecretKeyRef { + if in == nil { + return nil + } + out := new(SecretKeyRef) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServerGroup) DeepCopyInto(out *ServerGroup) { *out = *in @@ -2277,13 +2440,13 @@ func (in *StandalonePGAdminConfiguration) DeepCopyInto(out *StandalonePGAdminCon } if in.ConfigDatabaseURI != nil { in, out := &in.ConfigDatabaseURI, &out.ConfigDatabaseURI - *out = new(corev1.SecretKeySelector) + *out = new(OptionalSecretKeyRef) (*in).DeepCopyInto(*out) } out.Gunicorn = in.Gunicorn.DeepCopy() if in.LDAPBindPassword != nil { in, out := &in.LDAPBindPassword, &out.LDAPBindPassword - *out = new(corev1.SecretKeySelector) + *out = new(OptionalSecretKeyRef) (*in).DeepCopyInto(*out) } out.Settings = in.Settings.DeepCopy() diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml index f5ef8c029e..8a0e57bab6 100644 --- a/testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml +++ b/testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml @@ -8,11 +8,9 @@ metadata: spec: # postgres version that is no longer available postgresVersion: 11 - patroni: - dynamicConfiguration: - postgresql: - parameters: - shared_preload_libraries: pgaudit, set_user, pg_stat_statements, pgnodemx, pg_cron + config: + parameters: + shared_preload_libraries: pgaudit, set_user, pg_stat_statements, pgnodemx, pg_cron instances: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } backups: diff --git a/testing/kuttl/e2e/major-upgrade/01--invalid-pgupgrade.yaml b/testing/kuttl/e2e/major-upgrade/01--invalid-pgupgrade.yaml deleted file mode 100644 index ea90f5718a..0000000000 --- a/testing/kuttl/e2e/major-upgrade/01--invalid-pgupgrade.yaml +++ /dev/null @@ -1,10 +0,0 @@ ---- -# This pgupgrade is invalid and should get that condition (even with no cluster) -apiVersion: postgres-operator.crunchydata.com/v1beta1 -kind: PGUpgrade -metadata: - name: major-upgrade-do-it -spec: - fromPostgresVersion: ${KUTTL_PG_VERSION} - toPostgresVersion: ${KUTTL_PG_VERSION} - postgresClusterName: major-upgrade diff --git a/testing/kuttl/e2e/major-upgrade/01-assert.yaml b/testing/kuttl/e2e/major-upgrade/01-assert.yaml deleted file mode 100644 index f4cef66aa7..0000000000 --- a/testing/kuttl/e2e/major-upgrade/01-assert.yaml +++ /dev/null @@ -1,10 +0,0 @@ ---- -apiVersion: postgres-operator.crunchydata.com/v1beta1 -kind: PGUpgrade -metadata: - name: major-upgrade-do-it -status: - conditions: - - type: "Progressing" - status: "False" - reason: "PGUpgradeInvalid" diff --git a/testing/kuttl/e2e/major-upgrade/20--cluster-with-invalid-version.yaml b/testing/kuttl/e2e/major-upgrade/20--cluster-with-invalid-version.yaml deleted file mode 100644 index 8d73277292..0000000000 --- a/testing/kuttl/e2e/major-upgrade/20--cluster-with-invalid-version.yaml +++ /dev/null @@ -1,18 +0,0 @@ ---- -# Create a cluster where the version does not match the pgupgrade's `from` -# TODO(benjaminjb): this isn't quite working out -# apiVersion: postgres-operator.crunchydata.com/v1beta1 -# kind: PostgresCluster -# metadata: -# name: major-upgrade -# spec: -# shutdown: true -# postgresVersion: ${KUTTL_PG_UPGRADE_TOO_EARLY_FROM_VERSION} -# instances: -# - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } -# backups: -# pgbackrest: -# repos: -# - name: repo1 -# volume: -# volumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } diff --git a/testing/kuttl/e2e/major-upgrade/20-assert.yaml b/testing/kuttl/e2e/major-upgrade/20-assert.yaml deleted file mode 100644 index 2ea1486284..0000000000 --- a/testing/kuttl/e2e/major-upgrade/20-assert.yaml +++ /dev/null @@ -1,11 +0,0 @@ ---- -# # pgupgrade should exit since the cluster is already at the requested version -# apiVersion: postgres-operator.crunchydata.com/v1beta1 -# kind: PGUpgrade -# metadata: -# name: major-upgrade-do-it -# status: -# conditions: -# - type: "Progressing" -# status: "False" -# reason: "PGUpgradeInvalidForCluster" diff --git a/testing/kuttl/e2e/major-upgrade/21-delete-cluster.yaml b/testing/kuttl/e2e/major-upgrade/21-delete-cluster.yaml deleted file mode 100644 index 535c6311a4..0000000000 --- a/testing/kuttl/e2e/major-upgrade/21-delete-cluster.yaml +++ /dev/null @@ -1,8 +0,0 @@ ---- -# # Delete the existing cluster. -# apiVersion: kuttl.dev/v1beta1 -# kind: TestStep -# delete: -# - apiVersion: postgres-operator.crunchydata.com/v1beta1 -# kind: PostgresCluster -# name: major-upgrade diff --git a/testing/kuttl/e2e/major-upgrade/30--cluster.yaml b/testing/kuttl/e2e/major-upgrade/30--cluster.yaml index 01e1ef6175..07546c384e 100644 --- a/testing/kuttl/e2e/major-upgrade/30--cluster.yaml +++ b/testing/kuttl/e2e/major-upgrade/30--cluster.yaml @@ -6,11 +6,9 @@ metadata: name: major-upgrade spec: postgresVersion: ${KUTTL_PG_UPGRADE_FROM_VERSION} - patroni: - dynamicConfiguration: - postgresql: - parameters: - shared_preload_libraries: pgaudit, set_user, pg_stat_statements, pgnodemx, pg_cron + config: + parameters: + shared_preload_libraries: pgaudit, set_user, pg_stat_statements, pgnodemx, pg_cron instances: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } replicas: 3 diff --git a/testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml index c414806892..5c562189f4 100644 --- a/testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml +++ b/testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml @@ -8,11 +8,9 @@ metadata: labels: { postgres-operator-test: kuttl } spec: postgresVersion: ${KUTTL_PG_VERSION} - patroni: - dynamicConfiguration: - postgresql: - parameters: - max_connections: 200 + config: + parameters: + max_connections: 200 instances: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } replicas: 2 diff --git a/testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml index f83a02c7c6..0c8cb99b98 100644 --- a/testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml +++ b/testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml @@ -7,11 +7,9 @@ metadata: labels: { postgres-operator-test: kuttl } spec: postgresVersion: ${KUTTL_PG_VERSION} - patroni: - dynamicConfiguration: - postgresql: - parameters: - max_connections: 1000 + config: + parameters: + max_connections: 1000 instances: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } replicas: 2