Skip to content

Commit

Permalink
Merge branch 'master' into PTX-19346
Browse files Browse the repository at this point in the history
  • Loading branch information
nikolaypopov committed Aug 4, 2023
2 parents b6f7b1e + 65e443a commit 502f386
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 16 deletions.
108 changes: 92 additions & 16 deletions drivers/storage/portworx/testspec/prometheusRule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,31 @@ spec:
- alert: PortworxVolumeUsageCritical
annotations:
description: Portworx volume {{$labels.volumeid}} on {{$labels.instance}} is over 80% used for
more than 10 minutes.
more than 5 minutes.
summary: Portworx volume capacity is at {{$value}}% used.
expr: 100 * (px_volume_usage_bytes / px_volume_capacity_bytes) > 80
for: 5m
labels:
issue: Portworx volume {{$labels.volumeid}} usage on {{$labels.instance}} is high.
issue: Portworx volume {{$labels.volumeid}} usage on {{$labels.instance}} is critical.
severity: critical
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxVolumeUsage
annotations:
description: Portworx volume {{$labels.volumeid}} on {{$labels.instance}} is over 70% used for
more than 10 minutes.
more than 5 minutes.
summary: Portworx volume {{$labels.volumeid}} on {{$labels.instance}} is at {{$value}}% used.
expr: 100 * (px_volume_usage_bytes / px_volume_capacity_bytes) > 70
for: 5m
labels:
issue: Portworx volume {{$labels.volumeid}} usage on {{$labels.instance}} is critical.
issue: Portworx volume {{$labels.volumeid}} usage on {{$labels.instance}} is high.
severity: warning
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxVolumeNotInQuorum
annotations:
description: Portworx volume {{$labels.volumeid}} from cluster {{$labels.cluster}} is out of quorum. Please check all nodes with that volume replicas are online.
Expand All @@ -37,6 +45,10 @@ spec:
labels:
issue: Portworx volume out of quorum.
severity: warning
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxVolumeInResync
annotations:
description: Portworx volume {{$labels.volumeid}} from cluster {{$labels.cluster}} is in resync state.
Expand All @@ -45,6 +57,10 @@ spec:
labels:
issue: Portworx volume in resync state.
severity: warning
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxVolumeDegraded
annotations:
description: Portworx volume {{$labels.volumeid}} from cluster {{$labels.cluster}} is in degraded state. Please check all nodes with that volume replicas are online.
Expand All @@ -53,42 +69,58 @@ spec:
labels:
issue: Portworx volume in degraded state.
severity: warning
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxStorageUsageCritical
annotations:
description: Portworx storage {{$labels.volumeid}} on {{$labels.instance}} is over 80% used
for more than 10 minutes.
summary: Portworx storage {{$labels.volumeid}} on {{$labels.instance}} is at {{$value}}% used.
description: Portworx storage on {{$labels.instance}} is over 80% used
for more than 5 minutes.
summary: Portworx storage on {{$labels.instance}} is at {{$value}}% used.
expr: 100 * (px_cluster_disk_utilized_bytes / px_cluster_disk_total_bytes)
> 80
for: 5m
labels:
issue: Portworx storage {{$labels.volumeid}} usage on {{$labels.instance}} is critical.
issue: Portworx storage usage on {{$labels.instance}} is critical.
severity: critical
resource_type: portworx-node
resource_name: "{{$labels.node}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxStorageUsage
annotations:
description: Portworx storage {{$labels.volumeid}} on {{$labels.instance}} is over 70% used
for more than 10 minutes.
summary: Portworx storage {{$labels.volumeid}} on {{$labels.instance}} is at {{$value}}% used.
description: Portworx storage on {{$labels.instance}} is over 70% used
for more than 5 minutes.
summary: Portworx storage on {{$labels.instance}} is at {{$value}}% used.
expr: 100 * (px_cluster_disk_utilized_bytes / px_cluster_disk_total_bytes)
> 70
for: 5m
labels:
issue: Portworx storage {{$labels.volumeid}} usage on {{$labels.instance}} is critical.
issue: Portworx storage usage on {{$labels.instance}} is high.
severity: warning
resource_type: portworx-node
resource_name: "{{$labels.node}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxStorageWillFill
annotations:
description: Portworx storage {{$labels.volumeid}} on {{$labels.instance}} is over 70% full
and has been predicted to fill within 2 weeks for more than 10 minutes.
summary: Portworx storage {{$labels.volumeid}} on {{$labels.instance}} is over 70% full and
description: Portworx storage on {{$labels.instance}} is over 70% full
and has been predicted to fill within 2 weeks.
summary: Portworx storage on {{$labels.instance}} is over 70% full and
is predicted to fill within 2 weeks.
expr: (100 * (1 - (px_cluster_disk_utilized_bytes / px_cluster_disk_total_bytes)))
< 30 and predict_linear(px_cluster_disk_available_bytes[1h], 14 * 86400) <
0
for: 10m
labels:
issue: Portworx storage {{$labels.volumeid}} on {{$labels.instance}} is predicted to fill within
issue: Portworx storage on {{$labels.instance}} is predicted to fill within
2 weeks.
severity: warning
resource_type: portworx-node
resource_name: "{{$labels.node}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxStorageNodeDown
annotations:
description: Portworx Storage Node has been offline for more than 5 minutes.
Expand All @@ -98,6 +130,10 @@ spec:
labels:
issue: Portworx Storage Node is Offline.
severity: critical
resource_type: portworx-cluster
resource_name: "{{$labels.cluster}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxQuorumUnhealthy
annotations:
description: Portworx cluster Quorum Unhealthy for more than 5 minutes.
Expand All @@ -107,6 +143,10 @@ spec:
labels:
issue: Portworx Quorum Unhealthy.
severity: critical
resource_type: portworx-cluster
resource_name: "{{$labels.cluster}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PortworxMemberDown
annotations:
description: Portworx cluster member(s) has(have) been down for more than
Expand All @@ -118,6 +158,10 @@ spec:
labels:
issue: Portworx cluster member(s) is(are) down.
severity: critical
resource_type: portworx-cluster
resource_name: "{{$labels.cluster}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PXBackupError
annotations:
description: Failed to take backup for volume {{$labels.volumename}} with error {{$labels.error_string}}.
Expand All @@ -126,6 +170,10 @@ spec:
labels:
issue: Cloudsnap backup error.
severity: warning
resource_type: portworx-volume
resource_name: "{{$labels.volumename}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PXKvdbNodeViewUnhealthy
annotations:
description: Portworx node {{$labels.node_id}} from cluster {{$labels.cluster}} is unable to talk to kvdb. Please check the KVDB health and node's connection to KVDB.
Expand All @@ -135,6 +183,10 @@ spec:
labels:
issue: Portworx node {{$labels.node_id}} is unable to talk to kvdb.
severity: critical
resource_type: portworx-node
resource_name: "{{$labels.node}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PXKvdbClusterViewUnhealthy
annotations:
description: Portworx node {{$labels.node_id}} from cluster {{$labels.cluster}} is reporting that the cluster is unable to talk to kvdb. Please check KVDB health and the node's connection to KVDB and the other nodes in the cluster.
Expand All @@ -143,6 +195,10 @@ spec:
labels:
issue: Portworx cluster {{$labels.cluster}} is unable to talk to kvdb.
severity: critical
resource_type: portworx-cluster
resource_name: "{{$labels.cluster}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- name: Portworx PoolResize Alerts
rules:
- alert: PoolExpandSuccessful
Expand All @@ -153,6 +209,10 @@ spec:
labels:
issue: Portworx pool expand successful.
severity: warning
resource_type: portworx-node
resource_name: "{{$labels.node}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: PoolExpandFailure
annotations:
description: Pool expansion for pool {{$labels.POOL}} from node {{$labels.instance}}, from Portworx cluster {{$labels.clusterid}} failed. Please check Portworx alerts for more details.
Expand All @@ -161,6 +221,10 @@ spec:
labels:
issue: Portworx pool expand failure.
severity: critical
resource_type: portworx-node
resource_name: "{{$labels.node}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- name: Portworx VolumeResize Alerts
rules:
- alert: VolumeResizeSuccessful
Expand All @@ -171,6 +235,10 @@ spec:
labels:
issue: Portworx volume resize successful.
severity: warning
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: VolumeResizeDeferred
annotations:
description: Volume resize for volume {{$labels.volumeid}} from Portworx cluster {{$labels.clusterid}} deferred. Please check Portworx alerts for more details.
Expand All @@ -179,6 +247,10 @@ spec:
labels:
issue: Portworx volume resize deferred.
severity: warning
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
- alert: VolumeResizeFailed
annotations:
description: Volume resize for volume {{$labels.volumeid}} from Portworx cluster {{$labels.clusterid}} failed. Please check Portworx alerts for more details.
Expand All @@ -187,3 +259,7 @@ spec:
labels:
issue: Portworx volume resize failure.
severity: critical
resource_type: portworx-volume
resource_name: "{{$labels.volumeid}}"
scrape_target_type: portworx-node
scrape_target_name: "{{$labels.node}}"
4 changes: 4 additions & 0 deletions test/integration_test/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ func setup() error {
"portworx-env-vars",
"",
"List of comma separated environment variables that will be added to StorageCluster spec")
flag.StringVar(&ci_utils.PxCustomAnnotations,
"portworx-custom-annotations",
"",
"List of comma separated custom annotations that will be added to StorageCluster spec")
flag.StringVar(&ci_utils.PxDeviceSpecs,
"portworx-device-specs",
"",
Expand Down
1 change: 1 addition & 0 deletions test/integration_test/operator-test-pod-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ spec:
- -portworx-device-specs=PORTWORX_DEVICE_SPECS
- -portworx-kvdb-spec=PORTWORX_KVDB_SPEC
- -portworx-env-vars=PORTWORX_ENV_VARS
- -portworx-custom-annotations=PORTWORX_CUSTOM_ANNOTATIONS
- -is-ocp=IS_OCP
- -is-eks=IS_EKS
- -is-aks=IS_AKS
Expand Down
15 changes: 15 additions & 0 deletions test/integration_test/test-deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ is_oke=false
portworx_device_specs=""
portworx_kvdb_spec=""
portworx_env_vars=""
portworx_custom_annotations=""
log_level="debug"
for i in "$@"
do
Expand Down Expand Up @@ -148,6 +149,12 @@ case $i in
shift
shift
;;
--portworx-custom-annotations)
echo "Flag for Portworx Custom Annotations: $2"
portworx_custom_annotations=$2
shift
shift
;;
--short-test)
echo "Skip tests that are long/not supported: $2"
short_test=$2
Expand Down Expand Up @@ -211,6 +218,14 @@ else
sed -i '/PORTWORX_ENV_VARS/d' $test_pod_spec
fi

# Portworx custom annotations
if [ "$portworx_custom_annotations" != "" ]; then
echo "Portworx Custom Annotations: $portworx_custom_annotations"
sed -i 's|'PORTWORX_CUSTOM_ANNOTATIONS'|'"$portworx_custom_annotations"'|g' $test_pod_spec
else
sed -i '/PORTWORX_CUSTOM_ANNOTATIONS/d' $test_pod_spec
fi

# Set OCP
if [ "$is_ocp" != "" ]; then
echo "This is OCP cluster: $is_ocp"
Expand Down
3 changes: 3 additions & 0 deletions test/integration_test/utils/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ var (
// PxEnvVars is a string of comma separated ENV vars
PxEnvVars string

// PxCustomAnnotations is a string of comma separated annotations
PxCustomAnnotations string

// CloudProvider is a cloud provider name
CloudProvider string

Expand Down
12 changes: 12 additions & 0 deletions test/integration_test/utils/storagecluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,18 @@ func ConstructStorageCluster(cluster *corev1.StorageCluster, specGenURL string,
cluster.Annotations["portworx.io/is-oke"] = "true"
}

// Add custom annotations
if len(PxCustomAnnotations) != 0 {
if cluster.Annotations == nil {
cluster.Annotations = make(map[string]string)
}
annotations := strings.Split(PxCustomAnnotations, ",")
for _, annotation := range annotations {
keyvalue := strings.Split(annotation, ":")
cluster.Annotations[keyvalue[0]] = strings.TrimSpace(keyvalue[1])
}
}

// Populate cloud storage
if len(PxDeviceSpecs) != 0 {
pxDeviceSpecs := strings.Split(PxDeviceSpecs, ";")
Expand Down

0 comments on commit 502f386

Please sign in to comment.