diff --git a/.github/workflows/stackhpc-container-image-build.yml b/.github/workflows/stackhpc-container-image-build.yml index d57c80bda..7cd09b585 100644 --- a/.github/workflows/stackhpc-container-image-build.yml +++ b/.github/workflows/stackhpc-container-image-build.yml @@ -92,6 +92,7 @@ jobs: timeout-minutes: 720 permissions: {} strategy: + fail-fast: false matrix: ${{ fromJson(needs.generate-tag.outputs.matrix) }} needs: - generate-tag diff --git a/doc/source/configuration/index.rst b/doc/source/configuration/index.rst index f19775700..8c283481d 100644 --- a/doc/source/configuration/index.rst +++ b/doc/source/configuration/index.rst @@ -18,3 +18,4 @@ the various features provided. wazuh vault magnum-capi + security-hardening diff --git a/doc/source/configuration/release-train.rst b/doc/source/configuration/release-train.rst index 318e60307..f7cdafc85 100644 --- a/doc/source/configuration/release-train.rst +++ b/doc/source/configuration/release-train.rst @@ -101,6 +101,23 @@ default apt repositories. This can be done on a host-by host basis by defining the variables as host or group vars under ``etc/kayobe/inventory/host_vars`` or ``etc/kayobe/inventory/group_vars``. +For Ubuntu-based deployments, Pulp currently `lacks support +`_ for certain types of content, +including i18n files and command-not-found indices. This breaks APT when the +``command-not-found`` package is installed: + +.. code:: console + + E: Failed to fetch https://pulp.example.com/pulp/content/ubuntu/jammy-security/development/dists/jammy-security/main/cnf/Commands-amd64 404 Not Found + +The ``purge-command-not-found.yml`` custom playbook can be used to uninstall +the package, prior to running any other APT commands. It may be installed as a +:kayobe-doc:`pre-hook ` to the ``host +configure`` commands. Note that if used as a hook, this playbook matches all +hosts, so will run against the seed, even when running ``overcloud host +configure``. Depending on the stage of deployment, some hosts may be +unreachable. + For Rocky Linux based systems, package manager configuration is provided by ``stackhpc_dnf_repos`` in ``etc/kayobe/dnf.yml``, which points to package repositories on the local Pulp server. To use this configuration, the diff --git a/doc/source/configuration/security-hardening.rst b/doc/source/configuration/security-hardening.rst new file mode 100644 index 000000000..f0cd77df3 --- /dev/null +++ b/doc/source/configuration/security-hardening.rst @@ -0,0 +1,42 @@ +================== +Security Hardening +================== + +CIS Benchmark Hardening +----------------------- + +The roles from the `Ansible-Lockdown `_ +project are used to harden hosts in accordance with the CIS benchmark criteria. +It won't get your benchmark score to 100%, but should provide a significant +improvement over an unhardened system. A typical score would be 70%. + +The following operating systems are supported: + +- Ubuntu 22.04 +- Rocky 9 + +Configuration +-------------- + +Some overrides to the role defaults are provided in +``$KAYOBE_CONFIG_PATH/inventory/group_vars/overcloud/cis``. These may not be +suitable for all deployments and so some fine tuning may be required. For +instance, you may want different rules on a network node compared to a +controller. It is best to consult the upstream role documentation for details +about what each variable does. The documentation can be found here: + +- `Ubuntu 22.04 `__ +- `Rocky 9 `__ + +Running the playbooks +--------------------- + +As there is potential for unintended side effects when applying the hardening +playbooks, the playbooks are not currently enabled by default. It is recommended +that they are first applied to a representative staging environment to determine +whether or not workloads or API requests are affected by any configuration changes. + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cis.yml + diff --git a/etc/kayobe/ansible/cis.yml b/etc/kayobe/ansible/cis.yml index ce6445359..3edb174cc 100644 --- a/etc/kayobe/ansible/cis.yml +++ b/etc/kayobe/ansible/cis.yml @@ -4,12 +4,18 @@ hosts: overcloud become: true tasks: - - name: Remove /etc/motd - # See remediation in: - # https://github.com/wazuh/wazuh/blob/bfa4efcf11e288c0a8809dc0b45fdce42fab8e0d/ruleset/sca/centos/8/cis_centos8_linux.yml#L777 - file: - path: /etc/motd - state: absent + - name: Ensure the cron package is installed on ubuntu + package: + name: cron + state: present + when: ansible_facts.distribution == 'Ubuntu' - include_role: - name: ansible-lockdown.rhel8_cis + name: ansible-lockdown.rhel9_cis + when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9' + tags: always + + - include_role: + name: ansible-lockdown.ubuntu22_cis + when: ansible_facts.distribution == 'Ubuntu' and ansible_facts.distribution_major_version == '22' + tags: always diff --git a/etc/kayobe/ansible/nova-compute-disable.yml b/etc/kayobe/ansible/nova-compute-disable.yml index 98ab58e28..32ea8ee09 100644 --- a/etc/kayobe/ansible/nova-compute-disable.yml +++ b/etc/kayobe/ansible/nova-compute-disable.yml @@ -11,6 +11,7 @@ - name: Set up openstack cli virtualenv pip: virtualenv: "{{ venv }}" + virtualenv_command: "/usr/bin/python3 -m venv" name: - python-openstackclient state: latest diff --git a/etc/kayobe/ansible/nova-compute-drain.yml b/etc/kayobe/ansible/nova-compute-drain.yml index a29ac3169..dddf84634 100644 --- a/etc/kayobe/ansible/nova-compute-drain.yml +++ b/etc/kayobe/ansible/nova-compute-drain.yml @@ -11,6 +11,7 @@ - name: Set up openstack cli virtualenv pip: virtualenv: "{{ venv }}" + virtualenv_command: "/usr/bin/python3 -m venv" name: - python-openstackclient state: latest diff --git a/etc/kayobe/ansible/nova-compute-enable.yml b/etc/kayobe/ansible/nova-compute-enable.yml index 9d6d45720..f880a2aa4 100644 --- a/etc/kayobe/ansible/nova-compute-enable.yml +++ b/etc/kayobe/ansible/nova-compute-enable.yml @@ -11,6 +11,7 @@ - name: Set up openstack cli virtualenv pip: virtualenv: "{{ venv }}" + virtualenv_command: "/usr/bin/python3 -m venv" name: - python-openstackclient state: latest diff --git a/etc/kayobe/ansible/rabbitmq-reset.yml b/etc/kayobe/ansible/rabbitmq-reset.yml index 0aece9a05..82e7ee992 100644 --- a/etc/kayobe/ansible/rabbitmq-reset.yml +++ b/etc/kayobe/ansible/rabbitmq-reset.yml @@ -1,6 +1,6 @@ --- # Reset a broken RabbitMQ cluster. -# Also restarts OpenStack services which may be broken. +# Also restarts all OpenStack services using RabbitMQ. - name: Reset RabbitMQ hosts: controllers @@ -65,7 +65,7 @@ tags: - restart-openstack tasks: - # The following services can have problems if the cluster gets broken. + # The following services use RabbitMQ. - name: Restart OpenStack services shell: >- - systemctl -a | egrep '(cinder|heat|ironic|keystone|magnum|neutron|nova)' | awk '{ print $1 }' | xargs systemctl restart + systemctl -a | egrep '(barbican|blazar|cinder|cloudkitty|designate|heat|ironic|keystone|magnum|manila|neutron|nova|octavia)' | awk '{ print $1 }' | xargs systemctl restart diff --git a/etc/kayobe/ansible/rekey-hosts.yml b/etc/kayobe/ansible/rekey-hosts.yml new file mode 100644 index 000000000..a72da3ac7 --- /dev/null +++ b/etc/kayobe/ansible/rekey-hosts.yml @@ -0,0 +1,117 @@ +--- +# Playbook to rotate SSH keys across the cloud. By default it will rotate the +# standard keys used by kayobe/kolla-ansible, but it can be configured for any +# keys. + +- name: Rekey hosts + hosts: overcloud,seed,seed-hypervisor,infra-vms + gather_facts: false + vars: + # The existing key is the key that is currently used to access overcloud hosts + existing_private_key_path: "{{ ssh_private_key_path }}" + existing_public_key_path: "{{ ssh_public_key_path }}" + # The new key is the key that will be generated by this playbook + new_private_key_path: "{{ ssh_private_key_path }}" + new_public_key_path: "{{ ssh_public_key_path }}" + new_key_type: "{{ ssh_key_type }}" + # The existing key will locally be moved to deprecated_key_path once it is replaced + deprecated_key_path: ~/old_ssh_key + rekey_users: + - stack + - kolla + rekey_remove_existing_key: false + tasks: + - name: Stat existing private key file + ansible.builtin.stat: + path: "{{ existing_private_key_path }}" + register: stat_result + delegate_to: localhost + run_once: true + + - name: Fail when existing private key does not exist + ansible.builtin.fail: + msg: "No existing private key file found. Check existing_private_key_path is set correctly." + when: + - not stat_result.stat.exists + delegate_to: localhost + run_once: true + + - name: Stat existing public key file + ansible.builtin.stat: + path: "{{ existing_public_key_path }}" + register: stat_result + delegate_to: localhost + run_once: true + + - name: Fail when existing public key does not exist + ansible.builtin.fail: + msg: "No existing public key file found. Check existing_public_key_path is set correctly." + when: + - not stat_result.stat.exists + delegate_to: localhost + run_once: true + + - name: Generate a new SSH key + community.crypto.openssh_keypair: + path: "{{ existing_private_key_path }}_new" + type: "{{ new_key_type }}" + delegate_to: localhost + run_once: true + + - name: Set new authorized keys + vars: + lookup_path: "{{ existing_private_key_path }}_new.pub" + ansible.posix.authorized_key: + user: "{{ item }}" + state: present + key: "{{ lookup('file', lookup_path) }}" + loop: "{{ rekey_users }}" + become: true + + - name: Locally deprecate existing key (private) + command: "mv {{ existing_private_key_path }} {{ deprecated_key_path }}" + delegate_to: localhost + run_once: true + + - name: Locally deprecate existing key (public) + command: "mv {{ existing_public_key_path }} {{ deprecated_key_path }}.pub" + delegate_to: localhost + run_once: true + + - name: Locally promote new key (private) + command: "mv {{ existing_private_key_path }}_new {{ new_private_key_path }}" + delegate_to: localhost + run_once: true + + - name: Locally promote new key (public) + command: "mv {{ existing_private_key_path }}_new.pub {{ new_public_key_path }}" + delegate_to: localhost + run_once: true + + - block: + - name: Stat old key file + ansible.builtin.stat: + path: "{{ deprecated_key_path }}.pub" + register: stat_result + delegate_to: localhost + run_once: true + + - name: Fail when deprecated public key does not exist + ansible.builtin.fail: + msg: "No deprecated public key file found. Check deprecated_key_path is set correctly." + when: + - not stat_result.stat.exists + delegate_to: localhost + run_once: true + + - name: Remove old key from hosts + vars: + lookup_path: "{{ deprecated_key_path }}.pub" + ansible.posix.authorized_key: + user: "{{ item }}" + state: absent + key: "{{ lookup('file', lookup_path) }}" + loop: "{{ rekey_users }}" + become: true + tags: remove-key + when: rekey_remove_existing_key | bool diff --git a/etc/kayobe/ansible/requirements.yml b/etc/kayobe/ansible/requirements.yml index 3154d461e..bfb2d51b9 100644 --- a/etc/kayobe/ansible/requirements.yml +++ b/etc/kayobe/ansible/requirements.yml @@ -12,9 +12,16 @@ collections: version: 2.4.0 roles: - src: stackhpc.vxlan - - name: ansible-lockdown.rhel8_cis - src: https://github.com/ansible-lockdown/RHEL8-CIS - version: 1.3.0 + - name: ansible-lockdown.ubuntu22_cis + src: https://github.com/stackhpc/UBUNTU22-CIS + #FIXME: Waiting for https://github.com/ansible-lockdown/UBUNTU22-CIS/pull/174 + # to be in a tagged release + version: bugfix/inject-facts + - name: ansible-lockdown.rhel9_cis + src: https://github.com/stackhpc/RHEL9-CIS + #FIXME: Waiting for https://github.com/ansible-lockdown/RHEL9-CIS/pull/115 + # to be in a tagged release. + version: bugfix/inject-facts - name: wazuh-ansible src: https://github.com/stackhpc/wazuh-ansible version: stackhpc diff --git a/etc/kayobe/ansible/wazuh-agent.yml b/etc/kayobe/ansible/wazuh-agent.yml index cd5afce93..730828604 100644 --- a/etc/kayobe/ansible/wazuh-agent.yml +++ b/etc/kayobe/ansible/wazuh-agent.yml @@ -28,7 +28,9 @@ owner: wazuh group: wazuh block: sca.remote_commands=1 - when: custom_sca_policies.files | length > 0 + when: + - custom_sca_policies_folder.stat.exists + - custom_sca_policies.files | length > 0 notify: - Restart wazuh-agent diff --git a/etc/kayobe/inventory/group_vars/overcloud/cis b/etc/kayobe/inventory/group_vars/overcloud/cis index 81fb151e8..40cd0692b 100644 --- a/etc/kayobe/inventory/group_vars/overcloud/cis +++ b/etc/kayobe/inventory/group_vars/overcloud/cis @@ -1,24 +1,136 @@ --- +############################################################################## +# Common CIS Hardening Configuration -# NOTE: kayobe configures NTP. Do not clobber configuration. -rhel8cis_time_synchronization: skip -rhel8cis_rule_2_2_1_1: false -rhel8cis_rule_2_2_1_2: false +# Enable collecting auditd logs +update_audit_template: true -# NOTE: disable CIS rolefirewall configuration -rhel8cis_firewall: skip -rhel8cis_rule_3_4_1_1: false +############################################################################## +# Rocky 9 CIS Hardening Configuration -# NOTE: kayobe does not currently support selinux -rhel8cis_selinux_disable: true +# Allow IP forwarding +rhel9cis_is_router: true -# NOTE: This updates the system. Let's do this explicitly. -rhel8cis_rule_1_9: false +# Skip configuration of chrony +rhel9cis_rule_2_1_1: false +rhel9cis_rule_2_1_2: false + +# Skip configuration of the firewall +rhel9cis_firewall: None +rhel9cis_rule_3_4_1_2: false + +# Don't configure selinux +rhel9cis_selinux_disable: true # NOTE: FUTURE breaks wazuh agent repo metadata download -rhel8cis_crypto_policy: FIPS +rhel9cis_crypto_policy: FIPS + +# Skip package updates +rhel9cis_rule_1_9: false + +# Disable requirement for password when using sudo +rhel9cis_rule_5_3_4: false + +# Disable check for root password being set, we should be locking root passwords instead. +# Please double-check yourself with: sudo passwd -S root +rhel9cis_rule_5_6_6: false + +# Configure log rotation to prevent audit logs from filling the disk +rhel9cis_auditd: + space_left_action: syslog + action_mail_acct: root + admin_space_left_action: halt + max_log_file_action: rotate + +# Max size of audit logs (MB) +rhel9cis_max_log_file_size: 1024 + +# Disable setting of boatloader password. This requires setting the variable +# `rhel9cis_bootloader_password_hash` +rhel9cis_set_boot_pass: false + +############################################################################## +# Ubuntu Jammy CIS Hardening Configuration + +# Ubuntu 22 CIS configuration +# Disable changing routing rules +ubtu22cis_is_router: true + +# Set Chrony as the time sync tool +ubtu22cis_time_sync_tool: "chrony" + +# Disable CIS from configuring the firewall +ubtu22cis_firewall_package: "none" + +# Stop CIS from installing Network Manager +ubtu22cis_install_network_manager: false + +# Set syslog service to journald +ubtu22cis_syslog_service: journald + +# Squashfs is compiled into the kernel +ubtu22cis_rule_1_1_1_2: false + +# This updates the system. Let's do this explicitly. +ubtu22cis_rule_1_9: false + +# Do not change Chrony Time servers +ubtu22cis_rule_2_1_2_1: false + +# Disable CIS from touching sudoers +ubtu22cis_rule_5_3_4: false + +# Add stack and kolla to allowed ssh users +ubtu22cis_sshd: + log_level: "INFO" + max_auth_tries: 4 + ciphers: + - chacha20-poly1305@openssh.com + - aes256-gcm@openssh.com + - aes128-gcm@openssh.com + - aes256-ctr + - aes192-ctr + - aes128-ctr + macs: + - hmac-sha2-512-etm@openssh.com + - hmac-sha2-256-etm@openssh.com + - hmac-sha2-512 + - hmac-sha2-256 + kex_algorithms: + - curve25519-sha256 + - curve25519-sha256@libssh.org + - diffie-hellman-group14-sha256 + - diffie-hellman-group16-sha512 + - diffie-hellman-group18-sha512 + - ecdh-sha2-nistp521 + - ecdh-sha2-nistp384 + - ecdh-sha2-nistp256 + - diffie-hellman-group-exchange-sha256 + client_alive_interval: 300 + client_alive_count_max: 3 + login_grace_time: 60 + max_sessions: 8 + allow_users: "kolla stack ubuntu" + allow_groups: "kolla stack ubuntu" + deny_users: "" + deny_groups: "" + +# Do not change /var/lib/docker permissions +ubtu22cis_no_group_adjust: false +ubtu22cis_no_owner_adjust: false + +# Configure log rotation to prevent audit logs from filling the disk +ubtu22cis_auditd: + action_mail_acct: root + space_left_action: syslog + admin_space_left_action: halt + max_log_file_action: rotate + +# Max size of audit logs (MB) +ubtu22cis_max_log_file_size: 1024 -# NOTE: We will remove /etc/motd instead. This prevents a duplicate warning -# from being displayed. -rhel8cis_rule_1_8_1_1: false -rhel8cis_rule_1_8_1_4: false +# Disable grub bootloader password. Requires overriding +# ubtu22cis_bootloader_password_hash +ubtu22cis_rule_1_4_1: false +ubtu22cis_rule_1_4_3: false +############################################################################## diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json index 091c76760..40ed94f29 100644 --- a/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json +++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json @@ -148,7 +148,7 @@ "tableColumn": "", "targets": [ { - "expr": "topk(1, elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"red\"}==1 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"green\"}==1)+4 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"yellow\"}==1)+22)", + "expr": "topk(1, elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"red\"}==1 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"green\"}==1)+4 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"yellow\"}==1)+2)", "format": "time_series", "instant": true, "intervalFactor": 2, diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json index 926dd1a13..26f352071 100644 --- a/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json +++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json @@ -113,7 +113,7 @@ "steppedLine": false, "targets": [ { - "expr": "count(haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:[0-9]+\"} == 1)", + "expr": "count(haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"} == 1)", "hide": false, "interval": "$interval", "intervalFactor": 2, @@ -122,7 +122,7 @@ "step": 60 }, { - "expr": "count(haproxy_backend_up{backend=~\"$backend\",instance=~\"$host:[0-9]+\"} == 0)", + "expr": "count(haproxy_backend_up{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"} == 0)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backends Down", @@ -265,7 +265,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\",code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])) by (code)", + "expr": "sum(irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\",code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])) by (code)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Frontend {{ code }}", @@ -274,7 +274,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_http_responses_total{backend=~\"$backend\",code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])) by (code)", + "expr": "sum(irate(haproxy_backend_http_responses_total{backend=~\"$backend\",code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])) by (code)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Backend {{ code }}", @@ -380,7 +380,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN Front", @@ -389,7 +389,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT Front", @@ -397,14 +397,14 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "intervalFactor": 2, "legendFormat": "IN Back", "refId": "C", "step": 240 }, { - "expr": "sum(irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "intervalFactor": 2, "legendFormat": "OUT Back", "refId": "D", @@ -507,7 +507,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Front", @@ -516,7 +516,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Back", @@ -525,7 +525,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Back errors", @@ -634,7 +634,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Requests", @@ -643,7 +643,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_response_errors_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_response_errors_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Response errors", @@ -651,7 +651,7 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Requests errors", @@ -660,7 +660,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backend redispatch", @@ -668,7 +668,7 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backend retry", @@ -676,7 +676,7 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Request denied", @@ -684,7 +684,7 @@ "step": 60 }, { - "expr": "sum(haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backend Queued", @@ -788,7 +788,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Frontend current sessions", @@ -797,7 +797,7 @@ "step": 30 }, { - "expr": "sum(haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Frontend current session rate", @@ -806,7 +806,7 @@ "step": 30 }, { - "expr": "sum(haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Backend current sessions", @@ -815,7 +815,7 @@ "step": 30 }, { - "expr": "sum(haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Backend current session rate", @@ -940,7 +940,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN-{{ backend }}", @@ -949,7 +949,7 @@ "step": 30 }, { - "expr": "irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT-{{ backend }}", @@ -1056,7 +1056,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN-{{ frontend }}", @@ -1065,7 +1065,7 @@ "step": 30 }, { - "expr": "irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT-{{ frontend }}", @@ -1189,7 +1189,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_bytes_in_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_server_bytes_in_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN-{{ backend }} / {{ server }}", @@ -1198,7 +1198,7 @@ "step": 30 }, { - "expr": "irate(haproxy_server_bytes_out_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_server_bytes_out_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT-{{ backend }} / {{ server }}", @@ -1319,7 +1319,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -1328,7 +1328,7 @@ "step": 30 }, { - "expr": "irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "{{ backend }} Error", "refId": "A", @@ -1426,7 +1426,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ frontend }}", @@ -1544,7 +1544,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_max_queue{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_max_queue{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -1650,7 +1650,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -1775,7 +1775,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Redispatch {{ backend }}", @@ -1784,7 +1784,7 @@ "step": 30 }, { - "expr": "irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Retry {{ backend }}", @@ -1792,7 +1792,7 @@ "step": 60 }, { - "expr": "irate(haproxy_backend_response_errors_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_response_errors_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "Error {{ backend }}", "refId": "C", @@ -1899,7 +1899,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ frontend }}", @@ -1908,14 +1908,14 @@ "step": 30 }, { - "expr": "irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\", instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "{{ frontend }} Error", "refId": "B", "step": 240 }, { - "expr": "irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\", instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "{{ frontend }} Denied", "refId": "C", @@ -2024,7 +2024,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_http_responses_total{backend=~\"$backend\", code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_http_responses_total{backend=~\"$backend\", code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ code }} {{ backend }}", @@ -2119,7 +2119,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\", code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\", code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ code }} {{ frontend }} ", @@ -2231,7 +2231,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2327,7 +2327,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2423,7 +2423,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -2518,7 +2518,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ frontend }}", @@ -2618,7 +2618,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_max_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_max_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2628,7 +2628,7 @@ "step": 30 }, { - "expr": "haproxy_backend_limit_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_limit_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "intervalFactor": 2, "legendFormat": "{{ backend }} limit", "refId": "B", @@ -2726,7 +2726,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_max_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_max_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2736,7 +2736,7 @@ "step": 30 }, { - "expr": "haproxy_frontend_limit_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_limit_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "intervalFactor": 2, "legendFormat": "{{ frontend }} limit", "refId": "B", @@ -2829,7 +2829,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_max_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_max_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2930,7 +2930,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_max_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_max_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2940,7 +2940,7 @@ "step": 30 }, { - "expr": "haproxy_frontend_limit_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_limit_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "intervalFactor": 2, "legendFormat": "{{ frontend }} limit", "refId": "B", @@ -3051,7 +3051,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -3146,7 +3146,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_weight{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_weight{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -3375,7 +3375,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_max_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_max_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -3470,7 +3470,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_current_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_current_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -3592,7 +3592,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])) by (server)", + "expr": "sum(irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])) by (server)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} {{ server }}", @@ -3700,7 +3700,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -3710,7 +3710,7 @@ "step": 30 }, { - "expr": "haproxy_server_redispatch_warnings_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_redispatch_warnings_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 2, @@ -3719,7 +3719,7 @@ "step": 60 }, { - "expr": "irate(haproxy_server_response_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_response_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "hide": false, "interval": "$interval", "intervalFactor": 2, @@ -3830,7 +3830,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} {{ server }} {{ code }}", @@ -3942,7 +3942,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_current_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_current_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4037,7 +4037,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_current_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_current_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4132,7 +4132,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_max_session_rate{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_max_session_rate{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4227,7 +4227,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_max_sessions{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_max_sessions{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4339,7 +4339,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_downtime_seconds_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_downtime_seconds_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4435,7 +4435,7 @@ "steppedLine": false, "targets": [ { - "expr": "increase(haproxy_server_check_failures_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "increase(haproxy_server_check_failures_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4530,7 +4530,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_connection_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_connection_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4725,7 +4725,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_up{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_up{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4820,7 +4820,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_weight{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_weight{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -5169,7 +5169,7 @@ "name": "backend", "options": [], "query": { - "query": "label_values(haproxy_backend_bytes_in_total{instance=~\"$host:[0-9]+\"}, backend)", + "query": "label_values(haproxy_backend_bytes_in_total{instance=~\"$host:?[0-9]*\"}, backend)", "refId": "Prometheus-backend-Variable-Query" }, "refresh": 1, @@ -5201,7 +5201,7 @@ "name": "frontend", "options": [], "query": { - "query": "label_values(haproxy_frontend_bytes_in_total{instance=~\"$host:[0-9]+\"}, frontend)", + "query": "label_values(haproxy_frontend_bytes_in_total{instance=~\"$host:?[0-9]*\"}, frontend)", "refId": "Prometheus-frontend-Variable-Query" }, "refresh": 1, @@ -5232,7 +5232,7 @@ "name": "server", "options": [], "query": { - "query": "label_values(haproxy_server_bytes_in_total{instance=~\"$host:[0-9]+\", backend=~\"$backend\"}, server)", + "query": "label_values(haproxy_server_bytes_in_total{instance=~\"$host:?[0-9]*\", backend=~\"$backend\"}, server)", "refId": "Prometheus-server-Variable-Query" }, "refresh": 1, @@ -5264,7 +5264,7 @@ "name": "code", "options": [], "query": { - "query": "label_values(haproxy_server_http_responses_total{instance=~\"$host:[0-9]+\", backend=~\"$backend\", server=~\"$server\"}, code)", + "query": "label_values(haproxy_server_http_responses_total{instance=~\"$host:?[0-9]*\", backend=~\"$backend\", server=~\"$server\"}, code)", "refId": "Prometheus-code-Variable-Query" }, "refresh": 1, diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json index 60649ff28..12771a0f2 100644 --- a/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json +++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json @@ -637,8 +637,8 @@ "overrides": [] }, "gridPos": { - "h": 12, - "w": 20, + "h": 13, + "w": 9, "x": 0, "y": 17 }, @@ -674,6 +674,95 @@ ], "title": "Disk Temperatures", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The data written to the disk in the last 24h period divided by the physical capacity of the disk", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 10, + "x": 9, + "y": 17 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "delta(nvme_data_units_written_total{instance=~\"$node\"}[24h])*512000 / nvme_physical_size_bytes{instance=~\"$node\"}", + "legendFormat": "{{instance}} - {{device}}", + "range": true, + "refId": "A" + } + ], + "title": "DWPD", + "type": "timeseries" } ], "refresh": false, diff --git a/etc/kayobe/kolla/config/prometheus/smart.rules b/etc/kayobe/kolla/config/prometheus/smart.rules index aea36bdf8..853d9268a 100644 --- a/etc/kayobe/kolla/config/prometheus/smart.rules +++ b/etc/kayobe/kolla/config/prometheus/smart.rules @@ -13,4 +13,20 @@ groups: summary: "SMART monitor reports bad disk on (instance {{ $labels.instance }})" description: "{{ $labels.instance }} is reporting unhealthy for the disk at {{ $labels.disk }}. Disk serial number is: {{ $labels.serial_number }}" -{% endraw %} \ No newline at end of file + - alert: DWPDTooHigh + expr: (delta(nvme_data_units_written_total[30d])*512000 / nvme_physical_size_bytes) / 30 > 1 + labels: + severity: alert + annotations: + summary: "High 30-Day Average DWPD for {{ $labels.instance }}" + description: "The 30-Day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + + - alert: DWPDTooHighWarning + expr: (delta(nvme_data_units_written_total[7d])*512000 / nvme_physical_size_bytes) / 7 > 1 + labels: + severity: warning + annotations: + summary: "High 7-Day Average DWPD for {{ $labels.instance }}" + description: "The 7-day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + +{% endraw %} diff --git a/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml b/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml new file mode 100644 index 000000000..5e75a51ad --- /dev/null +++ b/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Added the ``rekey-hosts.yml`` playbook to automatically rotate the SSH + keys on all hosts. diff --git a/releasenotes/notes/adds-cis-hardening-for-ubuntu-jammy-d9bf23a34c08f5be.yaml b/releasenotes/notes/adds-cis-hardening-for-ubuntu-jammy-d9bf23a34c08f5be.yaml new file mode 100644 index 000000000..66de6e0e8 --- /dev/null +++ b/releasenotes/notes/adds-cis-hardening-for-ubuntu-jammy-d9bf23a34c08f5be.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Adds support for Ubuntu Jammy and Rocky 9 to the CIS benchmark hardening playbook: + ``cis.yml``. This playbook will need to be manually applied. diff --git a/releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml b/releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml new file mode 100644 index 000000000..62d918519 --- /dev/null +++ b/releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Adds a panel in the Hardware Overview dashboard to show DWPD (Drive writes + per day) for NVMEs. This is calculated by dividing the total bytes written + in the past 24 hours by the drive capacity. This is currently only + supported on NVMEs. + - | + Adds alerts that will fire after 1 DWPD is sustained for 7 days, and a + critical alert if 1 DWPD is sustained for 30 days. diff --git a/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml b/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml new file mode 100644 index 000000000..1a45bc9e0 --- /dev/null +++ b/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes display of the OpenSearch cluster health in Grafana + when in yellow state. diff --git a/releasenotes/notes/haproxy-dashboard-instance-label-836b93921e964680.yaml b/releasenotes/notes/haproxy-dashboard-instance-label-836b93921e964680.yaml new file mode 100644 index 000000000..5e2e00c68 --- /dev/null +++ b/releasenotes/notes/haproxy-dashboard-instance-label-836b93921e964680.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fix Grafana HAProxy dashboard when non-default Prometheus instance labels + are used. diff --git a/requirements.txt b/requirements.txt index c8b7c43d6..631266a78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ kayobe@git+https://github.com/stackhpc/kayobe@stackhpc/2023.1 ansible-modules-hashivault +jmespath