From 30467e1f90136c505dd4a8aa0f8a252b716df676 Mon Sep 17 00:00:00 2001 From: technowhizz <7688823+technowhizz@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:26:24 +0100 Subject: [PATCH 01/37] Add DWPD to Hardware Overview dashboard Adds the DWPD (Drive Writes Per Day) to the Hardware Overview dashboard in Grafana. This is useful for monitoring the wear on NVMEs. --- .../openstack/hardware_overview.json | 93 ++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json index 60649ff28..12771a0f2 100644 --- a/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json +++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json @@ -637,8 +637,8 @@ "overrides": [] }, "gridPos": { - "h": 12, - "w": 20, + "h": 13, + "w": 9, "x": 0, "y": 17 }, @@ -674,6 +674,95 @@ ], "title": "Disk Temperatures", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The data written to the disk in the last 24h period divided by the physical capacity of the disk", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 10, + "x": 9, + "y": 17 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "delta(nvme_data_units_written_total{instance=~\"$node\"}[24h])*512000 / nvme_physical_size_bytes{instance=~\"$node\"}", + "legendFormat": "{{instance}} - {{device}}", + "range": true, + "refId": "A" + } + ], + "title": "DWPD", + "type": "timeseries" } ], "refresh": false, From ab444c948cc0cfe60a5b476d7cb58a6a946836c4 Mon Sep 17 00:00:00 2001 From: technowhizz <7688823+technowhizz@users.noreply.github.com> Date: Fri, 13 Oct 2023 18:35:02 +0100 Subject: [PATCH 02/37] Add DWPD alerts --- etc/kayobe/kolla/config/prometheus/smart.rules | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/etc/kayobe/kolla/config/prometheus/smart.rules b/etc/kayobe/kolla/config/prometheus/smart.rules index aea36bdf8..853d9268a 100644 --- a/etc/kayobe/kolla/config/prometheus/smart.rules +++ b/etc/kayobe/kolla/config/prometheus/smart.rules @@ -13,4 +13,20 @@ groups: summary: "SMART monitor reports bad disk on (instance {{ $labels.instance }})" description: "{{ $labels.instance }} is reporting unhealthy for the disk at {{ $labels.disk }}. Disk serial number is: {{ $labels.serial_number }}" -{% endraw %} \ No newline at end of file + - alert: DWPDTooHigh + expr: (delta(nvme_data_units_written_total[30d])*512000 / nvme_physical_size_bytes) / 30 > 1 + labels: + severity: alert + annotations: + summary: "High 30-Day Average DWPD for {{ $labels.instance }}" + description: "The 30-Day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + + - alert: DWPDTooHighWarning + expr: (delta(nvme_data_units_written_total[7d])*512000 / nvme_physical_size_bytes) / 7 > 1 + labels: + severity: warning + annotations: + summary: "High 7-Day Average DWPD for {{ $labels.instance }}" + description: "The 7-day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + +{% endraw %} From f1293b9ad7b246df1f04cd2804553f331c2a201b Mon Sep 17 00:00:00 2001 From: technowhizz <7688823+technowhizz@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:39:02 +0100 Subject: [PATCH 03/37] Add release note --- releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml diff --git a/releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml b/releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml new file mode 100644 index 000000000..62d918519 --- /dev/null +++ b/releasenotes/notes/dwpd-6b9fb0c8d6d3a570.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Adds a panel in the Hardware Overview dashboard to show DWPD (Drive writes + per day) for NVMEs. This is calculated by dividing the total bytes written + in the past 24 hours by the drive capacity. This is currently only + supported on NVMEs. + - | + Adds alerts that will fire after 1 DWPD is sustained for 7 days, and a + critical alert if 1 DWPD is sustained for 30 days. From 0fc39a0e65ab6c58db72d51507abbfa2761db835 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 8 Nov 2023 17:13:52 +0000 Subject: [PATCH 04/37] docs: Add in-place upgrade to RL9 migration --- doc/source/operations/rocky-linux-9.rst | 83 ++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 9113f287a..8b873a6b8 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -599,7 +599,84 @@ Wazuh manager TODO -In-place migrations -=================== +In-place upgrades +================= -TODO +Sometimes it is necessary to upgrade a system in-place. +This may be the case for the seed hypervisor or Ansible control host which are often installed manually onto bare metal. +This procedure is not officially recommended, and can be risky, so be sure to back up all critical data and ensure serial console access is available (including password login) in case of getting locked out. + +The procedure is performed in two stages: + +1. Migrate from CentOS Stream 8 to Rocky Linux 8 +2. Upgrade from Rocky Linux 8 to Rocky Linux 9 + +Potential issues +---------------- + +Full procedure +-------------- + +- It's good to inspect existing DNF packages and determine whether they are + really required. + +- Use the `migrate2rocky.sh + `__ + script to migrate to Rocky Linux 8. + +- Disable all DNF modules - they're no longer used. + + .. code-block:: console + + sudo dnf module disable "*" + +- Migrate to NetworkManager: + + - Ensure that all network interfaces are managed by Network Manager: + + .. code:: console + + sudo sed -i -e 's/NM_CONTROLLED=no/NM_CONTROLLED=yes/g' /etc/sysconfig/network-scripts/* + + - Enable and start NetworkManager: + + .. code:: console + + sudo systemctl enable NetworkManager + sudo systemctl start NetworkManager + + - Migrate Ethernet connections to native NetworkManager configuration: + + .. code:: console + + sudo nmcli connection migrate + + - Manually migrate non-Ethernet (bonds, bridges & VLAN subinterfaces) network interfaces to native NetworkManager. + + - Look out for lost DNS configuration after migration to NetworkManager. This may be manually restored using something like this: + + .. code:: console + + nmcli con mod System\ brextmgmt.3003 ipv4.dns "10.41.4.4 10.41.4.5 10.41.4.6" + + - Make sure there are no funky udev rules left in + ``/etc/udev/rules.d/70-persistent-net.rules`` (e.g. from cloud-init run on + Rocky 9.1). + + - Inspect networking configuration at this point, ideally reboot to validate correctness. + +- Upgrade to Rocky Linux 9 + + .. https://forums.rockylinux.org/t/dnf-warning-message-after-upgrade-from-rocky-8-to-rocky-9/8319/2 + + .. code:: console + + sudo dnf install -y https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-gpg-keys-9.2-1.6.el9.noarch.rpm \ + https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-release-9.2-1.6.el9.noarch.rpm \ + https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-repos-9.2-1.6.el9.noarch.rpm + sudo rm -rf /usr/share/redhat-logos + sudo dnf --releasever=9 --allowerasing --setopt=deltarpm=false distro-sync -y + sudo rpm --rebuilddb + sudo rpm -qa | grep el8 | xargs dnf remove + +- You will need to re-create *all* virtualenvs afterwards due to system Python version upgrade. From 1e218c7ff5772e36ac8e75fb7bebf07daff00b76 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 9 Nov 2023 10:03:11 +0000 Subject: [PATCH 05/37] docs: RL9 migration in-place updates --- doc/source/operations/rocky-linux-9.rst | 55 ++++++++++++++++++++----- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 8b873a6b8..beec3d020 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -617,8 +617,7 @@ Potential issues Full procedure -------------- -- It's good to inspect existing DNF packages and determine whether they are - really required. +- Inspect existing DNF packages and determine whether they are really required. - Use the `migrate2rocky.sh `__ @@ -630,7 +629,9 @@ Full procedure sudo dnf module disable "*" -- Migrate to NetworkManager: +- Migrate to NetworkManager. This can be done using a manual process or with Kayobe. + + The manual process is as follows: - Ensure that all network interfaces are managed by Network Manager: @@ -659,6 +660,16 @@ Full procedure nmcli con mod System\ brextmgmt.3003 ipv4.dns "10.41.4.4 10.41.4.5 10.41.4.6" + The following Kayobe process for migrating to NetworkManager has not yet been tested. + + - Set ``interfaces_use_nmconnection: true`` as a host/group variable for the relevant hosts + + - Run the appropriate host configure command. For example, for the seed hypervisor: + + .. code:: console + + kayobe seed hypervisor host configure -t network -kt none + - Make sure there are no funky udev rules left in ``/etc/udev/rules.d/70-persistent-net.rules`` (e.g. from cloud-init run on Rocky 9.1). @@ -669,14 +680,36 @@ Full procedure .. https://forums.rockylinux.org/t/dnf-warning-message-after-upgrade-from-rocky-8-to-rocky-9/8319/2 - .. code:: console + - Install Rocky Linux 9 repositories and GPG keys: + + .. code:: console + + sudo dnf install -y https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-gpg-keys-9.2-1.6.el9.noarch.rpm \ + https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-release-9.2-1.6.el9.noarch.rpm \ + https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-repos-9.2-1.6.el9.noarch.rpm + + - Remove the RedHat logos package: + + .. code:: console + + sudo rm -rf /usr/share/redhat-logos + + - Synchronise all packages with current versions + + .. code:: console + + sudo dnf --releasever=9 --allowerasing --setopt=deltarpm=false distro-sync -y + + - Rebuild RPB database: + + .. code:: console + + sudo rpm --rebuilddb + + - Remove all EL8 packages: + + .. code:: console - sudo dnf install -y https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-gpg-keys-9.2-1.6.el9.noarch.rpm \ - https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-release-9.2-1.6.el9.noarch.rpm \ - https://download.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/Packages/r/rocky-repos-9.2-1.6.el9.noarch.rpm - sudo rm -rf /usr/share/redhat-logos - sudo dnf --releasever=9 --allowerasing --setopt=deltarpm=false distro-sync -y - sudo rpm --rebuilddb - sudo rpm -qa | grep el8 | xargs dnf remove + sudo rpm -qa | grep el8 | xargs dnf remove - You will need to re-create *all* virtualenvs afterwards due to system Python version upgrade. From 31b0c4016aecda7099c492f425ca63ee83b9cbf8 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 9 Nov 2023 13:07:21 +0000 Subject: [PATCH 06/37] docs: RL9 migration in place fix --- doc/source/operations/rocky-linux-9.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index beec3d020..13ce7fd90 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -706,10 +706,18 @@ Full procedure sudo rpm --rebuilddb - - Remove all EL8 packages: + - Make a list of EL8 packages to remove: .. code:: console - sudo rpm -qa | grep el8 | xargs dnf remove + sudo rpm -qa | grep el8 > el8-packages + + - Inspect the ``el8-packages`` list and ensure only expected packages are included. + + - Remove the EL8 packages: + + .. code:: console + + cat el8-packages | xargs sudo dnf remove -y - You will need to re-create *all* virtualenvs afterwards due to system Python version upgrade. From 94e88e1a2fe998cca88240fdff8c087936674e35 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Fri, 10 Nov 2023 12:52:43 +0000 Subject: [PATCH 07/37] Bump nova images Nova hasn't been rebuilt since: https://github.com/stackhpc/stackhpc-kayobe-config/commit/1cfefbd1a12f18c414546f957b9afb43fb1aa18d So this switches back to the upstream code. --- etc/kayobe/kolla/globals.yml | 1 + .../notes/fix-libvirt-mdev-issue-55b3f501a436c3be.yaml | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 releasenotes/notes/fix-libvirt-mdev-issue-55b3f501a436c3be.yaml diff --git a/etc/kayobe/kolla/globals.yml b/etc/kayobe/kolla/globals.yml index 59dfacfce..ab67604ef 100644 --- a/etc/kayobe/kolla/globals.yml +++ b/etc/kayobe/kolla/globals.yml @@ -28,6 +28,7 @@ prometheus_node_exporter_tag: xena-20230315T164024 glance_tls_proxy_tag: "{% raw %}{{ haproxy_tag | default(openstack_tag) }}{% endraw %}" neutron_tls_proxy_tag: "{% raw %}{{ haproxy_tag | default(openstack_tag) }}{% endraw %}" +nova_tag: xena-20231110T095551 om_enable_rabbitmq_high_availability: true diff --git a/releasenotes/notes/fix-libvirt-mdev-issue-55b3f501a436c3be.yaml b/releasenotes/notes/fix-libvirt-mdev-issue-55b3f501a436c3be.yaml new file mode 100644 index 000000000..cd7c16f41 --- /dev/null +++ b/releasenotes/notes/fix-libvirt-mdev-issue-55b3f501a436c3be.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes an `issue where Nova failed to parse the mdev device format + `__. From 5c4270f4fe7328327eb52df996a6246126ebfba7 Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Tue, 14 Nov 2023 17:15:52 +0100 Subject: [PATCH 08/37] Fix Grafana HAProxy dashboard (again) The previous commit to this file [1] added port numbers to the instance label regex. This is compatible with the default Prometheus targets generated by Kolla Ansible, which look like this: 192.168.1.1:9101 However, when using a non-default instance label [2], the port number is absent, which breaks the dashboard (all panels are empty). Modify the regex to make the port number optional, which should support all possible instance labels. Note: I first tried using `$host:([0-9]+)?` but it does not appear to be supported by Prometheus, which uses RE2 [3]. [1] https://github.com/stackhpc/stackhpc-kayobe-config/commit/eceee825432748c48035ae1f3b3289a14c7373c0 [2] https://docs.openstack.org/kolla-ansible/latest/reference/logging-and-monitoring/prometheus-guide.html#metric-instance-labels [3] https://github.com/google/re2/wiki/Syntax --- .../grafana/dashboards/openstack/haproxy.json | 148 +++++++++--------- ...board-instance-label-836b93921e964680.yaml | 5 + 2 files changed, 79 insertions(+), 74 deletions(-) create mode 100644 releasenotes/notes/haproxy-dashboard-instance-label-836b93921e964680.yaml diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json index 926dd1a13..26f352071 100644 --- a/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json +++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/haproxy.json @@ -113,7 +113,7 @@ "steppedLine": false, "targets": [ { - "expr": "count(haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:[0-9]+\"} == 1)", + "expr": "count(haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"} == 1)", "hide": false, "interval": "$interval", "intervalFactor": 2, @@ -122,7 +122,7 @@ "step": 60 }, { - "expr": "count(haproxy_backend_up{backend=~\"$backend\",instance=~\"$host:[0-9]+\"} == 0)", + "expr": "count(haproxy_backend_up{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"} == 0)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backends Down", @@ -265,7 +265,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\",code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])) by (code)", + "expr": "sum(irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\",code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])) by (code)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Frontend {{ code }}", @@ -274,7 +274,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_http_responses_total{backend=~\"$backend\",code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])) by (code)", + "expr": "sum(irate(haproxy_backend_http_responses_total{backend=~\"$backend\",code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])) by (code)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Backend {{ code }}", @@ -380,7 +380,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN Front", @@ -389,7 +389,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT Front", @@ -397,14 +397,14 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "intervalFactor": 2, "legendFormat": "IN Back", "refId": "C", "step": 240 }, { - "expr": "sum(irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8) by (instance)", + "expr": "sum(irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8) by (instance)", "intervalFactor": 2, "legendFormat": "OUT Back", "refId": "D", @@ -507,7 +507,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Front", @@ -516,7 +516,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Back", @@ -525,7 +525,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Back errors", @@ -634,7 +634,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Requests", @@ -643,7 +643,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_response_errors_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_response_errors_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Response errors", @@ -651,7 +651,7 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Requests errors", @@ -660,7 +660,7 @@ "step": 30 }, { - "expr": "sum(irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backend redispatch", @@ -668,7 +668,7 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backend retry", @@ -676,7 +676,7 @@ "step": 60 }, { - "expr": "sum(irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])) by (instance)", + "expr": "sum(irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Request denied", @@ -684,7 +684,7 @@ "step": 60 }, { - "expr": "sum(haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Backend Queued", @@ -788,7 +788,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Frontend current sessions", @@ -797,7 +797,7 @@ "step": 30 }, { - "expr": "sum(haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Frontend current session rate", @@ -806,7 +806,7 @@ "step": 30 }, { - "expr": "sum(haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Backend current sessions", @@ -815,7 +815,7 @@ "step": 30 }, { - "expr": "sum(haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}) by (instance)", + "expr": "sum(haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}) by (instance)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Backend current session rate", @@ -940,7 +940,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_backend_bytes_in_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN-{{ backend }}", @@ -949,7 +949,7 @@ "step": 30 }, { - "expr": "irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_backend_bytes_out_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT-{{ backend }}", @@ -1056,7 +1056,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_frontend_bytes_in_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN-{{ frontend }}", @@ -1065,7 +1065,7 @@ "step": 30 }, { - "expr": "irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_frontend_bytes_out_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT-{{ frontend }}", @@ -1189,7 +1189,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_bytes_in_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_server_bytes_in_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 1, "legendFormat": "IN-{{ backend }} / {{ server }}", @@ -1198,7 +1198,7 @@ "step": 30 }, { - "expr": "irate(haproxy_server_bytes_out_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])*8", + "expr": "irate(haproxy_server_bytes_out_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])*8", "interval": "$interval", "intervalFactor": 2, "legendFormat": "OUT-{{ backend }} / {{ server }}", @@ -1319,7 +1319,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_connections_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -1328,7 +1328,7 @@ "step": 30 }, { - "expr": "irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_connection_errors_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "{{ backend }} Error", "refId": "A", @@ -1426,7 +1426,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_connections_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ frontend }}", @@ -1544,7 +1544,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_max_queue{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_max_queue{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -1650,7 +1650,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_current_queue{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -1775,7 +1775,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_redispatch_warnings_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "Redispatch {{ backend }}", @@ -1784,7 +1784,7 @@ "step": 30 }, { - "expr": "irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 2, "legendFormat": "Retry {{ backend }}", @@ -1792,7 +1792,7 @@ "step": 60 }, { - "expr": "irate(haproxy_backend_response_errors_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_response_errors_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "Error {{ backend }}", "refId": "C", @@ -1899,7 +1899,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_http_requests_total{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ frontend }}", @@ -1908,14 +1908,14 @@ "step": 30 }, { - "expr": "irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_request_errors_total{frontend=~\"$frontend\", instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "{{ frontend }} Error", "refId": "B", "step": 240 }, { - "expr": "irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_requests_denied_total{frontend=~\"$frontend\", instance=~\"$host:?[0-9]*\"}[5m])", "intervalFactor": 2, "legendFormat": "{{ frontend }} Denied", "refId": "C", @@ -2024,7 +2024,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_backend_http_responses_total{backend=~\"$backend\", code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_backend_http_responses_total{backend=~\"$backend\", code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ code }} {{ backend }}", @@ -2119,7 +2119,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\", code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_frontend_http_responses_total{frontend=~\"$frontend\", code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ code }} {{ frontend }} ", @@ -2231,7 +2231,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_current_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2327,7 +2327,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_current_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2423,7 +2423,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_current_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -2518,7 +2518,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_current_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ frontend }}", @@ -2618,7 +2618,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_max_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_max_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2628,7 +2628,7 @@ "step": 30 }, { - "expr": "haproxy_backend_limit_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_limit_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "intervalFactor": 2, "legendFormat": "{{ backend }} limit", "refId": "B", @@ -2726,7 +2726,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_max_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_max_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2736,7 +2736,7 @@ "step": 30 }, { - "expr": "haproxy_frontend_limit_sessions{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_limit_sessions{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "intervalFactor": 2, "legendFormat": "{{ frontend }} limit", "refId": "B", @@ -2829,7 +2829,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_max_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_max_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2930,7 +2930,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_frontend_max_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_max_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -2940,7 +2940,7 @@ "step": 30 }, { - "expr": "haproxy_frontend_limit_session_rate{frontend=~\"$frontend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_frontend_limit_session_rate{frontend=~\"$frontend\",instance=~\"$host:?[0-9]*\"}", "intervalFactor": 2, "legendFormat": "{{ frontend }} limit", "refId": "B", @@ -3051,7 +3051,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_up{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -3146,7 +3146,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_backend_weight{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_backend_weight{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }}", @@ -3375,7 +3375,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_max_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_max_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -3470,7 +3470,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_current_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_current_queue{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -3592,7 +3592,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])) by (server)", + "expr": "sum(irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])) by (server)", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} {{ server }}", @@ -3700,7 +3700,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_retry_warnings_total{backend=~\"$backend\", instance=~\"$host:?[0-9]*\"}[5m])", "hide": false, "interval": "$interval", "intervalFactor": 1, @@ -3710,7 +3710,7 @@ "step": 30 }, { - "expr": "haproxy_server_redispatch_warnings_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_redispatch_warnings_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}", "hide": false, "interval": "$interval", "intervalFactor": 2, @@ -3719,7 +3719,7 @@ "step": 60 }, { - "expr": "irate(haproxy_server_response_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_response_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "hide": false, "interval": "$interval", "intervalFactor": 2, @@ -3830,7 +3830,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",code=~\"$code\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_http_responses_total{backend=~\"$backend\",server=~\"$server\",code=~\"$code\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} {{ server }} {{ code }}", @@ -3942,7 +3942,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_current_sessions{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_current_sessions{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4037,7 +4037,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_current_session_rate{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_current_session_rate{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4132,7 +4132,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_max_session_rate{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_max_session_rate{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4227,7 +4227,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_max_sessions{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_max_sessions{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4339,7 +4339,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_downtime_seconds_total{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_downtime_seconds_total{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4435,7 +4435,7 @@ "steppedLine": false, "targets": [ { - "expr": "increase(haproxy_server_check_failures_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "increase(haproxy_server_check_failures_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4530,7 +4530,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(haproxy_server_connection_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:[0-9]+\"}[5m])", + "expr": "irate(haproxy_server_connection_errors_total{backend=~\"$backend\",server=~\"$server\",instance=~\"$host:?[0-9]*\"}[5m])", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4725,7 +4725,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_up{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_up{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -4820,7 +4820,7 @@ "steppedLine": false, "targets": [ { - "expr": "haproxy_server_weight{backend=~\"$backend\",instance=~\"$host:[0-9]+\"}", + "expr": "haproxy_server_weight{backend=~\"$backend\",instance=~\"$host:?[0-9]*\"}", "interval": "$interval", "intervalFactor": 1, "legendFormat": "{{ backend }} / {{ server }}", @@ -5169,7 +5169,7 @@ "name": "backend", "options": [], "query": { - "query": "label_values(haproxy_backend_bytes_in_total{instance=~\"$host:[0-9]+\"}, backend)", + "query": "label_values(haproxy_backend_bytes_in_total{instance=~\"$host:?[0-9]*\"}, backend)", "refId": "Prometheus-backend-Variable-Query" }, "refresh": 1, @@ -5201,7 +5201,7 @@ "name": "frontend", "options": [], "query": { - "query": "label_values(haproxy_frontend_bytes_in_total{instance=~\"$host:[0-9]+\"}, frontend)", + "query": "label_values(haproxy_frontend_bytes_in_total{instance=~\"$host:?[0-9]*\"}, frontend)", "refId": "Prometheus-frontend-Variable-Query" }, "refresh": 1, @@ -5232,7 +5232,7 @@ "name": "server", "options": [], "query": { - "query": "label_values(haproxy_server_bytes_in_total{instance=~\"$host:[0-9]+\", backend=~\"$backend\"}, server)", + "query": "label_values(haproxy_server_bytes_in_total{instance=~\"$host:?[0-9]*\", backend=~\"$backend\"}, server)", "refId": "Prometheus-server-Variable-Query" }, "refresh": 1, @@ -5264,7 +5264,7 @@ "name": "code", "options": [], "query": { - "query": "label_values(haproxy_server_http_responses_total{instance=~\"$host:[0-9]+\", backend=~\"$backend\", server=~\"$server\"}, code)", + "query": "label_values(haproxy_server_http_responses_total{instance=~\"$host:?[0-9]*\", backend=~\"$backend\", server=~\"$server\"}, code)", "refId": "Prometheus-code-Variable-Query" }, "refresh": 1, diff --git a/releasenotes/notes/haproxy-dashboard-instance-label-836b93921e964680.yaml b/releasenotes/notes/haproxy-dashboard-instance-label-836b93921e964680.yaml new file mode 100644 index 000000000..5e2e00c68 --- /dev/null +++ b/releasenotes/notes/haproxy-dashboard-instance-label-836b93921e964680.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fix Grafana HAProxy dashboard when non-default Prometheus instance labels + are used. From c8722800805ed41cb2766c53076b0ac69d93f807 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Wed, 15 Nov 2023 09:54:46 +0000 Subject: [PATCH 09/37] Adds Ubuntu Jammy & Rocky 9 CIS benchmark hardening playbooks (#685) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adds Ubuntu Jammy CIS benchmark hardening playbooks Co-authored-by: "Dawud " * Use fork of role to support inject_facts_as_vars=False * Add support for Rocky 9 CIS hardening Co-authored-by: Michał Nasiadka * Whitespace fix * Whitespace fix * Matt's code review Co-authored-by: Matt Crees * Add rhel9 auditd configuration * Move auditd config to new location * ... * Update cis * Apply suggestions from code review Co-authored-by: Alex-Welsh <112560678+Alex-Welsh@users.noreply.github.com> Co-authored-by: Mark Goddard Co-authored-by: Matt Crees * Apply suggestions from code review * Apply suggestions from code review * Rename section * Fix indentation * Apply suggestions from code review * Remove instructions to enable inject_facts_as_vars --------- Co-authored-by: Michał Nasiadka Co-authored-by: Matt Crees Co-authored-by: Alex-Welsh <112560678+Alex-Welsh@users.noreply.github.com> Co-authored-by: Mark Goddard --- doc/source/configuration/index.rst | 1 + .../configuration/security-hardening.rst | 44 ++++++ etc/kayobe/ansible/cis.yml | 19 +++ etc/kayobe/ansible/requirements.yml | 10 ++ etc/kayobe/inventory/group_vars/overcloud/cis | 126 ++++++++++++++++++ ...ing-for-ubuntu-jammy-d9bf23a34c08f5be.yaml | 5 + 6 files changed, 205 insertions(+) create mode 100644 doc/source/configuration/security-hardening.rst create mode 100644 releasenotes/notes/adds-cis-hardening-for-ubuntu-jammy-d9bf23a34c08f5be.yaml diff --git a/doc/source/configuration/index.rst b/doc/source/configuration/index.rst index f19775700..8c283481d 100644 --- a/doc/source/configuration/index.rst +++ b/doc/source/configuration/index.rst @@ -18,3 +18,4 @@ the various features provided. wazuh vault magnum-capi + security-hardening diff --git a/doc/source/configuration/security-hardening.rst b/doc/source/configuration/security-hardening.rst new file mode 100644 index 000000000..2d7c6a6fd --- /dev/null +++ b/doc/source/configuration/security-hardening.rst @@ -0,0 +1,44 @@ +================== +Security Hardening +================== + +CIS Benchmark Hardening +----------------------- + +The roles from the `Ansible-Lockdown `_ +project are used to harden hosts in accordance with the CIS benchmark criteria. +It won't get your benchmark score to 100%, but should provide a significant +improvement over an unhardened system. A typical score would be 70%. + +The following operating systems are supported: + +- Rocky 8, RHEL 8, CentOS Stream 8 +- Ubuntu 22.04 +- Rocky 9 + +Configuration +-------------- + +Some overrides to the role defaults are provided in +``$KAYOBE_CONFIG_PATH/inventory/group_vars/overcloud/cis``. These may not be +suitable for all deployments and so some fine tuning may be required. For +instance, you may want different rules on a network node compared to a +controller. It is best to consult the upstream role documentation for details +about what each variable does. The documentation can be found here: + +- `Rocky 8, RHEL 8, CentOS Stream 8 `__ +- `Ubuntu 22.04 `__ +- `Rocky 9 `__ + +Running the playbooks +--------------------- + +As there is potential for unintended side effects when applying the hardening +playbooks, the playbooks are not currently enabled by default. It is recommended +that they are first applied to a representative staging environment to determine +whether or not workloads or API requests are affected by any configuration changes. + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cis.yml + diff --git a/etc/kayobe/ansible/cis.yml b/etc/kayobe/ansible/cis.yml index ce6445359..ffb31c2fe 100644 --- a/etc/kayobe/ansible/cis.yml +++ b/etc/kayobe/ansible/cis.yml @@ -4,12 +4,31 @@ hosts: overcloud become: true tasks: + - name: Ensure the cron package is installed on ubuntu + package: + name: cron + state: present + when: ansible_facts.distribution == 'Ubuntu' + - name: Remove /etc/motd # See remediation in: # https://github.com/wazuh/wazuh/blob/bfa4efcf11e288c0a8809dc0b45fdce42fab8e0d/ruleset/sca/centos/8/cis_centos8_linux.yml#L777 file: path: /etc/motd state: absent + when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '8' - include_role: name: ansible-lockdown.rhel8_cis + when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '8' + tags: always + + - include_role: + name: ansible-lockdown.rhel9_cis + when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9' + tags: always + + - include_role: + name: ansible-lockdown.ubuntu22_cis + when: ansible_facts.distribution == 'Ubuntu' and ansible_facts.distribution_major_version == '22' + tags: always diff --git a/etc/kayobe/ansible/requirements.yml b/etc/kayobe/ansible/requirements.yml index 7e7990ceb..91408df6e 100644 --- a/etc/kayobe/ansible/requirements.yml +++ b/etc/kayobe/ansible/requirements.yml @@ -15,6 +15,16 @@ roles: - name: ansible-lockdown.rhel8_cis src: https://github.com/ansible-lockdown/RHEL8-CIS version: 1.3.0 + - name: ansible-lockdown.ubuntu22_cis + src: https://github.com/ansible-lockdown//UBUNTU22-CIS + #FIXME: Waiting for https://github.com/ansible-lockdown/UBUNTU22-CIS/pull/132 + # to be in a tagged release + version: c91a1038fd218f727075d21b2d0880751322b162 + - name: ansible-lockdown.rhel9_cis + src: https://github.com/ansible-lockdown/RHEL9-CIS + #FIXME: Waiting for https://github.com/ansible-lockdown/RHEL9-CIS/pull/54 + # to be in a tagged release. + version: 3525cb6aab12a3d1e34aa8432ed77dd76be6a44a - name: wazuh-ansible src: https://github.com/stackhpc/wazuh-ansible version: stackhpc diff --git a/etc/kayobe/inventory/group_vars/overcloud/cis b/etc/kayobe/inventory/group_vars/overcloud/cis index 81fb151e8..519aeab8b 100644 --- a/etc/kayobe/inventory/group_vars/overcloud/cis +++ b/etc/kayobe/inventory/group_vars/overcloud/cis @@ -1,4 +1,12 @@ --- +############################################################################## +# Common CIS Hardening Configuration + +# Enable collecting auditd logs +update_audit_template: true + +############################################################################## +# RHEL 8 / Centos Stream 8 CIS Hardening Configuration # NOTE: kayobe configures NTP. Do not clobber configuration. rhel8cis_time_synchronization: skip @@ -22,3 +30,121 @@ rhel8cis_crypto_policy: FIPS # from being displayed. rhel8cis_rule_1_8_1_1: false rhel8cis_rule_1_8_1_4: false + +############################################################################## +# Rocky 9 CIS Hardening Configuration + +# Allow IP forwarding +rhel9cis_is_router: true + +# Skip configuration of chrony +rhel9cis_rule_2_1_1: false +rhel9cis_rule_2_1_2: false + +# Skip configuration of the firewall +rhel9cis_firewall: None +rhel9cis_rule_3_4_1_2: false + +# Don't configure selinux +rhel9cis_selinux_disable: true + +# NOTE: FUTURE breaks wazuh agent repo metadata download +rhel9cis_crypto_policy: FIPS + +# Skip package updates +rhel9cis_rule_1_9: false + +# Disable requirement for password when using sudo +rhel9cis_rule_5_3_4: false + +# Disable check for root password being set, we should be locking root passwords instead. +# Please double-check yourself with: sudo passwd -S root +rhel9cis_rule_5_6_6: false + +# Configure log rotation to prevent audit logs from filling the disk +rhel9cis_auditd: + space_left_action: syslog + action_mail_acct: root + admin_space_left_action: halt + max_log_file_action: rotate + +# Max size of audit logs (MB) +rhel9cis_max_log_file_size: 1024 + +############################################################################## +# Ubuntu Jammy CIS Hardening Configuration + +# Ubuntu 22 CIS configuration +# Disable changing routing rules +ubtu22cis_is_router: true + +# Set Chrony as the time sync tool +ubtu22cis_time_sync_tool: "chrony" + +# Disable CIS from configuring the firewall +ubtu22cis_firewall_package: "none" + +# Stop CIS from installing Network Manager +ubtu22cis_install_network_manager: false + +# Set syslog service to journald +ubtu22cis_syslog_service: journald + +# Squashfs is compiled into the kernel +ubtu22cis_rule_1_1_1_2: false + +# This updates the system. Let's do this explicitly. +ubtu22cis_rule_1_9: false + +# Do not change Chrony Time servers +ubtu22cis_rule_2_1_2_1: false + +# Disable CIS from touching sudoers +ubtu22cis_rule_5_3_4: false + +# Add stack and kolla to allowed ssh users +ubtu22cis_sshd: + log_level: "INFO" + max_auth_tries: 4 + ciphers: "chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr" + macs: "hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com,hmac-sha2-512,hmac-sha2-256" + kex_algorithms: "curve25519-sha256,curve25519-sha256@libssh.org,diffie-hellman-group14-sha256,diffie-hellman-group16-sha512,diffie-hellman-group18-sha512,ecdh-sha2-nistp521,ecdh-sha2-nistp384,ecdh-sha2-nistp256,diffie-hellman-group-exchange-sha256" + client_alive_interval: 300 + client_alive_count_max: 3 + login_grace_time: 60 + max_sessions: 10 + allow_users: "kolla stack ubuntu" + allow_groups: "kolla stack ubuntu" + # This variable, if specified, configures a list of USER name patterns, separated by spaces, to prevent SSH access + # for users whose user name matches one of the patterns. This is done + # by setting the value of `DenyUsers` option in `/etc/ssh/sshd_config` file. + # If an USER@HOST format will be used, the specified user will be restricted only on that particular host. + # The allow/deny directives process order: DenyUsers, AllowUsers, DenyGroups, AllowGroups. + # For more info, see https://linux.die.net/man/5/sshd_config + deny_users: "" + # This variable, if specified, configures a list of GROUP name patterns, separated by spaces, to prevent SSH access + # for users whose primary group or supplementary group list matches one of the patterns. This is done + # by setting the value of `DenyGroups` option in `/etc/ssh/sshd_config` file. + # The allow/deny directives process order: DenyUsers, AllowUsers, DenyGroups, AllowGroups. + # For more info, see https://linux.die.net/man/5/sshd_config + deny_groups: "" + +# Do not change /var/lib/docker permissions +ubtu22cis_no_group_adjust: false +ubtu22cis_no_owner_adjust: false + +# Configure log rotation to prevent audit logs from filling the disk +ubtu22cis_auditd: + action_mail_acct: root + space_left_action: syslog + admin_space_left_action: halt + max_log_file_action: rotate + +# Max size of audit logs (MB) +ubtu22cis_max_log_file_size: 1024 + +# Disable grub bootloader password. Requires overriding +# ubtu22cis_bootloader_password_hash +ubtu22cis_rule_1_4_1: false +ubtu22cis_rule_1_4_3: false +############################################################################## diff --git a/releasenotes/notes/adds-cis-hardening-for-ubuntu-jammy-d9bf23a34c08f5be.yaml b/releasenotes/notes/adds-cis-hardening-for-ubuntu-jammy-d9bf23a34c08f5be.yaml new file mode 100644 index 000000000..66de6e0e8 --- /dev/null +++ b/releasenotes/notes/adds-cis-hardening-for-ubuntu-jammy-d9bf23a34c08f5be.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Adds support for Ubuntu Jammy and Rocky 9 to the CIS benchmark hardening playbook: + ``cis.yml``. This playbook will need to be manually applied. From 09851d08481d8d6c36908308f4b75cdbd6e25c8a Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 15 Nov 2023 10:36:13 +0000 Subject: [PATCH 10/37] docs: Add info on purge-command-not-found.yml custom playbook --- doc/source/configuration/release-train.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/source/configuration/release-train.rst b/doc/source/configuration/release-train.rst index 7bceaf157..28cf6377b 100644 --- a/doc/source/configuration/release-train.rst +++ b/doc/source/configuration/release-train.rst @@ -107,6 +107,23 @@ apt repositories. This can be done on a host-by host basis by defining the variables as host or group vars under ``etc/kayobe/inventory/host_vars`` or ``etc/kayobe/inventory/group_vars``. +For Ubuntu-based deployments, Pulp currently `lacks support +`_ for certain types of content, +including i18n files and command-not-found indices. This breaks APT when the +``command-not-found`` package is installed: + +.. code:: console + + E: Failed to fetch https://pulp.example.com/pulp/content/ubuntu/jammy-security/development/dists/jammy-security/main/cnf/Commands-amd64 404 Not Found + +The ``purge-command-not-found.yml`` custom playbook can be used to uninstall +the package, prior to running any other APT commands. It may be installed as a +:kayobe-doc:`pre-hook ` to the ``host +configure`` commands. Note that if used as a hook, this playbook matches all +hosts, so will run against the seed, even when running ``overcloud host +configure``. Depending on the stage of deployment, some hosts may be +unreachable. + For CentOS and Rocky Linux based systems, package manager configuration is provided by ``stackhpc_dnf_repos`` in ``etc/kayobe/dnf.yml``, which points to package repositories on the local Pulp server. To use this configuration, the From 45ac9905b2b7dd0429275079f48397c8666f30bd Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 15 Nov 2023 14:48:36 +0000 Subject: [PATCH 11/37] CI: Don't fail fast on container image build job failure Previously if one of the container image build jobs (CS8, Ubuntu) failed, the other would be cancelled. This is not necessarily helpful, since the other job may complete successfully. This change disables this fail fast behaviour. --- .github/workflows/stackhpc-container-image-build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/stackhpc-container-image-build.yml b/.github/workflows/stackhpc-container-image-build.yml index 188ff19d1..a5992c7f2 100644 --- a/.github/workflows/stackhpc-container-image-build.yml +++ b/.github/workflows/stackhpc-container-image-build.yml @@ -91,6 +91,7 @@ jobs: runs-on: [self-hosted, stackhpc-kayobe-config-kolla-builder] permissions: {} strategy: + fail-fast: false matrix: ${{ fromJson(needs.generate-tag.outputs.matrix) }} needs: - generate-tag From ed5176dca0e10e0c19371ac8293fdaa92cd21dde Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 15 Nov 2023 15:46:59 +0000 Subject: [PATCH 12/37] docs: Add overcloud host image to RL9 migration guide --- doc/source/operations/rocky-linux-9.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index c508d795c..f5e931a01 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -67,6 +67,10 @@ Make the following changes to your Kayobe configuration: - Merge in the latest ``stackhpc-kayobe-config`` ``stackhpc/yoga`` branch. - Set ``os_distribution`` to ``rocky`` in ``etc/kayobe/globals.yml``. - Set ``os_release`` to ``"9"`` in ``etc/kayobe/globals.yml``. +- Consider using a `prebuilt overcloud host image + <../configuration/host-images.html#pulling-host-images>`_ or building an + overcloud host image using the `standard configuration + <../configuration/host-images.html#building-host-images>`_. - If you are using Kayobe multiple environments, add the following into ``kayobe-config/etc/kayobe/environments//kolla/config/nova.conf`` (as Kolla custom service config environment merging is not supported in From 0187d469cff9277d34b324463b2e3d27b9bc7182 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Wed, 15 Nov 2023 17:26:57 +0000 Subject: [PATCH 13/37] Rocky9: Add section on routing rules (#788) --- doc/source/operations/rocky-linux-9.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index d62b97716..464a4b179 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -86,6 +86,30 @@ Make the following changes to your Kayobe configuration: This change does not need to be applied before migrating to Rocky Linux 9, but it should cause no harm to do so. Note that this will not affect existing VMs, only newly created VMs. +Routing rules +------------- + +Routing rules referencing tables by name may need adapting to be compatible with NetworkManager +e.g: + + .. code-block:: yaml + + undercloud_prov_rules: + - from {{ internal_net_name | net_cidr }} table ironic-api + +will need to be updated to use numeric IDs: + + .. code-block:: yaml + + undercloud_prov_rules: + - from {{ internal_net_name | net_cidr }} table 1 + +The error from NetworkManager was: + + .. code-block:: shell + + [1697192659.9611] keyfile: ipv4.routing-rules: invalid value for "routing-rule1": invalid value for "table" + Prerequisites ============= From bfd9ebc791b1140961ee5ade219e3fbe49786f3f Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 16 Nov 2023 16:57:54 +0000 Subject: [PATCH 14/37] Further additions to RL9 migration docs --- doc/source/operations/rocky-linux-9.rst | 46 +++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 464a4b179..1e403a734 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -340,6 +340,15 @@ Full procedure for one host kayobe overcloud database recover +13. If you are using Wazuh, you will need to deploy the agent again. + Note that CIS benchmarks do not run on RL9 out-the-box. See + `our Wazuh docs `__ + for details. + + .. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml -l + After each controller has been migrated you may wish to perform some smoke testing, check for alerts and errors etc. Compute @@ -430,6 +439,15 @@ Full procedure for one batch of hosts kayobe overcloud service deploy -kl +8. If you are using Wazuh, you will need to deploy the agent again. + Note that CIS benchmarks do not run on RL9 out-the-box. See + `our Wazuh docs `__ + for details. + + .. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml -l + If any VMs were powered off, they may now be powered back on. Wait for Prometheus alerts and errors in OpenSearch Dashboard to resolve, or @@ -482,11 +500,11 @@ Full procedure for any storage host .. code-block:: console - kayobe overcloud host configure -l + kayobe overcloud host configure -l -kl 6. Make sure the cephadm public key is in ``authorized_keys`` for stack or root user - depends on your setup. For example, your SSH key may - already be defined in ``users.yml`` . If in doubt, run the cephadm + already be defined in ``users.yml``. If in doubt, run the cephadm deploy playbook to copy the SSH key and install the cephadm binary. .. code-block:: console @@ -507,6 +525,21 @@ Full procedure for any storage host ceph -s ceph -w +9. Deploy any services that are required, such as Prometheus exporters. + + .. code-block:: console + + kayobe overcloud service deploy -kl + +10. If you are using Wazuh, you will need to deploy the agent again. + Note that CIS benchmarks do not run on RL9 out-the-box. See + `our Wazuh docs `__ + for details. + + .. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml -l + Seed ==== @@ -615,6 +648,15 @@ Full procedure 14. Verify that Bifrost/Ironic is healthy. +15. If you are using Wazuh, you will need to deploy the agent again. + Note that CIS benchmarks do not run on RL9 out-the-box. See + `our Wazuh docs `__ + for details. + + .. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml -l + Seed hypervisor =============== From 2b7b00b87c274be0657e38da9440b0630ad2f280 Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Mon, 20 Nov 2023 14:21:01 +0100 Subject: [PATCH 15/37] Add more services to the rabbitmq-reset playbook This change adds services that we often deploy: Barbican, Blazar, CloudKitty, Designate, Manila and Octavia. --- etc/kayobe/ansible/rabbitmq-reset.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/etc/kayobe/ansible/rabbitmq-reset.yml b/etc/kayobe/ansible/rabbitmq-reset.yml index d1a03eeb1..76e90fd94 100644 --- a/etc/kayobe/ansible/rabbitmq-reset.yml +++ b/etc/kayobe/ansible/rabbitmq-reset.yml @@ -1,6 +1,6 @@ --- # Reset a broken RabbitMQ cluster. -# Also restarts OpenStack services which may be broken. +# Also restarts all OpenStack services using RabbitMQ. - name: Reset RabbitMQ hosts: controllers @@ -66,7 +66,7 @@ tags: - restart-openstack tasks: - # The following services can have problems if the cluster gets broken. + # The following services use RabbitMQ. - name: Restart OpenStack services shell: >- - docker ps -a | egrep '(cinder|heat|ironic|keystone|magnum|neutron|nova)' | awk '{ print $NF }' | xargs docker restart + docker ps -a | egrep '(barbican|blazar|cinder|cloudkitty|designate|heat|ironic|keystone|magnum|manila|neutron|nova|octavia)' | awk '{ print $NF }' | xargs docker restart From f6ceb41f237e44ab12c36160068406c60b0a225d Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Fri, 24 Nov 2023 09:55:28 +0000 Subject: [PATCH 16/37] Configure SELinux in permissive mode on RL9 hosts This makes the host match the target configuration in Zed and avoids a later reboot to change the SELinux mode. RL9 hosts that have SELinux disabled are rebooted only if disable_selinux_do_reboot is changed to true. --- etc/kayobe/inventory/group_vars/all/selinux | 9 +++++++++ .../notes/selinux-permissive-bb953d2cdcd7a545.yaml | 7 +++++++ 2 files changed, 16 insertions(+) create mode 100644 etc/kayobe/inventory/group_vars/all/selinux create mode 100644 releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml diff --git a/etc/kayobe/inventory/group_vars/all/selinux b/etc/kayobe/inventory/group_vars/all/selinux new file mode 100644 index 000000000..c38702b3f --- /dev/null +++ b/etc/kayobe/inventory/group_vars/all/selinux @@ -0,0 +1,9 @@ +--- +# Configure SELinux in permissive mode when configuring a Rocky Linux 9 host. +selinux_state: "{{ 'permissive' if ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9' else 'disabled' }}" + +# Do NOT reboot Rocky Linux 9 hosts to apply SELinux config changes. Operators +# must opt-in by changing this variable when applying host configuration. This +# is to avoid automatically rebooting hosts originally deployed with SELinux +# disabled and which now need to be changed to permissive. +disable_selinux_do_reboot: "{{ not (ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9') }}" diff --git a/releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml b/releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml new file mode 100644 index 000000000..90b1364fa --- /dev/null +++ b/releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml @@ -0,0 +1,7 @@ +--- +upgrade: + - | + SELinux mode is now set to permissive when configuring Rocky Linux 9 hosts, + to match the default mode in the Zed release. If SELinux is disabled on + these hosts, a reboot is required and will only be performed by Ansible if + ``disable_selinux_do_reboot`` is changed to ``true``. From 0a127625019a71f96c19545830a6f1bc3980e3c8 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Fri, 24 Nov 2023 14:14:30 +0000 Subject: [PATCH 17/37] Document new issues seen with Storage hosts --- doc/source/operations/rocky-linux-9.rst | 52 ++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 1e403a734..41fbd8ce6 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -233,11 +233,14 @@ Potential issues .. code-block:: yaml mariabackup_image_full: "{{ docker_registry }}/stackhpc/rocky-source-mariadb-server:yoga-20230310T170929" -- When using Octavia load balancers, restarting Neutron causes load balancers - with floating IPs to stop processing traffic. See `LP#2042938 - `__ for details. The issue - may be worked around after Neutron has been restarted by detaching then - reattaching the floating IP to the load balancer's virtual IP. +- When using Octavia load balancers, restarting Neutron causes load balancers + with floating IPs to stop processing traffic. See `LP#2042938 + `__ for details. The issue + may be worked around after Neutron has been restarted by detaching then + reattaching the floating IP to the load balancer's virtual IP. + +- If you are using hyper-convered Ceph, please also note the potential issues + in the Storage section below. Full procedure for one host --------------------------- @@ -466,6 +469,45 @@ Potential issues be identical, now that the "maintenance mode approach" is being used. It is still recommended to do the bootstrap host last. +- Prior to reprovisioning the bootstrap host, it can be beneficial to backup + ``/etc/ceph`` and ``/var/lib/ceph``, as sometimes the keys, config, etc. + stored here will not be moved/recreated correctly. + +- When a host is taken out of maintenance, you may see errors relating to + permissions of /tmp/etc and /tmp/var. These issues should be resolved in + Ceph version 17.2.7. See issue: https://github.com/ceph/ceph/pull/50736. In + the meantime, you can work around this by running the command below. You may + need to omit one or the other of ``/tmp/etc`` and ``/tmp/var``. You will + likely need to run this multiple times. Run ``ceph -W cephadm`` to monitor + the logs and see when permissions issues are hit. + + .. code-block:: console + + kayobe overcloud host command run --command "chown -R stack:stack /tmp/etc /tmp/var" -b -l storage + + +- It has been seen that sometimes the Ceph containers do not come up after + reprovisioning. This seems to be related to having ``/var/lib/ceph + ``persisted through the reprovision (e.g. seen at a customer in a volume + with software RAID). (Note: further investigation is needed for the root + cause). When this occurs, you will need to redeploy the daemons: + + List the daemons on the host: + + .. code-block:: console + + ceph orch ps + + + Redeploy the daemons, one at a time. It is recommended that you start with + the crash daemon, as this will have the least impact if unexpected issues + occur. + + .. code-block:: console + + ceph orch daemon redeploy to redeploy a daemon. + + - Commands starting with ``ceph`` are all run on the cephadm bootstrap host in a cephadm shell unless stated otherwise. From 5e50967a96a207d30db6d729fed72cba682527f9 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Fri, 24 Nov 2023 17:30:25 +0000 Subject: [PATCH 18/37] Converge on the right spelling of converge --- doc/source/operations/rocky-linux-9.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 41fbd8ce6..ee643022a 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -239,7 +239,7 @@ Potential issues may be worked around after Neutron has been restarted by detaching then reattaching the floating IP to the load balancer's virtual IP. -- If you are using hyper-convered Ceph, please also note the potential issues +- If you are using hyper-converged Ceph, please also note the potential issues in the Storage section below. Full procedure for one host From 8e56c09f749e8e5bfe667da6e7880857b03df326 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Mon, 27 Nov 2023 09:46:18 +0000 Subject: [PATCH 19/37] Use python3 -m venv for nova playbooks Cotinues with our ongoing switch from virtualenv to python3 -m venv. --- etc/kayobe/ansible/nova-compute-disable.yml | 1 + etc/kayobe/ansible/nova-compute-drain.yml | 1 + etc/kayobe/ansible/nova-compute-enable.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/etc/kayobe/ansible/nova-compute-disable.yml b/etc/kayobe/ansible/nova-compute-disable.yml index 98ab58e28..32ea8ee09 100644 --- a/etc/kayobe/ansible/nova-compute-disable.yml +++ b/etc/kayobe/ansible/nova-compute-disable.yml @@ -11,6 +11,7 @@ - name: Set up openstack cli virtualenv pip: virtualenv: "{{ venv }}" + virtualenv_command: "/usr/bin/python3 -m venv" name: - python-openstackclient state: latest diff --git a/etc/kayobe/ansible/nova-compute-drain.yml b/etc/kayobe/ansible/nova-compute-drain.yml index a29ac3169..dddf84634 100644 --- a/etc/kayobe/ansible/nova-compute-drain.yml +++ b/etc/kayobe/ansible/nova-compute-drain.yml @@ -11,6 +11,7 @@ - name: Set up openstack cli virtualenv pip: virtualenv: "{{ venv }}" + virtualenv_command: "/usr/bin/python3 -m venv" name: - python-openstackclient state: latest diff --git a/etc/kayobe/ansible/nova-compute-enable.yml b/etc/kayobe/ansible/nova-compute-enable.yml index 9d6d45720..f880a2aa4 100644 --- a/etc/kayobe/ansible/nova-compute-enable.yml +++ b/etc/kayobe/ansible/nova-compute-enable.yml @@ -11,6 +11,7 @@ - name: Set up openstack cli virtualenv pip: virtualenv: "{{ venv }}" + virtualenv_command: "/usr/bin/python3 -m venv" name: - python-openstackclient state: latest From d28f1c072554d35d25b732b194fc208cf3d98e0e Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Mon, 27 Nov 2023 11:32:45 +0000 Subject: [PATCH 20/37] Fixes various issues with the cis.yml playbook (#791) * Fixes various issues with the cis.yml playbook See release note for details. * Apply suggestions from code review Co-authored-by: Mark Goddard --------- Co-authored-by: Mark Goddard --- .../configuration/security-hardening.rst | 10 +++++ etc/kayobe/ansible/requirements.yml | 12 +++--- etc/kayobe/inventory/group_vars/overcloud/cis | 42 ++++++++++++------- requirements.txt | 1 + 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/doc/source/configuration/security-hardening.rst b/doc/source/configuration/security-hardening.rst index 2d7c6a6fd..a65812513 100644 --- a/doc/source/configuration/security-hardening.rst +++ b/doc/source/configuration/security-hardening.rst @@ -33,6 +33,16 @@ about what each variable does. The documentation can be found here: Running the playbooks --------------------- +.. note: + + On CentOS 8, you must run with `INJECT_FACT_AS_VARS `__ + enabled. To do this for this playbook only, you can use: + + .. code-block: shell + + ANSIBLE_INJECT_FACT_VARS=true kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cis.yml + + As there is potential for unintended side effects when applying the hardening playbooks, the playbooks are not currently enabled by default. It is recommended that they are first applied to a representative staging environment to determine diff --git a/etc/kayobe/ansible/requirements.yml b/etc/kayobe/ansible/requirements.yml index 91408df6e..b66c45052 100644 --- a/etc/kayobe/ansible/requirements.yml +++ b/etc/kayobe/ansible/requirements.yml @@ -16,15 +16,15 @@ roles: src: https://github.com/ansible-lockdown/RHEL8-CIS version: 1.3.0 - name: ansible-lockdown.ubuntu22_cis - src: https://github.com/ansible-lockdown//UBUNTU22-CIS - #FIXME: Waiting for https://github.com/ansible-lockdown/UBUNTU22-CIS/pull/132 + src: https://github.com/stackhpc/UBUNTU22-CIS + #FIXME: Waiting for https://github.com/ansible-lockdown/UBUNTU22-CIS/pull/174 # to be in a tagged release - version: c91a1038fd218f727075d21b2d0880751322b162 + version: bugfix/inject-facts - name: ansible-lockdown.rhel9_cis - src: https://github.com/ansible-lockdown/RHEL9-CIS - #FIXME: Waiting for https://github.com/ansible-lockdown/RHEL9-CIS/pull/54 + src: https://github.com/stackhpc/RHEL9-CIS + #FIXME: Waiting for https://github.com/ansible-lockdown/RHEL9-CIS/pull/115 # to be in a tagged release. - version: 3525cb6aab12a3d1e34aa8432ed77dd76be6a44a + version: bugfix/inject-facts - name: wazuh-ansible src: https://github.com/stackhpc/wazuh-ansible version: stackhpc diff --git a/etc/kayobe/inventory/group_vars/overcloud/cis b/etc/kayobe/inventory/group_vars/overcloud/cis index 519aeab8b..b20843d37 100644 --- a/etc/kayobe/inventory/group_vars/overcloud/cis +++ b/etc/kayobe/inventory/group_vars/overcloud/cis @@ -71,6 +71,10 @@ rhel9cis_auditd: # Max size of audit logs (MB) rhel9cis_max_log_file_size: 1024 +# Disable setting of boatloader password. This requires setting the variable +# `rhel9cis_bootloader_password_hash` +rhel9cis_set_boot_pass: false + ############################################################################## # Ubuntu Jammy CIS Hardening Configuration @@ -106,27 +110,35 @@ ubtu22cis_rule_5_3_4: false ubtu22cis_sshd: log_level: "INFO" max_auth_tries: 4 - ciphers: "chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr" - macs: "hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com,hmac-sha2-512,hmac-sha2-256" - kex_algorithms: "curve25519-sha256,curve25519-sha256@libssh.org,diffie-hellman-group14-sha256,diffie-hellman-group16-sha512,diffie-hellman-group18-sha512,ecdh-sha2-nistp521,ecdh-sha2-nistp384,ecdh-sha2-nistp256,diffie-hellman-group-exchange-sha256" + ciphers: + - chacha20-poly1305@openssh.com + - aes256-gcm@openssh.com + - aes128-gcm@openssh.com + - aes256-ctr + - aes192-ctr + - aes128-ctr + macs: + - hmac-sha2-512-etm@openssh.com + - hmac-sha2-256-etm@openssh.com + - hmac-sha2-512 + - hmac-sha2-256 + kex_algorithms: + - curve25519-sha256 + - curve25519-sha256@libssh.org + - diffie-hellman-group14-sha256 + - diffie-hellman-group16-sha512 + - diffie-hellman-group18-sha512 + - ecdh-sha2-nistp521 + - ecdh-sha2-nistp384 + - ecdh-sha2-nistp256 + - diffie-hellman-group-exchange-sha256 client_alive_interval: 300 client_alive_count_max: 3 login_grace_time: 60 - max_sessions: 10 + max_sessions: 8 allow_users: "kolla stack ubuntu" allow_groups: "kolla stack ubuntu" - # This variable, if specified, configures a list of USER name patterns, separated by spaces, to prevent SSH access - # for users whose user name matches one of the patterns. This is done - # by setting the value of `DenyUsers` option in `/etc/ssh/sshd_config` file. - # If an USER@HOST format will be used, the specified user will be restricted only on that particular host. - # The allow/deny directives process order: DenyUsers, AllowUsers, DenyGroups, AllowGroups. - # For more info, see https://linux.die.net/man/5/sshd_config deny_users: "" - # This variable, if specified, configures a list of GROUP name patterns, separated by spaces, to prevent SSH access - # for users whose primary group or supplementary group list matches one of the patterns. This is done - # by setting the value of `DenyGroups` option in `/etc/ssh/sshd_config` file. - # The allow/deny directives process order: DenyUsers, AllowUsers, DenyGroups, AllowGroups. - # For more info, see https://linux.die.net/man/5/sshd_config deny_groups: "" # Do not change /var/lib/docker permissions diff --git a/requirements.txt b/requirements.txt index da44bf018..0ca7aab63 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ kayobe@git+https://github.com/stackhpc/kayobe@stackhpc/yoga ansible-modules-hashivault@git+https://github.com/stackhpc/ansible-modules-hashivault@stackhpc;python_version < "3.8" ansible-modules-hashivault;python_version >= "3.8" +jmespath From d9dbd7270e4f1b66f3c9055a872d4740dcf7229a Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Mon, 27 Nov 2023 15:19:45 +0000 Subject: [PATCH 21/37] Enable hypervisor after RL9 compute migration --- doc/source/operations/rocky-linux-9.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 1e403a734..73a84d347 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -456,6 +456,13 @@ address them. Once happy that the system has been restored to full health, move onto the next host or batch or hosts. +9. Enable the hypervisor in Nova again: + + .. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/nova-compute-enable.yml --limit + + Storage ======= From 5f00e91eea493ee1b13ae3da20c95acfa4e473f4 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Mon, 27 Nov 2023 16:55:42 +0000 Subject: [PATCH 22/37] Tox lint fixes --- doc/source/operations/rocky-linux-9.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index ee643022a..b8ecf2fb1 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -485,10 +485,9 @@ Potential issues kayobe overcloud host command run --command "chown -R stack:stack /tmp/etc /tmp/var" -b -l storage - - It has been seen that sometimes the Ceph containers do not come up after - reprovisioning. This seems to be related to having ``/var/lib/ceph - ``persisted through the reprovision (e.g. seen at a customer in a volume + reprovisioning. This seems to be related to having ``/var/lib/ceph`` + persisted through the reprovision (e.g. seen at a customer in a volume with software RAID). (Note: further investigation is needed for the root cause). When this occurs, you will need to redeploy the daemons: From 5a78d180f3fbbf071e97a412a0bf0719a2562e20 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Mon, 27 Nov 2023 17:02:01 +0000 Subject: [PATCH 23/37] Update doc/source/operations/rocky-linux-9.rst Co-authored-by: Alex-Welsh <112560678+Alex-Welsh@users.noreply.github.com> --- doc/source/operations/rocky-linux-9.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 73a84d347..15a129311 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -460,7 +460,7 @@ host or batch or hosts. .. code-block:: console - kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/nova-compute-enable.yml --limit + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/nova-compute-enable.yml --limit Storage From 1b44bac4d30e51050c5c86b14ba703b7e38e7823 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Mon, 27 Nov 2023 15:19:45 +0000 Subject: [PATCH 24/37] Enable hypervisor after RL9 compute migration --- doc/source/operations/rocky-linux-9.rst | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 1e403a734..82cf52f54 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -448,13 +448,20 @@ Full procedure for one batch of hosts kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml -l -If any VMs were powered off, they may now be powered back on. +9. Restore the system to full health. -Wait for Prometheus alerts and errors in OpenSearch Dashboard to resolve, or -address them. + 1. If any VMs were powered off, they may now be powered back on. -Once happy that the system has been restored to full health, move onto the next -host or batch or hosts. + 2. Wait for Prometheus alerts and errors in OpenSearch Dashboard to resolve, + or address them. + + 3. Once happy that the system has been restored to full health, enable the + hypervisor in Nova if it is still disabled and then move onto the next + host or batch or hosts. + + .. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/nova-compute-enable.yml --limit Storage ======= From 80ba6729ee2f5db72c97f585632361cc03c5de83 Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Tue, 28 Nov 2023 22:35:38 +0100 Subject: [PATCH 25/37] Fix cluster health in Grafana Elasticsearch dashboard When Elasticsearch/OpenSearch was in yellow state, the number 23 was displayed instead of Yellow. --- .../config/grafana/dashboards/openstack/elasticsearch.json | 2 +- ...rafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json index 091c76760..40ed94f29 100644 --- a/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json +++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/elasticsearch.json @@ -148,7 +148,7 @@ "tableColumn": "", "targets": [ { - "expr": "topk(1, elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"red\"}==1 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"green\"}==1)+4 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"yellow\"}==1)+22)", + "expr": "topk(1, elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"red\"}==1 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"green\"}==1)+4 or (elasticsearch_cluster_health_status{cluster=\"$cluster\",color=\"yellow\"}==1)+2)", "format": "time_series", "instant": true, "intervalFactor": 2, diff --git a/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml b/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml new file mode 100644 index 000000000..e4ea7e537 --- /dev/null +++ b/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes display of the Elasticsearch or OpenSearch cluster health in Grafana + when in yellow state. From e9b04777369fe6f7c92880d806ba8a4420979a8e Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Fri, 17 Nov 2023 15:30:22 +0000 Subject: [PATCH 26/37] Add rekey-hosts.yml playbook --- etc/kayobe/ansible/rekey-hosts.yml | 72 +++++++++++++++++++ .../add-rekey-playbook-0065c5057b1639f8.yaml | 5 ++ 2 files changed, 77 insertions(+) create mode 100644 etc/kayobe/ansible/rekey-hosts.yml create mode 100644 releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml diff --git a/etc/kayobe/ansible/rekey-hosts.yml b/etc/kayobe/ansible/rekey-hosts.yml new file mode 100644 index 000000000..445e7224f --- /dev/null +++ b/etc/kayobe/ansible/rekey-hosts.yml @@ -0,0 +1,72 @@ +--- +- name: Rekey hosts + hosts: overcloud,seed,seed-hypervisor,infra-vms + gather_facts: false + vars: + ansible_user: stack + ansible_python_interpreter: /usr/bin/python3 + tasks: + - name: Generate a fresh SSH key + community.crypto.openssh_keypair: + path: ~/.ssh/id_rsa_new + delegate_to: localhost + + # - name: Copy new key to hosts + # ansible.builtin.copy: + # src: /tmp/id_rsa_new.pub + # dest: /tmp/id_rsa_new.pub + # mode: '0600' + # become: true + + - name: Copy old key to hosts + ansible.builtin.copy: + src: ~/.ssh/id_rsa.pub + dest: /tmp/id_rsa_old.pub + mode: '0777' + become: true + + - name: Set new stack authorized keys + ansible.posix.authorized_key: + user: "{{ item }}" + state: present + key: "{{ lookup('file', '~/.ssh/id_rsa_new.pub') }}" + loop: + - "stack" + - "kolla" + become: true + + - name: Set new stack authorized keys + ansible.posix.authorized_key: + user: "{{ item }}" + state: present + key: "{{ lookup('file', '~/.ssh/id_rsa_new.pub') }}" + loop: + - "stack" + - "kolla" + become: true + + - name: Locally deprecate old key (private) + command: "mv ~/.ssh/id_rsa ~/.ssh/id_rsa_old" + delegate_to: localhost + + - name: Locally deprecate old key (public) + command: "mv ~/.ssh/id_rsa.pub ~/.ssh/id_rsa_old.pub" + delegate_to: localhost + + - name: Locally promote new key (private) + command: "mv ~/.ssh/id_rsa_new ~/.ssh/id_rsa" + delegate_to: localhost + + - name: Locally promote new key (public) + command: " mv ~/.ssh/id_rsa_new.pub ~/.ssh/id_rsa.pub" + delegate_to: localhost + + - name: Remove old key from hosts + ansible.posix.authorized_key: + user: "{{ item }}" + state: absent + key: "{{ lookup('file', '/tmp/id_rsa_old.pub') }}" + loop: + - "stack" + - "kolla" + become: true diff --git a/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml b/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml new file mode 100644 index 000000000..a9d78cc12 --- /dev/null +++ b/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Added the ``rekey-hosts.yml`` playbook to automatically rotate the SSH + keys on all hosts in the cloud for the stack and kolla users. From c527579ef38381c196d1c1cb67a5146124a6a6da Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Fri, 17 Nov 2023 16:13:16 +0000 Subject: [PATCH 27/37] Rekey playbook misc improvements --- etc/kayobe/ansible/rekey-hosts.yml | 87 ++++++++++--------- .../add-rekey-playbook-0065c5057b1639f8.yaml | 2 +- 2 files changed, 46 insertions(+), 43 deletions(-) diff --git a/etc/kayobe/ansible/rekey-hosts.yml b/etc/kayobe/ansible/rekey-hosts.yml index 445e7224f..8641d5791 100644 --- a/etc/kayobe/ansible/rekey-hosts.yml +++ b/etc/kayobe/ansible/rekey-hosts.yml @@ -3,70 +3,73 @@ hosts: overcloud,seed,seed-hypervisor,infra-vms gather_facts: false vars: - ansible_user: stack - ansible_python_interpreter: /usr/bin/python3 + new_key_type: ed25519 + ansible_ssh_common_args: "-o StrictHostKeyChecking=no" + rekey_users: + - stack + - kolla + existing_key_path: "~/.ssh/id_rsa" + rekey_remove_existing_key: true tasks: - - name: Generate a fresh SSH key - community.crypto.openssh_keypair: - path: ~/.ssh/id_rsa_new + - name: Stat existing key file + ansible.builtin.stat: + path: "{{ existing_key_path }}" + register: stat_result delegate_to: localhost + run_once: true - # - name: Copy new key to hosts - # ansible.builtin.copy: - # src: /tmp/id_rsa_new.pub - # dest: /tmp/id_rsa_new.pub - # mode: '0600' - # become: true - - - name: Copy old key to hosts - ansible.builtin.copy: - src: ~/.ssh/id_rsa.pub - dest: /tmp/id_rsa_old.pub - mode: '0777' - become: true + - name: Fail when existing key does not exist + ansible.builtin.fail: + msg: "No existing key file found. Check existing_key_path is set correctly." + when: + - not stat_result.stat.exists + delegate_to: localhost + run_once: true - - name: Set new stack authorized keys - ansible.posix.authorized_key: - user: "{{ item }}" - state: present - key: "{{ lookup('file', '~/.ssh/id_rsa_new.pub') }}" - loop: - - "stack" - - "kolla" - become: true + - name: Generate a new SSH key + community.crypto.openssh_keypair: + path: "~/.ssh/id_{{ new_key_type }}_new" + type: "{{ new_key_type }}" + delegate_to: localhost + run_once: true - - name: Set new stack authorized keys + - name: Set new authorized keys + vars: + lookup_path: "~/.ssh/id_{{ new_key_type }}_new.pub" ansible.posix.authorized_key: user: "{{ item }}" state: present - key: "{{ lookup('file', '~/.ssh/id_rsa_new.pub') }}" - loop: - - "stack" - - "kolla" + key: "{{ lookup('file', lookup_path) }}" + loop: "{{ rekey_users }}" become: true - - name: Locally deprecate old key (private) - command: "mv ~/.ssh/id_rsa ~/.ssh/id_rsa_old" + - name: Locally deprecate existing key (private) + command: "mv {{ existing_key_path }} {{ existing_key_path }}_old" delegate_to: localhost + run_once: true - - name: Locally deprecate old key (public) - command: "mv ~/.ssh/id_rsa.pub ~/.ssh/id_rsa_old.pub" + - name: Locally deprecate existing key (public) + command: "mv {{ existing_key_path }}.pub {{ existing_key_path }}_old.pub" delegate_to: localhost + run_once: true - name: Locally promote new key (private) - command: "mv ~/.ssh/id_rsa_new ~/.ssh/id_rsa" + command: "mv ~/.ssh/id_{{ new_key_type }}_new ~/.ssh/id_{{ new_key_type }}" delegate_to: localhost + run_once: true - name: Locally promote new key (public) - command: " mv ~/.ssh/id_rsa_new.pub ~/.ssh/id_rsa.pub" + command: " mv ~/.ssh/id_{{ new_key_type }}_new.pub ~/.ssh/id_{{ new_key_type }}.pub" delegate_to: localhost + run_once: true - name: Remove old key from hosts + vars: + lookup_path: "{{ existing_key_path }}_old.pub" ansible.posix.authorized_key: user: "{{ item }}" state: absent - key: "{{ lookup('file', '/tmp/id_rsa_old.pub') }}" - loop: - - "stack" - - "kolla" + key: "{{ lookup('file', lookup_path) }}" + loop: "{{ rekey_users }}" become: true + when: rekey_remove_existing_key diff --git a/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml b/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml index a9d78cc12..5e75a51ad 100644 --- a/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml +++ b/releasenotes/notes/add-rekey-playbook-0065c5057b1639f8.yaml @@ -2,4 +2,4 @@ features: - | Added the ``rekey-hosts.yml`` playbook to automatically rotate the SSH - keys on all hosts in the cloud for the stack and kolla users. + keys on all hosts. From 0123c1f541b5553b3de788be169bb4ddaf6a9e10 Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Mon, 20 Nov 2023 16:54:46 +0000 Subject: [PATCH 28/37] Change rekey playbook to use existing ssh vars --- etc/kayobe/ansible/rekey-hosts.yml | 43 +++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/etc/kayobe/ansible/rekey-hosts.yml b/etc/kayobe/ansible/rekey-hosts.yml index 8641d5791..bd903d010 100644 --- a/etc/kayobe/ansible/rekey-hosts.yml +++ b/etc/kayobe/ansible/rekey-hosts.yml @@ -3,24 +3,37 @@ hosts: overcloud,seed,seed-hypervisor,infra-vms gather_facts: false vars: - new_key_type: ed25519 ansible_ssh_common_args: "-o StrictHostKeyChecking=no" rekey_users: - stack - kolla - existing_key_path: "~/.ssh/id_rsa" - rekey_remove_existing_key: true + rekey_remove_existing_key: false tasks: - - name: Stat existing key file + - name: Stat existing private key file ansible.builtin.stat: - path: "{{ existing_key_path }}" + path: "{{ ssh_private_key_path }}" register: stat_result delegate_to: localhost run_once: true - - name: Fail when existing key does not exist + - name: Fail when existing private key does not exist ansible.builtin.fail: - msg: "No existing key file found. Check existing_key_path is set correctly." + msg: "No existing private key file found. Check ssh_private_key_path and is set correctly." + when: + - not stat_result.stat.exists + delegate_to: localhost + run_once: true + + - name: Stat existing public key file + ansible.builtin.stat: + path: "{{ ssh_public_key_path }}" + register: stat_result + delegate_to: localhost + run_once: true + + - name: Fail when existing public key does not exist + ansible.builtin.fail: + msg: "No existing public key file found. Check ssh_public_key_path and is set correctly." when: - not stat_result.stat.exists delegate_to: localhost @@ -28,14 +41,14 @@ - name: Generate a new SSH key community.crypto.openssh_keypair: - path: "~/.ssh/id_{{ new_key_type }}_new" - type: "{{ new_key_type }}" + path: "{{ ssh_private_key_path }}_new" + type: "{{ ssh_key_type }}" delegate_to: localhost run_once: true - name: Set new authorized keys vars: - lookup_path: "~/.ssh/id_{{ new_key_type }}_new.pub" + lookup_path: "{{ ssh_private_key_path }}_new.pub" ansible.posix.authorized_key: user: "{{ item }}" state: present @@ -44,28 +57,28 @@ become: true - name: Locally deprecate existing key (private) - command: "mv {{ existing_key_path }} {{ existing_key_path }}_old" + command: "mv {{ ssh_private_key_path }} {{ ssh_private_key_path }}_old" delegate_to: localhost run_once: true - name: Locally deprecate existing key (public) - command: "mv {{ existing_key_path }}.pub {{ existing_key_path }}_old.pub" + command: "mv {{ ssh_public_key_path }} {{ ssh_public_key_path }}_old" delegate_to: localhost run_once: true - name: Locally promote new key (private) - command: "mv ~/.ssh/id_{{ new_key_type }}_new ~/.ssh/id_{{ new_key_type }}" + command: "mv {{ ssh_private_key_path }}_new {{ ssh_private_key_path }}" delegate_to: localhost run_once: true - name: Locally promote new key (public) - command: " mv ~/.ssh/id_{{ new_key_type }}_new.pub ~/.ssh/id_{{ new_key_type }}.pub" + command: "mv {{ ssh_private_key_path }}_new.pub {{ ssh_public_key_path }}" delegate_to: localhost run_once: true - name: Remove old key from hosts vars: - lookup_path: "{{ existing_key_path }}_old.pub" + lookup_path: "{{ ssh_public_key_path }}_old" ansible.posix.authorized_key: user: "{{ item }}" state: absent From 6931e1cb6e80523475c0a4af97cc56944f0db7bf Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Wed, 29 Nov 2023 17:19:30 +0000 Subject: [PATCH 29/37] Rework rekey-hosts.yml playbook --- etc/kayobe/ansible/rekey-hosts.yml | 35 +++++++++++++++++++----------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/etc/kayobe/ansible/rekey-hosts.yml b/etc/kayobe/ansible/rekey-hosts.yml index bd903d010..6d3b358a6 100644 --- a/etc/kayobe/ansible/rekey-hosts.yml +++ b/etc/kayobe/ansible/rekey-hosts.yml @@ -1,9 +1,18 @@ --- +# Playbook to rotate SSH keys across the cloud. By default it will rotate the +# standard keys used by kayobe/kolla-ansible, but it can be configured for any +# keys. + - name: Rekey hosts hosts: overcloud,seed,seed-hypervisor,infra-vms gather_facts: false vars: ansible_ssh_common_args: "-o StrictHostKeyChecking=no" + existing_private_key_path: "{{ ssh_private_key_path }}" + existing_public_key_path: "{{ ssh_public_key_path }}" + new_private_key_path: "{{ ssh_private_key_path }}" + new_public_key_path: "{{ ssh_public_key_path }}" + new_key_type: "{{ ssh_key_type }}" rekey_users: - stack - kolla @@ -11,14 +20,14 @@ tasks: - name: Stat existing private key file ansible.builtin.stat: - path: "{{ ssh_private_key_path }}" + path: "{{ existing_private_key_path }}" register: stat_result delegate_to: localhost run_once: true - name: Fail when existing private key does not exist ansible.builtin.fail: - msg: "No existing private key file found. Check ssh_private_key_path and is set correctly." + msg: "No existing private key file found. Check existing_private_key_path is set correctly." when: - not stat_result.stat.exists delegate_to: localhost @@ -26,14 +35,14 @@ - name: Stat existing public key file ansible.builtin.stat: - path: "{{ ssh_public_key_path }}" + path: "{{ existing_public_key_path }}" register: stat_result delegate_to: localhost run_once: true - name: Fail when existing public key does not exist ansible.builtin.fail: - msg: "No existing public key file found. Check ssh_public_key_path and is set correctly." + msg: "No existing public key file found. Check existing_public_key_path is set correctly." when: - not stat_result.stat.exists delegate_to: localhost @@ -41,14 +50,14 @@ - name: Generate a new SSH key community.crypto.openssh_keypair: - path: "{{ ssh_private_key_path }}_new" - type: "{{ ssh_key_type }}" + path: "{{ existing_private_key_path }}_new" + type: "{{ new_key_type }}" delegate_to: localhost run_once: true - name: Set new authorized keys vars: - lookup_path: "{{ ssh_private_key_path }}_new.pub" + lookup_path: "{{ existing_private_key_path }}_new.pub" ansible.posix.authorized_key: user: "{{ item }}" state: present @@ -57,32 +66,32 @@ become: true - name: Locally deprecate existing key (private) - command: "mv {{ ssh_private_key_path }} {{ ssh_private_key_path }}_old" + command: "mv {{ existing_private_key_path }} {{ existing_public_key_path }}_old" delegate_to: localhost run_once: true - name: Locally deprecate existing key (public) - command: "mv {{ ssh_public_key_path }} {{ ssh_public_key_path }}_old" + command: "mv {{ existing_public_key_path }} {{ existing_public_key_path }}_old" delegate_to: localhost run_once: true - name: Locally promote new key (private) - command: "mv {{ ssh_private_key_path }}_new {{ ssh_private_key_path }}" + command: "mv {{ existing_private_key_path }}_new {{ new_private_key_path }}" delegate_to: localhost run_once: true - name: Locally promote new key (public) - command: "mv {{ ssh_private_key_path }}_new.pub {{ ssh_public_key_path }}" + command: "mv {{ existing_private_key_path }}_new.pub {{ new_public_key_path }}" delegate_to: localhost run_once: true - name: Remove old key from hosts vars: - lookup_path: "{{ ssh_public_key_path }}_old" + lookup_path: "{{ existing_public_key_path }}_old" ansible.posix.authorized_key: user: "{{ item }}" state: absent key: "{{ lookup('file', lookup_path) }}" loop: "{{ rekey_users }}" become: true - when: rekey_remove_existing_key + when: rekey_remove_existing_key | bool From 14caeb48e796a15e0922e0950cb3f0c87d581dc9 Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Thu, 30 Nov 2023 13:24:19 +0000 Subject: [PATCH 30/37] rekey-host.yml remove-key tag --- etc/kayobe/ansible/rekey-hosts.yml | 44 ++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/etc/kayobe/ansible/rekey-hosts.yml b/etc/kayobe/ansible/rekey-hosts.yml index 6d3b358a6..a72da3ac7 100644 --- a/etc/kayobe/ansible/rekey-hosts.yml +++ b/etc/kayobe/ansible/rekey-hosts.yml @@ -7,12 +7,15 @@ hosts: overcloud,seed,seed-hypervisor,infra-vms gather_facts: false vars: - ansible_ssh_common_args: "-o StrictHostKeyChecking=no" + # The existing key is the key that is currently used to access overcloud hosts existing_private_key_path: "{{ ssh_private_key_path }}" existing_public_key_path: "{{ ssh_public_key_path }}" + # The new key is the key that will be generated by this playbook new_private_key_path: "{{ ssh_private_key_path }}" new_public_key_path: "{{ ssh_public_key_path }}" new_key_type: "{{ ssh_key_type }}" + # The existing key will locally be moved to deprecated_key_path once it is replaced + deprecated_key_path: ~/old_ssh_key rekey_users: - stack - kolla @@ -66,12 +69,12 @@ become: true - name: Locally deprecate existing key (private) - command: "mv {{ existing_private_key_path }} {{ existing_public_key_path }}_old" + command: "mv {{ existing_private_key_path }} {{ deprecated_key_path }}" delegate_to: localhost run_once: true - name: Locally deprecate existing key (public) - command: "mv {{ existing_public_key_path }} {{ existing_public_key_path }}_old" + command: "mv {{ existing_public_key_path }} {{ deprecated_key_path }}.pub" delegate_to: localhost run_once: true @@ -85,13 +88,30 @@ delegate_to: localhost run_once: true - - name: Remove old key from hosts - vars: - lookup_path: "{{ existing_public_key_path }}_old" - ansible.posix.authorized_key: - user: "{{ item }}" - state: absent - key: "{{ lookup('file', lookup_path) }}" - loop: "{{ rekey_users }}" - become: true + - block: + - name: Stat old key file + ansible.builtin.stat: + path: "{{ deprecated_key_path }}.pub" + register: stat_result + delegate_to: localhost + run_once: true + + - name: Fail when deprecated public key does not exist + ansible.builtin.fail: + msg: "No deprecated public key file found. Check deprecated_key_path is set correctly." + when: + - not stat_result.stat.exists + delegate_to: localhost + run_once: true + + - name: Remove old key from hosts + vars: + lookup_path: "{{ deprecated_key_path }}.pub" + ansible.posix.authorized_key: + user: "{{ item }}" + state: absent + key: "{{ lookup('file', lookup_path) }}" + loop: "{{ rekey_users }}" + become: true + tags: remove-key when: rekey_remove_existing_key | bool From 6528e1d4b817cfee96029b246b52b97ee77c9a04 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Fri, 1 Dec 2023 13:48:06 +0000 Subject: [PATCH 31/37] Fix Wazuh agent playbook w/o using custom policies --- etc/kayobe/ansible/wazuh-agent.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/etc/kayobe/ansible/wazuh-agent.yml b/etc/kayobe/ansible/wazuh-agent.yml index cd5afce93..730828604 100644 --- a/etc/kayobe/ansible/wazuh-agent.yml +++ b/etc/kayobe/ansible/wazuh-agent.yml @@ -28,7 +28,9 @@ owner: wazuh group: wazuh block: sca.remote_commands=1 - when: custom_sca_policies.files | length > 0 + when: + - custom_sca_policies_folder.stat.exists + - custom_sca_policies.files | length > 0 notify: - Restart wazuh-agent From 3c980beed0bfa2a74813ac4c90bcff205ec9aa5f Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Tue, 5 Dec 2023 10:07:53 +0100 Subject: [PATCH 32/37] Fix link to Release Train docs --- doc/source/operations/rocky-linux-9.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index bf65181f8..8a03f7c55 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -148,8 +148,8 @@ Elasticsearch/Kibana should be migrated to OpenSearch. Sync Release Train artifacts ---------------------------- -New `StackHPC Release Train <../configuration/release-train>` content should be -synced to the local Pulp server. This includes host packages (Deb/RPM) and +New `StackHPC Release Train <../configuration/release-train>`__ content should +be synced to the local Pulp server. This includes host packages (Deb/RPM) and container images. To sync host packages: From ab50878cad0ae5cb74d892d3aab43663844164c1 Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Tue, 5 Dec 2023 15:34:17 +0100 Subject: [PATCH 33/37] Fix opensearch-migration command Co-Authored-By: Alex Welsh --- doc/source/operations/rocky-linux-9.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index bf65181f8..f4168cc2b 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -143,7 +143,7 @@ Elasticsearch/Kibana should be migrated to OpenSearch. - ``kayobe overcloud service configuration generate --node-config-dir '/tmp/ignore' --kolla-tags none`` - ``kayobe overcloud container image pull -kt opensearch`` - ``kayobe kolla ansible run opensearch-migration`` -- If old indices are detected, they may be removed by running ``kayobe kolla ansible run opensearch-migration -e prune_kibana_indices=true`` +- If old indices are detected, they may be removed by running ``kayobe kolla ansible run opensearch-migration -ke prune_kibana_indices=true`` Sync Release Train artifacts ---------------------------- From 1b4594a6bcb618b71d92dde262cbcfd934311eb7 Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Tue, 5 Dec 2023 15:38:47 +0100 Subject: [PATCH 34/37] Fix link to Release Train docs (really) --- doc/source/operations/rocky-linux-9.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/operations/rocky-linux-9.rst b/doc/source/operations/rocky-linux-9.rst index 8a03f7c55..98b15c2ef 100644 --- a/doc/source/operations/rocky-linux-9.rst +++ b/doc/source/operations/rocky-linux-9.rst @@ -148,9 +148,9 @@ Elasticsearch/Kibana should be migrated to OpenSearch. Sync Release Train artifacts ---------------------------- -New `StackHPC Release Train <../configuration/release-train>`__ content should -be synced to the local Pulp server. This includes host packages (Deb/RPM) and -container images. +New `StackHPC Release Train <../configuration/release-train.html>`__ content +should be synced to the local Pulp server. This includes host packages +(Deb/RPM) and container images. To sync host packages: From 2acf52aa4653faa956e3cad5a3447aa87c846c28 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 7 Dec 2023 10:50:04 +0000 Subject: [PATCH 35/37] Drop CentOS/Rocky 8 from CIS security hardening These OS versions are no longer supported. --- .../configuration/security-hardening.rst | 12 --------- etc/kayobe/ansible/cis.yml | 13 ---------- etc/kayobe/ansible/requirements.yml | 3 --- etc/kayobe/inventory/group_vars/overcloud/cis | 26 ------------------- 4 files changed, 54 deletions(-) diff --git a/doc/source/configuration/security-hardening.rst b/doc/source/configuration/security-hardening.rst index a65812513..f0cd77df3 100644 --- a/doc/source/configuration/security-hardening.rst +++ b/doc/source/configuration/security-hardening.rst @@ -12,7 +12,6 @@ improvement over an unhardened system. A typical score would be 70%. The following operating systems are supported: -- Rocky 8, RHEL 8, CentOS Stream 8 - Ubuntu 22.04 - Rocky 9 @@ -26,23 +25,12 @@ instance, you may want different rules on a network node compared to a controller. It is best to consult the upstream role documentation for details about what each variable does. The documentation can be found here: -- `Rocky 8, RHEL 8, CentOS Stream 8 `__ - `Ubuntu 22.04 `__ - `Rocky 9 `__ Running the playbooks --------------------- -.. note: - - On CentOS 8, you must run with `INJECT_FACT_AS_VARS `__ - enabled. To do this for this playbook only, you can use: - - .. code-block: shell - - ANSIBLE_INJECT_FACT_VARS=true kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cis.yml - - As there is potential for unintended side effects when applying the hardening playbooks, the playbooks are not currently enabled by default. It is recommended that they are first applied to a representative staging environment to determine diff --git a/etc/kayobe/ansible/cis.yml b/etc/kayobe/ansible/cis.yml index ffb31c2fe..3edb174cc 100644 --- a/etc/kayobe/ansible/cis.yml +++ b/etc/kayobe/ansible/cis.yml @@ -10,19 +10,6 @@ state: present when: ansible_facts.distribution == 'Ubuntu' - - name: Remove /etc/motd - # See remediation in: - # https://github.com/wazuh/wazuh/blob/bfa4efcf11e288c0a8809dc0b45fdce42fab8e0d/ruleset/sca/centos/8/cis_centos8_linux.yml#L777 - file: - path: /etc/motd - state: absent - when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '8' - - - include_role: - name: ansible-lockdown.rhel8_cis - when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '8' - tags: always - - include_role: name: ansible-lockdown.rhel9_cis when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9' diff --git a/etc/kayobe/ansible/requirements.yml b/etc/kayobe/ansible/requirements.yml index 92276159b..bfb2d51b9 100644 --- a/etc/kayobe/ansible/requirements.yml +++ b/etc/kayobe/ansible/requirements.yml @@ -12,9 +12,6 @@ collections: version: 2.4.0 roles: - src: stackhpc.vxlan - - name: ansible-lockdown.rhel8_cis - src: https://github.com/ansible-lockdown/RHEL8-CIS - version: 1.3.0 - name: ansible-lockdown.ubuntu22_cis src: https://github.com/stackhpc/UBUNTU22-CIS #FIXME: Waiting for https://github.com/ansible-lockdown/UBUNTU22-CIS/pull/174 diff --git a/etc/kayobe/inventory/group_vars/overcloud/cis b/etc/kayobe/inventory/group_vars/overcloud/cis index b20843d37..40cd0692b 100644 --- a/etc/kayobe/inventory/group_vars/overcloud/cis +++ b/etc/kayobe/inventory/group_vars/overcloud/cis @@ -5,32 +5,6 @@ # Enable collecting auditd logs update_audit_template: true -############################################################################## -# RHEL 8 / Centos Stream 8 CIS Hardening Configuration - -# NOTE: kayobe configures NTP. Do not clobber configuration. -rhel8cis_time_synchronization: skip -rhel8cis_rule_2_2_1_1: false -rhel8cis_rule_2_2_1_2: false - -# NOTE: disable CIS rolefirewall configuration -rhel8cis_firewall: skip -rhel8cis_rule_3_4_1_1: false - -# NOTE: kayobe does not currently support selinux -rhel8cis_selinux_disable: true - -# NOTE: This updates the system. Let's do this explicitly. -rhel8cis_rule_1_9: false - -# NOTE: FUTURE breaks wazuh agent repo metadata download -rhel8cis_crypto_policy: FIPS - -# NOTE: We will remove /etc/motd instead. This prevents a duplicate warning -# from being displayed. -rhel8cis_rule_1_8_1_1: false -rhel8cis_rule_1_8_1_4: false - ############################################################################## # Rocky 9 CIS Hardening Configuration From c4f5173e2c1e0a293a34dc4f1dcad9a3799f5715 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 7 Dec 2023 10:51:18 +0000 Subject: [PATCH 36/37] Remove SELinux overrides These were added for transitional support for Rocky 9 in Yoga, and are not required in Zed because the default changed in Kayobe. --- etc/kayobe/inventory/group_vars/all/selinux | 9 --------- .../notes/selinux-permissive-bb953d2cdcd7a545.yaml | 7 ------- 2 files changed, 16 deletions(-) delete mode 100644 etc/kayobe/inventory/group_vars/all/selinux delete mode 100644 releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml diff --git a/etc/kayobe/inventory/group_vars/all/selinux b/etc/kayobe/inventory/group_vars/all/selinux deleted file mode 100644 index c38702b3f..000000000 --- a/etc/kayobe/inventory/group_vars/all/selinux +++ /dev/null @@ -1,9 +0,0 @@ ---- -# Configure SELinux in permissive mode when configuring a Rocky Linux 9 host. -selinux_state: "{{ 'permissive' if ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9' else 'disabled' }}" - -# Do NOT reboot Rocky Linux 9 hosts to apply SELinux config changes. Operators -# must opt-in by changing this variable when applying host configuration. This -# is to avoid automatically rebooting hosts originally deployed with SELinux -# disabled and which now need to be changed to permissive. -disable_selinux_do_reboot: "{{ not (ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9') }}" diff --git a/releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml b/releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml deleted file mode 100644 index 90b1364fa..000000000 --- a/releasenotes/notes/selinux-permissive-bb953d2cdcd7a545.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- -upgrade: - - | - SELinux mode is now set to permissive when configuring Rocky Linux 9 hosts, - to match the default mode in the Zed release. If SELinux is disabled on - these hosts, a reboot is required and will only be performed by Ansible if - ``disable_selinux_do_reboot`` is changed to ``true``. From 08fd1e8332c3fe328ffcb86965b484cc59bb7337 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 7 Dec 2023 10:51:48 +0000 Subject: [PATCH 37/37] Fix OpenSearch reno --- .../grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml b/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml index e4ea7e537..1a45bc9e0 100644 --- a/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml +++ b/releasenotes/notes/grafana-elasticsearch-cluster-health-154275e8d39dd89f.yaml @@ -1,5 +1,5 @@ --- fixes: - | - Fixes display of the Elasticsearch or OpenSearch cluster health in Grafana + Fixes display of the OpenSearch cluster health in Grafana when in yellow state.