From bf3586e64251bdef017416c99f8e8d511a45ae1c Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 28 Nov 2023 16:43:33 +0000 Subject: [PATCH 1/3] Remove bogus alert This was removed upstream, see: https://github.com/samber/awesome-prometheus-alerts/commit/10b810fd6e0fe92500279366dd3b6d2309be8def (cherry picked from commit 80f7567f60cea94d440f5700541e6944cb7456bd) --- etc/kayobe/kolla/config/prometheus/cadvisor.rules | 9 --------- ...ogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml | 5 +++++ 2 files changed, 5 insertions(+), 9 deletions(-) create mode 100644 releasenotes/notes/removes-bogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml diff --git a/etc/kayobe/kolla/config/prometheus/cadvisor.rules b/etc/kayobe/kolla/config/prometheus/cadvisor.rules index d97a3a545..726517452 100644 --- a/etc/kayobe/kolla/config/prometheus/cadvisor.rules +++ b/etc/kayobe/kolla/config/prometheus/cadvisor.rules @@ -33,15 +33,6 @@ groups: summary: "Container Volume usage (instance {{ $labels.instance }})" description: "Container Volume usage is above 80%" - - alert: ContainerVolumeIoUsage - expr: (sum(container_fs_io_current{name!=""}) BY (instance, name) * 100) > 80 - for: 5m - labels: - severity: warning - annotations: - summary: "Container Volume IO usage (instance {{ $labels.instance }})" - description: "Container Volume IO usage is above 80%" - - alert: ContainerHighThrottleRate expr: rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1 for: 1m diff --git a/releasenotes/notes/removes-bogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml b/releasenotes/notes/removes-bogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml new file mode 100644 index 000000000..f1c76c545 --- /dev/null +++ b/releasenotes/notes/removes-bogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Removes bogus ContainerVolumeUsage alert. This rule wasn't correctly measuring + container volume IO and could cause spurious alerts. From ca192e22f59bc8640c5eb331d30ff1f43bed9371 Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Wed, 17 Jan 2024 10:23:41 +0100 Subject: [PATCH 2/3] Fix issue with GRUB defaulting to an old kernel On some Rocky Linux 9 deployments, we are seeing GRUB defaulting to the old kernel included in the DIB image, even after newer kernels have been installed. This appears to be related to the presence of Boot Loader Specification (BLS) entries with a machine ID lower in alphabetical order than the current one: [stack@host ~]$ sudo cat /etc/machine-id cd3361a338fe47348de9937e51a7a4aa [stack@host ~]$ sudo ls -l /boot/loader/entries/ total 20 -rw-r--r--. 1 root root 449 Mar 31 2023 104a42359fae41b687caac066397aec2-0-rescue.conf -rw-r--r--. 1 root root 397 Mar 31 2023 104a42359fae41b687caac066397aec2-5.14.0-162.22.2.el9_1.x86_64.conf -rw-r--r-- 1 root root 446 Jun 9 2023 cd3361a338fe47348de9937e51a7a4aa-0-rescue.conf -rw-r--r-- 1 root root 422 Jun 9 2023 cd3361a338fe47348de9937e51a7a4aa-5.14.0-284.11.1.el9_2.x86_64.conf -rw-r--r-- 1 root root 422 Jan 9 09:40 cd3361a338fe47348de9937e51a7a4aa-5.14.0-284.30.1.el9_2.x86_64.conf Add a new `reset-bls-entries.yml` playbook which will rename existing BLS entries using the current machine ID. This should prompt Grub to pick the most recent kernel on next reboot. --- etc/kayobe/ansible/reset-bls-entries.yml | 39 +++++++++++++++++++ .../reset-bls-entries-b2bded62c5887937.yaml | 7 ++++ 2 files changed, 46 insertions(+) create mode 100755 etc/kayobe/ansible/reset-bls-entries.yml create mode 100644 releasenotes/notes/reset-bls-entries-b2bded62c5887937.yaml diff --git a/etc/kayobe/ansible/reset-bls-entries.yml b/etc/kayobe/ansible/reset-bls-entries.yml new file mode 100755 index 000000000..59e968cba --- /dev/null +++ b/etc/kayobe/ansible/reset-bls-entries.yml @@ -0,0 +1,39 @@ +--- +# Custom playbook to reset Boot Loader Specification (BLS) entries to resolve +# an issue with GRUB defaulting to an old kernel. This is adapted from a Bash +# script in diskimage-builder: +# https://opendev.org/openstack/diskimage-builder/src/branch/master/diskimage_builder/elements/rhel/post-install.d/03-reset-bls-entries + +- name: Reset BLS entries + hosts: overcloud + become: true + tags: + - reset-bls-entries + tasks: + - name: Get machine ID + command: cat /etc/machine-id + register: machine_id + check_mode: false + + - name: Find entries with wrong machine ID + ansible.builtin.find: + paths: /boot/loader/entries + patterns: "*.conf" + register: bls_entries + check_mode: false + + # We set force to false to avoid replacing an existing BLS entry with the + # correct machine ID. + - name: Rename entries with wrong machine ID + copy: + src: "/boot/loader/entries/{{ item }}" + dest: "/boot/loader/entries/{{ item | ansible.builtin.regex_replace('^[a-f0-9]*', machine_id.stdout) }}" + force: false + remote_src: true + with_items: "{{ bls_entries.files | map(attribute='path') | reject('search', machine_id.stdout) | map('basename') }}" + + - name: Remove entries with wrong machine ID + file: + path: "/boot/loader/entries/{{ item }}" + state: absent + with_items: "{{ bls_entries.files | map(attribute='path') | reject('search', machine_id.stdout) | map('basename') }}" diff --git a/releasenotes/notes/reset-bls-entries-b2bded62c5887937.yaml b/releasenotes/notes/reset-bls-entries-b2bded62c5887937.yaml new file mode 100644 index 000000000..6279b6a00 --- /dev/null +++ b/releasenotes/notes/reset-bls-entries-b2bded62c5887937.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Add a new ``reset-bls-entries.yml`` custom playbook which will rename + existing Boot Loader Specification (BLS) entries using the current machine + ID for each host. This should fix an issue with Grub not selecting the most + recent kernel during boot. From 00ff42d7cea8852e4e2b709a14e456b1602800c9 Mon Sep 17 00:00:00 2001 From: Seunghun Lee <45145778+seunghun1ee@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:41:13 +0000 Subject: [PATCH 3/3] Add pvresize before lvextend (#890) Add pvresize before lvextend Running pvresize before lvextend ensures that there is enough spaces for extension. --- etc/kayobe/environments/ci-aio/automated-setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/kayobe/environments/ci-aio/automated-setup.sh b/etc/kayobe/environments/ci-aio/automated-setup.sh index b5b0795a5..9252bf35a 100644 --- a/etc/kayobe/environments/ci-aio/automated-setup.sh +++ b/etc/kayobe/environments/ci-aio/automated-setup.sh @@ -7,6 +7,7 @@ cat << EOF | sudo tee -a /etc/hosts EOF if sudo vgdisplay | grep -q lvm2; then + sudo pvresize $(sudo pvs --noheadings | head -n 1 | awk '{print $1}') sudo lvextend -L 4G /dev/rootvg/lv_home -r || true sudo lvextend -L 4G /dev/rootvg/lv_tmp -r || true fi