diff --git a/etc/kayobe/ansible/reset-bls-entries.yml b/etc/kayobe/ansible/reset-bls-entries.yml new file mode 100755 index 000000000..59e968cba --- /dev/null +++ b/etc/kayobe/ansible/reset-bls-entries.yml @@ -0,0 +1,39 @@ +--- +# Custom playbook to reset Boot Loader Specification (BLS) entries to resolve +# an issue with GRUB defaulting to an old kernel. This is adapted from a Bash +# script in diskimage-builder: +# https://opendev.org/openstack/diskimage-builder/src/branch/master/diskimage_builder/elements/rhel/post-install.d/03-reset-bls-entries + +- name: Reset BLS entries + hosts: overcloud + become: true + tags: + - reset-bls-entries + tasks: + - name: Get machine ID + command: cat /etc/machine-id + register: machine_id + check_mode: false + + - name: Find entries with wrong machine ID + ansible.builtin.find: + paths: /boot/loader/entries + patterns: "*.conf" + register: bls_entries + check_mode: false + + # We set force to false to avoid replacing an existing BLS entry with the + # correct machine ID. + - name: Rename entries with wrong machine ID + copy: + src: "/boot/loader/entries/{{ item }}" + dest: "/boot/loader/entries/{{ item | ansible.builtin.regex_replace('^[a-f0-9]*', machine_id.stdout) }}" + force: false + remote_src: true + with_items: "{{ bls_entries.files | map(attribute='path') | reject('search', machine_id.stdout) | map('basename') }}" + + - name: Remove entries with wrong machine ID + file: + path: "/boot/loader/entries/{{ item }}" + state: absent + with_items: "{{ bls_entries.files | map(attribute='path') | reject('search', machine_id.stdout) | map('basename') }}" diff --git a/etc/kayobe/environments/ci-aio/automated-setup.sh b/etc/kayobe/environments/ci-aio/automated-setup.sh index 59271ccdf..794e9cbef 100644 --- a/etc/kayobe/environments/ci-aio/automated-setup.sh +++ b/etc/kayobe/environments/ci-aio/automated-setup.sh @@ -7,6 +7,7 @@ cat << EOF | sudo tee -a /etc/hosts EOF if sudo vgdisplay | grep -q lvm2; then + sudo pvresize $(sudo pvs --noheadings | head -n 1 | awk '{print $1}') sudo lvextend -L 4G /dev/rootvg/lv_home -r || true sudo lvextend -L 4G /dev/rootvg/lv_tmp -r || true fi diff --git a/etc/kayobe/kolla/config/prometheus/cadvisor.rules b/etc/kayobe/kolla/config/prometheus/cadvisor.rules index d97a3a545..726517452 100644 --- a/etc/kayobe/kolla/config/prometheus/cadvisor.rules +++ b/etc/kayobe/kolla/config/prometheus/cadvisor.rules @@ -33,15 +33,6 @@ groups: summary: "Container Volume usage (instance {{ $labels.instance }})" description: "Container Volume usage is above 80%" - - alert: ContainerVolumeIoUsage - expr: (sum(container_fs_io_current{name!=""}) BY (instance, name) * 100) > 80 - for: 5m - labels: - severity: warning - annotations: - summary: "Container Volume IO usage (instance {{ $labels.instance }})" - description: "Container Volume IO usage is above 80%" - - alert: ContainerHighThrottleRate expr: rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1 for: 1m diff --git a/releasenotes/notes/removes-bogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml b/releasenotes/notes/removes-bogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml new file mode 100644 index 000000000..f1c76c545 --- /dev/null +++ b/releasenotes/notes/removes-bogus-ContainerVolumeUsage-alert-c973b61f598d85e5.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Removes bogus ContainerVolumeUsage alert. This rule wasn't correctly measuring + container volume IO and could cause spurious alerts. diff --git a/releasenotes/notes/reset-bls-entries-b2bded62c5887937.yaml b/releasenotes/notes/reset-bls-entries-b2bded62c5887937.yaml new file mode 100644 index 000000000..6279b6a00 --- /dev/null +++ b/releasenotes/notes/reset-bls-entries-b2bded62c5887937.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Add a new ``reset-bls-entries.yml`` custom playbook which will rename + existing Boot Loader Specification (BLS) entries using the current machine + ID for each host. This should fix an issue with Grub not selecting the most + recent kernel during boot.