From 3933e4520ba512b5bf095a28b791c0bac12c5dd0 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 6 Dec 2022 14:41:46 +0000 Subject: [PATCH] Hammer playbook for rabbitmq --- etc/kayobe/ansible/rabbitmq-reset.yml | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 etc/kayobe/ansible/rabbitmq-reset.yml diff --git a/etc/kayobe/ansible/rabbitmq-reset.yml b/etc/kayobe/ansible/rabbitmq-reset.yml new file mode 100644 index 000000000..df6d0c4ca --- /dev/null +++ b/etc/kayobe/ansible/rabbitmq-reset.yml @@ -0,0 +1,57 @@ +--- +# Reset a broken RabbitMQ cluster. +# Also restarts OpenStack services which may be broken. + +- name: Reset RabbitMQ + hosts: controllers + become: True + gather_facts: no + tags: + - rabbitmq-reset + vars: + - container_name: rabbitmq + tasks: + - name: Inspect the {{ container_name }} container + shell: + cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ container_name }}" + register: inspection + + - name: Ensure the {{ container_name }} container is running + command: "docker start {{ container_name }}" + when: inspection.stdout == 'false' + + - name: Wait for the {{ container_name }} container to reach state 'Running' + shell: + cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ container_name }}" + register: result + until: result.stdout == 'true' + retries: 10 + delay: 6 + + - name: Wait for the rabbitmq node to automatically start on container start + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl wait /var/lib/rabbitmq/mnesia/rabbitmq.pid --timeout 60'" + when: inspection.stdout == 'false' + + - name: Stop app + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl stop_app'" + + - name: Force reset app + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl force_reset'" + + - name: Start app + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl start_app'" + + - name: Wait for all nodes to join the cluster + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl await_online_nodes {{ groups['controllers'] | length }}'" + +- name: Restart OpenStack services + hosts: controllers:compute + become: true + gather_facts: no + tags: + - restart-openstack + tasks: + # The following services can have problems if the cluster gets broken. + - name: Restart OpenStack services + shell: >- + docker ps -a | egrep '(cinder|heat|ironic|keystone|magnum|neutron|nova)' | awk '{ print $NF }' | xargs docker restart