diff --git a/doc/source/configuration/release-train.rst b/doc/source/configuration/release-train.rst index 39bb846c3..04b583410 100644 --- a/doc/source/configuration/release-train.rst +++ b/doc/source/configuration/release-train.rst @@ -156,6 +156,7 @@ If your login credentials are incorrect, or lack the required permissions, you will see a 404 error during ``pulp-repo-sync.yml``: .. code-block:: console + TASK [stackhpc.pulp.pulp_repository : Sync RPM remotes into repositories] **************************************************************************************************************************************** An exception occurred during task execution. To see the full traceback, use -vvv. The error was: Exception: Task failed to complete. (failed; 404, message='Not Found', url=URL('https://ark.stackhpc.com/pulp/content/centos/8-stream/BaseOS/x86_64/os/20211122T102435')) failed: [localhost] (item=centos-stream-8-baseos-development) => changed=false diff --git a/etc/kayobe/ansible/rabbitmq-reset.yml b/etc/kayobe/ansible/rabbitmq-reset.yml new file mode 100644 index 000000000..df6d0c4ca --- /dev/null +++ b/etc/kayobe/ansible/rabbitmq-reset.yml @@ -0,0 +1,57 @@ +--- +# Reset a broken RabbitMQ cluster. +# Also restarts OpenStack services which may be broken. + +- name: Reset RabbitMQ + hosts: controllers + become: True + gather_facts: no + tags: + - rabbitmq-reset + vars: + - container_name: rabbitmq + tasks: + - name: Inspect the {{ container_name }} container + shell: + cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ container_name }}" + register: inspection + + - name: Ensure the {{ container_name }} container is running + command: "docker start {{ container_name }}" + when: inspection.stdout == 'false' + + - name: Wait for the {{ container_name }} container to reach state 'Running' + shell: + cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ container_name }}" + register: result + until: result.stdout == 'true' + retries: 10 + delay: 6 + + - name: Wait for the rabbitmq node to automatically start on container start + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl wait /var/lib/rabbitmq/mnesia/rabbitmq.pid --timeout 60'" + when: inspection.stdout == 'false' + + - name: Stop app + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl stop_app'" + + - name: Force reset app + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl force_reset'" + + - name: Start app + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl start_app'" + + - name: Wait for all nodes to join the cluster + command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl await_online_nodes {{ groups['controllers'] | length }}'" + +- name: Restart OpenStack services + hosts: controllers:compute + become: true + gather_facts: no + tags: + - restart-openstack + tasks: + # The following services can have problems if the cluster gets broken. + - name: Restart OpenStack services + shell: >- + docker ps -a | egrep '(cinder|heat|ironic|keystone|magnum|neutron|nova)' | awk '{ print $NF }' | xargs docker restart diff --git a/etc/kayobe/environments/ci-aio/stackhpc-ci.yml b/etc/kayobe/environments/ci-aio/stackhpc-ci.yml index 45d107d3e..a9827d82e 100644 --- a/etc/kayobe/environments/ci-aio/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-aio/stackhpc-ci.yml @@ -48,9 +48,16 @@ stackhpc_repo_ubuntu_cloud_archive_version: "{{ stackhpc_pulp_repo_ubuntu_cloud_ stackhpc_repo_ubuntu_focal_version: "{{ stackhpc_pulp_repo_ubuntu_focal_version }}" stackhpc_repo_ubuntu_focal_security_version: "{{ stackhpc_pulp_repo_ubuntu_focal_security_version }}" stackhpc_repo_docker_ce_ubuntu_version: "{{ stackhpc_pulp_repo_docker_ce_ubuntu_version }}" -stackhpc_repo_rocky_baseos_version: "{{ stackhpc_pulp_repo_rocky_8_6_baseos_version }}" -stackhpc_repo_rocky_appstream_version: "{{ stackhpc_pulp_repo_rocky_8_6_appstream_version }}" -stackhpc_repo_rocky_extras_version: "{{ stackhpc_pulp_repo_rocky_8_6_extras_version }}" +## Use derived vars from etc/kayobe/pulp.yml to switch between +## minor Rocky versions using stackhpc_pulp_repo_rocky_8_minor_version +stackhpc_repo_rocky_baseos_version: "{{ stackhpc_pulp_repo_rocky_8_baseos_version }}" +stackhpc_repo_rocky_appstream_version: "{{ stackhpc_pulp_repo_rocky_8_appstream_version }}" +stackhpc_repo_rocky_extras_version: "{{ stackhpc_pulp_repo_rocky_8_extras_version }}" + +# Rocky-and-CI-specific Pulp urls +stackhpc_repo_rocky_baseos_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/BaseOS/x86_64/os/{{ stackhpc_repo_rocky_baseos_version }}/" +stackhpc_repo_rocky_appstream_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/AppStream/x86_64/os/{{ stackhpc_repo_rocky_appstream_version }}/" +stackhpc_repo_rocky_extras_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/extras/x86_64/os/{{ stackhpc_repo_rocky_extras_version }}/" # Host and port of container registry. # Push built images to the development Pulp service registry.