From cc4150292c66b99aaf63a63e7438fa1cee3be129 Mon Sep 17 00:00:00 2001 From: Ken Wronkiewicz Date: Fri, 1 Jul 2016 17:56:30 -0700 Subject: [PATCH] Fix intf address for RabbitMQ and disable cluster for Kube enable_rabbitmq_cluster is now a "yes" by default but you can set it to "no" if you want to disable clustering under any circumstances. The agreement made at OpenStack in Austin was that Kolla-Kubernetes would concentrate on RabbitMQ and MariaDB without clustering but with persistent storage and workload migration, then examine how to do proper distributed functionality as the project progresses, so I am just following what we'd already agreed upon. First, it helps us deal with issues of version upgrades without dealing with clustered version upgrades and the synchronization thereof. Second, it provides an alternative model for durability when used in Kubernetes. Understand that, if we disable RabbitMQ's clustering, Kubernetes is still able to re-schedule the queue off of a failed node in ways that Kolla-Ansible is not. There are known issues with RabbitMQ clustering, especially with auto-heal turned on. For many small-to-mid-sized clusters, it's going to provide for a better operator experience to have the known potential for a 30 second blip after RabbitMQ node failure than it is to have the known potential for partition and data loss and/or manual operations after you've turned off auto-heal. Kolla-kubernetes has already turned off host networking for the RabbitMQ pod; it's safe to set the interface address in the Kubernetes context. The question was asked why don't I just set the RabbitMQ cluster to be a single instance. It's unlikely that Kubernetes RabbitMQ with a PetSet will be clustered in the same declaritive fashion as the rabbitmq-clusterer plugin. Easier to just disable it and worry about how to configure the kube-friendly clustered RabbitMQ at a later point in time. Furthermore, it's an entirely valid case for many OpenStack control planes hosted atop Kolla-Kubernetes to accept the possibility of a 30-60 second blip in lieu of the long and questionable history of RabbitMQ clustering in production. Co-authored-by: Ryan Hallisey Change-Id: I7f0cb22d29a418fce4af8d69f63739859173d746 Partially-implements: blueprint api-interface-bind-address-override --- ansible/group_vars/all.yml | 2 +- .../roles/rabbitmq/templates/rabbitmq-env.conf.j2 | 4 +++- ansible/roles/rabbitmq/templates/rabbitmq.config.j2 | 12 +++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index 465bf6e1fe..671e8a1971 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -257,7 +257,7 @@ enable_kibana: "{{ 'yes' if enable_central_logging | bool else 'no' }}" # RabbitMQ options #################### rabbitmq_user: "openstack" - +rabbitmq_version: "rabbitmq_server-3.5.7/plugins/rabbitmq_clusterer-3.5.x-189b3a81.ez/rabbitmq_clusterer-3.5.x-189b3a81/ebin" #################### # HAProxy options diff --git a/ansible/roles/rabbitmq/templates/rabbitmq-env.conf.j2 b/ansible/roles/rabbitmq/templates/rabbitmq-env.conf.j2 index 47ef8fe06c..41add3fa53 100644 --- a/ansible/roles/rabbitmq/templates/rabbitmq-env.conf.j2 +++ b/ansible/roles/rabbitmq/templates/rabbitmq-env.conf.j2 @@ -1,8 +1,10 @@ RABBITMQ_NODENAME=rabbit +{% if orchestration_engine == 'ANSIBLE' %} RABBITMQ_BOOT_MODULE=rabbit_clusterer +RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS="-pa /usr/lib/rabbitmq/lib/{{ rabbitmq_version }}" +{%- endif %} RABBITMQ_LOG_BASE=/var/log/kolla/rabbitmq -RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS="-pa /usr/lib/rabbitmq/lib/rabbitmq_server-3.5.7/plugins/rabbitmq_clusterer-3.5.x-189b3a81.ez/rabbitmq_clusterer-3.5.x-189b3a81/ebin" # TODO(sdake, vhosakot) # erlang by default binds to wildcard (all interfaces) and can potentially diff --git a/ansible/roles/rabbitmq/templates/rabbitmq.config.j2 b/ansible/roles/rabbitmq/templates/rabbitmq.config.j2 index 7db5f020d7..f0d85b0f8e 100644 --- a/ansible/roles/rabbitmq/templates/rabbitmq.config.j2 +++ b/ansible/roles/rabbitmq/templates/rabbitmq.config.j2 @@ -1,22 +1,24 @@ [ {kernel, [ - {inet_dist_use_interface, {% raw %}{{% endraw %}{{ hostvars[inventory_hostname]['ansible_' + api_interface]['ipv4']['address'] | regex_replace('\.', ',') }}}}, + {inet_dist_use_interface, {% raw %}{{% endraw %}{{ api_interface_address | regex_replace('\.', ',') }}}}, {inet_dist_listen_min, {{ rabbitmq_cluster_port }}}, {inet_dist_listen_max, {{ rabbitmq_cluster_port }}} ]}, {rabbit, [ {tcp_listeners, [ - {"{{ hostvars[inventory_hostname]['ansible_' + api_interface]['ipv4']['address'] }}", {{ rabbitmq_port }}} - ]}, + {"{{ api_interface_address }}", {{ rabbitmq_port }}} + ]}{% if orchestration_engine == 'ANSIBLE' %}, {cluster_partition_handling, autoheal} + {%- endif %} ]}, {rabbitmq_management, [ {listener, [ - {ip, "{{ hostvars[inventory_hostname]['ansible_' + api_interface]['ipv4']['address'] }}"}, + {ip, "{{ api_interface_address }}"}, {port, {{ rabbitmq_management_port }}} ]}, {load_definitions, "/etc/rabbitmq/definitions.json"} - ]}, + ]}{% if orchestration_engine == 'ANSIBLE' %}, {rabbitmq_clusterer, [{config, "/etc/rabbitmq/rabbitmq-clusterer.config"}]} +{%- endif %} ]. % EOF