diff --git a/defaults/main.yml b/defaults/main.yml index c6c71461..cbfded02 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -69,6 +69,17 @@ galera_running_and_bootstrapped: false galera_monitoring_user: monitoring galera_monitoring_user_password: "" +# NOTE(cloudnull): Set an interface or CIDR to limit the traffic source when +# monitoring the galera cluster status by default this is set +# to the first network in the ansible_interfaces list (usually +# default). From that information we pull the CIDR. To define +# this in prod most users will simply need to set the variable +# "galera_monitoring_default_network" to the interface used for +# management traffic, IE: "eth1". +galera_monitoring_default_network: "{{ 'ansible_' + (ansible_interfaces | difference(['lo']))[0] }}" +galera_monitoring_network: "{{ hostvars[inventory_hostname][galera_monitoring_default_network]['ipv4']['network'] }}" +galera_monitoring_netmask: "{{ (galera_monitoring_network + '/' + hostvars[inventory_hostname][galera_monitoring_default_network]['ipv4']['netmask']) | ipaddr('prefix') }}" +galera_monitoring_allowed_source: "{{ galera_monitoring_network }}/{{ galera_monitoring_netmask }}" galera_root_user: root # WARNING: This option is deprecated and will be removed in v12.0 diff --git a/handlers/main.yml b/handlers/main.yml index 5e47e9ce..b0a1b06d 100644 --- a/handlers/main.yml +++ b/handlers/main.yml @@ -103,3 +103,8 @@ - meta: noop listen: Manage LB when: false + +- name: Restart xinetd + service: + name: xinetd + state: restarted diff --git a/releasenotes/notes/clustecheck-9311d05fb32f13b3.yaml b/releasenotes/notes/clustecheck-9311d05fb32f13b3.yaml new file mode 100644 index 00000000..f40f9799 --- /dev/null +++ b/releasenotes/notes/clustecheck-9311d05fb32f13b3.yaml @@ -0,0 +1,7 @@ +--- +features: + - The galera cluster now supports cluster health checks over HTTP using port + 9200. The new cluster check ensures a node is healthy by running a simple + query against the wsrep sync status using monitoring user. This change will + provide for a more robust cluster check ensuring we have the most fault + tolerant galera cluster possible. diff --git a/tasks/galera_post_install.yml b/tasks/galera_post_install.yml index d86a0f03..3e1cf496 100644 --- a/tasks/galera_post_install.yml +++ b/tasks/galera_post_install.yml @@ -160,3 +160,31 @@ command: "systemctl daemon-reload" when: - ansible_service_mgr == 'systemd' + +- name: Create clustercheck script + template: + src: "clustercheck.j2" + dest: "/usr/local/bin/clustercheck" + mode: "0755" + tags: + - galera-config + +- name: Create mysqlchk config + template: + src: "mysqlchk.j2" + dest: "/etc/xinetd.d/mysqlchk" + mode: "0644" + notify: + - Restart xinetd + tags: + - galera-config + +- name: Add galera service check to services + lineinfile: + dest: /etc/services + state: present + regexp: '^mysqlchk' + line: 'mysqlchk 9200/tcp # MySQL check' + backup: yes + tags: + - galera-config diff --git a/templates/clustercheck.j2 b/templates/clustercheck.j2 new file mode 100644 index 00000000..2c77e779 --- /dev/null +++ b/templates/clustercheck.j2 @@ -0,0 +1,110 @@ +#!/bin/bash +# +# Script to make a proxy (ie HAProxy) capable of monitoring Percona XtraDB Cluster nodes properly +# +# Author: Olaf van Zandwijk +# Author: Raghavendra Prabhu +# +# Documentation and download: https://github.com/olafz/percona-clustercheck +# +# Based on the original script from Unai Rodriguez +# + +# {{ ansible_managed }} + +if [[ $1 == '-h' || $1 == '--help' ]];then + echo "Usage: $0 " + exit +fi + +# if the disabled file is present, return 503. This allows +# admins to manually remove a node from a cluster easily. +if [ -e "/var/tmp/clustercheck.disabled" ]; then + # Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 51\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is manually disabled.\r\n" + sleep 0.1 + exit 1 +fi + +MYSQL_USERNAME="${1-{{ galera_monitoring_user }}}" +MYSQL_PASSWORD="${2-{{ galera_monitoring_user_password }}}" +AVAILABLE_WHEN_DONOR=${3:-0} +ERR_FILE="${4:-/dev/null}" +AVAILABLE_WHEN_READONLY=${5:-1} +DEFAULTS_EXTRA_FILE=${6:-/etc/my.cnf} + +#Timeout exists for instances where mysqld may be hung +TIMEOUT=10 + +EXTRA_ARGS="" +if [[ -n "$MYSQL_USERNAME" ]]; then + EXTRA_ARGS="$EXTRA_ARGS --user=${MYSQL_USERNAME}" +fi + +if [[ -n "$MYSQL_PASSWORD" ]]; then + EXTRA_ARGS="$EXTRA_ARGS --password=${MYSQL_PASSWORD}" +else + EXTRA_ARGS="$EXTRA_ARGS --password=" +fi + +if [[ -r $DEFAULTS_EXTRA_FILE ]]; then + MYSQL_CMDLINE="mysql --defaults-extra-file=$DEFAULTS_EXTRA_FILE -nNE --connect-timeout=$TIMEOUT \ + ${EXTRA_ARGS}" +else + MYSQL_CMDLINE="mysql -nNE --connect-timeout=$TIMEOUT ${EXTRA_ARGS}" +fi + +# +# Perform the query to check the wsrep_local_state +# +WSREP_STATUS=$($MYSQL_CMDLINE -e "SHOW STATUS LIKE 'wsrep_local_state';" \ + 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) + +if [[ "${WSREP_STATUS}" == "4" ]] || [[ "${WSREP_STATUS}" == "2" && ${AVAILABLE_WHEN_DONOR} == 1 ]]; then + # Check only when set to 0 to avoid latency in response. + if [[ $AVAILABLE_WHEN_READONLY -eq 0 ]];then + READ_ONLY=$($MYSQL_CMDLINE -e "SHOW GLOBAL VARIABLES LIKE 'read_only';" \ + 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) + + if [[ "${READ_ONLY}" == "ON" ]];then + # Percona XtraDB Cluster node local state is 'Synced', but it is in + # read-only mode. The variable AVAILABLE_WHEN_READONLY is set to 0. + # => return HTTP 503 + # Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 43\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is read-only.\r\n" + sleep 0.1 + exit 1 + fi + fi + # Percona XtraDB Cluster node local state is 'Synced' => return HTTP 200 + # Shell return-code is 0 + echo -en "HTTP/1.1 200 OK\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 40\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is synced.\r\n" + sleep 0.1 + exit 0 +else + # Percona XtraDB Cluster node local state is not 'Synced' => return HTTP 503 + # Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 44\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is not synced.\r\n" + sleep 0.1 + exit 1 +fi diff --git a/templates/mysqlchk.j2 b/templates/mysqlchk.j2 new file mode 100644 index 00000000..25a19bcd --- /dev/null +++ b/templates/mysqlchk.j2 @@ -0,0 +1,16 @@ +# default: on +# description: mysqlchk +# {{ ansible_managed }} +service mysqlchk +{ + disable = no + flags = REUSE + socket_type = stream + port = 9200 + wait = no + user = nobody + server = /usr/local/bin/clustercheck + log_on_failure += USERID + only_from = {{ galera_monitoring_allowed_source }} + per_source = UNLIMITED +} diff --git a/tests/test-galera-server-functional.yml b/tests/test-galera-server-functional.yml index 8eea4660..bb1fe1bf 100644 --- a/tests/test-galera-server-functional.yml +++ b/tests/test-galera-server-functional.yml @@ -27,6 +27,8 @@ --skip-column-names register: wsrep_incoming_addresses changed_when: false + tags: + - skip_ansible_lint - name: Check cluster local state command: | mysql -h {{ ansible_host }} \ @@ -36,6 +38,8 @@ --skip-column-names register: wsrep_local_state_comment changed_when: false + tags: + - skip_ansible_lint - name: Check cluster evs state command: | mysql -h {{ ansible_host }} \ @@ -45,6 +49,8 @@ --skip-column-names register: wsrep_evs_state changed_when: false + tags: + - skip_ansible_lint - name: Check contents assert: that: diff --git a/vars/redhat-7.yml b/vars/redhat-7.yml index db4d4a51..218c37a2 100644 --- a/vars/redhat-7.yml +++ b/vars/redhat-7.yml @@ -27,6 +27,7 @@ galera_server_required_distro_packages: - libgcrypt - MariaDB-client - MariaDB-devel + - xinetd galera_etc_conf_file: "/etc/mysql/my.cnf" galera_etc_include_dir: "/etc/mysql/conf.d" diff --git a/vars/suse-42.yml b/vars/suse-42.yml index eb62f15f..7d7a8402 100644 --- a/vars/suse-42.yml +++ b/vars/suse-42.yml @@ -26,6 +26,7 @@ galera_server_required_distro_packages: - libmysqlclient-devel - mariadb-client - qpress + - xinetd galera_etc_conf_file: "/etc/my.cnf" galera_etc_include_dir: "/etc/my.cnf.d" diff --git a/vars/ubuntu-16.04.yml b/vars/ubuntu-16.04.yml index 83c256c0..de08c7ef 100644 --- a/vars/ubuntu-16.04.yml +++ b/vars/ubuntu-16.04.yml @@ -38,6 +38,7 @@ galera_server_required_distro_packages: - libstdc++6 - python-software-properties - software-properties-common + - xinetd galera_etc_conf_file: "/etc/mysql/my.cnf" galera_etc_include_dir: "/etc/mysql/conf.d"