Skip to content

Commit

Permalink
Implement a proper WSREP check for galera
Browse files Browse the repository at this point in the history
The galera cluster rely on WSREP for cluster consistency. While the
default MySQL monitor will allow us to know when the database node is
minimally functional it does not provide the ability to query the node
state allowing loadbalancers, operators, and deployers to know a node
is healthy prior to being allowed to accept connections. This change
implements the checkcluster script as provided by the fine folks at
Percona. The implementation of this check follows the guild-lines noted
here [0]. With this in-place, we'll be able to convert our haproxy check
for the galera cluster nodes to use an HTTP check on port 9200 instead
of the default MySQL login which will provide for a more robust and
fault tolerant cluster.

[0] https://www.percona.com/doc/percona-xtradb-cluster/LATEST/howtos/virt_sandbox.html
Closes-Bug: #1665667

Change-Id: Ie1b3b9724dd33de1d90634166e585ecceb1f4c96
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
  • Loading branch information
cloudnull committed Nov 29, 2017
1 parent 546c402 commit 94821f8
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 0 deletions.
11 changes: 11 additions & 0 deletions defaults/main.yml
Expand Up @@ -69,6 +69,17 @@ galera_running_and_bootstrapped: false

galera_monitoring_user: monitoring
galera_monitoring_user_password: ""
# NOTE(cloudnull): Set an interface or CIDR to limit the traffic source when
# monitoring the galera cluster status by default this is set
# to the first network in the ansible_interfaces list (usually
# default). From that information we pull the CIDR. To define
# this in prod most users will simply need to set the variable
# "galera_monitoring_default_network" to the interface used for
# management traffic, IE: "eth1".
galera_monitoring_default_network: "{{ 'ansible_' + (ansible_interfaces | difference(['lo']))[0] }}"
galera_monitoring_network: "{{ hostvars[inventory_hostname][galera_monitoring_default_network]['ipv4']['network'] }}"
galera_monitoring_netmask: "{{ (galera_monitoring_network + '/' + hostvars[inventory_hostname][galera_monitoring_default_network]['ipv4']['netmask']) | ipaddr('prefix') }}"
galera_monitoring_allowed_source: "{{ galera_monitoring_network }}/{{ galera_monitoring_netmask }}"
galera_root_user: root

# WARNING: This option is deprecated and will be removed in v12.0
Expand Down
5 changes: 5 additions & 0 deletions handlers/main.yml
Expand Up @@ -103,3 +103,8 @@
- meta: noop
listen: Manage LB
when: false

- name: Restart xinetd
service:
name: xinetd
state: restarted
7 changes: 7 additions & 0 deletions releasenotes/notes/clustecheck-9311d05fb32f13b3.yaml
@@ -0,0 +1,7 @@
---
features:
- The galera cluster now supports cluster health checks over HTTP using port
9200. The new cluster check ensures a node is healthy by running a simple
query against the wsrep sync status using monitoring user. This change will
provide for a more robust cluster check ensuring we have the most fault
tolerant galera cluster possible.
28 changes: 28 additions & 0 deletions tasks/galera_post_install.yml
Expand Up @@ -160,3 +160,31 @@
command: "systemctl daemon-reload"
when:
- ansible_service_mgr == 'systemd'

- name: Create clustercheck script
template:
src: "clustercheck.j2"
dest: "/usr/local/bin/clustercheck"
mode: "0755"
tags:
- galera-config

- name: Create mysqlchk config
template:
src: "mysqlchk.j2"
dest: "/etc/xinetd.d/mysqlchk"
mode: "0644"
notify:
- Restart xinetd
tags:
- galera-config

- name: Add galera service check to services
lineinfile:
dest: /etc/services
state: present
regexp: '^mysqlchk'
line: 'mysqlchk 9200/tcp # MySQL check'
backup: yes
tags:
- galera-config
110 changes: 110 additions & 0 deletions templates/clustercheck.j2
@@ -0,0 +1,110 @@
#!/bin/bash
#
# Script to make a proxy (ie HAProxy) capable of monitoring Percona XtraDB Cluster nodes properly
#
# Author: Olaf van Zandwijk <olaf.vanzandwijk@nedap.com>
# Author: Raghavendra Prabhu <raghavendra.prabhu@percona.com>
#
# Documentation and download: https://github.com/olafz/percona-clustercheck
#
# Based on the original script from Unai Rodriguez
#

# {{ ansible_managed }}

if [[ $1 == '-h' || $1 == '--help' ]];then
echo "Usage: $0 <user> <pass> <available_when_donor=0|1> <log_file> <available_when_readonly=0|1> <defaults_extra_file>"
exit
fi

# if the disabled file is present, return 503. This allows
# admins to manually remove a node from a cluster easily.
if [ -e "/var/tmp/clustercheck.disabled" ]; then
# Shell return-code is 1
echo -en "HTTP/1.1 503 Service Unavailable\r\n"
echo -en "Content-Type: text/plain\r\n"
echo -en "Connection: close\r\n"
echo -en "Content-Length: 51\r\n"
echo -en "\r\n"
echo -en "Percona XtraDB Cluster Node is manually disabled.\r\n"
sleep 0.1
exit 1
fi

MYSQL_USERNAME="${1-{{ galera_monitoring_user }}}"
MYSQL_PASSWORD="${2-{{ galera_monitoring_user_password }}}"
AVAILABLE_WHEN_DONOR=${3:-0}
ERR_FILE="${4:-/dev/null}"
AVAILABLE_WHEN_READONLY=${5:-1}
DEFAULTS_EXTRA_FILE=${6:-/etc/my.cnf}

#Timeout exists for instances where mysqld may be hung
TIMEOUT=10

EXTRA_ARGS=""
if [[ -n "$MYSQL_USERNAME" ]]; then
EXTRA_ARGS="$EXTRA_ARGS --user=${MYSQL_USERNAME}"
fi

if [[ -n "$MYSQL_PASSWORD" ]]; then
EXTRA_ARGS="$EXTRA_ARGS --password=${MYSQL_PASSWORD}"
else
EXTRA_ARGS="$EXTRA_ARGS --password="
fi

if [[ -r $DEFAULTS_EXTRA_FILE ]]; then
MYSQL_CMDLINE="mysql --defaults-extra-file=$DEFAULTS_EXTRA_FILE -nNE --connect-timeout=$TIMEOUT \
${EXTRA_ARGS}"
else
MYSQL_CMDLINE="mysql -nNE --connect-timeout=$TIMEOUT ${EXTRA_ARGS}"
fi

#
# Perform the query to check the wsrep_local_state
#
WSREP_STATUS=$($MYSQL_CMDLINE -e "SHOW STATUS LIKE 'wsrep_local_state';" \
2>${ERR_FILE} | tail -1 2>>${ERR_FILE})

if [[ "${WSREP_STATUS}" == "4" ]] || [[ "${WSREP_STATUS}" == "2" && ${AVAILABLE_WHEN_DONOR} == 1 ]]; then
# Check only when set to 0 to avoid latency in response.
if [[ $AVAILABLE_WHEN_READONLY -eq 0 ]];then
READ_ONLY=$($MYSQL_CMDLINE -e "SHOW GLOBAL VARIABLES LIKE 'read_only';" \
2>${ERR_FILE} | tail -1 2>>${ERR_FILE})

if [[ "${READ_ONLY}" == "ON" ]];then
# Percona XtraDB Cluster node local state is 'Synced', but it is in
# read-only mode. The variable AVAILABLE_WHEN_READONLY is set to 0.
# => return HTTP 503
# Shell return-code is 1
echo -en "HTTP/1.1 503 Service Unavailable\r\n"
echo -en "Content-Type: text/plain\r\n"
echo -en "Connection: close\r\n"
echo -en "Content-Length: 43\r\n"
echo -en "\r\n"
echo -en "Percona XtraDB Cluster Node is read-only.\r\n"
sleep 0.1
exit 1
fi
fi
# Percona XtraDB Cluster node local state is 'Synced' => return HTTP 200
# Shell return-code is 0
echo -en "HTTP/1.1 200 OK\r\n"
echo -en "Content-Type: text/plain\r\n"
echo -en "Connection: close\r\n"
echo -en "Content-Length: 40\r\n"
echo -en "\r\n"
echo -en "Percona XtraDB Cluster Node is synced.\r\n"
sleep 0.1
exit 0
else
# Percona XtraDB Cluster node local state is not 'Synced' => return HTTP 503
# Shell return-code is 1
echo -en "HTTP/1.1 503 Service Unavailable\r\n"
echo -en "Content-Type: text/plain\r\n"
echo -en "Connection: close\r\n"
echo -en "Content-Length: 44\r\n"
echo -en "\r\n"
echo -en "Percona XtraDB Cluster Node is not synced.\r\n"
sleep 0.1
exit 1
fi
16 changes: 16 additions & 0 deletions templates/mysqlchk.j2
@@ -0,0 +1,16 @@
# default: on
# description: mysqlchk
# {{ ansible_managed }}
service mysqlchk
{
disable = no
flags = REUSE
socket_type = stream
port = 9200
wait = no
user = nobody
server = /usr/local/bin/clustercheck
log_on_failure += USERID
only_from = {{ galera_monitoring_allowed_source }}
per_source = UNLIMITED
}
6 changes: 6 additions & 0 deletions tests/test-galera-server-functional.yml
Expand Up @@ -27,6 +27,8 @@
--skip-column-names
register: wsrep_incoming_addresses
changed_when: false
tags:
- skip_ansible_lint
- name: Check cluster local state
command: |
mysql -h {{ ansible_host }} \
Expand All @@ -36,6 +38,8 @@
--skip-column-names
register: wsrep_local_state_comment
changed_when: false
tags:
- skip_ansible_lint
- name: Check cluster evs state
command: |
mysql -h {{ ansible_host }} \
Expand All @@ -45,6 +49,8 @@
--skip-column-names
register: wsrep_evs_state
changed_when: false
tags:
- skip_ansible_lint
- name: Check contents
assert:
that:
Expand Down
1 change: 1 addition & 0 deletions vars/redhat-7.yml
Expand Up @@ -27,6 +27,7 @@ galera_server_required_distro_packages:
- libgcrypt
- MariaDB-client
- MariaDB-devel
- xinetd

galera_etc_conf_file: "/etc/mysql/my.cnf"
galera_etc_include_dir: "/etc/mysql/conf.d"
Expand Down
1 change: 1 addition & 0 deletions vars/suse-42.yml
Expand Up @@ -26,6 +26,7 @@ galera_server_required_distro_packages:
- libmysqlclient-devel
- mariadb-client
- qpress
- xinetd

galera_etc_conf_file: "/etc/my.cnf"
galera_etc_include_dir: "/etc/my.cnf.d"
Expand Down
1 change: 1 addition & 0 deletions vars/ubuntu-16.04.yml
Expand Up @@ -38,6 +38,7 @@ galera_server_required_distro_packages:
- libstdc++6
- python-software-properties
- software-properties-common
- xinetd

galera_etc_conf_file: "/etc/mysql/my.cnf"
galera_etc_include_dir: "/etc/mysql/conf.d"
Expand Down

0 comments on commit 94821f8

Please sign in to comment.