Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 856 lines (753 sloc) 28.2 KB
#!/bin/bash
# Authors: Bartosz Kupidura (Mirantis): Rewrite RA to support mysql/galera
# Sergii Golovatiuk (Mirantis): Rewrite RA to support mysql/galera
# Alan Robertson: DB2 Script
# Jakub Janczak: rewrite as MySQL
# Andrew Beekhof: cleanup and import
# Sebastian Reitenbach: add OpenBSD defaults, more cleanup
# Narayan Newton: add Gentoo/Debian defaults
# Marian Marinov, Florian Haas: add replication capability
# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
#
# Support: openstack@lists.launchpad.net
# License: GNU General Public License (GPL)
#
# (c) 2002-2005 International Business Machines, Inc.
# 2005-2010 Linux-HA contributors
# 2014 Mirantis Inc.
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_FUEL_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/fuel}
. ${OCF_FUEL_FUNCTIONS_DIR}/ocf-fuel-funcs
#######################################################################
# Fill in some defaults if no values are specified
OCF_RESKEY_binary_default="/usr/bin/mysqld_safe"
OCF_RESKEY_client_binary_default="/usr/bin/mysql"
OCF_RESKEY_config_default="/etc/mysql/my.cnf"
OCF_RESKEY_datadir_default="/var/lib/mysql"
OCF_RESKEY_user_default="mysql"
OCF_RESKEY_group_default="mysql"
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock"
OCF_RESKEY_test_user_default="root"
OCF_RESKEY_test_passwd_default=""
OCF_RESKEY_test_conf_default=""
OCF_RESKEY_additional_parameters_default=""
OCF_RESKEY_master_timeout_default="300"
: ${HA_LOGTAG="ocf-mysql-wss"}
: ${HA_LOGFACILITY="daemon"}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
MYSQL_BINDIR="$(dirname ${OCF_RESKEY_binary})"
: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}}
: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}}
: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}}
: ${OCF_RESKEY_test_conf=${OCF_RESKEY_test_conf_default}}
: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}}
: ${OCF_RESKEY_master_timeout=${OCF_RESKEY_master_timeout_default}}
#######################################################################
# Convenience variables
MYSQL=$OCF_RESKEY_client_binary
HOSTNAME=$(uname -n)
MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
if [ "${OCF_RESKEY_test_conf}" ]; then
MYSQL_OPTIONS_TEST="--defaults-extra-file=${OCF_RESKEY_test_conf} ${MYSQL_OPTIONS_LOCAL}"
else
MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
fi
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|meta-data|validate-all|monitor)
$0 manages a MySQL Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'monitor' operation reports whether the database seems to be working
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mysql" version="0.1">
<version>0.1</version>
<longdesc lang="en">
Resource script for MySQL
</longdesc>
<shortdesc lang="en">Resource script for MySQL</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">Data directory</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="test_user" unique="0" required="0">
<longdesc lang="en">
MySQL test user, must have select privilege on 'show status'
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="${OCF_RESKEY_test_user_default}" />
</parameter>
<parameter name="test_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL test user password
</longdesc>
<shortdesc lang="en">MySQL test user password</shortdesc>
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
</parameter>
<parameter name="test_conf" unique="0" required="0">
<longdesc lang="en">
MySQL test user conf file to override user/pass
</longdesc>
<shortdesc lang="en">MySQL test user conf file</shortdesc>
<content type="string" default="${OCF_RESKEY_test_conf_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="master_timeout" unique="0" required="0">
<longdesc lang="en">
How long we should wait for galera master. If master not come up before timeout,
RA will choose new master from already running nodes. This value can by changed by crm_attribute:
# crm_attribute --name galera_master_timeout --update 500
Remember to remove this after maintenance. USE WITH CAUTION!
Remember to change timeout for start operation. Start timeout should be bigger than master_timeout
</longdesc>
<shortdesc lang="en">Galera master timeout</shortdesc>
<content type="integer" default="${OCF_RESKEY_master_timeout_default}"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="330" />
<action name="stop" timeout="120" />
<action name="monitor" timeout="30" interval="20" depth="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="10" />
</actions>
</resource-agent>
END
}
# Convenience functions
#######################################################################
nodes_in_cluster_online() {
local LH="${LL} nodes_in_cluster_online():"
local NODES
NODES=$(crm_node --partition | sed -e '/(null)/d')
if [ ! -z "$NODES" ]; then
ocf_log "${LH} Online Nodes in cluster: ${NODES}"
echo $NODES
else
ocf_log "${LH} No online nodes in cluster"
echo
fi
}
nodes_in_cluster() {
local LH="${LL} nodes_in_cluster_online():"
local NODES
#Ubuntu doesn't like \w
NODES=$(crm_node --list | awk '/^[a-zA-Z0-9]/ {print $2}' | sed -e '/(null)/d')
if [ ! -z "$NODES" ]; then
ocf_log "${LH} Nodes in cluster: ${NODES}"
echo $NODES
else
ocf_log "${LH} No nodes in cluster"
echo
fi
}
#Validate if GTID have correct format (return 0), else return 1
#valid values are:
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:123 - standard cluster-id:commit-id
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:-1 - standard non initialized cluster, 00000000-0000-0000-0000-000000000000:-1
validate_gtid() {
local LH="${LL} validate_gtid():"
local rc
local status_loglevel="err"
if [ -z $1 ]; then
ocf_log $status_loglevel "${LH} No GTID provided"
return 1
fi
echo $1 | grep -q -E "${UUID_REGEX}"
rc=$?
if [ $rc -ne 0 ]; then
ocf_log $status_loglevel "${LH} GTID have wrong format: $1"
return 1
else
ocf_log info "${LH} GTID OK: $1"
return 0
fi
}
#Get galera GTID from local mysql instance.
#If changed, update it in CIB, then return 0 and new GTID
#If unchanged or bad vlue, return 1 and the current GTID from CIB
update_node_gtid() {
local LH="${LL} update_node_gtid():"
local status_loglevel="err"
local GTID
local GTID_current
local CLUSTER_ID
local COMMIT_ID
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
if mysql_status $status_loglevel 1; then
CLUSTER_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
-e "SHOW STATUS LIKE 'wsrep_local_state_uuid'" | awk '{print $NF}')
COMMIT_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
-e "SHOW STATUS LIKE 'wsrep_last_committed'" | awk '{print $NF}')
GTID="$CLUSTER_ID:$COMMIT_ID"
else
GTID=$(${OCF_RESKEY_binary} --wsrep-recover 2>&1 | \
grep -e 'Recovered position' -e 'wsrep_start_position' | grep -Eo "${UUID_REGEX}")
[ -z "${GTID}" ] && GTID=$(cat ${OCF_RESKEY_datadir}/grastate.dat \
| awk '/uuid/ { uuid = $NF} /seqno/ { seqno = $NF} END {print uuid":"seqno}')
fi
GTID_current=$(get_node_gtid $HOSTNAME)
if [ "${GTID}" != "${GTID_current}" ]; then
if validate_gtid "${GTID}"; then
ocf_log info "${LH} Galera GTID: ${GTID}"
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
--update $GTID
echo "${GTID}"
return 0
fi
fi
echo "${GTID_current}"
return 1
}
update_node_pc()
{
local LH="${LL} update_node_pc():"
ocf_log info "${LH} Setting node PC flag to true"
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc --update 'true'
}
clear_node_pc()
{
ocf_log info "${LH} Cleaning up is_pc attribute"
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc \
--delete
}
get_master_timeout() {
local LH="${LL} get_master_timeout():"
local timeout
timeout=$(crm_attribute --quiet --name galera_master_timeout \
--query --default=$OCF_RESKEY_master_timeout -q | sed -e '/(null)/d')
ocf_log info "${LH} Setting timeout $timeout"
echo $timeout
}
#Get gtid attribute for $1 node, "0" means no GTID set or wrong format for GTID
get_node_gtid() {
local LH="${LL} get_node_gtid():"
local GTID
GTID=$(crm_attribute --quiet --node $1 --lifetime reboot --query \
--name gtid 2> /dev/null | sed -e '/(null)/d')
if [ -z "$GTID" ]; then
ocf_log info "${LH} No GTID for $1"
echo 0
else
if validate_gtid "$GTID"; then
ocf_log info "${LH} Galera GTID: ${GTID}"
echo $GTID
else
ocf_log info "${LH} No GTID for $1"
echo 0
fi
fi
}
#Retry get node gtid with once with a random 1-10 second sleep if gtid is not found
get_node_gtid_with_retry() {
local LH="${LL} get_node_gtid():"
local GTID
local NODE="$1"
GTID=$(get_node_gtid $NODE)
if [ "$GTID" = "0" ]; then
sleep $(( ( $RANDOM % 10 ) + 1 ))
GTID=$(get_node_gtid $NODE)
fi
echo $GTID
}
check_if_reelection_needed() {
local LH="${LL} check_if_reelection_needed()"
local PARTITION_WITH_QUORUM
local RESOURCE_NAME
local NODE_COUNT
local RUNNING_INSTANCES
local rc
PARTITION_WITH_QUORUM=$(crm_node -q | sed -e '/(null)/d')
RESOURCE_NAME=$(echo $OCF_RESOURCE_INSTANCE | cut -f1 -d":")
NODE_COUNT=$(nodes_in_cluster | wc -w)
if [ $PARTITION_WITH_QUORUM -eq 1 -o $NODE_COUNT -eq 1 ]; then
RUNNING_INSTANCES=$(crm_resource \
--quiet --locate --resource $RESOURCE_NAME | sed -e '/(null)/d' | wc -l 2> /dev/null)
rc=$?
if [ $RUNNING_INSTANCES -lt 1 ]; then
ocf_log info "${LH} Election is needed"
return 0
fi
fi
ocf_log info "${LH} Election was done"
return 1
}
# Return 0 and the pid, if running a new cluster as a seed node
check_if_new_cluster() {
local LH="${LL} check_if_new_cluster()"
local pid
# Match a mysqld pid by the datadir and a new cluster sign, exclude position recovery
pid=$(ps -C mysqld -o pid= -o command= -o args= | \
grep -e "${OCF_RESKEY_datadir}.*wsrep-new-cluster" -e "wsrep-new-cluster.*${OCF_RESKEY_datadir}" | \
awk '!/wsrep.recover|defunct/ {print $1}')
if [ "${pid}" ]; then
update_node_pc
ocf_log info "${LH} New cluster"
return 0
fi
ocf_log info "${LH} Running cluster"
return 1
}
get_master() {
local LH="${LL} get_master()"
local NODES=$*
local POSSIBLE_MASTERS
local -A TMP
local MASTER_GTID
local GTID
local NODE
local NODE_SCORE
local LATEST_SEQNO=-1
local SEQNO
local MASTER
# Ensure the same nodes list to reach a consensus for the choosen master across all of the nodes
NODES=$(printf -- '%s\n' ${NODES} | sort -u)
# Form a hash of keys as node names, values as GTID:SEQNO
for NODE in $NODES; do
# Try and get a gtid with a retry when the GTID=0 to make sure there
# is plenty of time if multiple nodes are starting at the same time.
GTID=$(get_node_gtid_with_retry $NODE)
# Cut the seqno off the GTID:SEQNO pairs
TMP[$NODE]=$GTID
done
# Find possible masters
# Cut the seqnums off the stored GTID:SEQNO pairs, then find the most seen GTID for the nodes
MASTER_GTID=$(printf -- '%s\n' ${TMP[@]%:*} | grep -vE -e "^0$" -e $ZEROID | sort | uniq -c | awk '{print $2}' | head -1)
[ "${MASTER_GTID}" ] || MASTER_GTID=$ZEROID
ocf_log info "${LH} The most seen GTID is: ${MASTER_GTID}"
for NODE in $NODES; do
NODE_SCORE=$(crm_simulate -Ls | awk "/${OCF_RESOURCE_INSTANCE}/ && /clone_color/ && ! /${OCF_RESOURCE_INSTANCE}:/ && /${NODE}/ {print \$NF}")
if [[ $NODE_SCORE =~ ^-?[0-9]+$ && $NODE_SCORE -le 0 || $NODE_SCORE = "-INFINITY" || -z $NODE_SCORE ]]; then
ocf_log info "${LH} Skipping node $NODE as it is not eligible for running the resource. Its score is ${NODE_SCORE:-NULL}"
continue
fi
ocf_log info "${LH} Node's ${NODE} score: ${NODE_SCORE}, GTID/SEQNUM: ${TMP[$NODE]}"
# Filter node names with the most seen GTID as possible masters and find the latest SEQNO
if [ "${MASTER_GTID}" = "${TMP[$NODE]%:*}" ]; then
POSSIBLE_MASTERS="$POSSIBLE_MASTERS $NODE"
SEQNO=${TMP[$NODE]#*:}
[ $SEQNO -gt $LATEST_SEQNO ] && LATEST_SEQNO=$SEQNO
fi
done
ocf_log info "${LH} Possible masters: $POSSIBLE_MASTERS"
# Cut the gtids off the stored GTID:SEQNO pairs, then
# filter the master, which is one who has the latest SEQNO from the possible masters
for NODE in $POSSIBLE_MASTERS; do
if [ "${LATEST_SEQNO}" = "${TMP[$NODE]#*:}" ]; then
MASTER=$NODE
break
fi
done
ocf_log info "${LH} Choosed master: ${MASTER} with GTID: ${TMP[$MASTER]}"
echo "$MASTER"
}
#Find the best master and return its GTID.
#If the best master is this node, propose it as a prim, then return 1.
#If another node is, check if *this* node is also running a new cluster and exit
#with error for safety concerns
check_if_galera_pc() {
local LH="${LL} check_if_galera_pc():"
local NODES
local MASTER
local timeout
local GTID
local pid
local pcnum=0
timeout=$(get_master_timeout)
ocf_log info "${LH} Checking if Primary Component"
while [ $timeout -gt 0 ]; do
NODES=$(nodes_in_cluster_online)
MASTER=$(get_master "$NODES")
GTID=$(get_node_gtid $MASTER)
if [ "$MASTER" = "$HOSTNAME" ]; then
ocf_log info "${LH} I'm Primary Component. Join me! My GTID: ${GTID}"
echo "${GTID}"
return 0
fi
if ! check_if_reelection_needed; then
ocf_log info "${LH} My neighbour is Primary Component with GTID: ${GTID}"
if check_if_new_cluster
then
for node in ${NODES}; do
is_pc=$(crm_attribute --quiet --node ${node} --lifetime reboot --query --name is_pc | sed -e '/(null)/d')
if [ "${is_pc}" = "true" ]; then
pcnum=$((pcnum + 1))
fi
if [ ${pcnum} -gt 1 ]; then
ocf_log err "${LH} But I'm running a new cluster, PID:${pid}, this is a split-brain!"
clear_node_pc
exit $OCF_ERR_GENERIC
fi
done
fi
echo "${GTID}"
return 1
fi
sleep 10
timeout=$((timeout - 10))
ocf_log info "${LH} Waiting for master. ${timeout} seconds left"
done
ocf_log info "${LH} ${HOSTNAME} is not Primary Component"
return 1
}
# Functions invoked by resource manager actions
mysql_validate() {
local LH="${LL} mysql_validate()"
check_binary $OCF_RESKEY_binary
check_binary $OCF_RESKEY_client_binary
if [ ! -f $OCF_RESKEY_config ]; then
ocf_log err "${LH} Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
if [ ! -d $OCF_RESKEY_datadir ]; then
ocf_log err "${LH} Datadir $OCF_RESKEY_datadir doesn't exist"
return $OCF_ERR_INSTALLED
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
ocf_log err "${LH} User $OCF_RESKEY_user doesn't exit"
return $OCF_ERR_INSTALLED
fi
getent group $OCF_RESKEY_group >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
ocf_log err "${LH} Group $OCF_RESKEY_group doesn't exist"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
check_if_sst() {
local LH="${LL} check_if_sst():"
local loglevel=${1:-'info'}
# Match a MySQLd pid by the datadir, exclude position recovery
local pid=$(ps -C mysqld -o pid= -o command= -o args= | grep "${OCF_RESKEY_datadir}" | \
awk '!/wsrep.recover|defunct/ {print $1}')
if [ "${pid}" ] ; then
ocf_log info "${LH} MySQL process ${pid} found"
# MySQLd's running and may be blocked, check for signs of SST
local wsrep_sst_method=$(awk '/^wsrep_sst_method/ {print $3}' ${OCF_RESKEY_config})
local wsrep_sst_command="wsrep_sst_${wsrep_sst_method}"
local wsrep_sst_pid=$(ps -C ${wsrep_sst_command} -o pid= -o command= | \
awk '!/defunct/ {print $1}' | head -1)
if [ "${wsrep_sst_pid}" ]; then
ocf_log $loglevel "${LH} SST is in progress"
return $OCF_SUCCESS
fi
fi
ocf_log $loglevel "${LH} No signs of SST found"
return $OCF_ERR_GENERIC
}
mysql_status() {
local LH="${LL} mysql_status():"
local loglevel=${1:-'info'}
local count=${2:-3}
local sleeptime=${3:-2}
local pid
while [ $count -gt 0 ]; do
if [ -f "$OCF_RESKEY_pid" ]; then
ocf_log info "${LH} MySQL PID found"
break
fi
count=$(( count-1 ))
ocf_log $loglevel "${LH} PIDFile ${OCF_RESKEY_pid} of MySQL server not found. Sleeping for $sleeptime seconds. ${count} retries left"
sleep $sleeptime
done
if [ $count -eq 0 ]; then
ocf_log $loglevel "${LH} MySQL is not running"
return $OCF_NOT_RUNNING
fi
pid=$(cat $OCF_RESKEY_pid)
if [ "u$pid" != "u" -a -d /proc/$pid ]; then
ocf_log $loglevel "${LH} MySQL is running"
return $OCF_SUCCESS
else
ocf_log $loglevel "${LH} MySQL is not running"
return $OCF_NOT_RUNNING
fi
}
mysql_monitor() {
local LH="${LL} mysql_monitor():"
local rc
local rc2
local status_loglevel="err"
local WSREP_CONNECTED
local WSREP_LOCAL_STATE_COMMENT
local WSREP_READY
local MGTID
local GTID
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
check_if_sst
rc=$?
[ $rc -eq $OCF_SUCCESS ] && return $rc
mysql_status $status_loglevel
rc=$?
[ $rc -eq $OCF_SUCCESS ] || return $rc
GTID=$(update_node_gtid)
WSREP_CONNECTED=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
-e "SHOW STATUS LIKE 'wsrep_connected'" | awk '{print $NF}')
if [ "$WSREP_CONNECTED" != "ON" ]; then
return $OCF_ERR_GENERIC
fi
WSREP_LOCAL_STATE_COMMENT=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
-e "SHOW STATUS LIKE 'wsrep_local_state_comment'" | awk '{print $NF}')
if [[ "$WSREP_LOCAL_STATE_COMMENT" =~ 'Synced'|'Donor'|'Desync' ]]; then
WSREP_READY=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
-e "SHOW STATUS LIKE 'wsrep_ready'" | awk '{print $NF}')
if [ "$WSREP_READY" != "ON" ]; then
ocf_log err "${LH} MySQL synced but not ready"
return $OCF_ERR_GENERIC
fi
elif [[ "$WSREP_LOCAL_STATE_COMMENT" == 'Initialized' ]]; then
ocf_log err "${LH} MySQL lost quorum or uninitialized"
return $OCF_ERR_GENERIC
fi
# Check if this node is the master and is running the most recent GTID
check_if_new_cluster
MGTID=$(check_if_galera_pc)
rc=$?
if [ $rc -eq 0 -a "${MGTID}" != "${GTID}" ]; then
ocf_log err "${LH} I'm a master, and my GTID: ${GTID}, which was not expected"
return $OCF_ERR_GENERIC
fi
ocf_log debug "${LH} MySQL monitor succeeded"
return $OCF_SUCCESS
}
mysql_start() {
local LH="${LL} mysql_start():"
local NODES
local socket_dir
local pid_dir
local rc
local dir
local mysql_extra_params
if mysql_status info 1; then
ocf_log info "${LH} MySQL already running"
return $OCF_SUCCESS
fi
socket_dir="$( dirname ${OCF_RESKEY_socket} )"
if [ ! -d "${socket_dir}" ] ; then
ocf_log info "${LH} Create socket dir: ${socket_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
mkdir -p "${socket_dir}"
chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${socket_dir}"
chmod 755 "${socket_dir}"
fi
# check and make PID file dir
pid_dir="$( dirname ${OCF_RESKEY_pid} )"
if [ ! -d "${pid_dir}" ] ; then
ocf_log info "${LH} Create PID dir: ${pid_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
mkdir -p "${pid_dir}"
chown -R ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${pid_dir}"
chmod 755 "${pid_dir}"
fi
# set user/group for datadir
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir
# Regardless of whether we just created the directory or it
# already existed, check whether it is writable by the configured
# user
for dir in $pid_dir $socket_dir; do
if ! /usr/bin/sudo -n -u $OCF_RESKEY_user /usr/bin/test -w $dir; then
ocf_log err "${LH} Directory $dir is not writable by $OCF_RESKEY_user"
exit $OCF_ERR_PERM
fi
done
if [ -f /tmp/wsrep-init-file ]; then
mysql_extra_params="--init-file=/tmp/wsrep-init-file"
else
mysql_extra_params=""
fi
update_node_gtid
check_if_reelection_needed
rc=$?
if [ $rc -eq 0 ]; then
check_if_galera_pc
rc=$?
if [ $rc -eq 0 ]; then
mysql_extra_params="$mysql_extra_params --wsrep-new-cluster"
update_node_pc
else
clear_node_pc
fi
fi
ocf_log info "${LH} Starting MySQL"
${OCF_RESKEY_binary} \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir \
--user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
$mysql_extra_params >/dev/null 2>&1 &
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "${LH} MySQL start command failed: $rc"
clear_node_pc
return $rc
fi
# Spin waiting for the server to come up or exit, if SST's in progress
# Let the CRM/LRM time us out if required.
while :; do
check_if_sst
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if mysql_status info 1; then
break
fi
sleep 3
done
ocf_log info "${LH} MySQL started"
return $OCF_SUCCESS
}
mysql_cleanup() {
local LH="${LL} mysql_cleanup():"
ocf_log info "${LH} Cleaning up gtid attribute"
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
--delete
clear_node_pc
ocf_log debug "${LH} Delete lock file: /var/lock/subsys/mysqld"
rm -f /var/lock/subsys/mysqld
ocf_log debug "${LH} Delete sock file: ${OCF_RESKEY_socket}"
rm -f $OCF_RESKEY_socket
ocf_log debug "${LH} Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
rm -f $OCF_RESKEY_pid
}
mysql_stop() {
local LH="${LL} mysql_stop():"
local shutdown_timeout
local rc
ocf_log info "${LH}"
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000)-5 ))
fi
proc_stop "${OCF_RESKEY_pid}" "mysqld.*${OCF_RESKEY_datadir}" SIGTERM 5 $(( $shutdown_timeout/5 ))
mysql_cleanup
return $OCF_SUCCESS
}
##########################################################################
# If DEBUG_LOG is set, make this resource agent easy to debug: set up the
# debug log and direct all output to it. Otherwise, redirect to /dev/null.
# The log directory must be a directory owned by root, with permissions 0700,
# and the log must be writable and not a symlink.
##########################################################################
DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
UUID_REGEX="\w{8}-\w{4}-\w{4}-\w{4}-\w{12}:([[:digit:]]+|-1)"
ZEROID="00000000-0000-0000-0000-000000000000"
if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
if [ -d "${DEBUG_LOG_DIR}" ]; then
exec 9>>"$DEBUG_LOG"
exec 1>&9 2>&9
date '+%Y%m%d %H:%M:%S' >&9
echo "$*" >&9
env | grep OCF_ | sort >&9
set -x
else
exec 9>/dev/null
fi
fi
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
export LL="${OCF_RESOURCE_INSTANCE}:"
mysql_validate
rc=$?
if [ $rc -ne 0 ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
*) exit $rc;;
esac
fi
# What kind of method was invoked?
case "$1" in
start) mysql_start;;
stop) mysql_stop;;
monitor) mysql_monitor;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vim: set ts=4 sw=4 tw=0 et :