Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Cannot retrieve contributors at this time

147 lines (122 sloc) 5.429 kb
(import org.apache.commons.mail.SimpleEmail)
(import com.twilio.sdk.TwilioRestClient)
(add-alert-handler email [alert-msg server-name cmd-output]
(doto (SimpleEmail.)
(.setHostName "smtp.gmail.com")
(.setSslSmtpPort "465")
(.setSSL true)
(.addTo "sample@example.com")
(.setFrom "sender@example.com" "Server Stats")
(.setSubject (str "ALERT - " server-name " - " alert-msg))
(.setMsg (str "Alert triggered on " server-name "\n" cmd-output))
(.setAuthentication "sender@example.com" "password123")
(.send)))
(set-command-failure-handler [server-name]
(let [client (TwilioRestClient. "ACCOUNT_SID" "AUTH_TOKEN")
account (. client getAccount)
smsFactory (. account getSmsFactory)]
(. smsFactory (create {"To" "5554155484"
"From" "2675551234"
"Body" (str server-name " is not responding to ssh")}))))
(set-ssh-username "statsuser")
(add-server-group web-servers
["host1.example.com"
"host2.example.com"])
(add-server-group backup-servers
["backup.example.com"])
(add-server-group database-servers
["db1.example.com"
"db2.example.com"])
(add-server-group app-servers
["app1.example.com"
"app2.example.com"
"app3.example.com"])
(add-cmd top
{:doc "Get the top 10 processes by CPU"
:servers [web-servers backup-servers database-servers app-servers]
:cmd "top -b -n 1| tail -n+7 | head -11"})
(add-cmd disk
{:doc "Get the disk usage using df"
:cmd "df -h"
:servers [web-servers backup-servers database-servers app-servers]
:alerts [{:column "Use%"
:value-type percent
:handlers [email]
:msg "Disk space over 85% full"
:trigger (> 85)}]})
(add-cmd mem
{:doc "Get the memory usage in megabytes"
:servers [web-servers backup-servers database-servers app-servers]
:cmd "free -m"})
(add-cmd io
{:doc "Get the I/O utilization"
:servers [web-servers backup-servers database-servers app-servers]
:cmd "iostat -x -m | tail -n+6"})
(add-cmd mysql-error
{:doc "Get the last 20 mysql errors"
:servers [web-servers database-servers app-servers]
:cmd "sudo su -c \"tail -20 `sudo su -c \"mysql -e 'SHOW VARIABLES;'\" | grep log_error | awk '{print $2}'`\""})
(add-cmd mysql-slow
{:doc "Get the last 20 entries in the slow query log"
:servers [web-servers database-servers app-servers]
:cmd "sudo su -c \"tail -20 `sudo su -c \"mysql -e 'SHOW VARIABLES;'\" | grep slow_query_log_file | awk '{print $2}'`\""})
(add-cmd mysql-log
{:doc "Get the last 20 entries in the mysql log"
:servers [web-servers database-servers app-servers]
:cmd "sudo su -c \"tail -20 `sudo su -c \"mysql -e 'SHOW VARIABLES;'\" | grep general_log_file | awk '{print $2}'`\""})
(add-cmd num-queries
{:doc "Get the number of running MySql Queries"
:servers [web-servers database-servers app-servers]
:cmd "sudo su -c \"mysql -e 'SHOW PROCESSLIST;'\" | grep Query | grep -v 'PROCESSLIST' | wc -l"})
(add-cmd is-mysql
{:doc "Is MySql Running?"
:servers [web-servers database-servers app-servers]
:cmd "[[ -z `ps aux | grep mysql[d]` ]] && echo 'false' || echo 'true'"
:alerts [{:value-type bool
:msg "MySql is not running"
:handlers [email]
:trigger (= false)
:mute-for 300000}]}) ; does not send mail before 5 minutes passed
; since last send.
(add-cmd is-apache
{:doc "Is Apache Running?"
:servers [web-servers]
:cmd "[[ -z `ps aux | grep http[d]` ]] && echo 'false' || echo 'true'"
:alerts [{:value-type bool
:msg "Apache is not running"
:handlers [email]
:trigger (= false)
:mute-for 1860000}]}); does not send mail before 30 minutes passed
; since last send.
(add-cmd show-queries
{:doc "Show a list of currently running queries"
:servers [web-servers database-servers app-servers]
:cmd "sudo su -c \"mysql -e 'SHOW Full PROCESSLIST;'\" | grep Query | grep -v PROCESSLIST | cut -f8-"})
(add-cmd invalid-widgets
{:doc "Return the number of widgets that are invalid"
:servers [web-servers]
:cmd "sudo su -c \"mysql -e \\\"select count(*) from widget_errors\\\"\" | tail -1"
:alerts [{:value-type number
:msg "Invalid widgets detected"
:handlers [email]
:trigger (> 0)}]})
(add-cmd locked-queries
{:doc "Show a list of locked queries"
:servers [web-servers database-servers app-servers]
:cmd "sudo su -c \"mysql -e 'SHOW Full PROCESSLIST\\G'\" | grep -B 6 -A 1 Locked"})
(add-cmd longest-query
{:doc "Show the longest currently running query"
:servers [web-servers database-servers app-servers]
:cmd "sudo su -c \"mysql -e 'show full processlist' | grep -v '^\\+\\-\\-' | grep -v 'system user' | grep -v Sleep | sort -n -k6 | tail -1\""})
(add-cmd php-procs
{:doc "Show a list of all php processes"
:servers [web-servers database-servers app-servers]
:cmd "ps aux | grep ph[p]"})
(add-cmd app-log
{:doc "Get last 20 entries in todays app-servers log"
:servers [app-servers]
:cmd "tail -20 `ls -t /logs/app* | head -1`"})
(add-cmd http-errors
{:doc "Show recent non-200s requests"
:servers [web-servers]
:cmd "tail -200 /etc/httpd/httpd.log | sort -k9 -n | grep -P -v '[\\S+\\s+]{8}200'"})
Jump to Line
Something went wrong with that request. Please try again.