Skip to content

Commit

Permalink
ircslash watches for DB malperformance and notifies the IRC channel.
Browse files Browse the repository at this point in the history
  • Loading branch information
jamiemccarthy committed Dec 9, 2004
1 parent 912be4a commit 7c3809e
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 33 deletions.
3 changes: 2 additions & 1 deletion Slash/DB/Static/MySQL/MySQL.pm
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ sub getArchiveList {
return $returnable;
}


########################################################
# For balance_readers.pl
sub deleteOldDBReaderStatus {
Expand All @@ -241,6 +240,8 @@ sub getDBsReaderStatus {
"dbid",
"dbid,
MIN(IF(was_alive='yes',1,0)) AS was_alive,
MIN(IF(was_reachable='yes',1,0)) AS was_reachable,
MIN(IF(was_running='yes',1,0)) AS was_running,
AVG(slave_lag_secs) AS lag,
AVG(query_bog_secs) AS bog",
"dbs_readerstatus",
Expand Down
2 changes: 2 additions & 0 deletions sql/mysql/defaults.sql
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,8 @@ INSERT INTO vars (name, value, description) VALUES ('index_noanon','0','Redirect
INSERT INTO vars (name, value, description) VALUES ('ircslash','0','Enable the ircslash task and connect to an IRC channel whenever slashd starts');
INSERT INTO vars (name, value, description) VALUES ('ircslash_channel','#ircslash','Which channel to join');
INSERT INTO vars (name, value, description) VALUES ('ircslash_channel_password','','Password for ircslash_channel');
INSERT INTO vars (name, value, description) VALUES ('ircslash_dbalert_bogthresh','30','Alert the IRC channel when DB query bog exceeds this value, in seconds, for the last minute average');
INSERT INTO vars (name, value, description) VALUES ('ircslash_dbalert_lagthresh','30','Alert the IRC channel when DB replication lag exceeds this value, in seconds, for the last minute average');
INSERT INTO vars (name, value, description) VALUES ('ircslash_ircname','','Name to use on IRC server (defaults to "(slashsite) slashd")');
INSERT INTO vars (name, value, description) VALUES ('ircslash_lastremarkid','','Id of the last remark seen');
INSERT INTO vars (name, value, description) VALUES ('ircslash_nick','','Nick to use on IRC server (has a reasonable default)');
Expand Down
3 changes: 3 additions & 0 deletions sql/mysql/upgrades
Original file line number Diff line number Diff line change
Expand Up @@ -2813,3 +2813,6 @@ DELETE FROM vars where name in

# End of T_2_5_0_41, Start of T_2_5_0_42 - 2004/12/07

INSERT INTO vars (name, value, description) VALUES ('ircslash_dbalert_bogthresh','30','Alert the IRC channel when DB query bog exceeds this value, in seconds, for the last minute average');
INSERT INTO vars (name, value, description) VALUES ('ircslash_dbalert_lagthresh','30','Alert the IRC channel when DB replication lag exceeds this value, in seconds, for the last minute average');

4 changes: 4 additions & 0 deletions themes/slashcode/tasks/balance_readers.pl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ sub sleep_until {
}
}

# This should be in Static/MySQL.pm
{ # cheap closure cache
my $reader_dbid;
sub get_reader_dbid {
Expand All @@ -78,6 +79,7 @@ sub get_reader_dbid {
}
} # end closure

# This should be in Static/MySQL.pm
sub get_readers {
my $slashdb = getCurrentDB();
my $readers = { };
Expand All @@ -95,6 +97,7 @@ sub check_readers {
my $reader_info = { };

# Weed out readers that are isalive='no'
# This should be in Static/MySQL.pm
my $vu_hr = $slashdb->sqlSelectAllHashref(
"virtual_user",
"virtual_user, IF(isalive='yes',1,0) AS isalive, weight, weight_adjust",
Expand Down Expand Up @@ -428,6 +431,7 @@ sub set_reader_weight_adjust {
my $reduce_max = $constants->{dbs_reader_weight_reduce_max}*$delay/60;
my $increase_max = $constants->{dbs_reader_weight_increase_max}*$delay/60;

# This should be in Static/MySQL.pm
$slashdb->sqlUpdate("dbs",
{ -weight_adjust => "GREATEST(0, weight_adjust-$reduce_max,
LEAST(1, weight_adjust+$increase_max,
Expand Down
151 changes: 124 additions & 27 deletions themes/slashcode/tasks/ircslash.pl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
$has_proc_processtable
$irc $conn $nick $channel
$remarks_active $next_remark_id $next_handle_remarks $hushed
$next_check_slashd
%stoid $clean_exit_flag
$parent_pid
);
Expand Down Expand Up @@ -59,29 +58,8 @@
$next_handle_remarks = time() + $remark_delay;
handle_remarks();
}
if (!$clean_exit_flag && time() >= $next_check_slashd) {
$next_check_slashd = time() + 20;
my($not_ok, $response) = check_slashd();
if ($not_ok) {
# Parent slashd process seems to be gone. Maybe
# it just got killed and sent us the SIGUSR1 and
# our $task_exit_flag is already set. Pause a
# moment and check that.
sleep 1;
if ($task_exit_flag) {
# OK, forget this warning, just exit
# normally.
$not_ok = 0;
}
}
if ($not_ok) {
# Parent slashd process is gone, that's not good,
# but the channel doesn't need to hear about it
# every 20 seconds.
$next_check_slashd = time() + 30 * 60;
$conn->privmsg($channel, getIRCData('slashd_parent_gone'));
}
}
possible_check_slashd();
possible_check_dbs();
}

ircshutdown();
Expand Down Expand Up @@ -433,6 +411,37 @@ sub cmd_slashd {
slashdLog("slashd: $result, cmd from $info->{event}{nick}");
}

{ # closure
# first checks come after 1 minute, so we are sure we joined the
# IRC channel OK.
my $next_check_slashd = $^T + 60;
sub possible_check_slashd {
if (!$task_exit_flag && time() >= $next_check_slashd) {
$next_check_slashd = time() + 20;
my($not_ok, $response) = check_slashd();
if ($not_ok) {
# Parent slashd process seems to be gone. Maybe
# it just got killed and sent us the SIGUSR1 and
# our $task_exit_flag is already set. Pause a
# moment and check that.
sleep 1;
if ($task_exit_flag) {
# OK, forget this warning, just exit
# normally.
$not_ok = 0;
}
}
if ($not_ok) {
# Parent slashd process is gone, that's not good,
# but the channel doesn't need to hear about it
# every 20 seconds.
$next_check_slashd = time() + 30 * 60;
$conn->privmsg($channel, getIRCData('slashd_parent_gone'));
}
}
}
} # end closure

sub check_slashd {
my $parent_pid_str = "";
if (!$has_proc_processtable) {
Expand Down Expand Up @@ -462,8 +471,10 @@ sub cmd_dbs {
if (%$dbs_data) {
for my $dbid (keys %$dbs_data) {
$dbs_data->{$dbid}{virtual_user} = $dbs->{$dbid}{virtual_user};
$dbs_data->{$dbid}{lag} = sprintf("%.1f", $dbs_data->{$dbid}{lag} || 0);
$dbs_data->{$dbid}{bog} = sprintf("%.1f", $dbs_data->{$dbid}{bog} || 0);
$dbs_data->{$dbid}{lag} = defined($dbs_data->{$dbid}{lag})
? sprintf("%4.1f", $dbs_data->{$dbid}{lag} || 0)
: "?";
$dbs_data->{$dbid}{bog} = sprintf("%4.1f", $dbs_data->{$dbid}{bog} || 0);
}
my @dbids =
sort { $dbs->{$a}{virtual_user} cmp $dbs->{$b}{virtual_user} }
Expand All @@ -472,8 +483,94 @@ sub cmd_dbs {
} else {
$response = getIRCData('dbs_nodata');
}
$self->privmsg($channel, $response);
chomp $response;
my @responses = split /\n/, $response;
for my $r (@responses) {
sleep 1;
$conn->privmsg($channel, $r);
}
}

{ # closure
# first checks come after 1 minute, so we are sure we joined the
# IRC channel OK.
my $next_check_dbs = $^T + 60;
my $next_report_bad_dbs = $^T + 60;
sub possible_check_dbs {
my $slashdb = getCurrentDB();
my $constants = getCurrentStatic();
if (!$task_exit_flag && time() >= $next_check_dbs) {
$next_check_dbs = time() + 20;
my $dbs = $slashdb->getDBs();
my $dbs_data = $slashdb->getDBsReaderStatus(60);
my $ok = 1;
for my $dbid (keys %$dbs_data) {
$ok = 0 if !$dbs_data->{$dbid}{was_alive}
|| !$dbs_data->{$dbid}{was_reachable}
|| !$dbs_data->{$dbid}{was_running};
$ok = 0 if $dbs_data->{$dbid}{lag} > ($constants->{ircslash_dbalert_lagthresh} || 30);
$ok = 0 if $dbs_data->{$dbid}{bog} > ($constants->{ircslash_dbalert_bogthresh} || 30);
}
# "Great" means good enough to clear out a previously
# reported alert.
my $great = 1;
for my $dbid (keys %$dbs_data) {
$great = 0 if !$dbs_data->{$dbid}{was_alive}
|| !$dbs_data->{$dbid}{was_reachable}
|| !$dbs_data->{$dbid}{was_running};
$great = 0 if $dbs_data->{$dbid}{lag} > ($constants->{ircslash_dbalert_lagthresh} || 30)/2;
$great = 0 if $dbs_data->{$dbid}{bog} > ($constants->{ircslash_dbalert_bogthresh} || 30)/2;
}
if (!$ok) {
# There's something about the DBs that we
# should tell the IRC channel.
sleep 1;
if ($task_exit_flag) {
# OK, forget this alert, just exit
# normally.
$ok = 1;
}
}
if ($great) {
# The DBs are fine, so reset the next-report time.
if ($next_report_bad_dbs) {
# They were previously reported as bad,
# so now give an all-clear.
my $all_clear = getIRCData('dbalert_allclear');
$conn->privmsg($channel, $all_clear);

}
$next_report_bad_dbs = 0;
}
if (!$ok) {
# One or more DBs are wonky, that's not good,
# but the channel doesn't need to hear about it
# every 20 seconds.
if (time() >= $next_report_bad_dbs) {
$next_report_bad_dbs = time() + 10 * 60;
for my $dbid (keys %$dbs_data) {
$dbs_data->{$dbid}{virtual_user} = $dbs->{$dbid}{virtual_user};
$dbs_data->{$dbid}{lag} = defined($dbs_data->{$dbid}{lag})
? sprintf("%4.1f", $dbs_data->{$dbid}{lag} || 0)
: "?";
$dbs_data->{$dbid}{bog} = sprintf("%4.1f", $dbs_data->{$dbid}{bog} || 0);
}
my @dbids =
sort { $dbs->{$a}{virtual_user} cmp $dbs->{$b}{virtual_user} }
keys %$dbs_data;
my $response = getIRCData('dbs_response', { dbids => \@dbids, dbs => $dbs_data });
chomp $response;
my @responses = split /\n/, $response;
$conn->privmsg($channel, getIRCData('dbalert_prefix'));
for my $r (@responses) {
sleep 1;
$conn->privmsg($channel, $r);
}
}
}
}
}
} # end closure

############################################################

Expand Down
18 changes: 13 additions & 5 deletions themes/slashcode/templates/data;ircslash;default
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,21 @@ __template__

[% CASE 'dbs_response' %]
[% FOREACH dbid = dbids;
dbs.$dbid.virtual_user;
" alive="; dbs.$dbid.was_alive;
" lag="; dbs.$dbid.lag;
" bog="; dbs.$dbid.bog;
UNLESS loop.last(); " -- "; END;
"lag "; dbs.$dbid.lag;
" bog "; dbs.$dbid.bog;
IF !dbs.$dbid.was_alive; " DEAD!";
ELSIF !dbs.$dbid.was_reachable; " UNREACHABLE!";
ELSIF !dbs.$dbid.was_running; " STOPPED!"; END;
" - "; dbs.$dbid.virtual_user;
"\n";
END %]

[% CASE 'dbalert_prefix' %]
Alert - one or more reader DBs are in trouble:

[% CASE 'dbalert_allclear' %]
The DBs are much better now. Yay!

[% CASE 'dbs_nodata' %]
Error: no dbs_readerstatus data found within the last minute.

Expand Down

0 comments on commit 7c3809e

Please sign in to comment.