Skip to content
Newer
Older
100755 186 lines (169 sloc) 6.84 KB
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
1 #!/usr/bin/perl -w
2 #
d2a3668 Comments on what this does
francis authored
3 # reduce-repeated-logs:
4 # Finds messages with lots of duplicated log lines. Merges the adjacent
5 # duplicates into one log line, with number of repetitions after it.
6 #
7 # e.g.
8 # unexpected error (type RABX::Error::User) while processing message: Representative ID '13531' not found (repeated 16138 times)
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
9 #
ddbf997 Found this on dademcron@bitter
dademcron authored
10 # By default, scans all messages with more log file entries than $low below.
11 # Or specify a message id to do just that message.
12 #
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
13 # Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved.
14 # Email: francis@mysociety.org; WWW: http://www.mysociety.org/
15 #
16
d2a3668 Comments on what this does
francis authored
17 # TODO:
18 # Cope with messages which have alternating lines repeated many times.
19
ddbf997 Found this on dademcron@bitter
dademcron authored
20 my $rcsid = ''; $rcsid .= '$Id: reduce-repeated-logs,v 1.5 2007-02-02 12:16:35 dademcron Exp $';
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
21
22 use strict;
23 require 5.8.0;
24
d2a3668 Comments on what this does
francis authored
25 # Number of lines which must exactly repeat within a message for this script to
26 # look for adjacent duplicates within that message.
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
27 my $low = 1000;
28
29 # Horrible boilerplate to set up appropriate library paths.
30 use FindBin;
31 use lib "$FindBin::Bin/../perllib";
461700f @crowbot Update library paths
crowbot authored
32 use lib "$FindBin::Bin/../commonlib/perllib";
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
33
34 use Data::Dumper;
35 use POSIX qw(strftime);
36
37 use mySociety::Config;
38 BEGIN {
39 mySociety::Config::set_file("$FindBin::Bin/../conf/general");
40 }
41 use mySociety::DBHandle qw(dbh);
42 #DBI->trace(1);
43 use FYR;
44
8a06171 Remove alternating pairs
francis authored
45 # Takes an array of ids of duplicate message_log items, and an extra count to
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
46 # add to this. Deletes all of the message except the last one, and appends
47 # total count to that.
48 sub merge_messages {
49 my ($dups, $c_extra) = @_;
50 my @dups = @$dups;
51 my $c = scalar(@dups) + $c_extra;
52 my $last = pop @dups;
53 #print Dumper(\@dups);
54 #print "count: " . $c . "\n";
55 #print "last: " . $last . "\n";
56 while (scalar(@dups) > 5000) {
57 my @start = @dups[0..5000];
58 @dups = @dups[5001..$#dups];
59 dbh()->do("delete from message_log where order_id in (". join(",", @start) . ")");
60 }
61 dbh()->do("delete from message_log where order_id in (". join(",", @dups) . ")");
62 dbh()->do("update message_log set message =
63 (replace(message, coalesce(substring(message from ' \\\\(repeated .+ times\\\\)\$'), ''),'')
64 || ' (repeated $c times)') where order_id = ?", {}, $last);
65 dbh()->commit();
66 }
8a06171 Remove alternating pairs
francis authored
67
68 # Takes an array of ids of alternating duplicate message_log items, and
69 # merges them together, much like merge_messages for non-alternating repeats.
70 sub merge_messages_pairs {
71 my ($dups, $c_extra) = @_;
72 my @dups = @$dups;
73 my $c = (scalar(@dups) + $c_extra) / 2;
74 my $last1 = pop @dups;
75 my $last2 = pop @dups;
76 # print Dumper(\@dups);
77 # print "count: $c\n";
78 # print "last1: $last1 last2: $last2\n";
79 # exit;
80 while (scalar(@dups) > 5000) {
81 my @start = @dups[0..5000];
82 @dups = @dups[5001..$#dups];
83 dbh()->do("delete from message_log where order_id in (". join(",", @start) . ")");
84 }
85 dbh()->do("delete from message_log where order_id in (". join(",", @dups) . ")");
86 dbh()->do("update message_log set message =
87 (replace(message, coalesce(substring(message from ' \\\\(repeated .+ times\\\\)\$'), ''),'')
88 || ' (repeated in pair about $c times)') where order_id = ?", {}, $last1);
89 dbh()->do("update message_log set message =
90 (replace(message, coalesce(substring(message from ' \\\\(repeated .+ times\\\\)\$'), ''),'')
91 || ' (repeated in pair about $c times)') where order_id = ?", {}, $last2);
92 dbh()->commit();
93 }
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
94
95 # Takes a message id, and merges adjacent duplicate message_log items for that message.
96 sub reduce_log($) {
97 my ($message_id) = @_;
98 my $stm = dbh()->prepare("
99 select order_id, exceptional, whenlogged, state, message, editor
100 from message_log where message_id = ? order by order_id");
101 $stm->execute($message_id);
102 my ($p_exceptional, $p_whenlogged, $p_state, $p_message, $p_editor);
8a06171 Remove alternating pairs
francis authored
103 my $p_p_message;
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
104 my @dups;
105 my $c_extra = 0;
8a06171 Remove alternating pairs
francis authored
106 my $matching_pairs = 0;
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
107 while (my ($order_id, $exceptional, $whenlogged, $state, $message, $editor) = $stm->fetchrow_array()) {
108 $editor = '' if (!$editor);
8a06171 Remove alternating pairs
francis authored
109 if ($message =~ m/^(.+) \(repeated (?:in pair about )?(\d+) times\)$/) {
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
110 $message = $1;
8a06171 Remove alternating pairs
francis authored
111 my $c_message = $2 - 1;
112 $c_extra += ($c_message - 1);
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
113 }
114 #print scalar(@dups) . " $order_id $exceptional $whenlogged $state $message $editor\n";
115
8a06171 Remove alternating pairs
francis authored
116 # See if meta data same as last message
117 my $got = 0;
118 if (!$p_state ||
119 ( $p_exceptional eq $exceptional &&
120 $p_whenlogged <= $whenlogged &&
121 $p_state eq $state &&
122 $p_editor eq $editor)
123 ) {
124 if (!$p_state || (!$matching_pairs && $p_message eq $message)) {
125 # Either at beginning, or message same as last one
126 push @dups, $order_id;
127 $got = 1;
128 } elsif (!$matching_pairs && scalar(@dups) == 1) {
129 # Start looking for pair matches
130 $matching_pairs = 1;
131 push @dups, $order_id;
132 $got = 1;
133 } elsif ($matching_pairs && $p_p_message eq $message) {
134 # Message same as one two ago
135 push @dups, $order_id;
136 $got = 1;
137 }
138 }
139 if (!$got) {
140 # If we got more than one row, then merge in
141 if (!$matching_pairs && scalar(@dups) >= 2) {
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
142 merge_messages(\@dups, $c_extra);
8a06171 Remove alternating pairs
francis authored
143 } elsif ($matching_pairs && scalar(@dups) >= 8) { # higher tolerance for pairs
144 merge_messages_pairs(\@dups, $c_extra);
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
145 }
8a06171 Remove alternating pairs
francis authored
146 # Reset
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
147 @dups = ($order_id);
148 $c_extra = 0;
8a06171 Remove alternating pairs
francis authored
149 $matching_pairs = 0;
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
150 }
8a06171 Remove alternating pairs
francis authored
151 $p_p_message = $p_message;
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
152 $p_exceptional = $exceptional;
153 $p_whenlogged = $whenlogged;
154 $p_state = $state;
155 $p_message = $message;
156 $p_editor = $editor;
157 }
8a06171 Remove alternating pairs
francis authored
158 if (!$matching_pairs && scalar(@dups) >= 2) {
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
159 merge_messages(\@dups, $c_extra);
8a06171 Remove alternating pairs
francis authored
160 } elsif ($matching_pairs && scalar(@dups) >= 8) { # higher tolerance for pairs
161 merge_messages_pairs(\@dups, $c_extra);
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
162 }
163 }
164
12a4768 Cope with already deleted reps
dademcron authored
165 #reduce_log('7f715e54e05dd8428f8a');
166 #exit;
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
167
ddbf997 Found this on dademcron@bitter
dademcron authored
168 my $one_id = "";
169 if (defined($ARGV[0])) {
170 $one_id = " and message_id = '" . $ARGV[0] . "'";
171 }
172
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
173 # Read everything with lots of duplicates, using heuristic here
174 my $st = dbh()->prepare("select count(*) as c, message_id, message
ddbf997 Found this on dademcron@bitter
dademcron authored
175 from message_log group by message_id, message having count(*) > $low $one_id order by c desc");
36c9165 Script to remove massively repeated log file entries from WTT's messa…
francis authored
176 $st->execute();
177 while (my ($count, $message_id, $message) = $st->fetchrow_array()) {
178 my ($total_rows) = dbh()->selectrow_array("select count(*) from message_log");
179 print "Total rows: $total_rows\n";
180
181 # Do proper merging of adjacent items
182 print "Reducing $message_id $message...\n";
183 reduce_log($message_id);
184 }
185
Something went wrong with that request. Please try again.