This repository has been archived by the owner on Nov 12, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
indexbloat.py
executable file
·127 lines (105 loc) · 4.42 KB
/
indexbloat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/python
import sys, time, csv, re
from optparse import OptionParser
"""
Simple Python script to calcute indexes bloat size based on a csv files
containing information about index sizes.
Useful if you do a periodic database restore (to eg. check database
backup consistency) and have somewhere to get a fresh indexes sizes
from.
CSV files can be generated from PSQL using following query:
echo "COPY (SELECT nspname || '.' || relname AS \"relation\",
pg_relation_size(C.oid) AS \"size\" FROM pg_class C LEFT JOIN
pg_namespace N ON (N.oid = C.relnamespace) WHERE nspname NOT IN
('pg_catalog', 'information_schema') AND relkind = 'i' ORDER BY
pg_relation_size(C.oid) DESC) TO STDOUT with CSV HEADER" | psql
database_name
"""
def readCSV(filename):
csvHook = csv.reader(open(filename, 'rb'), delimiter=',')
# initialise dictionary
result = {}
for k, v in csvHook:
# parse key through regexp to strip out the first part
k = re.search('\w+.(.*)', k)
# not interested in pg internal indexes
if not re.match('^pg_', k.group(1)) and k.group(1) is not '':
result[k.group(1)] = v
return result
def convert_bytes(bytes):
bytes = float(bytes)
if bytes >= 1073741824:
gigabytes = bytes / 1073741824
size = '%.2fG' % gigabytes
elif bytes >= 1048576:
megabytes = bytes / 1048576
size = '%.2fM' % megabytes
elif bytes >= 1024:
kilobytes = bytes / 1024
size = '%.2fK' % kilobytes
else:
size = '%.2fb' % bytes
return size
def main():
# process options and arguments
usage = "usage: %prog [options] csvA csvB"
parser = OptionParser(usage)
parser.add_option("-i", "--ignoremissing", dest="ignmissidxs",
help="ignore missing indexes",
action="store_false", default=True)
parser.add_option("-p", "--pretty-mode", dest="pretty",
help="pretty mode",
action="store_true", default=False)
parser.add_option("-s", "--sum", dest="sum",
help="print total bloat size at the end",
action="store_true")
parser.add_option("-t", "--percent-threshold", dest="pctthrs",
help="pct threshold when to treat idx as bloated; default 102",
action="store", default=102, metavar="number")
parser.add_option("-b", "--bloat-bytes", dest="bloatbytes",
help="minimal bloat size to display; default 0",
action="store", default=0, metavar="bytes")
parser.add_option("-c", "--size-threshold", dest="bytesthrs",
help="bytes threshold when to compare idx; default 100M",
action="store", default=100000000, metavar="bytes")
(options, args) = parser.parse_args()
if len(args) != 2:
parser.error("incorrect number of arguments; you must specify both "
"csvA and csvB file locations")
# load data from CSV files
csvA = readCSV(args[0]) # production data
csvB = readCSV(args[1]) # clean import data
sum = 0 # we are going to use it to track total bloat size
indexes = []
for name, size in csvA.iteritems():
if name in csvB:
# difference in %
diff = long(size) * 100 / long(csvB[name])
# difference in bytes
diff_bytes = long(size) - long(csvB[name])
if (diff > options.pctthrs and long(size) > options.bytesthrs and
long(diff_bytes) > long(options.bloatbytes)):
indexes.append((name, diff, convert_bytes(size), convert_bytes(csvB[name])))
# total it up to the total idx bloat size
sum = sum + diff_bytes
else:
if options.ignmissidxs:
if options.pretty:
print ("Ough! %s index is missing in the %s file. "
"Likely a problem with backup!" % (name, args[1]))
else:
print "index %s missing in %s file!" % (name, args[1])
if options.pretty:
template = "Index %s size compare to clean import: %s %% (%s vs. %s)"
else:
template = "index %s, %s %%, %s/%s"
for indexdata in sorted(indexes, key=lambda x: x[1], reverse=True):
print template % indexdata
# print total bloat size
if options.sum:
if options.pretty:
print "Total index bloat: %s" % convert_bytes(sum)
else:
print "total bloat: %s" % sum
if __name__ == '__main__':
main()