From f2d29c72016aa3d204d193b31672b273c9976d40 Mon Sep 17 00:00:00 2001 From: Julien Vehent Date: Sun, 15 Jan 2012 16:00:15 -0500 Subject: [PATCH] added support for pygeoip, added google map generation --- README.md | 147 +++++++++++++++++++----------- postscreen_stats.py | 211 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 270 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 3bd6375..f03bba6 100644 --- a/README.md +++ b/README.md @@ -6,41 +6,75 @@ Run it against your postfix syslogs Published under GPL v2 +Usage +------- + postscreen_stats.py + parses postfix logs to compute statistics on postscreen activity -Examples: + usage: postscreen_stats.py <-y|--year> <-r|--report|-f|--full> + + <-a|--action> action filter with operators | and & + ex. 'PREGREET&DNSBL|HANGUP' => ((PREGREET and DNSBL) or HANGUP) + ex. 'HANGUP&DNSBL|PREGREET&DNSBL' + => ((HANGUP and DNSBL) or (PREGREET and DNSBL) -Short report on full log file ---------------------------------- + <-f|--file> log file to parse (default to /var/log/maillog) + + <-g|--geoloc> /!\ slow ! ip geoloc against hostip.info (default disabled) + + <--geofile> path to a maxmind geolitecity.dat. if specified, with the -g switch + the script uses the maxmind data instead of hostip.info (faster) + + <-G> when using --geofile, use the pygeoip module instead of the GeoIP module + + <-i|--ip> filters the results on a specific IP + + <--mapdest> path to a destination HTML file that will display a Google Map of the result + /!\ Require the geolocation, preferably with --geofile + + <-r|--report> report mode {short|full|ip} (default to short) + + <-y|--year> select the year of the logs (default to current year) + + Julien Vehent (http://1nw.eu/!j) - https://github.com/jvehent/Postscreen-Stats + + +Basic usage +-------------- + +Generate a report form a syslog postfix log file. +If you are parsing logs from a year that is not the current year, use the -y option to specify the year of the logs. $ python postscreen_stats.py -f maillog.1 -r short -y 2011 - === Postscreen statistics === - 0 BLACKLISTED - 33 COMMAND COUNT LIMIT - 0 COMMAND LENGTH LIMIT - 16 COMMAND PIPELINING - 6 COMMAND TIME LIMIT - 11010 CONNECT - 536 DNSBL - 503 HANGUP - 42 NOQUEUE MAXCONN - 0 NOQUEUE PORT BUSY - 2258 NOQUEUE REJECT 450 (graylist) - 1600 PASS NEW - 8391 PASS OLD - 239 PREGREET - 84 WHITELISTED - === Clients statistics === + === unique clients/total postscreen actions === + 2131/11010 CONNECT + 1/1 BARE NEWLINE + 30/33 COMMAND COUNT LIMIT + 13/16 COMMAND PIPELINING + 6/6 COMMAND TIME LIMIT + 463/536 DNSBL + 305/503 HANGUP + 12/15 NON-SMTP COMMAND + 1884/2258 NOQUEUE 450 deep protocol test reconnection + 1/42 NOQUEUE too many connections + 1577/1600 PASS NEW + 866/8391 PASS OLD + 181/239 PREGREET + 5/84 WHITELISTED + + === clients statistics === 4 avg. dnsbl rank - 840 came back count + 505 blocked clients 2131 clients - 32245 seconds avg. reco. delay + 840 reconnections + 32245.4285714 seconds avg. reco. delay + === First reconnection delay (graylist) === delay| <10s |>10to30s| >30to1m| >1to5m | >5to30m|>30mto2h| >2hto5h|>5hto12h|>12to24h| >24h | count|12 |21 |21 |196 |261 |88 |40 |29 |53 |119 | % |1.4 |2.5 |2.5 |23 |31 |10 |4.8 |3.5 |6.3 |14 | - - + Get the statistics for a specific IP only -------------------------------------------- @@ -59,34 +93,39 @@ Get the statistics for a specific IP only Geo Localisation of blocked IPs ----------------------------------- +There are 3 GeoIP modes: + 1. Use hostip.info online geoip service. This is free but slow and not very accurate + 2. Use Maxmind's GeoIP database. You can use either the free version of the DB from their website, or get a paid version. -Use the '-g' switch to activate geolocalisation against hostip.info. At the moment, there are two big limitations to geolocalisation: -1. It's slow ! Don't expect to get more than 2/3 IPs per second. So if you have 2000 IPs to geolocalise, it will take a while to run -2. It only gives the Country of the IP. I didn't find the need to query the whole GPS data, but that's easy enough to change. - - $ python postscreen_stats.py -f maillog.1 -r short -g - - === Postscreen statistics === - 1 BARE NEWLINE - 33 COMMAND COUNT LIMIT - 16 COMMAND PIPELINING - 6 COMMAND TIME LIMIT - 11010 CONNECT - 536 DNSBL - 503 HANGUP - 2258 NOQUEUE 450 deep protocol test reconnection - 42 NOQUEUE too many connections - 1600 PASS NEW - 8391 PASS OLD - 239 PREGREET - 84 WHITELISTED +To use hostip.info, just set the -g option. +To use maxmind, set the --geofile to point to your Maxmind DB (ie. --geofile=/path/to/GeoIPCity.dat) +By default, geofile use the GeoIP python module, but if you prefer to use pygeoip instead, set the -G option as well. - === Clients statistics === - 4 avg. dnsbl rank - 2131 clients - - === Blocked IPs per country === - [('XX', 238), ('US', 162), ('IN', 21), ('ID', 10), ('RU', 9), ('EU', 5), ('VN', 4), ('BR', 3), ('DE', 3), ('CO', 3), ('CA', 3), ('KR', 3), ('UK', 3), ('JP', 2), ('RO', 2), ('CN', 2), ('IT', 2), ('AR', 2), ('AU', 2), ('KZ', 2), ('MX', 2), ('FR', 1), ('BG', 1), ('BO', 1), ('NL', 1), ('PT', 1), ('TW', 1), ('TR', 1), ('TN', 1), ('LT', 1), ('PA', 1), ('PK', 1), ('PH', 1), ('PL', 1), ('CM', 1), ('IQ', 1), ('CZ', 1), ('ES', 1), ('SZ', 1), ('KE', 1), ('MW', 1), ('SA', 1), ('UA', 1)] + $ ./postscreen_stats.py -r short --geofile=../geoip/GeoIPCity.dat -G -f maillog.3 -y 2011 + + [....] + + === Top 20 Countries of Blocked Clients === + 167 (33.00%) United States + 59 (12.00%) India + 33 ( 6.50%) Russian Federation + 26 ( 5.10%) Indonesia + 23 ( 4.60%) Pakistan + 21 ( 4.20%) Vietnam + 20 ( 4.00%) China + 13 ( 2.60%) Brazil + 11 ( 2.20%) Korea, Republic of + 9 ( 1.80%) Belarus + 8 ( 1.60%) Turkey + 7 ( 1.40%) Iran, Islamic Republic of + 7 ( 1.40%) Ukraine + 6 ( 1.20%) Kazakstan + 6 ( 1.20%) Chile + 5 ( 0.99%) Italy + 5 ( 0.99%) Romania + 4 ( 0.79%) Poland + 4 ( 0.79%) Spain + 3 ( 0.59%) Afghanistan Geo IP database installation ----------------------------------- @@ -96,3 +135,11 @@ Using the MaxMind free database at http://www.maxmind.com/app/geolitecity # aptitude install python-geoip 3. launch postscreen_stats with --geofile="/path/to/geolistcity.dat" +Google Map of the blocked IPs +----------------------------------- +You can use the --geomap option to create an HTML file with a map of the blocked IPs. + $ ./postscreen_stats.py -f maillog.3 -r none -y 2011 --geofile=../geoip/GeoIPCity.dat -G --mapdest=postscreen_report_2012-01-15.html + + Google map will be generated at postscreen_report_2012-01-15.html + using MaxMind GeoIP database from ../geoip/GeoIPCity.dat + Creating HTML map at postscreen_report_2012-01-15.html diff --git a/postscreen_stats.py b/postscreen_stats.py index 2e996b4..4383b08 100755 --- a/postscreen_stats.py +++ b/postscreen_stats.py @@ -12,27 +12,40 @@ import urllib from collections import defaultdict from decimal import * +from types import * def usage(): - print - print " postscreen_stats.py" - print " parses postfix logs to compute statistics on postscreen activity" - print - print "usage: postscreen_stats.py <-y|--year> <-r|--report|-f|--full>" - print - print " <-a|--action> action filter with operators | and &" - print " ex. 'PREGREET&DNSBL|HANGUP' => ((PREGREET and DNSBL) or HANGUP)" - print " ex. 'HANGUP&DNSBL|PREGREET&DNSBL' " - print " => ((HANGUP and DNSBL) or (PREGREET and DNSBL)" - print " <-f|--file> log file to parse (default to /var/log/maillog)" - print " <-g|--geoloc> /!\ slow ! ip geoloc against hostip.info (default disabled)" - print " <--geofile> path to a maxmind geolitecity.dat. if specified, with the -g switch" - print " the script uses the maxmind data instead of hostip.info (faster)" - print " <-i|--ip> filters the results on a specific IP" - print " <-r|--report> report mode {short|full|ip} (default to short)" - print " <-y|--year> select the year of the logs (default to current year)" - print + print ''' + postscreen_stats.py + parses postfix logs to compute statistics on postscreen activity + usage: postscreen_stats.py <-y|--year> <-r|--report|-f|--full> + + <-a|--action> action filter with operators | and & + ex. 'PREGREET&DNSBL|HANGUP' => ((PREGREET and DNSBL) or HANGUP) + ex. 'HANGUP&DNSBL|PREGREET&DNSBL' + => ((HANGUP and DNSBL) or (PREGREET and DNSBL) + + <-f|--file> log file to parse (default to /var/log/maillog) + + <-g|--geoloc> /!\ slow ! ip geoloc against hostip.info (default disabled) + + <--geofile> path to a maxmind geolitecity.dat. if specified, with the -g switch + the script uses the maxmind data instead of hostip.info (faster) + + <-G> when using --geofile, use the pygeoip module instead of the GeoIP module + + <-i|--ip> filters the results on a specific IP + + <--mapdest> path to a destination HTML file that will display a Google Map of the result + /!\ Require the geolocation, preferably with --geofile + + <-r|--report> report mode {short|full|ip|none} (default to short) + + <-y|--year> select the year of the logs (default to current year) + +Julien Vehent (http://1nw.eu/!j) - https://github.com/jvehent/Postscreen-Stats +''' # convert the syslog time stamp in unix format and store it def gen_unix_ts(syslog_date): @@ -93,6 +106,8 @@ def action_filter(self,filter): REPORT_MODE = "short" LOG_FILE = "/var/log/maillog" GEOLOC = 0 +GEOFILE = "" +MAPDEST = "" # the list of clients ips and pointer to instance of class ip_list = {} @@ -100,7 +115,7 @@ def action_filter(self,filter): # command line arguments args_list, remainder = getopt.getopt(sys.argv[1:], - 'a:gi:f:y:r:h', ['action=','geoloc','geofile=','ip=','year=','report=','help', 'file=']) + 'a:gGi:f:y:r:h', ['action=','geoloc','geofile=','mapdest=','ip=','year=','report=','help', 'file=']) for argument, value in args_list: if argument in ('-a', '--action'): @@ -108,10 +123,11 @@ def action_filter(self,filter): elif argument in ('-g', '--geoloc'): GEOLOC = 1 elif argument in ('--geofile'): - import GeoIP - gi = GeoIP.open(value,GeoIP.GEOIP_MEMORY_CACHE) - print "using MaxMind GeoIP database from",value - GEOLOC = 2 + GEOFILE = value + if GEOLOC < 2: + GEOLOC = 2 + elif argument in ('-G'): + GEOLOC = 3 elif argument in ('-f', '--file'): LOG_FILE = value elif argument in ('-y', '--year'): @@ -120,20 +136,28 @@ def action_filter(self,filter): IP_FILTER = value print "Filtering results to match:",IP_FILTER elif argument in ('-r', '--report'): - if value in ('short'): - REPORT_MODE = "short" - elif value in ('full'): - REPORT_MODE = "full" - elif value in ('ip'): - REPORT_MODE = "ip" + if value in ('short','full','ip','none'): + REPORT_MODE = value else: print "unknown report type" usage() sys.exit() + elif argument in ('--mapdest'): + MAPDEST = value + print "Google map will be generated at",MAPDEST elif argument in ('-h', '--help'): usage() sys.exit() +if GEOLOC > 0 and GEOFILE not in "": + if GEOLOC == 2: + import GeoIP + gi = GeoIP.open(GEOFILE,GeoIP.GEOIP_MEMORY_CACHE) + elif GEOLOC == 3: + import pygeoip + gi = pygeoip.GeoIP(GEOFILE,pygeoip.MEMORY_CACHE) + print "using MaxMind GeoIP database from",GEOFILE + maillog = open(LOG_FILE) for line in maillog: @@ -171,7 +195,7 @@ def action_filter(self,filter): + current_ip ip_list[current_ip].geoloc["country_code"] = \ urllib.urlopen(geoloc_url).read() - elif GEOLOC == 2: + elif GEOLOC > 1: ip_list[current_ip].geoloc = gi.record_by_addr(current_ip) # ip is already known, update the last_seen timestamp @@ -274,15 +298,23 @@ def action_filter(self,filter): print "\tDNSBL ranks:",ip_list[client].dnsbl_ranks if GEOLOC > 0: print "\tGeoLoc:",ip_list[client].geoloc + print + + +# store the list of blocked clients for map generation +if MAPDEST not in "" and GEOLOC > 1: + blocked_clients = defaultdict(int) + +postscreen_stats = defaultdict(int) +clients = defaultdict(int) +comeback = {'<10s':0, '>10s to 30s':0, '>30s to 1min':0, '>1min to 5min':0, + '>5 min to 30min':0, '>30min to 2h':0, '>2h to 5h':0, + '>5h to 12h':0, '>12h to 24h':0, '>24h':0} +blocked_countries = defaultdict(int) + # normal report mode -if REPORT_MODE in ('short','full'): - postscreen_stats = defaultdict(int) - clients = defaultdict(int) - comeback = {'<10s':0, '>10s to 30s':0, '>30s to 1min':0, '>1min to 5min':0, - '>5 min to 30min':0, '>30min to 2h':0, '>2h to 5h':0, - '>5h to 12h':0, '>12h to 24h':0, '>24h':0} - blocked_countries = defaultdict(int) +if REPORT_MODE in ('short','full','none'): # basic accounting, browse through the list of objects and count # the occurences @@ -328,7 +360,7 @@ def action_filter(self,filter): # if client was blocked at any point, add its country to the count if ( GEOLOC > 0 and - ip_list[client].geoloc > 0 and + ip_list[client].geoloc > 0 and (ip_list[client].actions["BLACKLISTED"] > 0 or ip_list[client].actions["DNSBL"] > 0 or ip_list[client].actions["PREGREET"] > 0 @@ -338,8 +370,11 @@ def action_filter(self,filter): or ip_list[client].actions["COMMAND LENGTH LIMIT"] > 0 or ip_list[client].actions["BARE NEWLINE"] > 0 or ip_list[client].actions["NON-SMTP COMMAND"] > 0)): + blocked_countries[ip_list[client].geoloc["country_name"]] += 1 clients["blocked clients"] += 1 + if MAPDEST not in "" and GEOLOC > 1: + blocked_clients[client] = 1 # calculate the average reconnection delay if clients["reconnections"] > 0: @@ -349,6 +384,7 @@ def action_filter(self,filter): if (postscreen_stats["DNSBL"] > 0 and clients["avg. dnsbl rank"] > 0): clients["avg. dnsbl rank"] /= postscreen_stats["DNSBL"] +if REPORT_MODE in ('short','full'): # display unique clients and total postscreen actions print "\n=== unique clients/total postscreen actions ===" # print the count of CONNECT first (apply the ACTION_FILTER) @@ -411,3 +447,102 @@ def action_filter(self,filter): country,clients = sorted_countries[i] cpercent = "(%5.2f%%)" % float(Decimal(clients)/total_blocked*100) print "%4d" % clients, cpercent, country + +# generate the HTML for the google map and store it in a file +if MAPDEST not in "" and GEOLOC > 1: + mapcode = ''' + + + + Postscreen GeoMap of Blocked IPs + + + + + + +

Postscreen Map of Blocked IPs

+
+
+

generated using Postscreen-Stats by Julien Vehent

+ + +''' + fd = open(MAPDEST,"w") + fd.write(mapcode) + fd.close() + print "Creating HTML map at",MAPDEST