In [1]:
import re
import time
import collections


IP = r'(?P<HOST>.*?)'
SPACE = r'\s+'
DASH = r'\S+'
TIMESTAMP = r'(?P<TIMESTAMP>\[.*?\])'
REQUEST = r'(?P<REQUEST>\".*?\")'
STATUS = r'(?P<STATUS>\d{3})'
SIZE = r'(?P<SIZE>-|\d+)'

FILE_LOCATION = '/Users/fujikomalan/Documents/access_log'

def parse_log_line(LOG_LINE):
    result = re.search(IP+SPACE+DASH+SPACE+DASH+SPACE+TIMESTAMP+SPACE+REQUEST+SPACE+STATUS+SPACE+SIZE+SPACE,LOG_LINE)
    host = result.group('HOST')
    time = result.group('TIMESTAMP')
    request = result.group('REQUEST')
    status = result.group('STATUS')
    size = result.group('SIZE')
    return {'host':host , 'time':time , 'request':request , 'status':status , 'size':size}


### TOP 10 IP-ADDRESS FROM MOST HITS RECEIVED

In [34]:
ipCounter = collections.Counter()

with open(FILE_LOCATION) as fh:
    for line in fh:
        parse_items = parse_log_line(line)
        if parse_items['host'] not in ('::1','127.0.0.1'):
            ipCounter.update((parse_items['host'],))
            
print('')
msg='Top 10 IP-Address from most hits received'
print(msg.upper())
print('-'*len(msg))
print('')

for ip,hit in ipCounter.most_common(10):
    print('{:<16}{:^3}{}'.format(ip,':',hit))


TOP 10 IP-ADDRESS FROM MOST HITS RECEIVED
-----------------------------------------

149.202.89.239   : 2104
81.138.141.38    : 167
79.137.67.130    : 110
157.55.39.62     : 108
82.36.170.105    : 93
124.205.209.4    : 85
52.74.21.59      : 85
155.133.45.237   : 85
124.42.118.111   : 85
185.165.41.171   : 85


### DAY WISE HIT COUNTS

In [39]:
import datetime

day_hit_counter = collections.Counter()
with open(FILE_LOCATION) as fh:
    for line in fh:
        parse_items = parse_log_line(line)
        if parse_items['host'] not in ('::1','127.0.0.1'):
            timestamp = parse_items['time']  # [24/Mar/2017:19:40:06 +0000]
            date = timestamp[1:12]
            day_hit_counter.update((date,))
            
print('')
msg='Day wise hit counts'
print(msg.upper())
print('-'*len(msg))
print('')


def convert_date_format(t):   
    return datetime.datetime.strptime(t[0], "%d/%b/%Y")

#for date,hit in sorted(day_hit_counter.most_common(5) , key=lambda t : datetime.datetime.strptime(t[0],"%d/%b/%Y")):
for date,hit in sorted(day_hit_counter.most_common(5) , key=convert_date_format):
    print('{:<11}{:^3}{}'.format(date,':',hit))
    
print('')    


DAY WISE HIT COUNTS
-------------------

24/Mar/2017 : 1165
28/Mar/2017 : 807
30/Mar/2017 : 887
31/Mar/2017 : 944
04/Apr/2017 : 729



In [47]:
import collections
import pprint

hourly_counter = collections.Counter()

def day_details(DAY):
    with open(FILE_LOCATION) as fh:
        for line in fh:
            parse_items = parse_log_line(line)
            time_stamp = parse_items['time'] #[24/Mar/2017:19:40:06 +0000]
            date = time_stamp[1:12]
            hour = time_stamp[13:15]
            host = parse_items['host']
            if DAY == date and host not in ('::1','127.0.0.1'):  
                hourly_counter.update((int(hour),))
                
    for hour,hit in  sorted(hourly_counter.items() ,key=lambda t: t[0]):
        print('{:<3}{:^3}{}'.format(hour,':',hit))
    
    
print('')
msg='hourly hit counter'
print(msg.upper())
print('-'*len(msg))
print('')

day_details('24/Mar/2017')



HOURLY HIT COUNTER
------------------

19  : 135
20  : 504
21  : 266
22  : 152
23  : 108


In [50]:
import collections
import requests


ipCounter = collections.Counter()

def get_country_name(IP):
    search_string = 'https://freegeoip.net/json/'+IP
    reply = requests.get(search_string)
    if reply.status_code == 200:
        return reply.json()['country_code']
    else:
        return '--'
     

with open(FILE_LOCATION) as fh:
    for line in fh:
        parse_items = parse_log_line(line)
        if parse_items['host'] not in ('::1','127.0.0.1'):
            ipCounter.update((parse_items['host'],))
            
print('')
msg='Top 10 IP-Address from most hits received'
print(msg.upper())
print('-'*len(msg))
print('')

for ip,hit in ipCounter.most_common(10):
    cc = get_country_name(ip)
    print('{:<16}{:<2}{:^3}{}'.format(ip,cc,':',hit) )
    
print('')    


TOP 10 IP-ADDRESS FROM MOST HITS RECEIVED
-----------------------------------------

149.202.89.239  FR : 2104
81.138.141.38   GB : 167
79.137.67.130   FR : 110
157.55.39.62    US : 108
82.36.170.105   GB : 93
124.205.209.4   CN : 85
52.74.21.59     SG : 85
155.133.45.237  PL : 85
124.42.118.111  CN : 85
185.165.41.171  IR : 85
