-
Notifications
You must be signed in to change notification settings - Fork 28
/
main.py
54 lines (45 loc) · 1.27 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
__author__ = "Richard O'Dwyer"
__email__ = "richard@richard.do"
__license__ = "None"
import re
def process_log(log):
requests = get_requests(log)
files = get_files(requests)
totals = file_occur(files)
return totals
def get_requests(f):
log_line = f.read()
pat = (r''
'(\d+.\d+.\d+.\d+)\s-\s-\s' #IP address
'\[(.+)\]\s' #datetime
'"GET\s(.+)\s\w+/.+"\s\d+\s' #requested file
'\d+\s"(.+)"\s' #referrer
'"(.+)"' #user agent
)
requests = find(pat, log_line, None)
return requests
def find(pat, text, match_item):
match = re.findall(pat, text)
if match:
return match
else:
return False
def get_files(requests):
#get requested files with req
requested_files = []
for req in requests:
#req[2] for req file match, change to
#data you want to count totals
requested_files.append(req[2])
return requested_files
def file_occur(files):
#file occurrences in requested files
d = {}
for file in files:
d[file] = d.get(file,0)+1
return d
if __name__ == '__main__':
#nginx access log, standard format
log_file = open('example.log', 'r')
#return dict of files and total requests
print(process_log(log_file))