In [1]:
import re
from datetime import datetime
from typing import List, Optional


In [3]:
class ApacheLogEntry:
    def __init__(self, ip: str, datetime: datetime, method: str, url: str, protocol: str, status: int, size: int, referer: str, user_agent: str):
        self.ip = ip
        self.datetime = datetime
        self.method = method
        self.url = url
        self.protocol = protocol
        self.status = status
        self.size = size
        self.referer = referer
        self.user_agent = user_agent

    def __repr__(self) -> str:
        return f"<ApacheLogEntry(ip={self.ip}, datetime={self.datetime}, method={self.method}, url={self.url}, status={self.status})>"


In [4]:
# Regular expression to parse log entries
log_pattern = re.compile(r'(?P<ip>[\d.]+) - - \[(?P<datetime>[^\]]+)\] "(?P<method>\S+) (?P<url>\S+) (?P<protocol>\S+)" (?P<status>\d+) (?P<size>\d+) "(?P<referer>[^"]*)" "(?P<user_agent>[^"]*)"')

# Function to parse a log line
def parse_log_line(line: str) -> Optional[ApacheLogEntry]:
    match = log_pattern.match(line)
    if match:
        data = match.groupdict()
        data['datetime'] = datetime.strptime(data['datetime'], '%d/%b/%Y:%H:%M:%S %z')
        return ApacheLogEntry(
            ip=data['ip'],
            datetime=data['datetime'],
            method=data['method'],
            url=data['url'],
            protocol=data['protocol'],
            status=int(data['status']),
            size=int(data['size']),
            referer=data['referer'],
            user_agent=data['user_agent']
        )
    return None

In [5]:
# Example log lines
log_lines: List[str] = [
    '83.149.9.216 - - [17/May/2015:10:05:03 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"',
    '83.149.9.216 - - [17/May/2015:10:05:43 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1" 200 171717 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"',
    '83.149.9.216 - - [17/May/2015:10:05:47 +0000] "GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1" 200 26185 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"'
]

# Parse log lines and store in objects
log_entries: List[Optional[ApacheLogEntry]] = [parse_log_line(line) for line in log_lines]

# Print parsed log entries
for entry in log_entries:
    print(entry)


<ApacheLogEntry(ip=83.149.9.216, datetime=2015-05-17 10:05:03+00:00, method=GET, url=/presentations/logstash-monitorama-2013/images/kibana-search.png, status=200)>
<ApacheLogEntry(ip=83.149.9.216, datetime=2015-05-17 10:05:43+00:00, method=GET, url=/presentations/logstash-monitorama-2013/images/kibana-dashboard3.png, status=200)>
<ApacheLogEntry(ip=83.149.9.216, datetime=2015-05-17 10:05:47+00:00, method=GET, url=/presentations/logstash-monitorama-2013/plugin/highlight/highlight.js, status=200)>
