In [15]:
import re
from urllib.parse import urlparse
from datetime import datetime

class LogLine:
    compile_parser = re.compile(r'\[(.+)\] \"(.+) (.+) (.+)\" (\d+) (\d+)')
    def __init__(
        self,
        line,
        ignore_www=False
    ):
        self._line = line.strip()
        match_obj = LogLine.compile_parser.match(self._line)
        if not match_obj:
            raise LogLineParseException(self._line)
        self._line_components = {
            'request_date': datetime.strptime(match_obj.group(1), '%d/%b/%Y %X'),
            'request_type': match_obj.group(2),
            'request' :  match_obj.group(3), 
            'protocol':  match_obj.group(4),
            'response_code' :  match_obj.group(5),
            'response_time' :  match_obj.group(6)
        }
        if ignore_www:
            if 'www' in self.line_components['request']:
                self._line_components['request'] = self._line_components['request'][4:]

    def __getitem__(self, key):
        return _line_components[key]

    def get_url(self):
        url = urlparse(self['request'])
        return url.netloc + url.path

    def __str__(self):
        return self._line

    def __hash__(self):
        return hash(self._line)

In [16]:
d = dict()
log_line = LogLine('[21/Mar/2018 21:32:09] "GET https://sys.mail.ru/static/css/reset.css HTTPS/1.1" 200 1090')

In [17]:
d[log_line] = 0

In [18]:
d

{<__main__.LogLine at 0x7f95f27d5e80>: 0}