Skip to content

Commit

Permalink
Merge pull request #92 from piwik/date-format-option2
Browse files Browse the repository at this point in the history
Add --log-date-format option for date format in logs (second PR)
  • Loading branch information
diosmosis committed Aug 25, 2015
2 parents 2a6f298 + 1d8b954 commit 17f6c14
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
11 changes: 8 additions & 3 deletions import_logs.py
Expand Up @@ -562,6 +562,11 @@ def _create_parser(self):
"Recognized fields include: %s. For an example of a supported Regex, see the source code of this file. "
"Overrides --log-format-name." % (', '.join(available_regex_groups))
)
option_parser.add_option(
'--log-date-format', dest='log_date_format', default=None,
help="Format string used to parse dates. You can specify any format that can also be specified to "
"the strptime python function."
)
option_parser.add_option(
'--log-hostname', dest='log_hostname', default=None,
help="Force this hostname for a log format that doesn't include it. All hits "
Expand Down Expand Up @@ -762,7 +767,7 @@ def _parse_args(self, option_parser):
logging.debug('Accepted hostnames: all')

if self.options.log_format_regex:
self.format = RegexFormat('custom', self.options.log_format_regex)
self.format = RegexFormat('custom', self.options.log_format_regex, self.options.log_date_format)
elif self.options.log_format_name:
try:
self.format = FORMATS[self.options.log_format_name]
Expand Down Expand Up @@ -2086,8 +2091,8 @@ def invalid_line(line, reason):
date_string = format.get('date')
try:
hit.date = datetime.datetime.strptime(date_string, format.date_format)
except ValueError:
invalid_line(line, 'invalid date')
except ValueError, e:
invalid_line(line, 'invalid date or invalid format: %s' % str(e))
continue

# Parse timezone and substract its value from the date
Expand Down
1 change: 1 addition & 0 deletions tests/logs/custom_regex_custom_date.log
@@ -0,0 +1 @@
1.2.3.4 - - [February - 10, 2012:16:42:07] "GET / HTTP/1.0" 301 368
25 changes: 25 additions & 0 deletions tests/tests.py
Expand Up @@ -692,3 +692,28 @@ def test_w3c_custom_field_regex_option():
assert match is not None
assert format.get('substatus') == '654'
assert format.get('win32_status') == '456'

def test_custom_log_date_format_option():
"""Test that --log-date-format will change how dates are parsed in a custom log format."""

file_ = 'logs/custom_regex_custom_date.log'

# have to override previous globals override for this test
Recorder.recorders = []
import_logs.parser = import_logs.Parser()
import_logs.config.options.w3c_field_regexes = None
import_logs.config.options.regex_group_to_visit_cvars_map = None
import_logs.config.options.regex_group_to_page_cvars_map = None
import_logs.config.options.log_format_regex = (
'(?P<ip>\S+)\s+\S+\s+\S+\s+\[(?P<date>.*?)\]\s+'
'"\S+\s+(?P<path>.*?)\s+\S+"\s+(?P<status>\S+)\s+(?P<length>\S+)'
)
import_logs.config.options.log_date_format = '%B - %d, %Y:%H:%M:%S'
import_logs.config.format = import_logs.RegexFormat('custom', import_logs.config.options.log_format_regex,
import_logs.config.options.log_date_format)

import_logs.parser.parse(file_)

hits = [hit.__dict__ for hit in Recorder.recorders]

assert hits[0]['date'] == datetime.datetime(2012, 2, 10, 16, 42, 7)

0 comments on commit 17f6c14

Please sign in to comment.