From 92eb866f54f0918b918446802c46e894b0801752 Mon Sep 17 00:00:00 2001 From: Marcello Nuccio Date: Wed, 24 Jun 2015 16:11:25 +0200 Subject: [PATCH 1/3] Add option for date format in logs. Custom log format can be specified with --log-format-regex option. But there's no way to also specify a format string for the date. --- import_logs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/import_logs.py b/import_logs.py index cb3a614..dff3111 100755 --- a/import_logs.py +++ b/import_logs.py @@ -562,6 +562,10 @@ def _create_parser(self): "Recognized fields include: %s. For an example of a supported Regex, see the source code of this file. " "Overrides --log-format-name." % (', '.join(available_regex_groups)) ) + option_parser.add_option( + '--log-date-format', dest='log_date_format', default=None, + help="Format string used to parse dates." + ) option_parser.add_option( '--log-hostname', dest='log_hostname', default=None, help="Force this hostname for a log format that doesn't include it. All hits " @@ -762,7 +766,7 @@ def _parse_args(self, option_parser): logging.debug('Accepted hostnames: all') if self.options.log_format_regex: - self.format = RegexFormat('custom', self.options.log_format_regex) + self.format = RegexFormat('custom', self.options.log_format_regex, self.options.log_date_format) elif self.options.log_format_name: try: self.format = FORMATS[self.options.log_format_name] From 32052a3aa64415c569e59868f20d788e051a5a16 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Tue, 25 Aug 2015 08:17:54 -0700 Subject: [PATCH 2/3] Add some more documentation to --log-date-format option and print out exception reason when an invalid date is found for easier debugging. --- import_logs.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/import_logs.py b/import_logs.py index dff3111..9847fdf 100755 --- a/import_logs.py +++ b/import_logs.py @@ -564,7 +564,8 @@ def _create_parser(self): ) option_parser.add_option( '--log-date-format', dest='log_date_format', default=None, - help="Format string used to parse dates." + help="Format string used to parse dates. You can specify any format that can also be specified to " + "the strptime python function." ) option_parser.add_option( '--log-hostname', dest='log_hostname', default=None, @@ -2090,8 +2091,8 @@ def invalid_line(line, reason): date_string = format.get('date') try: hit.date = datetime.datetime.strptime(date_string, format.date_format) - except ValueError: - invalid_line(line, 'invalid date') + except ValueError, e: + invalid_line(line, 'invalid date or invalid format: %s' % str(e)) continue # Parse timezone and substract its value from the date From 1d8b954d4c8d456a49790441f93b5ece54c956c0 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Tue, 25 Aug 2015 08:18:27 -0700 Subject: [PATCH 3/3] Adding python test for --log-date-format option. --- tests/logs/custom_regex_custom_date.log | 1 + tests/tests.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/logs/custom_regex_custom_date.log diff --git a/tests/logs/custom_regex_custom_date.log b/tests/logs/custom_regex_custom_date.log new file mode 100644 index 0000000..9cc1735 --- /dev/null +++ b/tests/logs/custom_regex_custom_date.log @@ -0,0 +1 @@ +1.2.3.4 - - [February - 10, 2012:16:42:07] "GET / HTTP/1.0" 301 368 diff --git a/tests/tests.py b/tests/tests.py index 519699c..f913dca 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -692,3 +692,28 @@ def test_w3c_custom_field_regex_option(): assert match is not None assert format.get('substatus') == '654' assert format.get('win32_status') == '456' + +def test_custom_log_date_format_option(): + """Test that --log-date-format will change how dates are parsed in a custom log format.""" + + file_ = 'logs/custom_regex_custom_date.log' + + # have to override previous globals override for this test + Recorder.recorders = [] + import_logs.parser = import_logs.Parser() + import_logs.config.options.w3c_field_regexes = None + import_logs.config.options.regex_group_to_visit_cvars_map = None + import_logs.config.options.regex_group_to_page_cvars_map = None + import_logs.config.options.log_format_regex = ( + '(?P\S+)\s+\S+\s+\S+\s+\[(?P.*?)\]\s+' + '"\S+\s+(?P.*?)\s+\S+"\s+(?P\S+)\s+(?P\S+)' + ) + import_logs.config.options.log_date_format = '%B - %d, %Y:%H:%M:%S' + import_logs.config.format = import_logs.RegexFormat('custom', import_logs.config.options.log_format_regex, + import_logs.config.options.log_date_format) + + import_logs.parser.parse(file_) + + hits = [hit.__dict__ for hit in Recorder.recorders] + + assert hits[0]['date'] == datetime.datetime(2012, 2, 10, 16, 42, 7)