From a2600380daf060660bfda9601a24fefd981cd7ec Mon Sep 17 00:00:00 2001 From: Mark Reid Date: Wed, 28 Jun 2017 10:33:17 -0700 Subject: [PATCH 1/3] Bug 1376028 - Stop using lazy json parsing --- moztelemetry/heka/message_parser.py | 39 +---------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/moztelemetry/heka/message_parser.py b/moztelemetry/heka/message_parser.py index df7ec93..af23a59 100644 --- a/moztelemetry/heka/message_parser.py +++ b/moztelemetry/heka/message_parser.py @@ -82,7 +82,7 @@ def _parse_heka_record(record): def _add_field(container, keys, value): if len(keys) == 1: blob = value[0] if len(value) else "" - container[keys[0]] = _lazyjson(blob) + container[keys[0]] = _parse_json(blob) return key = keys.pop(0) @@ -99,43 +99,6 @@ def _parse_json(string): return result -def _lazyjson(content): - if not isinstance(content, basestring): - raise ValueError("Argument must be a string.") - - if content.startswith("{"): - default = {} - elif content.startswith("["): - default = [] - else: - try: - return float(content) if '.' in content or 'e' in content.lower() else int(content) - except: - return content - - class WrapperType(type(default)): - pass - - def wrap(method_name): - def _wrap(*args, **kwargs): - if not hasattr(WrapperType, '__cache__'): - setattr(WrapperType, '__cache__', _parse_json(content)) - - cached = WrapperType.__cache__ - method = getattr(cached, method_name) - return method(*args[1:], **kwargs) - - return _wrap - - wrapper = WrapperType(default) - for k, v in type(default).__dict__.iteritems(): - if k == "__doc__": - continue - else: - setattr(WrapperType, k, wrap(k)) - return wrapper - - _record_separator = 0x1e From 8761b6c68ab4db7d5a8e9797933460bee0245ded Mon Sep 17 00:00:00 2001 From: Mark Reid Date: Wed, 28 Jun 2017 11:04:57 -0700 Subject: [PATCH 2/3] Bug 1376028 - Add test coverage Add a test that exposes the problem with lazy json evaluation. --- tests/heka/test_message_parser.py | 47 ++++++++++++++++++------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/tests/heka/test_message_parser.py b/tests/heka/test_message_parser.py index adf6113..14f566a 100644 --- a/tests/heka/test_message_parser.py +++ b/tests/heka/test_message_parser.py @@ -97,22 +97,31 @@ def test_json_fallback(): assert TOO_BIG == message_parser._parse_json(str(TOO_BIG)) -def test_lazy_parsing(data_dir, monkeypatch): - mock_parse_json = MagicMock(name='_parse_json', - wraps=message_parser._parse_json) - monkeypatch.setattr(message_parser, '_parse_json', mock_parse_json) - - # this heka message has 20 json fields, only one of which should be parsed - # on first load - filename = "{}/test_telemetry_gzip.heka".format(data_dir) - with open(filename, "rb") as o: - heka_message = message_parser.parse_heka_message(streaming_gzip_wrapper(o)).next() - - # should only have parsed json *once* to get the payload field (other - # json/dictionary fields of the message should be parsed lazily) - assert mock_parse_json.call_count == 1 - - # deep copying the heka message should cause the lazily evaluated fields - # to be evaluated - copy.deepcopy(heka_message) - assert mock_parse_json.call_count == 20 # 19 lazily instantiated fields + original call +def test_json_keys(): + record = lambda: None + record.message = lambda: None + record.message.timestamp = 1 + record.message.type = "t" + record.message.hostname = "h" + record.message.payload = '{"a": 1}' + + f1 = lambda: None + f1.name = "f1.test" + f1.value_string = ['{"b": "bee"}'] + f1.value_type = 0 + + record.message.fields = [f1] + + parsed = message_parser._parse_heka_record(record) + + expected = {"a": 1, "f1": {"test": {"b": "bee"}}} + expected["meta"] = { + "Timestamp": 1, + "Type": "t", + "Hostname": "h", + } + + serialized = json.dumps(parsed) + e_serialized = json.dumps(expected) + + assert serialized == e_serialized From 79f5197592f015adf3606e0b1d14cda256e98234 Mon Sep 17 00:00:00 2001 From: Mark Reid Date: Wed, 28 Jun 2017 13:15:52 -0700 Subject: [PATCH 3/3] Fix flake8 complaints --- tests/heka/test_message_parser.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/heka/test_message_parser.py b/tests/heka/test_message_parser.py index 14f566a..1ce3c5a 100644 --- a/tests/heka/test_message_parser.py +++ b/tests/heka/test_message_parser.py @@ -6,7 +6,6 @@ import pytest import ujson from google.protobuf.message import DecodeError -from mock import MagicMock from moztelemetry.heka import message_parser from moztelemetry.util.streaming_gzip import streaming_gzip_wrapper @@ -98,14 +97,23 @@ def test_json_fallback(): def test_json_keys(): - record = lambda: None - record.message = lambda: None + class Message(): + pass + + class Field(): + pass + + class Record(): + def __init__(self): + self.message = Message() + + record = Record() record.message.timestamp = 1 record.message.type = "t" record.message.hostname = "h" record.message.payload = '{"a": 1}' - f1 = lambda: None + f1 = Field() f1.name = "f1.test" f1.value_string = ['{"b": "bee"}'] f1.value_type = 0