Skip to content
This repository has been archived by the owner on Jul 13, 2023. It is now read-only.

Commit

Permalink
feat: normalize user-agent for datadog and parse for raw logging
Browse files Browse the repository at this point in the history
The user-agent string is now parsed and normalized to fewer values for DataDog. The
raw parsed version is also included in all websocket.py logging statements for easier
filtering in our logging. Existing raw user_agent string will remain so that existing log
parsing systems using it can function as normal.

Closes #487
  • Loading branch information
bbangert committed Jul 26, 2016
1 parent eee0733 commit 8c4c636
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 17 deletions.
31 changes: 31 additions & 0 deletions autopush/tests/test_utils.py
@@ -0,0 +1,31 @@
import unittest

from nose.tools import eq_


class TestUserAgentParser(unittest.TestCase):
def _makeFUT(self, *args):
from autopush.utils import parse_user_agent
return parse_user_agent(*args)

def test_linux_extraction(self):
dd, raw = self._makeFUT('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.2) Gecko/20090807 Mandriva Linux/1.9.1.2-1.1mud2009.1 (2009.1) Firefox/3.5.2 FirePHP/0.3,gzip(gfe),gzip(gfe)') # NOQA
eq_(dd["ua_os_family"], "Linux")
eq_(raw["ua_os_family"], "Mandriva")

def test_windows_extraction(self):
dd, raw = self._makeFUT('Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 (.NET CLR 3.5.30729)') # NOQA
eq_(dd["ua_os_family"], "Windows")
eq_(raw["ua_os_family"], "Windows 7")

def test_valid_os(self):
dd, raw = self._makeFUT('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; rv:2.1.1) Gecko/ Firefox/5.0.1') # NOQA
eq_(dd["ua_os_family"], "Mac OS X")
eq_(raw["ua_os_family"], "Mac OS X")

def test_other_os_and_browser(self):
dd, raw = self._makeFUT('BlackBerry9000/4.6.0.167 Profile/MIDP-2.0 Configuration/CLDC-1.1 VendorID/102') # NOQA
eq_(dd["ua_os_family"], "Other")
eq_(raw["ua_os_family"], "BlackBerry OS")
eq_(dd["ua_browser_family"], "Other")
eq_(raw["ua_browser_family"], "BlackBerry")
16 changes: 10 additions & 6 deletions autopush/tests/test_websocket.py
Expand Up @@ -74,7 +74,9 @@ def setUp(self):
self.proto.ap_settings = settings
self.proto.sendMessage = self.send_mock = Mock()
self.orig_close = self.proto.sendClose
self.proto.ps = PushState(settings=settings, request=Mock())
request_mock = Mock()
request_mock.headers = {}
self.proto.ps = PushState(settings=settings, request=request_mock)
self.proto.sendClose = self.close_mock = Mock()
self.proto.transport = self.transport_mock = Mock()
self.proto.closeHandshakeTimeout = 0
Expand Down Expand Up @@ -197,11 +199,13 @@ def test_headers_locate(self):

def test_base_tags(self):
req = Mock()
req.headers = {'user-agent': "tester"}
req.headers = {'user-agent': "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 (.NET CLR 3.5.30729)"} # NOQA
req.host = "example.com:8080"
ps = PushState(settings=self.proto.ap_settings, request=req)
eq_(ps._base_tags, ['user_agent:tester',
'host:example.com:8080'])
eq_(sorted(ps._base_tags),
sorted(['ua_os_family:Windows',
'ua_browser_family:Firefox',
'host:example.com:8080']))

def test_reporter(self):
from autopush.websocket import periodic_reporter
Expand Down Expand Up @@ -1205,7 +1209,7 @@ def test_ws_unregister(self):
def check_unregister_result(msg):
eq_(msg["status"], 200)
eq_(msg["channelID"], chid)
eq_(len(self.proto.log.mock_calls), 1)
eq_(len(self.proto.log.mock_calls), 2)
assert_called_included(self.proto.log.info, format="Unregister")
d.callback(True)

Expand Down Expand Up @@ -1365,7 +1369,7 @@ def check_hello_result(msg):

# Verify it was cleared out
eq_(len(self.proto.ps.direct_updates), 0)
eq_(len(self.proto.log.info.mock_calls), 1)
eq_(len(self.proto.log.info.mock_calls), 2)
assert_called_included(self.proto.log.info,
format="Ack",
router_key="simplepush",
Expand Down
62 changes: 61 additions & 1 deletion autopush/utils.py
Expand Up @@ -9,10 +9,19 @@
import ecdsa
import requests
from jose import jws

from twisted.logger import Logger
from twisted.python import failure
from ua_parser import user_agent_parser


# List of valid user-agent attributes to keep, anything not in this list is
# considered 'Other'. We log the user-agent on connect always to retain the
# full string, but for DD more tags are expensive so we limit to these.
VALID_UA_BROWSER = ["Chrome", "Firefox", "Safari", "Opera"]
# See test_os.yaml in github.com/ua-parser/uap-core for full list
# We special case Windows since it has 8 values, and we only care that its
# Windows
VALID_UA_OS = ["Firefox OS", "Linux", "Mac OS X"]

default_ports = {
"ws": 80,
Expand Down Expand Up @@ -138,6 +147,57 @@ def extract_jwt(token, crypto_key):
return json.loads(jws.verify(token, vk, algorithms=["ES256"]))


def parse_user_agent(agent_string):
"""Extracts user-agent data from a UA string
Parses the user-agent into two forms. A limited one suitable for Datadog
logging with limited tags, and a full string suitable for complete logging.
:returns: A tuple of dicts, the first being the Datadog limited and the
second being the complete info.
:rtype: (dict, dict)
"""
parsed = user_agent_parser.Parse(agent_string)
dd_info = {}
raw_info = {}

# Parse out the OS family
ua_os = parsed["os"]
ua_os_family = raw_info["ua_os_family"] = ua_os["family"]
if ua_os_family.startswith("Windows"):
# Windows has a bunch of additional version bits in the family string
dd_info["ua_os_family"] = "Windows"
elif ua_os_family in VALID_UA_OS:
dd_info["ua_os_family"] = ua_os_family
elif "Linux" in agent_string:
# Incredibly annoying, but the user agent parser returns things like
# 'Mandriva' and 'Unbuntu' sometimes instead of just saying Linux
dd_info["ua_os_family"] = "Linux"
else:
dd_info["ua_os_family"] = "Other"

# Parse out the full version for raw info, too many combos for DataDog
bits = ["major", "minor", "patch", "patch_minor"]
os_bits = [ua_os[x] for x in bits]
raw_info["ua_os_ver"] = ".".join(filter(None, os_bits))

# Parse out the browser family
ua_browser = parsed["user_agent"]
ua_browser_family = raw_info["ua_browser_family"] = ua_browser["family"]
if ua_browser_family in VALID_UA_BROWSER:
dd_info["ua_browser_family"] = ua_browser_family
else:
dd_info["ua_browser_family"] = "Other"

# Parse out the full browser version
bits = ["major", "minor", "patch"]
browser_bits = [ua_browser[x] for x in bits]
raw_info["ua_browser_ver"] = ".".join(filter(None, browser_bits))

return dd_info, raw_info


class ErrorLogger(object):
log = Logger()

Expand Down
36 changes: 26 additions & 10 deletions autopush/websocket.py
Expand Up @@ -58,8 +58,8 @@
from twisted.logger import Logger
from twisted.protocols import policies
from twisted.python import failure
from zope.interface import implements
from twisted.web.resource import Resource
from zope.interface import implements

from autopush import __version__
from autopush.db import (
Expand All @@ -68,7 +68,11 @@
generate_last_connect
)
from autopush.protocol import IgnoreBody
from autopush.utils import validate_uaid, ErrorLogger
from autopush.utils import (
ErrorLogger,
parse_user_agent,
validate_uaid,
)
from autopush.noseplugin import track_object


Expand Down Expand Up @@ -134,6 +138,7 @@ class PushState(object):
'metrics',
'uaid',
'uaid_hash',
'raw_agent',
'last_ping',
'check_storage',
'use_webpush',
Expand Down Expand Up @@ -173,8 +178,11 @@ def __init__(self, settings, request):
else:
self._user_agent = None
self._base_tags = []
self.raw_agent = {}
if self._user_agent:
self._base_tags.append("user_agent:%s" % self._user_agent)
dd_tags, self.raw_agent = parse_user_agent(self._user_agent)
for tag_name, tag_value in dd_tags.items():
self._base_tags.append("%s:%s" % (tag_name, tag_value))
if host:
self._base_tags.append("host:%s" % host)

Expand Down Expand Up @@ -791,6 +799,8 @@ def finish_hello(self, previous):
msg['env'] = self.ap_settings.env
self.ap_settings.clients[self.ps.uaid] = self
self.sendJSON(msg)
self.log.info(format="hello", uaid_hash=self.ps.uaid_hash,
**self.ps.raw_agent)
self.ps.metrics.increment("updates.client.hello", tags=self.base_tags)
self.process_notifications()

Expand Down Expand Up @@ -1046,7 +1056,8 @@ def send_register_finish(self, result, endpoint, chid):
self.log.info(format="Register", channelID=chid,
endpoint=endpoint,
uaid_hash=self.ps.uaid_hash,
user_agent=self.ps.user_agent)
user_agent=self.ps.user_agent,
**self.ps.raw_agent)

def process_unregister(self, data):
"""Process an unregister message"""
Expand All @@ -1063,7 +1074,8 @@ def process_unregister(self, data):

event = dict(format="Unregister", channelID=chid,
uaid_hash=self.ps.uaid_hash,
user_agent=self.ps.user_agent)
user_agent=self.ps.user_agent,
**self.ps.raw_agent)
if "code" in data:
event["code"] = extract_code(data)
self.log.info(**event)
Expand Down Expand Up @@ -1128,7 +1140,8 @@ def ver_filter(update):
self.log.info(format="Ack", router_key="webpush", channelID=chid,
message_id=version, message_source="direct",
message_size=size, uaid_hash=self.ps.uaid_hash,
user_agent=self.ps.user_agent, code=code)
user_agent=self.ps.user_agent, code=code,
**self.ps.raw_agent)
self.ps.direct_updates[chid].remove(msg)
return

Expand All @@ -1139,7 +1152,8 @@ def ver_filter(update):
self.log.info(format="Ack", router_key="webpush", channelID=chid,
message_id=version, message_source="stored",
message_size=size, uaid_hash=self.ps.uaid_hash,
user_agent=self.ps.user_agent, code=code)
user_agent=self.ps.user_agent, code=code,
**self.ps.raw_agent)
d = self.force_retry(self.ps.message.delete_message,
uaid=self.ps.uaid,
channel_id=chid,
Expand Down Expand Up @@ -1173,12 +1187,14 @@ def _handle_simple_ack(self, chid, version, code):
channelID=chid, message_id=version,
message_source="direct",
uaid_hash=self.ps.uaid_hash,
user_agent=self.ps.user_agent, code=code)
user_agent=self.ps.user_agent, code=code,
**self.ps.raw_agent)
return
self.log.info(format="Ack", router_key="simplepush", channelID=chid,
message_id=version, message_source="stored",
uaid_hash=self.ps.uaid_hash,
user_agent=self.ps.user_agent, code=code)
user_agent=self.ps.user_agent, code=code,
**self.ps.raw_agent)
if chid in self.ps.updates_sent and \
self.ps.updates_sent[chid] <= version:
del self.ps.updates_sent[chid]
Expand Down Expand Up @@ -1215,7 +1231,7 @@ def process_nack(self, data):

self.log.info(format="Nack", uaid_hash=self.ps.uaid_hash,
user_agent=self.ps.user_agent, message_id=version,
code=code)
code=code, **self.ps.raw_agent)

def check_missed_notifications(self, results, resume=False):
"""Check to see if notifications were missed"""
Expand Down
1 change: 1 addition & 0 deletions pypy-requirements.txt
Expand Up @@ -53,6 +53,7 @@ six==1.10.0
translationstring==1.3
-e git+https://github.com/habnabit/txstatsd.git@157ef85fbdeafe23865c7c4e176237ffcb3c3f1f#egg=txStatsD-master
txaio==2.5.1
ua_parser==0.7.1
virtualenv==15.0.1
wsaccel==0.6.2
xmltodict==0.10.1
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -53,6 +53,7 @@ six==1.10.0
translationstring==1.3
-e git+https://github.com/habnabit/txstatsd.git@157ef85fbdeafe23865c7c4e176237ffcb3c3f1f#egg=txStatsD-master
txaio==2.5.1
ua_parser==0.7.1
virtualenv==15.0.1
wsaccel==0.6.2
xmltodict==0.10.1
Expand Down
1 change: 1 addition & 0 deletions test-requirements.txt
Expand Up @@ -61,6 +61,7 @@ six==1.10.0
translationstring==1.3
-e git+https://github.com/habnabit/txstatsd.git@157ef85fbdeafe23865c7c4e176237ffcb3c3f1f#egg=txStatsD-master
txaio==2.5.1
ua_parser==0.7.1
virtualenv==15.0.1
wsaccel==0.6.2
xmltodict==0.10.1
Expand Down

0 comments on commit 8c4c636

Please sign in to comment.