Skip to content

Commit

Permalink
First pass at HTTP POST handling
Browse files Browse the repository at this point in the history
This implements part of what we need with HTTP POST handling, but
doesn't include tests and doesn't have fleshed out throttling or
storage.
  • Loading branch information
willkg committed May 27, 2016
1 parent 8f48b4d commit 8d52df0
Show file tree
Hide file tree
Showing 9 changed files with 263 additions and 3 deletions.
200 changes: 197 additions & 3 deletions antenna/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,21 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import base64
import cgi
import cStringIO
import json
import logging
import time
import zlib
from functools import wraps

import falcon

from antenna.configlib import ConfigManager
from antenna.configlib import ConfigManager, parse_class
from antenna.lib.datetimeutil import utc_now
from antenna.lib.ooid import create_new_ooid
from antenna.lib.storage import Storage
from antenna.throttler import ACCEPT, DEFER, DISCARD, IGNORE

logger = logging.getLogger('gunicorn.error')

Expand Down Expand Up @@ -37,7 +45,6 @@ def auth_error():
'Authentication required',
['Basic']
)

@wraps(fun)
def view_fun(resource, req, resp, *args, **kwargs):
auth = req.auth
Expand All @@ -57,13 +64,199 @@ def view_fun(resource, req, resp, *args, **kwargs):
return view_fun


# FIXME: Figure out whether we need to implement this or nix it. For now, we're
# just going to define it as a synonym.
MemoryDumpsMapping = dict


class BreakpadSubmitterResource(object):
def __init__(self, config):
# the default name for the main dump
self.dump_field = config(
'dump_field', default='upload_file_minidump'
)

# the prefix to return to the client in front of the OOID
self.dump_id_prefix = config(
'dump_id_prefix', default='bp-'
)

# a boolean telling the collector to use a legacy_processing flag
# submitted with the crash
self.accept_submitted_legacy_processing = config(
'accept_submitted_legacy_processing', default='False', parser=bool
)

# a boolean telling the collector to use a crash_id provided in the
# crash submission
self.accept_submitted_crash_id = config(
'accept_submitted_crash_id', default='False', parser=bool
)

# a reference to method that accepts a string and calculates a hash
# value
self.checksum_method = config(
'checksum_method', default='hashlib.md5', parser=parse_class
)

# class that implements the throttling action
self.throttler = config(
'throttler_class', default='antenna.throttler.Throttler',
parser=parse_class
)
self.throttler = self.throttler(config)

# source storage class
self.crash_storage = config(
'crashstorage_class',
default='antenna.external.boto.crashstorage.BotoS3CrashStorage',
parser=parse_class
)
self.crash_storage = self.crash_storage(config)

def _process_fieldstorage(self, fs):
# Note: Copied from web.py.
if isinstance(fs, list):
return [self._process_fieldstorage(x) for x in fs]
elif fs.filename is None:
return fs.value
else:
return fs

def _form_as_mapping(self, req):
"""Extracts POST form data from request
This handles gzip compressed form post data, too.
:arg req: the WSGI request
:returns: Storage instance with the data
"""
if req.env.get('HTTP_CONTENT_ENCODING') == 'gzip':
# If the content is gzipped, we pull it out and decompress it. We
# have to do that here because nginx doesn't have a good way to do
# that in nginx-land.
gzip_header = 16 + zlib.MAX_WBITS
content_length = req.env.get(int('CONTENT_LENGTH'), 0)
data = zlib.decompress(req.stream.read(content_length), gzip_header)
data = cStringIO.StringIO(data)

else:
data = req.stream

env = req.env.copy()
fs = cgi.FieldStorage(fp=data, environ=env, keep_blank_values=1)
form = Storage(
[(k, self._process_fieldstorage(fs[k])) for k in fs.keys()]
)
# FIXME: In the original collector, this returned request querystring
# data as well as request body data.
return form

@staticmethod
def _no_x00_character(value):
"""Remove x00 characters
Note: We remove null characters because they are a hassle to deal with
during reporting and cause problems when sending to Postgres.
:arg value: a basestring with null characters in it
:returns: same type basestring with null characters removed
"""
if isinstance(value, unicode) and u'\u0000' in value:
return u''.join(c for c in value if c != u'\u0000')
if isinstance(value, str) and '\x00' in value:
return ''.join(c for c in value if c != '\x00')
return value

def _get_raw_crash_from_form(self, req):
"""Retrieves crash/dump data and fixes it
:arg req: the WSGI request
:returns: (raw_crash, dumps)
"""
dumps = MemoryDumpsMapping()
raw_crash = {}
checksums = {}

for name, value in self._form_as_mapping(req).iteritems():
name = self._no_x00_character(name)
if isinstance(value, basestring):
if name != "dump_checksums":
raw_crash[name] = self._no_x00_character(value)
elif hasattr(value, 'file') and hasattr(value, 'value'):
dumps[name] = value.value
checksums[name] = self.checksum_method(value.value).hexdigest()
elif isinstance(value, int):
raw_crash[name] = value
else:
raw_crash[name] = value.value
raw_crash['dump_checksums'] = checksums
return raw_crash, dumps

def on_post(self, req, resp):
raw_crash, dumps = self._get_raw_crash_from_form(req)

# Set the content-type now. That way we can drop out of this method
# whenever.
resp.content_type = 'text/plain'

current_timestamp = utc_now()
raw_crash['submitted_timestamp'] = current_timestamp.isoformat()
# legacy - ought to be removed someday
raw_crash['timestamp'] = time.time()

if not self.accept_submitted_crash_id or 'uuid' not in raw_crash:
crash_id = create_new_ooid(current_timestamp)
raw_crash['uuid'] = crash_id
logger.info('%s received', crash_id)
else:
crash_id = raw_crash['uuid']
logger.info('%s received with existing crash_id:', crash_id)

if ('legacy_processing' not in raw_crash
or not self.accept_submitted_legacy_processing
):
raw_crash['legacy_processing'], raw_crash['throttle_rate'] = (
self.throttler.throttle(raw_crash)
)
else:
raw_crash['legacy_processing'] = legacy_processing

if raw_crash['legacy_processing'] == DISCARD:
logger.info('%s discarded', crash_id)
resp.data = 'Discarded=1\n'
return
if raw_crash['legacy_processing'] == IGNORE:
logger.info('%s ignored', crash_id)
resp.data = 'Unsupported=1\n'
return

raw_crash['type_tag'] = self.dump_id_prefix.strip('-')

self.crash_storage.save_raw_crash(
raw_crash,
dumps,
crash_id
)
logger.info('%s accepted', crash_id)

resp.status = falcon.HTTP_200
resp.data = 'CrashID=%s%s\n' % (self.dump_id_prefix, crash_id)


class HealthCheckResource(object):
def __init__(self, config):
self.username = config('USERNAME', namespace='healthcheck')
self.password = config('PASSWORD', namespace='healthcheck')

def is_valid_auth(self, username, password):
return (username, password) == (self.username, self.password)
return (self.username, self.password) == (username, password)

@require_basic_auth
def on_get(self, req, resp):
Expand All @@ -85,4 +278,5 @@ def get_app():
config = ConfigManager()
app = falcon.API()
app.add_route('/api/v1/health', HealthCheckResource(config))
app.add_route('/submit', BreakpadSubmitterResource(config))
return app
3 changes: 3 additions & 0 deletions antenna/external/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
3 changes: 3 additions & 0 deletions antenna/external/boto/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 changes: 10 additions & 0 deletions antenna/external/boto/crashstorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

class BotoS3CrashStorage:
def __init__(self, config):
pass

def save_raw_crash(self, raw_crash, dumps, crash_id):
pass
3 changes: 3 additions & 0 deletions antenna/lib/storage.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# web.py is in the public domain; it can be used for whatever purpose with
# absolutely no restrictions.

# From web.py with minor fixes for Python 2.7 betterness.

class Storage(dict):
Expand Down
17 changes: 17 additions & 0 deletions antenna/throttler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

ACCEPT = 0 # save and process
DEFER = 1 # save but don't process
DISCARD = 2 # tell client to go away and not come back
IGNORE = 3 # ignore submission entirely


class Throttler:
def __init__(self, config):
pass

def throttle(self, raw_crash):
# FIXME: Implement
return ACCEPT, 100
Binary file not shown.
26 changes: 26 additions & 0 deletions tests/data/raw/7d381dc5-51e2-4887-956b-1ae9c2130109.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"InstallTime": "1357622062",
"Theme": "classic/1.0",
"Version": "4.0a1",
"id": "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}",
"Vendor": "Mozilla",
"EMCheckCompatibility": "true",
"Throttleable": "0",
"URL": "http://code.google.com/p/crashme/",
"version": "20.0a1",
"CrashTime": "1357770042",
"ReleaseChannel": "nightly",
"submitted_timestamp": "2013-01-09T22:21:18.646733+00:00",
"buildid": "20130107030932",
"timestamp": 1357770078.646789,
"Notes": "OpenGL: NVIDIA Corporation -- GeForce 8600M GT/PCIe/SSE2 -- 3.3.0 NVIDIA 313.09 -- texture_from_pixmap\r\n",
"StartupTime": "1357769913",
"FramePoisonSize": "4096",
"FramePoisonBase": "7ffffffff0dea000",
"Add-ons": "%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:20.0a1,crashme%40ted.mielczarek.org:0.4",
"BuildID": "20130107030932",
"SecondsSinceLastCrash": "1831736",
"ProductName": "WaterWolf",
"legacy_processing": 0,
"ProductID": "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}"
}
4 changes: 4 additions & 0 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import pytest

from antenna.lib.storage import Storage
Expand Down

0 comments on commit 8d52df0

Please sign in to comment.