Skip to content

Commit

Permalink
fixes bug 1212088 - Expose graphics related CSV report via webapp
Browse files Browse the repository at this point in the history
  • Loading branch information
peterbe committed Oct 12, 2015
1 parent aa3ab06 commit 7c9133f
Show file tree
Hide file tree
Showing 9 changed files with 413 additions and 1 deletion.
165 changes: 165 additions & 0 deletions socorro/external/postgresql/graphics_report.py
@@ -0,0 +1,165 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import datetime
import logging

from socorro.external.postgresql.base import PostgreSQLBase
from socorro.lib import external_common


logger = logging.getLogger("webapi")

"""
This was the original SQL used in the old cron job:
select
r.signature, -- 0
r.url, -- 1
'http://crash-stats.mozilla.com/report/index/' || r.uuid as uuid_url, -- 2
to_char(r.client_crash_date,'YYYYMMDDHH24MI') as client_crash_date, -- 3
to_char(r.date_processed,'YYYYMMDDHH24MI') as date_processed, -- 4
r.last_crash, -- 5
r.product, -- 6
r.version, -- 7
r.build, -- 8
'' as branch, -- 9
r.os_name, --10
r.os_version, --11
r.cpu_name || ' | ' || r.cpu_info as cpu_info, --12
r.address, --13
array(select ba.bug_id from bug_associations ba where ba.signature = r.signature) as bug_list, --14
r.user_comments, --15
r.uptime as uptime_seconds, --16
case when (r.email is NULL OR r.email='') then '' else r.email end as email, --17
(select sum(adi_count) from raw_adi adi
where adi.date = '%(now_str)s'
and r.product = adi.product_name and r.version = adi.product_version
and substring(r.os_name from 1 for 3) = substring(adi.product_os_platform from 1 for 3)
and r.os_version LIKE '%%'||adi.product_os_version||'%%') as adu_count, --18
r.topmost_filenames, --19
case when (r.addons_checked is NULL) then '[unknown]'when (r.addons_checked) then 'checked' else 'not' end as addons_checked, --20
r.flash_version, --21
r.hangid, --22
r.reason, --23
r.process_type, --24
r.app_notes, --25
r.install_age, --26
rd.duplicate_of, --27
r.release_channel, --28
r.productid --29
from
reports r left join reports_duplicates rd on r.uuid = rd.uuid
where
'%(yesterday_str)s' <= r.date_processed and r.date_processed < '%(now_str)s'
%(prod_phrase)s %(ver_phrase)s
order by 5 -- r.date_processed, munged
"""

SQL = """
SELECT
r.signature,
'URL (removed)' as url, -- 1
'https://crash-stats.mozilla.com/report/index/' || r.uuid as uuid_url, -- 2
to_char(r.client_crash_date,'YYYYMMDDHH24MI') as client_crash_date, -- 3
to_char(r.date_processed,'YYYYMMDDHH24MI') as date_processed, -- 4
r.last_crash, -- 5
r.product, -- 6
r.version, -- 7
r.build, -- 8
'' as branch, -- 9
r.os_name, --10
r.os_version, --11
r.cpu_name || ' | ' || r.cpu_info as cpu_info, --12
r.address, --13
array(select ba.bug_id from bug_associations ba where ba.signature = r.signature) as bug_list, --14
r.user_comments, --15
r.uptime as uptime_seconds, --16
'' as email, --17
(select sum(adi_count) from raw_adi adi
where adi.date = %(date)s
and r.product = adi.product_name and r.version = adi.product_version
and substring(r.os_name from 1 for 3) = substring(adi.product_os_platform from 1 for 3)
and r.os_version LIKE '%%'||adi.product_os_version||'%%') as adu_count, --18
r.topmost_filenames, --19
case when (r.addons_checked is NULL) then '[unknown]'when (r.addons_checked) then 'checked' else 'not' end as addons_checked, --20
r.flash_version, --21
r.hangid, --22
r.reason, --23
r.process_type, --24
r.app_notes, --25
r.install_age, --26
rd.duplicate_of, --27
r.release_channel, --28
r.productid --29
FROM
reports r left join reports_duplicates rd on r.uuid = rd.uuid
WHERE
r.date_processed BETWEEN %(yesterday)s AND %(date)s
AND r.product = %(product)s
ORDER BY 5 -- r.date_processed, munged
""".strip()


class GraphicsReport(PostgreSQLBase):
"""
This implementation solves a un-legacy problem.
We used to generate a big fat CSV file based on this query for
the Mozilla Graphics team so that they can, in turn, analyze
the data and produce pretty graphs that give them historic
oversight of their efforts.
See. http://people.mozilla.org/~bgirard/gfx_features_stats/
This report might not be perfect but the intention is to have
it as an postgres implementation so that it can satisfy their
need and let the Socorro team avoid a complicated cron job
that relies on dumping files to disk.
"""

def get(self, **kwargs):
filters = [
('date', datetime.datetime.utcnow().date(), 'date'),
('product', 'Firefox', 'str'),
]
params = external_common.parse_arguments(filters, kwargs)
params['yesterday'] = params['date'] - datetime.timedelta(days=1)
results = self.query(SQL, params)
header = [
'signature',
'url',
'uuid_url',
'client_crash_date',
'date_processed',
'last_crash',
'product',
'version',
'build',
'branch',
'os_name',
'os_version',
'cpu_info',
'address',
'bug_list',
'user_comments',
'uptime_seconds',
'email',
'adu_count',
'topmost_filenames',
'addons_checked',
'flash_version',
'hangid',
'reason',
'process_type',
'app_notes',
'install_age',
'duplicate_of',
'release_channel',
'productid',
]
return {
'header': header,
'hits': results,
}
2 changes: 2 additions & 0 deletions socorro/middleware/middleware_app.py
Expand Up @@ -76,6 +76,8 @@
(r'/suspicious/(.*)', 'suspicious.SuspiciousCrashSignatures'),
(r'/util/(versions_info)/(.*)', 'util.Util'),
(r'/adi/(.*)', 'adi.ADI'),
# the legacy one
(r'/graphics_report/', 'graphics_report.GraphicsReport'),
)

# certain items in a URL path should NOT be split by `+`
Expand Down
105 changes: 105 additions & 0 deletions socorro/unittest/external/postgresql/test_graphics_report.py
@@ -0,0 +1,105 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import datetime

from nose.plugins.attrib import attr
from nose.tools import eq_, ok_

from socorro.external.postgresql.graphics_report import GraphicsReport

from .unittestbase import PostgreSQLTestCase


#==============================================================================
@attr(integration='postgres') # for nosetests
class IntegrationTestGraphicsReport(PostgreSQLTestCase):
"""Test socorro.external.postgresql.graphics_report.GraphicsReport
class. """

@classmethod
def setUpClass(cls):
""" Populate product_info table with fake data """
super(IntegrationTestGraphicsReport, cls).setUpClass()

cursor = cls.connection.cursor()

cursor.execute("""
INSERT INTO products
(product_name, sort, rapid_release_version, release_name)
VALUES
(
'Firefox',
1,
'8.0',
'firefox'
),
(
'Fennec',
3,
'11.0',
'mobile'
),
(
'Thunderbird',
2,
'10.0',
'thunderbird'
);
""")
today = datetime.datetime.utcnow().date()
cursor.execute("""
INSERT INTO reports
(id, signature, date_processed, uuid, product,
url, email, success, addons_checked)
VALUES
(
1,
'signature',
%s,
'1',
'Firefox',
'http://mywebsite.com',
'test@something.com',
TRUE,
TRUE
),
(
2,
'my signature',
%s,
'2',
'Firefox',
'http://myotherwebsite.com',
'admin@example.com',
NULL,
FALSE
);
""", (today, today))

cls.connection.commit()

#--------------------------------------------------------------------------
@classmethod
def tearDownClass(cls):
""" Cleanup the database, delete tables and functions """
cursor = cls.connection.cursor()
cursor.execute("""
TRUNCATE products, reports
CASCADE
""")
cls.connection.commit()
super(IntegrationTestGraphicsReport, cls).tearDownClass()

#--------------------------------------------------------------------------
def test_get(self):
api = GraphicsReport(config=self.config)
res = api.get(product='Firefox')
ok_(res['header'])
eq_(res['header'][0], 'signature')
eq_(res['header'][-1], 'productid')
assert res['hits']
ok_(isinstance(res['hits'], list))
signatures = [x[0] for x in res['hits']]
eq_(signatures, ['my signature', 'signature'])
1 change: 1 addition & 0 deletions webapp-django/crashstats/api/views.py
Expand Up @@ -160,6 +160,7 @@ class FormWrapper(forms.Form):
'Query',
# because it's an internal thing only
'SuperSearchFields',
'GraphicsReport',
)


Expand Down
6 changes: 6 additions & 0 deletions webapp-django/crashstats/crashstats/forms.py
Expand Up @@ -513,3 +513,9 @@ def clean_end_date(self):
)

return cleaned_end_date


class GraphicsReportForm(BaseForm):

date = forms.DateField()
product = forms.CharField(required=False)
20 changes: 20 additions & 0 deletions webapp-django/crashstats/crashstats/models.py
Expand Up @@ -1758,3 +1758,23 @@ class ProductBuildTypes(SocorroMiddleware):
API_WHITELIST = (
'hits',
)


class GraphicsReport(SocorroMiddleware):
"""The legacy solution to supply the CSV reports that the Mozilla
Graphics Team needs."""

# This endpoint is protected in a django view with permission
# requirements. That means we don't have to worry about it being
# overly requested by rogue clients.
# Also, the response payload is usually very very large meaning
# it will cause strain having to store it in the cacheing server
# when it does get re-used much by repeated queries.
cache_seconds = 0

URL_PREFIX = '/graphics_report/'

required_params = (
'product',
('date', datetime.date),
)
59 changes: 59 additions & 0 deletions webapp-django/crashstats/crashstats/tests/test_views.py
Expand Up @@ -5117,3 +5117,62 @@ def test_your_permissions_page(self):
eq_(cells[1], 'No')
elif cells[0] == PERMISSIONS['view_exploitability']:
eq_(cells[1], 'Yes!')

@mock.patch('requests.get')
def test_graphics_report(self, rget):

def mocked_get(url, **options):
header = ['signature', 'date_processed']
hits = [
['my signature', '2015-10-08 23:22:21'],
['other signature', '2015-10-08 13:12:11'],
]
return Response({
'header': header,
'hits': hits,
})

rget.side_effect = mocked_get

url = reverse('crashstats:graphics_report')
response = self.client.get(url)
eq_(response.status_code, 403)
user = self._login()
response = self.client.get(url)
eq_(response.status_code, 403)
group = Group.objects.create(name='Hackers')
permission = Permission.objects.get(codename='run_long_queries')
group.permissions.add(permission)
user.groups.add(group)
response = self.client.get(url)
eq_(response.status_code, 400)
data = {'date': datetime.datetime.utcnow().date()}
response = self.client.get(url, data)
eq_(response.status_code, 200)
eq_(response['Content-Type'], 'text/csv')
eq_(response['Content-Length'], str(len(response.content)))
# the content should be parseable
length = len(response.content)
inp = StringIO(response.content)
reader = csv.reader(inp, delimiter='\t')
lines = list(reader)
assert len(lines) == 3
header = lines[0]
eq_(header, ['signature', 'date_processed'])
first = lines[1]
eq_(first, ['my signature', '2015-10-08 23:22:21'])

# now fetch it with gzip
response = self.client.get(url, data, HTTP_ACCEPT_ENCODING='gzip')
eq_(response.status_code, 200)
eq_(response['Content-Type'], 'text/csv')
eq_(response['Content-Length'], str(len(response.content)))
eq_(response['Content-Encoding'], 'gzip')
compressed = len(response.content)
ok_(compressed < length)

# check that the model isn't available in the API documentation
api_url = reverse('api:model_wrapper', args=('GraphicsReport',))
response = self.client.get(reverse('api:documentation'))
eq_(response.status_code, 200)
ok_(api_url not in response.content)
3 changes: 3 additions & 0 deletions webapp-django/crashstats/crashstats/urls.py
Expand Up @@ -186,6 +186,9 @@
url(r'^permissions/$',
views.permissions,
name='permissions'),
url(r'^graphics_report/$',
views.graphics_report,
name='graphics_report'),
# if we do a permanent redirect, the browser will "cache" the redirect and
# it will make it very hard to ever change the DEFAULT_PRODUCT
url(r'^$',
Expand Down

0 comments on commit 7c9133f

Please sign in to comment.