Skip to content

Commit

Permalink
bug 803209 Support for garbage collection reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
selenamarie committed Jun 11, 2013
1 parent 9901bd0 commit 459939e
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 43 deletions.
3 changes: 3 additions & 0 deletions Makefile
Expand Up @@ -101,3 +101,6 @@ analysis:
rsync socorro-toolbox/target/*.jar analysis/
rsync akela/target/*.jar analysis/
rsync -a socorro-toolbox/src/main/pig/ analysis/

json: virtualenv
if [ ! -f `pg_config --pkglibdir`/json_enhancements.so ]; then sudo ./socorro-virtualenv/bin/pgxn install json_enhancements ; fi
73 changes: 73 additions & 0 deletions alembic/versions/2b285e76f71d_bug_803209_add_garag.py
@@ -0,0 +1,73 @@
"""bug 803209 -- add garbage collection count to TCBS
Revision ID: 2b285e76f71d
Revises: 8894c185715
Create Date: 2013-06-11 12:46:09.637058
"""

# revision identifiers, used by Alembic.
revision = '2b285e76f71d'
down_revision = '8894c185715'

import os
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from sqlalchemy import types
from sqlalchemy.sql import table, column
from sqlalchemy.dialects import postgresql

class CITEXT(types.UserDefinedType):
name = 'citext'

def get_col_spec(self):
return 'CITEXT'

def bind_processor(self, dialect):
def process(value):
return value
return process

def result_processor(self, dialect, coltype):
def process(value):
return value
return process

def __repr__(self):
return "citext"

class JSON(types.UserDefinedType):
name = 'json'

def get_col_spec(self):
return 'JSON'

def bind_processor(self, dialect):
def process(value):
return value
return process

def result_processor(self, dialect, coltype):
def process(value):
return value
return process

def __repr__(self):
return "json"

def upgrade():
op.add_column(u'tcbs', sa.Column(u'is_gc_count', sa.INTEGER(), server_default='0', nullable=False))
op.add_column(u'tcbs_build', sa.Column(u'is_gc_count', sa.INTEGER(), server_default='0', nullable=False))
app_path=os.getcwd()
procs = [
'backfill_matviews.sql',
'update_tcbs.sql'
]
for myfile in [app_path + '/socorro/external/postgresql/raw_sql/procs/' + line for line in procs]:
proc = open(myfile, 'r').read()
op.execute(proc)

def downgrade():
op.drop_column(u'tcbs_build', u'is_gc_count')
op.drop_column(u'tcbs', u'is_gc_count')
6 changes: 6 additions & 0 deletions docs/installation.rst
Expand Up @@ -176,6 +176,12 @@ like to use a stable release, determine latest release tag from our release trac
::
git checkout $LATEST_RELEASE_TAG

Install json_extensions for use with PostgreSQL
```````````````````````````````````````````````
From inside the Socorro checkout
::
make json

Run unit/functional tests
````````````

Expand Down
1 change: 1 addition & 0 deletions requirements/prod.txt
Expand Up @@ -12,3 +12,4 @@ suds==0.4
thrift==0.8.0
web.py==0.36
requests==1.2.3
pgxnclient==1.2.1
18 changes: 16 additions & 2 deletions socorro/external/postgresql/fakedata.py
Expand Up @@ -12,6 +12,7 @@
import csv
import os

CRASHIDS = []

def date_range(start_date, end_date, delta=None):
if delta is None:
Expand Down Expand Up @@ -332,8 +333,10 @@ def generate_rows(self):
def generate_crashid(self, timestamp):
crashid = str(uuid.UUID(int=random.getrandbits(128)))
depth = 0
return "%s%d%02d%02d%02d" % (crashid[:-7], depth, timestamp.year % 100,
final_crashid = "%s%d%02d%02d%02d" % (crashid[:-7], depth, timestamp.year % 100,
timestamp.month, timestamp.day)
CRASHIDS.append( (final_crashid, timestamp) )
return final_crashid

def buildid(self, fragment, format='%Y%m%d', days=None):
days = days or self.days
Expand Down Expand Up @@ -676,10 +679,21 @@ class Skiplist(BaseTable):
rows = [['ignore','everything'],
['prefix','SocketShutdown']]

class RawCrashes(BaseTable):
table = 'raw_crashes'
columns = ['uuid', 'raw_crash', 'date_processed']

def generate_rows(self):
for crashid, date_processed, in CRASHIDS:
raw_crash = '{ "uuid": "%s", "IsGarbageCollecting": "1" }'
row = [crashid, raw_crash, date_processed]
yield row


# the order that tables are loaded is important.
tables = [OSNames, OSNameMatches, ProcessTypes, Products, ReleaseChannels,
ProductReleaseChannels, RawADU, ReleaseChannelMatches,
ReleasesRaw, UptimeLevels, WindowsVersions, Reports, OSVersions,
ReleasesRaw, UptimeLevels, WindowsVersions, Reports, RawCrashes, OSVersions,
ProductProductidMap, ReleaseRepositories, CrontabberState,
CrashTypes, ReportPartitionInfo, Skiplist]

Expand Down
5 changes: 5 additions & 0 deletions socorro/external/postgresql/models.py
Expand Up @@ -194,6 +194,7 @@ class Tcbs(DeclarativeBase):
lin_count = Column(u'lin_count', INTEGER(), nullable=False, server_default=text('0'))
hang_count = Column(u'hang_count', INTEGER(), nullable=False, server_default=text('0'))
startup_count = Column(u'startup_count', INTEGER())
is_gc_count = Column(u'is_gc_count', INTEGER(), nullable=False, server_default=text('0'))

idx_tcbs_product_version = Index('idx_tcbs_product_version', product_version_id, report_date)
tcbs_report_date = Index('tcbs_report_date', report_date)
Expand Down Expand Up @@ -1231,6 +1232,7 @@ class SignatureProduct(DeclarativeBase):
#relationship definitions
signatures = relationship('Signature', primaryjoin='SignatureProduct.signature_id==Signature.signature_id')


class SignatureProductsRollup(DeclarativeBase):
__tablename__ = 'signature_products_rollup'

Expand All @@ -1243,12 +1245,14 @@ class SignatureProductsRollup(DeclarativeBase):
products = relationship('Product', primaryjoin='SignatureProductsRollup.product_name==Product.product_name')
signatures = relationship('Signature', primaryjoin='SignatureProductsRollup.signature_id==Signature.signature_id')


class Skiplist(DeclarativeBase):
__tablename__ = 'skiplist'

category = Column(u'category', TEXT(), primary_key=True, nullable=False)
rule = Column(u'rule', TEXT(), primary_key=True, nullable=False)


class SocorroDbVersion(DeclarativeBase):
__tablename__ = 'socorro_db_version'

Expand Down Expand Up @@ -1308,6 +1312,7 @@ class TcbsBuild(DeclarativeBase):
signature_id = Column(u'signature_id', INTEGER(), primary_key=True, nullable=False)
startup_count = Column(u'startup_count', INTEGER())
win_count = Column(u'win_count', INTEGER(), nullable=False, server_default=text('0'))
is_gc_count = Column(u'is_gc_count', INTEGER(), nullable=False, server_default=text('0'))

#relationship definitions

Expand Down
@@ -1,4 +1,4 @@
CREATE OR REPLACE FUNCTION backfill_matviews(firstday date, lastday date DEFAULT NULL::date, reportsclean boolean DEFAULT true) RETURNS boolean
CREATE OR REPLACE FUNCTION backfill_matviews(firstday date, lastday date DEFAULT NULL::date, reportsclean boolean DEFAULT true, check_period interval DEFAULT '01:00:00'::interval) RETURNS boolean
LANGUAGE plpgsql
SET "TimeZone" TO 'UTC'
AS $$
Expand Down Expand Up @@ -65,7 +65,7 @@ WHILE thisday <= lastday LOOP
RAISE INFO 'signatures';
PERFORM update_signatures(thisday, FALSE);
RAISE INFO 'tcbs';
PERFORM backfill_tcbs(thisday);
PERFORM backfill_tcbs(thisday, check_period);
PERFORM backfill_tcbs_build(thisday);
DROP TABLE IF EXISTS new_tcbs;
RAISE INFO 'crashes by user';
Expand Down
60 changes: 43 additions & 17 deletions socorro/external/postgresql/raw_sql/procs/update_tcbs.sql
Expand Up @@ -35,35 +35,61 @@ INSERT INTO tcbs (
signature_id, report_date, product_version_id,
process_type, release_channel,
report_count, win_count, mac_count, lin_count, hang_count,
startup_count
startup_count, is_gc_count
)
SELECT signature_id, updateday,
product_version_id,
process_type, release_channel,
count(*),
sum(case when os_name = 'Windows' THEN 1 else 0 END),
sum(case when os_name = 'Mac OS X' THEN 1 else 0 END),
sum(case when os_name = 'Linux' THEN 1 else 0 END),
count(hang_id),
sum(case when uptime < INTERVAL '1 minute' THEN 1 else 0 END)
WITH raw_crash_filtered AS (
SELECT
uuid
, json_object_field_text(r.raw_crash, 'IsGarbageCollecting') as is_garbage_collecting
FROM
raw_crashes r
WHERE
date_processed::date = updateday
)
SELECT signature_id
, updateday
, product_version_id
, process_type
, release_channel
, count(*)
, sum(case when os_name = 'Windows' THEN 1 else 0 END)
, sum(case when os_name = 'Mac OS X' THEN 1 else 0 END)
, sum(case when os_name = 'Linux' THEN 1 else 0 END)
, count(hang_id)
, sum(case when uptime < INTERVAL '1 minute' THEN 1 else 0 END)
, sum(CASE WHEN r.is_garbage_collecting = '1' THEN 1 ELSE 0 END) as gc_count
FROM reports_clean
JOIN product_versions USING (product_version_id)
WHERE utc_day_is(date_processed, updateday)
JOIN signatures USING (signature_id)
JOIN raw_crash_filtered r ON r.uuid::text = reports_clean.uuid
WHERE utc_day_is(date_processed, updateday)
AND tstz_between(date_processed, build_date, sunset_date)
GROUP BY signature_id, updateday, product_version_id,
process_type, release_channel;


RAISE WARNING 'got here';
RETURN TRUE;

-- populate summary statistics for rapid beta parent records

INSERT INTO tcbs (
signature_id, report_date, product_version_id,
process_type, release_channel,
report_count, win_count, mac_count, lin_count, hang_count,
startup_count )
SELECT signature_id, updateday, rapid_beta_id,
process_type, release_channel,
sum(report_count), sum(win_count), sum(mac_count), sum(lin_count),
sum(hang_count), sum(startup_count)
startup_count, is_gc_count )
SELECT signature_id
, updateday
, rapid_beta_id
, process_type
, release_channel
, sum(report_count)
, sum(win_count)
, sum(mac_count)
, sum(lin_count)
, sum(hang_count)
, sum(startup_count)
, sum(is_gc_count)
FROM tcbs
JOIN product_versions USING (product_version_id)
WHERE report_date = updateday
Expand All @@ -74,7 +100,7 @@ GROUP BY signature_id, updateday, rapid_beta_id,

-- tcbs_ranking removed until it's being used

-- done

RETURN TRUE;
END;
$$;
Expand Down
4 changes: 2 additions & 2 deletions socorro/external/postgresql/setupdb_app.py
Expand Up @@ -44,6 +44,8 @@ def __init__(self, sa_url, logger, autocommit=False):
def setup_admin(self):
self.session.execute('SET check_function_bodies = false')
self.session.execute('CREATE EXTENSION IF NOT EXISTS citext')
self.session.execute('CREATE EXTENSION IF NOT EXISTS hstore')
self.session.execute('CREATE EXTENSION IF NOT EXISTS json_enhancements')
self.session.execute('CREATE SCHEMA bixie')
self.session.execute('GRANT ALL ON SCHEMA bixie, public TO breakpad_rw')

Expand Down Expand Up @@ -459,7 +461,6 @@ def connection_url():
url_template = connection_url()
sa_url = url_template + '/%s' % 'postgres'

# Using the old connection manager style
with PostgreSQLAlchemyManager(sa_url, self.config.logger,
autocommit=False) as db:
db_version = db.version()
Expand Down Expand Up @@ -501,7 +502,6 @@ def connection_url():
return 0
raise

connection.execute('CREATE EXTENSION IF NOT EXISTS citext')
connection.close()

if self.no_schema:
Expand Down
42 changes: 22 additions & 20 deletions socorro/external/postgresql/tcbs.py
Expand Up @@ -10,26 +10,28 @@

import datetime

# theoretical sample output
# [ [ (key, rank, rankDelta, ...), ... ], ... ]
#{
#"resource": "http://socorro.mozilla.org/trends/topcrashes/bysig/"
# "Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0",
#"page": "0",
#"previous": "null",
#"next": "http://socorro.mozilla.org/trends/topcrashes/bysig/"
# "Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0",
#"ranks":[
#{"signature": "LdrAlternateResourcesEnabled",
#"previousRank": 3,
#"currentRank": 8,
#"change": -5},
#{"signature": "OtherSignature",
#"previousRank": "null",
#"currentRank": 10,
#"change": 10}
#],
#}
"""
theoretical sample output
[ [ (key, rank, rankDelta, ...), ... ], ... ]
{
"resource": "http://socorro.mozilla.org/trends/topcrashes/bysig/"
"Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0",
"page": "0",
"previous": "null",
"next": "http://socorro.mozilla.org/trends/topcrashes/bysig/"
"Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0",
"ranks":[
{"signature": "LdrAlternateResourcesEnabled",
"previousRank": 3,
"currentRank": 8,
"change": -5},
{"signature": "OtherSignature",
"previousRank": "null",
"currentRank": 10,
"change": 10}
],
}
"""


def getListOfTopCrashersBySignature(aCursor, dbParams):
Expand Down

0 comments on commit 459939e

Please sign in to comment.