From 459939e3578afcb9cc370178b0b0bdd440a91671 Mon Sep 17 00:00:00 2001 From: Selena Deckelmann Date: Thu, 6 Jun 2013 11:14:34 -0700 Subject: [PATCH] bug 803209 Support for garbage collection reporting --- Makefile | 3 + .../2b285e76f71d_bug_803209_add_garag.py | 73 +++++++++++++++++++ docs/installation.rst | 6 ++ requirements/prod.txt | 1 + socorro/external/postgresql/fakedata.py | 18 ++++- socorro/external/postgresql/models.py | 5 ++ .../raw_sql/procs/backfill_matviews.sql | 4 +- .../postgresql/raw_sql/procs/update_tcbs.sql | 60 ++++++++++----- socorro/external/postgresql/setupdb_app.py | 4 +- socorro/external/postgresql/tcbs.py | 42 ++++++----- 10 files changed, 173 insertions(+), 43 deletions(-) create mode 100644 alembic/versions/2b285e76f71d_bug_803209_add_garag.py diff --git a/Makefile b/Makefile index 5be1ded088..9df69f7f1b 100644 --- a/Makefile +++ b/Makefile @@ -101,3 +101,6 @@ analysis: rsync socorro-toolbox/target/*.jar analysis/ rsync akela/target/*.jar analysis/ rsync -a socorro-toolbox/src/main/pig/ analysis/ + +json: virtualenv + if [ ! -f `pg_config --pkglibdir`/json_enhancements.so ]; then sudo ./socorro-virtualenv/bin/pgxn install json_enhancements ; fi diff --git a/alembic/versions/2b285e76f71d_bug_803209_add_garag.py b/alembic/versions/2b285e76f71d_bug_803209_add_garag.py new file mode 100644 index 0000000000..6be82b3098 --- /dev/null +++ b/alembic/versions/2b285e76f71d_bug_803209_add_garag.py @@ -0,0 +1,73 @@ +"""bug 803209 -- add garbage collection count to TCBS + +Revision ID: 2b285e76f71d +Revises: 8894c185715 +Create Date: 2013-06-11 12:46:09.637058 + +""" + +# revision identifiers, used by Alembic. +revision = '2b285e76f71d' +down_revision = '8894c185715' + +import os +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql +from sqlalchemy import types +from sqlalchemy.sql import table, column +from sqlalchemy.dialects import postgresql + +class CITEXT(types.UserDefinedType): + name = 'citext' + + def get_col_spec(self): + return 'CITEXT' + + def bind_processor(self, dialect): + def process(value): + return value + return process + + def result_processor(self, dialect, coltype): + def process(value): + return value + return process + + def __repr__(self): + return "citext" + +class JSON(types.UserDefinedType): + name = 'json' + + def get_col_spec(self): + return 'JSON' + + def bind_processor(self, dialect): + def process(value): + return value + return process + + def result_processor(self, dialect, coltype): + def process(value): + return value + return process + + def __repr__(self): + return "json" + +def upgrade(): + op.add_column(u'tcbs', sa.Column(u'is_gc_count', sa.INTEGER(), server_default='0', nullable=False)) + op.add_column(u'tcbs_build', sa.Column(u'is_gc_count', sa.INTEGER(), server_default='0', nullable=False)) + app_path=os.getcwd() + procs = [ + 'backfill_matviews.sql', + 'update_tcbs.sql' + ] + for myfile in [app_path + '/socorro/external/postgresql/raw_sql/procs/' + line for line in procs]: + proc = open(myfile, 'r').read() + op.execute(proc) + +def downgrade(): + op.drop_column(u'tcbs_build', u'is_gc_count') + op.drop_column(u'tcbs', u'is_gc_count') diff --git a/docs/installation.rst b/docs/installation.rst index 67b95f7fe0..81028d3fb3 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -176,6 +176,12 @@ like to use a stable release, determine latest release tag from our release trac :: git checkout $LATEST_RELEASE_TAG +Install json_extensions for use with PostgreSQL +``````````````````````````````````````````````` +From inside the Socorro checkout +:: + make json + Run unit/functional tests ```````````` diff --git a/requirements/prod.txt b/requirements/prod.txt index 71530ed5ac..d313f16ac2 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -12,3 +12,4 @@ suds==0.4 thrift==0.8.0 web.py==0.36 requests==1.2.3 +pgxnclient==1.2.1 diff --git a/socorro/external/postgresql/fakedata.py b/socorro/external/postgresql/fakedata.py index ec41608f1e..d4db85cfe1 100644 --- a/socorro/external/postgresql/fakedata.py +++ b/socorro/external/postgresql/fakedata.py @@ -12,6 +12,7 @@ import csv import os +CRASHIDS = [] def date_range(start_date, end_date, delta=None): if delta is None: @@ -332,8 +333,10 @@ def generate_rows(self): def generate_crashid(self, timestamp): crashid = str(uuid.UUID(int=random.getrandbits(128))) depth = 0 - return "%s%d%02d%02d%02d" % (crashid[:-7], depth, timestamp.year % 100, + final_crashid = "%s%d%02d%02d%02d" % (crashid[:-7], depth, timestamp.year % 100, timestamp.month, timestamp.day) + CRASHIDS.append( (final_crashid, timestamp) ) + return final_crashid def buildid(self, fragment, format='%Y%m%d', days=None): days = days or self.days @@ -676,10 +679,21 @@ class Skiplist(BaseTable): rows = [['ignore','everything'], ['prefix','SocketShutdown']] +class RawCrashes(BaseTable): + table = 'raw_crashes' + columns = ['uuid', 'raw_crash', 'date_processed'] + + def generate_rows(self): + for crashid, date_processed, in CRASHIDS: + raw_crash = '{ "uuid": "%s", "IsGarbageCollecting": "1" }' + row = [crashid, raw_crash, date_processed] + yield row + + # the order that tables are loaded is important. tables = [OSNames, OSNameMatches, ProcessTypes, Products, ReleaseChannels, ProductReleaseChannels, RawADU, ReleaseChannelMatches, - ReleasesRaw, UptimeLevels, WindowsVersions, Reports, OSVersions, + ReleasesRaw, UptimeLevels, WindowsVersions, Reports, RawCrashes, OSVersions, ProductProductidMap, ReleaseRepositories, CrontabberState, CrashTypes, ReportPartitionInfo, Skiplist] diff --git a/socorro/external/postgresql/models.py b/socorro/external/postgresql/models.py index 2ae0b56c58..83631bb849 100644 --- a/socorro/external/postgresql/models.py +++ b/socorro/external/postgresql/models.py @@ -194,6 +194,7 @@ class Tcbs(DeclarativeBase): lin_count = Column(u'lin_count', INTEGER(), nullable=False, server_default=text('0')) hang_count = Column(u'hang_count', INTEGER(), nullable=False, server_default=text('0')) startup_count = Column(u'startup_count', INTEGER()) + is_gc_count = Column(u'is_gc_count', INTEGER(), nullable=False, server_default=text('0')) idx_tcbs_product_version = Index('idx_tcbs_product_version', product_version_id, report_date) tcbs_report_date = Index('tcbs_report_date', report_date) @@ -1231,6 +1232,7 @@ class SignatureProduct(DeclarativeBase): #relationship definitions signatures = relationship('Signature', primaryjoin='SignatureProduct.signature_id==Signature.signature_id') + class SignatureProductsRollup(DeclarativeBase): __tablename__ = 'signature_products_rollup' @@ -1243,12 +1245,14 @@ class SignatureProductsRollup(DeclarativeBase): products = relationship('Product', primaryjoin='SignatureProductsRollup.product_name==Product.product_name') signatures = relationship('Signature', primaryjoin='SignatureProductsRollup.signature_id==Signature.signature_id') + class Skiplist(DeclarativeBase): __tablename__ = 'skiplist' category = Column(u'category', TEXT(), primary_key=True, nullable=False) rule = Column(u'rule', TEXT(), primary_key=True, nullable=False) + class SocorroDbVersion(DeclarativeBase): __tablename__ = 'socorro_db_version' @@ -1308,6 +1312,7 @@ class TcbsBuild(DeclarativeBase): signature_id = Column(u'signature_id', INTEGER(), primary_key=True, nullable=False) startup_count = Column(u'startup_count', INTEGER()) win_count = Column(u'win_count', INTEGER(), nullable=False, server_default=text('0')) + is_gc_count = Column(u'is_gc_count', INTEGER(), nullable=False, server_default=text('0')) #relationship definitions diff --git a/socorro/external/postgresql/raw_sql/procs/backfill_matviews.sql b/socorro/external/postgresql/raw_sql/procs/backfill_matviews.sql index 4f7b98aa2e..15bab458ac 100644 --- a/socorro/external/postgresql/raw_sql/procs/backfill_matviews.sql +++ b/socorro/external/postgresql/raw_sql/procs/backfill_matviews.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION backfill_matviews(firstday date, lastday date DEFAULT NULL::date, reportsclean boolean DEFAULT true) RETURNS boolean +CREATE OR REPLACE FUNCTION backfill_matviews(firstday date, lastday date DEFAULT NULL::date, reportsclean boolean DEFAULT true, check_period interval DEFAULT '01:00:00'::interval) RETURNS boolean LANGUAGE plpgsql SET "TimeZone" TO 'UTC' AS $$ @@ -65,7 +65,7 @@ WHILE thisday <= lastday LOOP RAISE INFO 'signatures'; PERFORM update_signatures(thisday, FALSE); RAISE INFO 'tcbs'; - PERFORM backfill_tcbs(thisday); + PERFORM backfill_tcbs(thisday, check_period); PERFORM backfill_tcbs_build(thisday); DROP TABLE IF EXISTS new_tcbs; RAISE INFO 'crashes by user'; diff --git a/socorro/external/postgresql/raw_sql/procs/update_tcbs.sql b/socorro/external/postgresql/raw_sql/procs/update_tcbs.sql index e6dfeedbff..7f6cf55d91 100644 --- a/socorro/external/postgresql/raw_sql/procs/update_tcbs.sql +++ b/socorro/external/postgresql/raw_sql/procs/update_tcbs.sql @@ -35,35 +35,61 @@ INSERT INTO tcbs ( signature_id, report_date, product_version_id, process_type, release_channel, report_count, win_count, mac_count, lin_count, hang_count, - startup_count + startup_count, is_gc_count ) -SELECT signature_id, updateday, - product_version_id, - process_type, release_channel, - count(*), - sum(case when os_name = 'Windows' THEN 1 else 0 END), - sum(case when os_name = 'Mac OS X' THEN 1 else 0 END), - sum(case when os_name = 'Linux' THEN 1 else 0 END), - count(hang_id), - sum(case when uptime < INTERVAL '1 minute' THEN 1 else 0 END) +WITH raw_crash_filtered AS ( + SELECT + uuid + , json_object_field_text(r.raw_crash, 'IsGarbageCollecting') as is_garbage_collecting + FROM + raw_crashes r + WHERE + date_processed::date = updateday +) +SELECT signature_id + , updateday + , product_version_id + , process_type + , release_channel + , count(*) + , sum(case when os_name = 'Windows' THEN 1 else 0 END) + , sum(case when os_name = 'Mac OS X' THEN 1 else 0 END) + , sum(case when os_name = 'Linux' THEN 1 else 0 END) + , count(hang_id) + , sum(case when uptime < INTERVAL '1 minute' THEN 1 else 0 END) + , sum(CASE WHEN r.is_garbage_collecting = '1' THEN 1 ELSE 0 END) as gc_count FROM reports_clean JOIN product_versions USING (product_version_id) - WHERE utc_day_is(date_processed, updateday) + JOIN signatures USING (signature_id) + JOIN raw_crash_filtered r ON r.uuid::text = reports_clean.uuid +WHERE utc_day_is(date_processed, updateday) AND tstz_between(date_processed, build_date, sunset_date) GROUP BY signature_id, updateday, product_version_id, process_type, release_channel; + +RAISE WARNING 'got here'; +RETURN TRUE; + -- populate summary statistics for rapid beta parent records INSERT INTO tcbs ( signature_id, report_date, product_version_id, process_type, release_channel, report_count, win_count, mac_count, lin_count, hang_count, - startup_count ) -SELECT signature_id, updateday, rapid_beta_id, - process_type, release_channel, - sum(report_count), sum(win_count), sum(mac_count), sum(lin_count), - sum(hang_count), sum(startup_count) + startup_count, is_gc_count ) +SELECT signature_id + , updateday + , rapid_beta_id + , process_type + , release_channel + , sum(report_count) + , sum(win_count) + , sum(mac_count) + , sum(lin_count) + , sum(hang_count) + , sum(startup_count) + , sum(is_gc_count) FROM tcbs JOIN product_versions USING (product_version_id) WHERE report_date = updateday @@ -74,7 +100,7 @@ GROUP BY signature_id, updateday, rapid_beta_id, -- tcbs_ranking removed until it's being used --- done + RETURN TRUE; END; $$; diff --git a/socorro/external/postgresql/setupdb_app.py b/socorro/external/postgresql/setupdb_app.py index 101019d57f..c166ce866d 100755 --- a/socorro/external/postgresql/setupdb_app.py +++ b/socorro/external/postgresql/setupdb_app.py @@ -44,6 +44,8 @@ def __init__(self, sa_url, logger, autocommit=False): def setup_admin(self): self.session.execute('SET check_function_bodies = false') self.session.execute('CREATE EXTENSION IF NOT EXISTS citext') + self.session.execute('CREATE EXTENSION IF NOT EXISTS hstore') + self.session.execute('CREATE EXTENSION IF NOT EXISTS json_enhancements') self.session.execute('CREATE SCHEMA bixie') self.session.execute('GRANT ALL ON SCHEMA bixie, public TO breakpad_rw') @@ -459,7 +461,6 @@ def connection_url(): url_template = connection_url() sa_url = url_template + '/%s' % 'postgres' - # Using the old connection manager style with PostgreSQLAlchemyManager(sa_url, self.config.logger, autocommit=False) as db: db_version = db.version() @@ -501,7 +502,6 @@ def connection_url(): return 0 raise - connection.execute('CREATE EXTENSION IF NOT EXISTS citext') connection.close() if self.no_schema: diff --git a/socorro/external/postgresql/tcbs.py b/socorro/external/postgresql/tcbs.py index e4fedc4fdf..da1867b9a6 100644 --- a/socorro/external/postgresql/tcbs.py +++ b/socorro/external/postgresql/tcbs.py @@ -10,26 +10,28 @@ import datetime -# theoretical sample output -# [ [ (key, rank, rankDelta, ...), ... ], ... ] -#{ - #"resource": "http://socorro.mozilla.org/trends/topcrashes/bysig/" - # "Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0", - #"page": "0", - #"previous": "null", - #"next": "http://socorro.mozilla.org/trends/topcrashes/bysig/" - # "Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0", - #"ranks":[ - #{"signature": "LdrAlternateResourcesEnabled", - #"previousRank": 3, - #"currentRank": 8, - #"change": -5}, - #{"signature": "OtherSignature", - #"previousRank": "null", - #"currentRank": 10, - #"change": 10} - #], -#} +""" + theoretical sample output + [ [ (key, rank, rankDelta, ...), ... ], ... ] +{ + "resource": "http://socorro.mozilla.org/trends/topcrashes/bysig/" + "Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0", + "page": "0", + "previous": "null", + "next": "http://socorro.mozilla.org/trends/topcrashes/bysig/" + "Firefox/3.5.3/from/2009-10-03/to/2009-10-13/page/0", + "ranks":[ + {"signature": "LdrAlternateResourcesEnabled", + "previousRank": 3, + "currentRank": 8, + "change": -5}, + {"signature": "OtherSignature", + "previousRank": "null", + "currentRank": 10, + "change": 10} + ], +} +""" def getListOfTopCrashersBySignature(aCursor, dbParams):