Skip to content

Commit

Permalink
fix bug 1440712 - add "upload_telemetry_schema" app
Browse files Browse the repository at this point in the history
This app can be run using the socorro-cmd in an appropriate environment for
uploading the Telemetry schema to the Telemetry S3 bucket.
  • Loading branch information
willkg committed Mar 13, 2018
1 parent ab7eac5 commit 014035a
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 1 deletion.
5 changes: 4 additions & 1 deletion socorro-cmd
Expand Up @@ -45,7 +45,7 @@ def showcommands_cmd(argv):
for cmd, runner in group:
if not isinstance(runner, basestring):
runner = '%s in %s' % (runner.__name__, inspect.getfile(runner))
print(' %-22s %s' % (cmd, runner))
print(' %-24s %s' % (cmd, runner))
print('')


Expand Down Expand Up @@ -77,6 +77,9 @@ COMMANDS = [
'socorro.external.es.create_recent_indices_app.CreateRecentESIndicesApp'
),
'clear_indices': 'socorro.external.es.clear_indices_app.ClearESIndicesApp',
'upload_telemetry_schema': (
'socorro.external.boto.upload_telemetry_schema.UploadTelemetrySchema'
),
'setupdb': 'socorro.external.postgresql.setupdb_app.SocorroDBApp',
}
),
Expand Down
75 changes: 75 additions & 0 deletions socorro/external/boto/upload_telemetry_schema.py
@@ -0,0 +1,75 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import sys

from configman import Namespace
from configman.converters import class_converter

from socorro.app.socorro_app import App
from socorro.schemas import CRASH_REPORT_JSON_SCHEMA_AS_STRING


class UploadTelemetrySchema(App):
"""Uploads schema to S3 bucket for Telemetry
We always send a copy of the crash (mainly processed crash) to a S3 bucket
meant for Telemetry to ingest. When they ingest, they need a copy of our
crash_report.json JSON Schema file.
They use that not to understand the JSON we store but the underlying
structure (types, nesting etc.) necessary for storing it in .parquet files
in S3.
To get a copy of the crash_report.json they can take it from the git
repository but that's fragile since it depends on github.com always being
available.
By uploading it to S3 not only do we bet on S3 being more read-reliable
that github.com's server but by being in S3 AND unavailable that means the
whole ingestion process has to halt/pause anyway.
"""

app_name = 'upload-telemetry-schema'
app_version = '0.1'
app_description = 'Uploads schema to S3 bucket for Telemetry'
metadata = ''

required_config = Namespace()
required_config.telemetry = Namespace()
required_config.telemetry.add_option(
'resource_class',
default=(
'socorro.external.boto.connection_context.RegionalS3ConnectionContext'
),
doc=(
'fully qualified dotted Python classname to handle Boto '
'connections'
),
from_string_converter=class_converter,
reference_value_from='resource.boto'
)
required_config.telemetry.add_option(
'json_filename',
default='crash_report.json',
doc="Name of the file/key we're going to upload to"
)

def main(self):
connection_context = self.config.telemetry.resource_class(self.config.telemetry)

connection = connection_context._connect()
bucket = connection_context._get_bucket(connection, self.config.telemetry.bucket_name)
key = bucket.get_key(self.config.telemetry.json_filename)
if not key:
key = bucket.new_key(self.config.telemetry.json_filename)
key.set_contents_from_string(CRASH_REPORT_JSON_SCHEMA_AS_STRING)

self.config.logger.info('Success: Schema uploaded!')
return 0


if __name__ == "__main__":
sys.exit(UploadTelemetrySchema.run())

0 comments on commit 014035a

Please sign in to comment.