Skip to content

Commit

Permalink
Merge pull request #4371 from willkg/1440712-upload-json
Browse files Browse the repository at this point in the history
fix bug 1440712 - add "upload_telemetry_schema" app
  • Loading branch information
willkg committed Mar 14, 2018
2 parents ab7eac5 + 679f75f commit 7b1d94d
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 1 deletion.
5 changes: 4 additions & 1 deletion socorro-cmd
Expand Up @@ -45,7 +45,7 @@ def showcommands_cmd(argv):
for cmd, runner in group:
if not isinstance(runner, basestring):
runner = '%s in %s' % (runner.__name__, inspect.getfile(runner))
print(' %-22s %s' % (cmd, runner))
print(' %-24s %s' % (cmd, runner))
print('')


Expand Down Expand Up @@ -77,6 +77,9 @@ COMMANDS = [
'socorro.external.es.create_recent_indices_app.CreateRecentESIndicesApp'
),
'clear_indices': 'socorro.external.es.clear_indices_app.ClearESIndicesApp',
'upload_telemetry_schema': (
'socorro.external.boto.upload_telemetry_schema.UploadTelemetrySchema'
),
'setupdb': 'socorro.external.postgresql.setupdb_app.SocorroDBApp',
}
),
Expand Down
85 changes: 85 additions & 0 deletions socorro/external/boto/upload_telemetry_schema.py
@@ -0,0 +1,85 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import sys

from boto.exception import S3ResponseError
from configman import Namespace
from configman.converters import class_converter

from socorro.app.socorro_app import App
from socorro.schemas import CRASH_REPORT_JSON_SCHEMA_AS_STRING


class UploadTelemetrySchema(App):
"""Uploads schema to S3 bucket for Telemetry
We always send a copy of the crash (mainly processed crash) to a S3 bucket
meant for Telemetry to ingest. When they ingest, they need a copy of our
crash_report.json JSON Schema file.
They use that not to understand the JSON we store but the underlying
structure (types, nesting etc.) necessary for storing it in .parquet files
in S3.
To get a copy of the crash_report.json they can take it from the git
repository but that's fragile since it depends on github.com always being
available.
By uploading it to S3 not only do we bet on S3 being more read-reliable
that github.com's server but by being in S3 AND unavailable that means the
whole ingestion process has to halt/pause anyway.
"""

app_name = 'upload-telemetry-schema'
app_version = '0.1'
app_description = 'Uploads JSON schema to S3 bucket for Telemetry'
metadata = ''

required_config = Namespace()
required_config.telemetry = Namespace()
required_config.telemetry.add_option(
'resource_class',
default=(
'socorro.external.boto.connection_context.RegionalS3ConnectionContext'
),
doc=(
'fully qualified dotted Python classname to handle Boto '
'connections'
),
from_string_converter=class_converter,
reference_value_from='resource.boto'
)
required_config.telemetry.add_option(
'json_filename',
default='crash_report.json',
doc="Name of the file/key we're going to upload to"
)

def main(self):
connection_context = self.config.telemetry.resource_class(self.config.telemetry)

connection = connection_context._connect()
try:
bucket = connection_context._get_bucket(connection, self.config.telemetry.bucket_name)
except S3ResponseError:
# If there's no bucket--fail out here
self.config.logger.error(
'Failure: The %s S3 bucket must be created first.',
self.config.telemetry.bucket_name
)
return 1

key = bucket.get_key(self.config.telemetry.json_filename)
if not key:
key = bucket.new_key(self.config.telemetry.json_filename)
key.set_contents_from_string(CRASH_REPORT_JSON_SCHEMA_AS_STRING)

self.config.logger.info('Success: Schema uploaded!')
return 0


if __name__ == "__main__":
sys.exit(UploadTelemetrySchema.run())
34 changes: 34 additions & 0 deletions socorro/unittest/external/boto/test_upload_telemetry_schema.py
@@ -0,0 +1,34 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from moto import mock_s3_deprecated

from socorro.external.boto.upload_telemetry_schema import UploadTelemetrySchema
from socorro.unittest.external.boto import get_config


class TestUploadTelemetrySchema:
@mock_s3_deprecated
def test_bucket_not_found(self):
# If the bucket isn't found, the script should tell the user and return
# a non-zero exit code
config = get_config(UploadTelemetrySchema)
app = UploadTelemetrySchema(config)

assert app.main() == 1
app.config.logger.error.assert_called_once_with(
'Failure: The %s S3 bucket must be created first.',
'crashstats'
)

@mock_s3_deprecated
def test_upload_worked(self, boto_helper):
boto_helper.get_or_create_bucket('crashstats')
config = get_config(UploadTelemetrySchema)
app = UploadTelemetrySchema(config)

assert app.main() == 0
app.config.logger.info.assert_called_once_with(
'Success: Schema uploaded!'
)

0 comments on commit 7b1d94d

Please sign in to comment.