Skip to content

Commit

Permalink
Merge pull request #1319 from glogiotatidis/issue-1285-stop-redshift
Browse files Browse the repository at this point in the history
[Issue #1285] Stop pulling data from RedShift data store.
  • Loading branch information
glogiotatidis committed Feb 19, 2020
2 parents 2ac1224 + 594ee56 commit 59f233b
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 124 deletions.
9 changes: 1 addition & 8 deletions snippets/base/admin/adminmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,12 +1192,6 @@ def redash_link(self, obj):
publish_end = (
obj.publish_end or datetime.utcnow() + timedelta(days=1)
).strftime("%Y-%m-%d")
link_legacy = (
f'{settings.REDASH_ENDPOINT}/queries/{settings.REDASH_JOB_QUERY_ID}/'
f'?p_start_date_{settings.REDASH_JOB_QUERY_ID}={obj.publish_start.strftime("%Y-%m-%d")}'
f'&p_end_date_{settings.REDASH_JOB_QUERY_ID}={publish_end}'
f'&p_message_id_{settings.REDASH_JOB_QUERY_ID}={obj.id}#161888'
)
link_bigquery = (
f'{settings.REDASH_ENDPOINT}/queries/{settings.REDASH_JOB_QUERY_BIGQUERY_ID}/'
f'?p_start_date_{settings.REDASH_JOB_QUERY_BIGQUERY_ID}='
Expand All @@ -1208,8 +1202,7 @@ def redash_link(self, obj):
)

return format_html(
f'<a href="{link_legacy}">Explore</a> - '
f'<a href="{link_bigquery}">Explore BigQuery (Fx 72+)</a>'
f'<a href="{link_bigquery}">Explore Redash</a>'
)

redash_link.short_description = 'Explore in Redash'
Expand Down
52 changes: 23 additions & 29 deletions snippets/base/management/commands/fetch_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,38 +51,32 @@ def handle(self, *args, **options):
'message_id': job.id,
}

data_fetched = 0
# We need to fetch metrics from two different data sources
# (RedShift and BigQuery) to capture all metrics. Firefox
# switched to BigQuery on Firefox 72. We expect to be able
# to remove RedShift querying in a year from 72's launch
# (Jan 2021). Issue #1285
for query in [settings.REDASH_JOB_QUERY_ID, settings.REDASH_JOB_QUERY_BIGQUERY_ID]:
try:
result = redash.query(query, bind_data)
except Exception as exp:
# Capture the exception but don't quit
sentry_sdk.capture_exception(exp)
continue

try:
for row in result['query_result']['data']['rows']:
if row['event'] == 'IMPRESSION':
impressions += row['counts']
elif row['event'] == 'BLOCK':
blocks += row['counts']
elif row['event'] in ['CLICK', 'CLICK_BUTTON']:
clicks += row['counts']
except KeyError as exp:
# Capture the exception but don't quit
sentry_sdk.capture_exception(exp)
continue
else:
data_fetched += 1
data_fetched = False
try:
result = redash.query(settings.REDASH_JOB_QUERY_BIGQUERY_ID, bind_data)
except Exception as exp:
# Capture the exception but don't quit
sentry_sdk.capture_exception(exp)
continue

try:
for row in result['query_result']['data']['rows']:
if row['event'] == 'IMPRESSION':
impressions += row['counts']
elif row['event'] == 'BLOCK':
blocks += row['counts']
elif row['event'] in ['CLICK', 'CLICK_BUTTON']:
clicks += row['counts']
except KeyError as exp:
# Capture the exception but don't quit
sentry_sdk.capture_exception(exp)
continue
else:
data_fetched = True

# We didn't fetch data from both data sources for this job, don't
# save it.
if data_fetched != 2:
if not data_fetched:
continue

# We fetched data for job, mark the ETL job `working` to update
Expand Down
93 changes: 7 additions & 86 deletions snippets/base/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,26 +81,6 @@ def test_base(self):
}
}
return_data_second = {
'query_result': {
'data': {
'rows': [
{
'event': 'IMPRESSION',
'counts': 10,
},
{
'event': 'BLOCK',
'counts': 5,
},
{
'event': 'CLICK_BUTTON',
'counts': 10,
}
]
}
}
}
return_data_third = {
'query_result': {
'data': {
'rows': [
Expand All @@ -124,51 +104,29 @@ def test_base(self):
}
}
}
return_data_fourth = {
'query_result': {
'data': {
'rows': [
{
'event': 'IMPRESSION',
'counts': 20,
},
{
'event': 'BLOCK',
'counts': 50,
},
{
'event': 'CLICK',
'counts': 10,
},
]
}
}
}

with patch('snippets.base.management.commands.fetch_metrics.RedashDynamicQuery') as rdq:
with patch('snippets.base.management.commands.fetch_metrics.datetime', wraps=datetime) as datetime_mock: # noqa
datetime_mock.utcnow.return_value = datetime(2050, 1, 6)
rdq.return_value.query.side_effect = [
return_data_first, return_data_second, return_data_third, return_data_fourth
return_data_first, return_data_second
]
call_command('fetch_metrics', stdout=Mock())

rdq.return_value.query.assert_has_calls([
call(settings.REDASH_JOB_QUERY_ID, request_data_first),
call(settings.REDASH_JOB_QUERY_BIGQUERY_ID, request_data_first),
call(settings.REDASH_JOB_QUERY_ID, request_data_second),
call(settings.REDASH_JOB_QUERY_BIGQUERY_ID, request_data_second),
])

job_running.refresh_from_db()
self.assertEqual(job_running.metric_impressions, 110)
self.assertEqual(job_running.metric_blocks, 15)
self.assertEqual(job_running.metric_clicks, 120)
self.assertEqual(job_running.metric_impressions, 100)
self.assertEqual(job_running.metric_blocks, 10)
self.assertEqual(job_running.metric_clicks, 110)

job_running2.refresh_from_db()
self.assertEqual(job_running2.metric_impressions, 270)
self.assertEqual(job_running2.metric_blocks, 150)
self.assertEqual(job_running2.metric_clicks, 45)
self.assertEqual(job_running2.metric_impressions, 250)
self.assertEqual(job_running2.metric_blocks, 100)
self.assertEqual(job_running2.metric_clicks, 35)

def test_no_data_fetched(self):
JobFactory(
Expand All @@ -191,43 +149,6 @@ def test_no_data_fetched(self):
rdq.return_value.query.return_value = return_data
self.assertRaises(CommandError, call_command, 'fetch_metrics', stdout=Mock())

def test_no_data_fetched_from_one_source(self):
# Test that even if one of the two data sources fail, the metrics are
# not saved in the db.
job = JobFactory(
status=models.Job.PUBLISHED,
publish_start='2050-01-05 01:00',
publish_end='2050-01-06 02:00',
limit_clicks=1000,
)
with patch('snippets.base.management.commands.fetch_metrics.RedashDynamicQuery') as rdq:
rdq.return_value.query.side_effect = [
{
'query_result': {
'data': {
'rows': [
{
'event': 'IMPRESSION',
'counts': 20,
},
{
'event': 'BLOCK',
'counts': 50,
},
{
'event': 'CLICK',
'counts': 10,
},
]
}
}
},
Exception('error'),
]
self.assertRaises(CommandError, call_command, 'fetch_metrics', stdout=Mock())
job.refresh_from_db()
self.assertEqual(job.metric_last_update, datetime(1970, 1, 1, 0, 0))


class UpdateJobsTests(TestCase):
@override_settings(SNIPPETS_PUBLICATION_OFFSET=5)
Expand Down
1 change: 0 additions & 1 deletion snippets/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,6 @@
REDASH_ENDPOINT = config('REDASH_ENDPOINT', default='https://sql.telemetry.mozilla.org')
REDASH_API_KEY = config('REDASH_API_KEY', default=None)
REDASH_MAX_WAIT = config('REDASH_MAX_WAIT', default=300)
REDASH_JOB_QUERY_ID = config('REDASH_JOB_QUERY_ID', default=63146)
REDASH_DAILY_QUERY_ID = config('REDASH_DAILY_QUERY_ID', default=65755)
REDASH_JOB_QUERY_BIGQUERY_ID = config('REDASH_JOB_QUERY_BIGQUERY_ID', default=66681)
REDASH_DAILY_QUERY_BIGQUERY_ID = config('REDASH_DAILY_QUERY_BIGQUERY_ID', default=66785)
Expand Down

0 comments on commit 59f233b

Please sign in to comment.