Skip to content

Commit

Permalink
Merge pull request #1402 from glogiotatidis/include-no-clients-metrics
Browse files Browse the repository at this point in the history
[Fix #1399] Include number of clients in JobDailyPerformance
  • Loading branch information
glogiotatidis committed Jun 25, 2020
2 parents f1bee35 + 2e361f5 commit 8d1d029
Show file tree
Hide file tree
Showing 5 changed files with 322 additions and 40 deletions.
35 changes: 27 additions & 8 deletions snippets/base/admin/adminmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,8 @@ class JobAdmin(admin.ModelAdmin):
('Metrics', {
'fields': (
('impressions_humanized', 'adj_impressions_humanized'),
('impressions_total_clients_humanized', 'adj_impressions_total_clients_humanized'),
('impressions_per_client_humanized', 'adj_impressions_per_client_humanized'),
('clicks_humanized', 'clicks_ctr'),
('blocks_humanized', 'blocks_ctr'),
'redash_link',
Expand Down Expand Up @@ -1226,6 +1228,22 @@ def adj_impressions_humanized(self, obj):
return intcomma(obj.adj_impressions or 0)
adj_impressions_humanized.short_description = 'Adjusted Impressions'

def impressions_total_clients_humanized(self, obj):
return intcomma(obj.metrics.first().impression_no_clients_total or 0)
impressions_total_clients_humanized.short_description = 'Total Unique Clients'

def adj_impressions_total_clients_humanized(self, obj):
return intcomma(obj.metrics.first().adj_impression_no_clients_total or 0)
adj_impressions_total_clients_humanized.short_description = 'Adjusted Total Unique Clients'

def impressions_per_client_humanized(self, obj):
return f'{obj.impressions / obj.metrics.first().impression_no_clients_total:.2f}'
impressions_per_client_humanized.short_description = 'Impressions Per Client'

def adj_impressions_per_client_humanized(self, obj):
return f'{obj.adj_impressions / obj.metrics.first().adj_impression_no_clients_total:.2f}'
adj_impressions_per_client_humanized.short_description = 'Adj Impressions Per Client'

def clicks_humanized(self, obj):
return intcomma(obj.clicks or 0)
clicks_humanized.short_description = 'Clicks'
Expand Down Expand Up @@ -1420,14 +1438,15 @@ class JobDailyPerformanceAdmin(admin.ModelAdmin):
'fields': (
'job',
'date',
'impression',
'click',
'block',
'dismiss',
'go_to_scene2',
'subscribe_success',
'subscribe_error',
'other_click',
('impression', 'impression_no_clients'),
('adj_impression', 'adj_impression_no_clients'),
('click', 'click_no_clients'),
('block', 'block_no_clients'),
('dismiss', 'dismiss_no_clients'),
('go_to_scene2', 'go_to_scene2_no_clients'),
('subscribe_success', 'subscribe_success_no_clients'),
('subscribe_error', 'subscribe_error_no_clients'),
('other_click', 'other_click_no_clients'),
'details',
),
}),
Expand Down
26 changes: 24 additions & 2 deletions snippets/base/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@


REDASH_QUERY_IDS = {
'bq-job': 68136,
'bq-impressions': 68341,
'bq-job': 72139,
'bq-impressions': 72140,

# Not currently used but kept here for reference.
'redshift-job': 68135,
Expand Down Expand Up @@ -105,12 +105,18 @@ def prosses_rows(rows, key='message_id'):
for row in new_rows:
event = row['event']
processed[row[key]][event] = processed[row[key]].get(event, 0) + row['counts']
processed[row[key]][f'{event}_no_clients'] = (
processed[row[key]].get(f'{event}_no_clients', 0) + row['no_clients'])
processed[row[key]][f'{event}_no_clients_total'] = (
processed[row[key]].get(f'{event}_no_clients_total', 0) + row['no_clients_total'])

detail = [{
'event': row['event'],
'channel': row['channel'],
'country': row['country_code'],
'counts': row['counts'],
'no_clients': row['no_clients'],
'no_clients_total': row['no_clients_total'],
}]

if not processed[row[key]].get('details'):
Expand All @@ -121,6 +127,8 @@ def prosses_rows(rows, key='message_id'):
drow['channel'] == row['channel'] and
drow['country'] == row['country_code'])):
drow['counts'] += row['counts']
drow['no_clients'] += row['no_clients']
drow['no_clients_total'] += row['no_clients_total']
break
else:
processed[row[key]]['details'] += detail
Expand All @@ -136,7 +144,14 @@ def prosses_rows(rows, key='message_id'):
for k, v in processed.items():
if 'conversion_subscribe_activation' in v:
processed[k]['other_click'] = processed[k].get('click', 0)
processed[k]['other_click_no_clients'] = processed[k].get('click_no_clients', 0)
processed[k]['other_click_no_clients_total'] = \
processed[k].get('click_no_clients_total', 0)
processed[k]['click'] = processed[k].pop('conversion_subscribe_activation')
processed[k]['click_no_clients'] = \
processed[k].pop('conversion_subscribe_activation_no_clients')
processed[k]['click_no_clients_total'] = \
processed[k].pop('conversion_subscribe_activation_no_clients_total')
for row in processed[k]['details']:
if row['event'] == 'click':
row['event'] = 'other_click'
Expand All @@ -161,6 +176,12 @@ def update_job_metrics(date):


def update_impressions(date):
"""Fetch number of Impressions per channel and per duration.
This information is used to determine the number of actually viewed
Snippets by disgarding Impressions the lasted too few seconds.
"""
rows = redash_rows('bq-impressions', date)
details = []
for row in rows:
Expand All @@ -183,6 +204,7 @@ def update_impressions(date):
'channel': channel,
'duration': row['duration'],
'counts': row['counts'],
'no_clients': row['no_clients'],
})

with atomic():
Expand Down
107 changes: 107 additions & 0 deletions snippets/base/migrations/0038_auto_20200624_1213.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Generated by Django 2.2.13 on 2020-06-24 12:13

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('base', '0037_auto_20200604_1218'),
]

operations = [
migrations.AlterModelOptions(
name='jobdailyperformance',
options={'ordering': ('-id',)},
),
migrations.AddField(
model_name='jobdailyperformance',
name='adj_impression_no_clients',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='adj_impression_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='block_no_clients',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `block`'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='block_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `block`'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='click_no_clients',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `click`.'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='click_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `click`.'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='dismiss_no_clients',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `dismiss`'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='dismiss_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `dismiss`'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='go_to_scene2_no_clients',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `go_to_scene2`'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='go_to_scene2_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `go_to_scene2`'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='impression_no_clients',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='impression_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='other_click_no_clients',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='other_click_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='subscribe_error_no_clients',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='subscribe_error_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False),
),
migrations.AddField(
model_name='jobdailyperformance',
name='subscribe_success_no_clients',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `subscribe_success`'),
),
migrations.AddField(
model_name='jobdailyperformance',
name='subscribe_success_no_clients_total',
field=models.PositiveIntegerField(default=0, editable=False, help_text='Must be equal or close to `subscribe_success`'),
),
]
84 changes: 71 additions & 13 deletions snippets/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,6 @@
template_cache = LRUCache(100)


IMPRESSION_THRESHOLD_SECONDS = 5
# Percentage of sessions out of total sessions that stayed on
# about:home or about:newtab for more than 5 seconds. Calculated
# manually and to be used as fallback in case actual data for a date
# do not exist.
DEFAULT_IMPRESSION_PERCENTAGE = 0.63


class SnippetTemplate(models.Model):
"""
A template for the body of a snippet. Can have multiple variables that the
Expand Down Expand Up @@ -2243,50 +2235,108 @@ def save(self, *args, **kwargs):


class JobDailyPerformance(models.Model):
IMPRESSION_THRESHOLD_SECONDS = 5
# Percentage of sessions out of total sessions that stayed on about:home or
# about:newtab for more than 5 seconds. Calculated manually and to be used
# as fallback in case actual data for a date do not exist. Similarly
# calculated default number for unique client IDs.
DEFAULT_IMPRESSION_PERCENTAGE = 0.63
DEFAULT_CLIENT_PERCENTAGE = 0.89

job = models.ForeignKey(Job, on_delete=models.PROTECT, related_name='metrics')
data_fetched_on = models.DateTimeField(auto_now_add=True)
date = models.DateField(editable=False, db_index=True)
impression = models.PositiveIntegerField(default=0, editable=False)
impression_no_clients = models.PositiveIntegerField(default=0, editable=False)
impression_no_clients_total = models.PositiveIntegerField(default=0, editable=False)
adj_impression = models.PositiveIntegerField(default=0, editable=False)
adj_impression_no_clients = models.PositiveIntegerField(default=0, editable=False)
adj_impression_no_clients_total = models.PositiveIntegerField(default=0, editable=False)
click = models.PositiveIntegerField(default=0, editable=False)
click_no_clients = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `click`.')
click_no_clients_total = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `click`.')
block = models.PositiveIntegerField(default=0, editable=False)
block_no_clients = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `block`')
block_no_clients_total = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `block`')
dismiss = models.PositiveIntegerField(default=0, editable=False)
dismiss_no_clients = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `dismiss`')
dismiss_no_clients_total = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `dismiss`')
go_to_scene2 = models.PositiveIntegerField(default=0, editable=False)
go_to_scene2_no_clients = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `go_to_scene2`')
go_to_scene2_no_clients_total = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `go_to_scene2`')
subscribe_success = models.PositiveIntegerField(default=0, editable=False)
subscribe_success_no_clients = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `subscribe_success`')
subscribe_success_no_clients_total = models.PositiveIntegerField(
default=0, editable=False,
help_text='Must be equal or close to `subscribe_success`')
subscribe_error = models.PositiveIntegerField(default=0, editable=False)
subscribe_error_no_clients = models.PositiveIntegerField(default=0, editable=False)
subscribe_error_no_clients_total = models.PositiveIntegerField(default=0, editable=False)
other_click = models.PositiveIntegerField(default=0, editable=False)
other_click_no_clients = models.PositiveIntegerField(default=0, editable=False)
other_click_no_clients_total = models.PositiveIntegerField(default=0, editable=False)
details = JSONField(default=dict)

class Meta:
unique_together = ('job', 'date')
ordering = ('-id', )

def save(self, *args, **kwargs):
self.adj_impression = self.calculate_adj_impression()
(self.adj_impression,
self.adj_impression_no_clients,
self.adj_impression_no_clients_total) = self.calculate_adj_impression()
return super().save(*args, **kwargs)

def calculate_adj_impression(self):
try:
di = DailyImpressions.objects.get(date=self.date)
except DailyImpressions.DoesNotExist:
percentage = DEFAULT_IMPRESSION_PERCENTAGE
percentage = self.DEFAULT_IMPRESSION_PERCENTAGE
percentage_clients = self.DEFAULT_CLIENT_PERCENTAGE
else:
total_impressions = 0
valid_impressions = 0
total_clients = 0
valid_clients = 0

for detail in di.details:
total_impressions += detail['counts']
if int(detail['duration']) >= IMPRESSION_THRESHOLD_SECONDS:
total_clients += detail['no_clients']
if int(detail['duration']) >= self.IMPRESSION_THRESHOLD_SECONDS:
valid_impressions += detail['counts']
valid_clients += detail['no_clients']

if total_impressions < 100_000:
# Sample too small, set default
percentage = DEFAULT_IMPRESSION_PERCENTAGE
percentage = self.DEFAULT_IMPRESSION_PERCENTAGE
percentage_clients = self.DEFAULT_CLIENT_PERCENTAGE
else:
percentage = valid_impressions / total_impressions
percentage_clients = valid_clients / total_clients

adj_im = int((self.impression * percentage) + 0.5)
adj_clients = int((self.impression_no_clients * percentage_clients) + 0.5)
adj_clients_total = int((self.impression_no_clients_total * percentage_clients) + 0.5)

return adj_im
return (adj_im, adj_clients, adj_clients_total)

@property
def adj_block_rate(self):
Expand All @@ -2306,6 +2356,14 @@ def click_rate(self):
return 'N/A'
return float(f'{(self.click / self.impression) * 100:.4f}')

@property
def impressions_per_client(self):
return float(f'{(self.impression / self.impression_no_clients) * 100:.4f}')

@property
def adj_impressions_per_client(self):
return float(f'{(self.adj_impression / self.adj_impression_no_clients) * 100:.4f}')


class DailyImpressions(models.Model):
data_fetched_on = models.DateTimeField(auto_now_add=True)
Expand Down

0 comments on commit 8d1d029

Please sign in to comment.