Skip to content

Commit

Permalink
Merge pull request #769 from readthedocs/davidfischer/traffic-fill-an…
Browse files Browse the repository at this point in the history
…d-cap

Calculate traffic fill rate daily and enforce traffic cap
  • Loading branch information
davidfischer committed Jul 12, 2023
2 parents 1ec7719 + 6b810d4 commit 45bf844
Show file tree
Hide file tree
Showing 10 changed files with 447 additions and 6 deletions.
2 changes: 2 additions & 0 deletions adserver/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class Meta:
"cpm",
"sold_impressions",
"targeting_parameters",
"traffic_fill",
"traffic_cap",
)


Expand Down
35 changes: 35 additions & 0 deletions adserver/migrations/0084_publisher_traffic_shaping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Generated by Django 3.2.18 on 2023-07-07 23:19
import jsonfield.fields
from django.db import migrations

import adserver.validators


class Migration(migrations.Migration):

dependencies = [
('adserver', '0083_allow_api_keywords'),
]

operations = [
migrations.AddField(
model_name='flight',
name='traffic_cap',
field=jsonfield.fields.JSONField(blank=True, default=None, null=True, validators=[adserver.validators.TrafficFillValidator()], verbose_name='Traffic cap'),
),
migrations.AddField(
model_name='flight',
name='traffic_fill',
field=jsonfield.fields.JSONField(blank=True, default=None, null=True, validators=[adserver.validators.TrafficFillValidator()], verbose_name='Traffic fill'),
),
migrations.AddField(
model_name='historicalflight',
name='traffic_cap',
field=jsonfield.fields.JSONField(blank=True, default=None, null=True, validators=[adserver.validators.TrafficFillValidator()], verbose_name='Traffic cap'),
),
migrations.AddField(
model_name='historicalflight',
name='traffic_fill',
field=jsonfield.fields.JSONField(blank=True, default=None, null=True, validators=[adserver.validators.TrafficFillValidator()], verbose_name='Traffic fill'),
),
]
65 changes: 63 additions & 2 deletions adserver/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from .utils import get_client_user_agent
from .utils import get_domain_from_url
from .validators import TargetingParametersValidator
from .validators import TrafficFillValidator

log = logging.getLogger(__name__) # noqa

Expand Down Expand Up @@ -782,6 +783,33 @@ class Flight(TimeStampedModel, IndestructibleModel):
default=0, help_text=_("Clicks across all ads in this flight")
)

# We store nightly the top 20 publishers/countries/regions for each flight
# and the percentage they have filled this flight
# eg.
# {
# "publishers": {"publisher1": 0.1, "publisher2": 0.05},
# "countries": {"US": 0.1, "CA": 0.05, "DE": 0.05},
# "regions": {"us-ca": 0.25, "eu": 0.5},
# }
traffic_fill = JSONField(
_("Traffic fill"),
blank=True,
null=True,
default=None,
validators=[TrafficFillValidator()],
)

# If set, any publisher, country, or region whose `traffic_fill` exceeds the cap
# will not be eligible to show on this campaign until they're below the cap.
# Format is the same as `traffic_fill` but this is set manually
traffic_cap = JSONField(
_("Traffic cap"),
blank=True,
null=True,
default=None,
validators=[TrafficFillValidator()],
)

# Connect to Stripe invoice data
# There can be multiple invoices for a flight
# (say a 3 month flight billed monthly)
Expand Down Expand Up @@ -956,10 +984,10 @@ def show_to_geo(self, geo_data):
if self.excluded_countries and geo_data.country in self.excluded_countries:
return False

regions = Region.load_from_cache()

# Check region groupings as well
if self.included_regions or self.excluded_regions:
# Only load regions if we have to
regions = Region.load_from_cache()
if self.included_regions and not any(
geo_data.country in regions[reg]
for reg in self.included_regions
Expand All @@ -974,6 +1002,29 @@ def show_to_geo(self, geo_data):
):
return False

# Check if the country traffic cap exceeds the current fill for that country
if self.traffic_cap and self.traffic_fill and "countries" in self.traffic_cap:
# pylint: disable=invalid-sequence-index
limited_countries = self.traffic_cap["countries"]
country_traffic_fill = self.traffic_fill.get("countries", {})
if country_traffic_fill.get(geo_data.country, 0.0) > limited_countries.get(
geo_data.country, 100.0
):
return False

# Check if the region traffic cap exceeds the current fill for that region
if self.traffic_cap and self.traffic_fill and "regions" in self.traffic_cap:
# pylint: disable=invalid-sequence-index
limited_regions = self.traffic_cap["regions"]
region_traffic_fill = self.traffic_fill.get("regions", {})
for region_slug in regions:
if geo_data.country not in regions[region_slug]:
continue
if region_traffic_fill.get(region_slug, 0.0) > limited_regions.get(
region_slug, 100.0
):
return False

return True

def show_to_keywords(self, keywords):
Expand Down Expand Up @@ -1036,6 +1087,16 @@ def show_on_publisher(self, publisher):
if self.excluded_publishers:
return publisher.slug not in self.excluded_publishers

# Check if the publisher traffic cap exceeds the current fill for that publisher
if self.traffic_cap and self.traffic_fill and "publishers" in self.traffic_cap:
# pylint: disable=invalid-sequence-index
limited_publishers = self.traffic_cap["publishers"]
publisher_traffic_fill = self.traffic_fill.get("publishers", {})
if publisher_traffic_fill.get(publisher.slug, 0.0) > limited_publishers.get(
publisher.slug, 100.0
):
return False

return True

def show_on_domain(self, url):
Expand Down
82 changes: 82 additions & 0 deletions adserver/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,9 @@ def update_previous_day_reports(day=None):
daily_update_uplift(start_date)
daily_update_regiontopic(start_date)

# Updates an aggregation on each paid flight
update_flight_traffic_fill.apply_async()

if not day:
# Send notification to Slack about previous day's reports
# Don't send this notification if run manually
Expand Down Expand Up @@ -942,6 +945,85 @@ def disable_inactive_publishers(days=60, draft_only=False, dry_run=False):
message.send()


@app.task()
def update_flight_traffic_fill():
"""Update a cached value on each paid flight with its fill rate by region/geo/publisher."""
max_objects = 20
threshold = 0.01 # Nothing below this percent will be aggregated

log.info("Updating flight traffic fill")

# Update the traffic fill rates for each publisher/region/country for each flight
for flight in Flight.objects.filter(
live=True, campaign__campaign_type=PAID_CAMPAIGN, total_views__gt=0
):
publisher_traffic_fill = {}
country_traffic_fill = {}
region_traffic_fill = {}

# Publisher (fast)
for imp in (
AdImpression.objects.using(settings.REPLICA_SLUG)
.filter(advertisement__flight=flight)
.values(
"publisher__slug",
)
.annotate(
publisher_views=Sum("views"),
)
.order_by("-publisher_views")[:max_objects]
):
publisher_slug = imp["publisher__slug"]
publisher_percentage = imp["publisher_views"] / flight.total_views
if publisher_percentage >= threshold:
publisher_traffic_fill[publisher_slug] = publisher_percentage

# Region (slower)
for imp in (
RegionImpression.objects.using(settings.REPLICA_SLUG)
.filter(advertisement__flight=flight)
.values(
"region",
)
.annotate(
region_views=Sum("views"),
)
.order_by("-region_views")[:max_objects]
):
region = imp["region"]
region_percentage = imp["region_views"] / flight.total_views
if region_percentage >= threshold:
region_traffic_fill[region] = region_percentage

# Country (slowest)
for imp in (
GeoImpression.objects.using(settings.REPLICA_SLUG)
.filter(advertisement__flight=flight)
.values(
"country",
)
.annotate(
country_views=Sum("views"),
)
.order_by("-country_views")[:max_objects]
):
country_code = imp["country"]
country_percentage = imp["country_views"] / flight.total_views
if country_percentage >= threshold:
country_traffic_fill[country_code] = country_percentage

# Grab the flight from the DB again in case the object has changed
flight.refresh_from_db()
if not flight.traffic_fill:
flight.traffic_fill = {}
flight.traffic_fill["publishers"] = publisher_traffic_fill
flight.traffic_fill["countries"] = country_traffic_fill
flight.traffic_fill["regions"] = region_traffic_fill
flight.save()

log.info("Completed updating flight traffic fill")


@app.task()
def run_publisher_importers():
"""
Expand Down
6 changes: 5 additions & 1 deletion adserver/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from django_dynamic_fixture import get
from djstripe.models import Customer

from ..constants import PAID_CAMPAIGN
from ..models import AdType
from ..models import Advertisement
from ..models import Advertiser
Expand All @@ -30,7 +31,10 @@ def setUp(self):
self.publisher = get(Publisher)
self.advertiser = get(Advertiser)
self.campaign = get(
Campaign, advertiser=self.advertiser, publishers=[self.publisher]
Campaign,
advertiser=self.advertiser,
publishers=[self.publisher],
campaign_type=PAID_CAMPAIGN,
)
self.flight = get(
Flight,
Expand Down
74 changes: 74 additions & 0 deletions adserver/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,53 @@ def test_geo_state_metro_include(self):
self.assertFalse(self.flight.show_to_geo(GeolocationData("US", "CA", 825)))
self.assertTrue(self.flight.show_to_geo(GeolocationData("US", "WA", 819)))

def test_geo_cap(self):
self.assertTrue(self.flight.show_to_geo(GeolocationData("US")))

self.flight.traffic_cap = {
"countries": {"US": 0.5},
}
self.flight.traffic_fill = {
"countries": {"US": 0.45},
}
self.flight.save()

# US hasn't hit the cap yet
self.assertTrue(self.flight.show_to_geo(GeolocationData("US")))

self.flight.traffic_fill = {
"countries": {"US": 0.55},
}
self.flight.save()

# US exceeds the cap and the flight won't show in the US
self.assertFalse(self.flight.show_to_geo(GeolocationData("US")))
self.assertTrue(self.flight.show_to_geo(GeolocationData("CA")))

def test_region_cap(self):
self.assertTrue(self.flight.show_to_geo(GeolocationData("US")))

self.flight.traffic_cap = {
"regions": {"us-ca": 0.5},
}
self.flight.traffic_fill = {
"regions": {"us-ca": 0.45},
}
self.flight.save()

# NA hasn't hit the cap yet
self.assertTrue(self.flight.show_to_geo(GeolocationData("US")))

self.flight.traffic_fill = {
"regions": {"us-ca": 0.55},
}
self.flight.save()

# NA region exceeds the cap and the flight won't show in the US or Canada
self.assertFalse(self.flight.show_to_geo(GeolocationData("CA")))
self.assertFalse(self.flight.show_to_geo(GeolocationData("US")))
self.assertTrue(self.flight.show_to_geo(GeolocationData("DE")))

def test_keyword_targeting(self):
self.assertTrue(self.flight.show_to_keywords(["django"]))

Expand Down Expand Up @@ -154,6 +201,33 @@ def test_publisher_targeting(self):
self.flight.save()
self.assertTrue(self.flight.show_on_publisher(self.publisher))

def test_publisher_traffic_cap(self):
self.assertTrue(self.flight.show_on_publisher(self.publisher))

self.flight.traffic_cap = {
"publishers": {self.publisher.slug: 0.5},
}
self.flight.save()

# This publisher isn't in the "fill"
self.assertTrue(self.flight.show_on_publisher(self.publisher))

self.flight.traffic_fill = {
"publishers": {self.publisher.slug: 0.45},
}
self.flight.save()

# Publisher doesn't exceed the cap
self.assertTrue(self.flight.show_on_publisher(self.publisher))

self.flight.traffic_fill = {
"publishers": {self.publisher.slug: 0.51},
}
self.flight.save()

# Fill exceeds the cap - this flight is not eligible to be shown on the publisher
self.assertFalse(self.flight.show_on_publisher(self.publisher))

def test_domain_targeting(self):
self.flight.targeting_parameters["include_domains"] = ["example.com"]
self.flight.save()
Expand Down
22 changes: 22 additions & 0 deletions adserver/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,3 +684,25 @@ def test_remove_old_report_data(self):
self.assertEqual(impression_new.offers, 3)
self.assertEqual(impression_new.views, 2)
self.assertEqual(impression_new.clicks, 1)

def test_traffic_fill(self):
# Ad1/CA - offered/decision=3, views=2, clicks=1
# Ad1/MX - offered/decision=1, views=1, clicks=0
# Ad2/MX - offered/decisions=2, views=2, clicks=0
# All views/clicks on publisher1
self.flight.total_views = 5
self.flight.save()
update_previous_day_reports(timezone.now())

self.flight.refresh_from_db()

self.assertIsNotNone(self.flight.traffic_fill)
self.assertTrue("regions" in self.flight.traffic_fill)
self.assertTrue("countries" in self.flight.traffic_fill)
self.assertTrue("publishers" in self.flight.traffic_fill)
self.assertDictEqual(
self.flight.traffic_fill["countries"], {"CA": 0.4, "MX": 0.6}
)
self.assertDictEqual(
self.flight.traffic_fill["publishers"], {self.publisher.slug: 1.0}
)

0 comments on commit 45bf844

Please sign in to comment.