Skip to content

Commit

Permalink
Merge pull request #205 from readthedocs/davidfischer/referrer-blocklist
Browse files Browse the repository at this point in the history
Allow blocking referrers for ad impressions with a setting
  • Loading branch information
davidfischer committed Jul 28, 2020
2 parents e110504 + 64c7ae6 commit 8d571ba
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 29 deletions.
4 changes: 2 additions & 2 deletions adserver/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ class BaseImpression(TimeStampedModel, models.Model):

# Offers include cases where the server returned an ad
# but the client didn't load it
# or the client didn't qualify as a view (staff, blacklisted, etc.)
# or the client didn't qualify as a view (staff, blocklisted, etc.)
offers = models.PositiveIntegerField(
_("Offers"),
default=0,
Expand All @@ -1285,7 +1285,7 @@ class BaseImpression(TimeStampedModel, models.Model):
),
)

# Views & Clicks don't count actions that are blacklisted, done by staff, bots, etc.
# Views & Clicks don't count actions that are blocklisted, done by staff, bots, etc.
views = models.PositiveIntegerField(
_("Views"),
default=0,
Expand Down
42 changes: 41 additions & 1 deletion adserver/tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import datetime
import json
import re
from unittest import mock

from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.auth.models import AnonymousUser
from django.test import Client
Expand All @@ -13,6 +15,7 @@
from django_dynamic_fixture import get
from rest_framework.authtoken.models import Token

from .. import utils as adserver_utils
from ..api.permissions import AdDecisionPermission
from ..api.permissions import AdvertiserPermission
from ..api.permissions import PublisherPermission
Expand Down Expand Up @@ -807,7 +810,7 @@ def setUp(self):

self.page_url = "http://example.com"

# To be counted, the UA and IP must be valid, non-blacklisted/non-bots
# To be counted, the UA and IP must be valid, non-blocklisted/non-bots
self.proxy_client = Client(
HTTP_USER_AGENT=self.user_agent, REMOTE_ADDR=self.ip_address
)
Expand Down Expand Up @@ -1039,6 +1042,43 @@ def test_view_tracking_unknown_ua(self):
self.assertEqual(resp.status_code, 200)
self.assertEqual(resp["X-Adserver-Reason"], "Unrecognized user agent")

@override_settings(ADSERVER_BLOCKLISTED_USER_AGENTS=["Safari"])
def test_view_tracking_blocked_ua(self):
# Override the settings for the blocklist
# This can't be done with ``override_settings`` because the setting is already processed
adserver_utils.BLOCKLISTED_UA_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_USER_AGENTS
]

ua = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/69.0.3497.100 Safari/537.36"
)
resp = self.client.get(self.url, HTTP_USER_AGENT=ua)

self.assertEqual(resp.status_code, 200)
self.assertEqual(resp["X-Adserver-Reason"], "Blocked UA impression")

# Reset the UA blocklist
adserver_utils.BLOCKLISTED_UA_REGEXES = []

@override_settings(ADSERVER_BLOCKLISTED_REFERRERS=["http://invalid.referrer"])
def test_view_tracking_blocked_referrer(self):
# Override the settings for the blocklist
# This can't be done with ``override_settings`` because the setting is already processed
adserver_utils.BLOCKLISTED_REFERRERS_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_REFERRERS
]

resp = self.client.get(self.url, HTTP_REFERER="http://invalid.referrer")

self.assertEqual(resp.status_code, 200)
self.assertEqual(resp["X-Adserver-Reason"], "Blocked referrer impression")

# Reset the referrer blocklist
adserver_utils.BLOCKLISTED_REFERRERS_REGEXES = []

def test_view_tracking_invalid_ad(self):
url = reverse(
"view-proxy", kwargs={"advertisement_id": 99999, "nonce": "invalidnonce"}
Expand Down
20 changes: 15 additions & 5 deletions adserver/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from ..utils import get_client_id
from ..utils import get_client_user_agent
from ..utils import get_geolocation
from ..utils import is_blacklisted_user_agent
from ..utils import is_blocklisted_referrer
from ..utils import is_blocklisted_user_agent
from ..utils import is_click_ratelimited
from ..utils import parse_date_string

Expand Down Expand Up @@ -64,18 +65,27 @@ def test_calculate_ctr(self):
self.assertAlmostEqual(calculate_ctr(1, 10), 10)
self.assertAlmostEqual(calculate_ctr(5, 25), 20)

def test_blacklisted_user_agent(self):
def test_blocklisted_user_agent(self):
ua = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/69.0.3497.100 Safari/537.36"
)
self.assertFalse(is_blacklisted_user_agent(ua))
self.assertFalse(is_blocklisted_user_agent(ua))
regexes = [re.compile("Chrome")]
self.assertTrue(is_blacklisted_user_agent(ua, regexes))
self.assertTrue(is_blocklisted_user_agent(ua, regexes))

regexes = [re.compile("this isn't found"), re.compile("neither is this")]
self.assertFalse(is_blacklisted_user_agent(ua, regexes))
self.assertFalse(is_blocklisted_user_agent(ua, regexes))

def test_blocklisted_referrer(self):
referrer = "http://google.com"
self.assertFalse(is_blocklisted_referrer(referrer))
regexes = [re.compile("google.com")]
self.assertTrue(is_blocklisted_referrer(referrer, regexes))

regexes = [re.compile("this isn't found"), re.compile("neither is this")]
self.assertFalse(is_blocklisted_referrer(referrer, regexes))

def test_ratelimited(self):
factory = RequestFactory()
Expand Down
34 changes: 27 additions & 7 deletions adserver/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@
log = logging.getLogger(__name__) # noqa

# Compile these regular expressions at startup time for performance purposes
BLACKLISTED_UA_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLACKLISTED_USER_AGENTS
BLOCKLISTED_UA_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_USER_AGENTS
]
BLOCKLISTED_REFERRERS_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_REFERRERS
]

try:
Expand Down Expand Up @@ -169,11 +172,28 @@ def is_click_ratelimited(request, ratelimits=None):
return False


def is_blacklisted_user_agent(user_agent, blacklist_regexes=BLACKLISTED_UA_REGEXES):
"""Returns ``True`` if the UA is blacklisted and ``False`` otherwise."""
for regex in blacklist_regexes:
if regex.search(user_agent):
return True
def is_blocklisted_user_agent(user_agent, blocklist_regexes=None):
"""Returns ``True`` if the UA is blocklisted and ``False`` otherwise."""
if blocklist_regexes is None:
blocklist_regexes = BLOCKLISTED_UA_REGEXES

if user_agent:
for regex in blocklist_regexes:
if regex.search(user_agent):
return True

return False


def is_blocklisted_referrer(referrer, blocklist_regexes=None):
"""Returns ``True`` if the Referrer is blocklisted and ``False`` otherwise."""
if blocklist_regexes is None:
blocklist_regexes = BLOCKLISTED_REFERRERS_REGEXES

if referrer:
for regex in blocklist_regexes:
if regex.search(referrer):
return True

return False

Expand Down
29 changes: 20 additions & 9 deletions adserver/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
from .utils import get_client_ip
from .utils import get_client_user_agent
from .utils import get_geolocation
from .utils import is_blacklisted_user_agent
from .utils import is_blocklisted_referrer
from .utils import is_blocklisted_user_agent
from .utils import is_click_ratelimited


Expand Down Expand Up @@ -327,6 +328,7 @@ def ignore_tracking_reason(self, request, advertisement, nonce, publisher):
ip_address = get_client_ip(request)
user_agent = get_client_user_agent(request)
parsed_ua = parse_user_agent(user_agent)
referrer = request.META.get("HTTP_REFERER")

country_code = None
region_code = None
Expand All @@ -339,9 +341,7 @@ def ignore_tracking_reason(self, request, advertisement, nonce, publisher):
region_code = geo_data["region"]
metro_code = geo_data["dma_code"]

valid_nonce = advertisement.is_valid_nonce(self.impression_type, nonce)

if not valid_nonce:
if not advertisement.is_valid_nonce(self.impression_type, nonce):
log.log(self.log_level, "Old or nonexistent impression nonce")
reason = "Old/Nonexistent nonce"
elif parsed_ua.is_bot:
Expand All @@ -360,11 +360,21 @@ def ignore_tracking_reason(self, request, advertisement, nonce, publisher):
elif request.user.is_staff:
log.log(self.log_level, "Ignored staff user ad impression")
reason = "Staff impression"
elif is_blacklisted_user_agent(user_agent):
elif is_blocklisted_user_agent(user_agent):
log.log(self.log_level, "Blocked user agent impression [%s]", user_agent)
reason = "Blocked UA impression"
elif is_blocklisted_referrer(referrer):
# Note: Normally logging IPs is frowned upon for DNT
# but this is a security/billing violation
log.log(
self.log_level, "Blacklisted user agent impression [%s]", user_agent
self.log_security_level,
"Blocklisted referrer [%s], Publisher: [%s], IP: [%s], UA: [%s]",
referrer,
publisher,
ip_address,
user_agent,
)
reason = "Blacklisted impression"
reason = "Blocked referrer impression"
elif not publisher:
log.log(self.log_level, "Ad impression for unknown publisher")
reason = "Unknown publisher"
Expand All @@ -376,19 +386,20 @@ def ignore_tracking_reason(self, request, advertisement, nonce, publisher):
# Then they turn off their VPN and click on the ad
log.log(
self.log_security_level,
"Invalid geo targeting for ad [%s]. Country: [%s], Regions: [%s], Metro: [%s]",
"Invalid geo targeting for ad [%s]. Country: [%s], Region: [%s], Metro: [%s], UA: [%s]",
advertisement,
country_code,
region_code,
metro_code,
user_agent,
)
reason = "Invalid targeting impression"
elif self.impression_type == CLICKS and is_click_ratelimited(request):
# Note: Normally logging IPs is frowned upon for DNT
# but this is a security/billing violation
log.log(
self.log_security_level,
"User has clicked too many ads recently, IP = [%s], User Agent = [%s]",
"User has clicked too many ads recently, IP: [%s], UA: [%s]",
ip_address,
user_agent,
)
Expand Down
5 changes: 4 additions & 1 deletion config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,10 @@
ADSERVER_ANALYTICS_ID = env("ADSERVER_ANALYTICS_ID", default=None)
ADSERVER_PRIVACY_POLICY_URL = env("ADSERVER_PRIVACY_POLICY_URL", default=None)
ADSERVER_CLICK_RATELIMITS = []
ADSERVER_BLACKLISTED_USER_AGENTS = []
ADSERVER_BLOCKLISTED_USER_AGENTS = env.list(
"ADSERVER_BLOCKLISTED_USER_AGENTS", default=[]
)
ADSERVER_BLOCKLISTED_REFERRERS = env.list("ADSERVER_BLOCKLISTED_REFERRERS", default=[])
# Recording views is highly discouraged in production but useful in development
ADSERVER_RECORD_VIEWS = True
ADSERVER_HTTPS = False # Should be True in most production setups
Expand Down
3 changes: 0 additions & 3 deletions config/settings/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@
ADSERVER_ADMIN_URL = env("ADSERVER_ADMIN_URL", default="admin")
ADSERVER_DO_NOT_TRACK = env.bool("ADSERVER_DO_NOT_TRACK", default=False)
ADSERVER_RECORD_VIEWS = env.bool("ADSERVER_RECORD_VIEWS", default=False)
ADSERVER_BLACKLISTED_USER_AGENTS = env.list(
"ADSERVER_BLACKLISTED_USER_AGENTS", default=[]
)
ADSERVER_CLICK_RATELIMITS = env.list(
"ADSERVER_CLICK_RATELIMITS", default=["1/m", "3/10m", "10/h", "25/d"]
)
Expand Down
9 changes: 8 additions & 1 deletion docs/install/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,20 @@ then the admin interface will be available at the URL ``http://adserver.example.
By default, this set to ``/admin``.


ADSERVER_BLACKLISTED_USER_AGENTS
ADSERVER_BLOCKLISTED_USER_AGENTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Set this to a comma separated list of strings that are looked for anywhere in the User Agent of an ad request.
Any user agents matching any of these will be completely ignored for counting clicks and views for billing purposes.


ADSERVER_BLOCKLISTED_REFERRERS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Set this to a comma separated list of strings that are looked for anywhere in the Referrer of an ad request.
Any referrer matching any of these will be completely ignored for counting clicks and views for billing purposes.


ADSERVER_CLICK_RATELIMITS
~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down

0 comments on commit 8d571ba

Please sign in to comment.