Skip to content

Commit

Permalink
Merge pull request #210 from readthedocs/davidfischer/block-ips
Browse files Browse the repository at this point in the history
IP Geolocation and Proxy detection improvements
  • Loading branch information
davidfischer committed Jul 29, 2020
2 parents 90b8807 + 9b293a2 commit 6787b66
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 35 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,4 @@ db.sqlite3
_build

# GeoIP data files
geoip/*.mmdb
geoip/*.lock
geoip/*
26 changes: 19 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
.PHONY: help test clean dockerbuild dockerserve dockershell dockerprod geoip
.PHONY: help test clean dockerbuild dockerserve dockershell dockerprod geoip ipproxy


GEOIP_UPDATE = geoipupdate
GEOIP_DIR = geoip
GEOIP_CONF_FILE = $(GEOIP_DIR)/GeoIP.conf
GEOIP_CITY_FILE = dbip-city-lite.mmdb.gz
GEOIP_COUNTRY_FILE = dbip-country-lite.mmdb.gz
GEOIP_CITY_DB_URL = https://download.db-ip.com/free/dbip-city-lite-2020-07.mmdb.gz
GEOIP_COUNTRY_DB_URL = https://download.db-ip.com/free/dbip-country-lite-2020-07.mmdb.gz

TOR_EXIT_LIST_FILE = torbulkexitlist.txt
TOR_EXIT_LIST_URL = https://check.torproject.org/torbulkexitlist

DOCKER_CONFIG=docker-compose-local.yml
DOCKER_IMAGE_NAME=ethicaladserver
Expand All @@ -17,7 +23,8 @@ help:
@echo " dockershell Connect to a shell on the Django docker container"
@echo " dockerstart Start all services in the background"
@echo " dockerstop Stop all services started by dockerstart"
@echo " geoip Download the GeoIP database from MaxMind"
@echo " geoip Download the GeoIP databases"
@echo " ipproxy Download proxy databases"

test:
tox
Expand Down Expand Up @@ -54,7 +61,12 @@ dockerstop:
dockershell:
docker-compose -f $(DOCKER_CONFIG) run --rm django /bin/ash

# Get the GeoIP database from MaxMind
# This command will probably fail unless you have "geoipupdate" installed
# Get the GeoIP databases from DB-IP
geoip:
$(GEOIP_UPDATE) -f $(GEOIP_CONF_FILE) -d $(GEOIP_DIR) --verbose
curl -o $(GEOIP_DIR)/$(GEOIP_CITY_FILE) "$(GEOIP_CITY_DB_URL)"
curl -o $(GEOIP_DIR)/$(GEOIP_COUNTRY_FILE) "$(GEOIP_COUNTRY_DB_URL)"
gunzip $(GEOIP_DIR)/$(GEOIP_CITY_FILE)
gunzip $(GEOIP_DIR)/$(GEOIP_COUNTRY_FILE)

ipproxy:
curl -o $(GEOIP_DIR)/$(TOR_EXIT_LIST_FILE) "$(TOR_EXIT_LIST_URL)"
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ Features
- mobile, tablet, or desktop targeting
- extensible custom targeting options

The Ethical Ad Server uses GeoLite2 data created by MaxMind.
The Ethical Ad Server uses GeoLite2 data created by MaxMind
or IP Geolocation by DB-IP.


Documentation
Expand Down
22 changes: 17 additions & 5 deletions adserver/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,16 @@ def setUp(self):
"click-proxy", kwargs={"advertisement_id": self.ad.pk, "nonce": self.nonce}
)

def tearDown(self):
# Reset the UA blocklist
adserver_utils.BLOCKLISTED_UA_REGEXES = []

# Reset the referrer blocklist
adserver_utils.BLOCKLISTED_REFERRERS_REGEXES = []

# Reset the IP blocklist
adserver_utils.BLOCKLISTED_IPS = []

def test_view_tracking_valid(self):
resp = self.client.get(self.url)

Expand Down Expand Up @@ -1060,9 +1070,6 @@ def test_view_tracking_blocked_ua(self):
self.assertEqual(resp.status_code, 200)
self.assertEqual(resp["X-Adserver-Reason"], "Blocked UA impression")

# Reset the UA blocklist
adserver_utils.BLOCKLISTED_UA_REGEXES = []

@override_settings(ADSERVER_BLOCKLISTED_REFERRERS=["http://invalid.referrer"])
def test_view_tracking_blocked_referrer(self):
# Override the settings for the blocklist
Expand All @@ -1076,8 +1083,13 @@ def test_view_tracking_blocked_referrer(self):
self.assertEqual(resp.status_code, 200)
self.assertEqual(resp["X-Adserver-Reason"], "Blocked referrer impression")

# Reset the referrer blocklist
adserver_utils.BLOCKLISTED_REFERRERS_REGEXES = []
def test_view_tracking_blocked_ip(self):
adserver_utils.BLOCKLISTED_IPS = set([self.ip_address])

resp = self.client.get(self.url)

self.assertEqual(resp.status_code, 200)
self.assertEqual(resp["X-Adserver-Reason"], "Blocked IP impression")

def test_view_tracking_invalid_ad(self):
url = reverse(
Expand Down
8 changes: 8 additions & 0 deletions adserver/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ..utils import get_client_id
from ..utils import get_client_user_agent
from ..utils import get_geolocation
from ..utils import is_blocklisted_ip
from ..utils import is_blocklisted_referrer
from ..utils import is_blocklisted_user_agent
from ..utils import is_click_ratelimited
Expand Down Expand Up @@ -87,6 +88,13 @@ def test_blocklisted_referrer(self):
regexes = [re.compile("this isn't found"), re.compile("neither is this")]
self.assertFalse(is_blocklisted_referrer(referrer, regexes))

def test_blocklisted_ip(self):
ip = "1.1.1.1"
self.assertFalse(is_blocklisted_ip(ip))

self.assertTrue(is_blocklisted_ip(ip, ["1.1.1.1", "2.2.2.2"]))
self.assertFalse(is_blocklisted_ip(ip, ["2.2.2.2"]))

def test_ratelimited(self):
factory = RequestFactory()
request = factory.get("/")
Expand Down
48 changes: 41 additions & 7 deletions adserver/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import hashlib
import ipaddress
import logging
import os
import re
from collections import namedtuple
from datetime import datetime
Expand All @@ -21,13 +22,6 @@

log = logging.getLogger(__name__) # noqa

# Compile these regular expressions at startup time for performance purposes
BLOCKLISTED_UA_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_USER_AGENTS
]
BLOCKLISTED_REFERRERS_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_REFERRERS
]

try:
geoip = GeoIP2()
Expand Down Expand Up @@ -198,6 +192,21 @@ def is_blocklisted_referrer(referrer, blocklist_regexes=None):
return False


def is_blocklisted_ip(ip, blocked_ips=None):
"""
Returns ``True`` if the IP is blocklisted and ``False`` otherwise.
IPs can be blocked because they are anonymous proxies or other reasons.
"""
if blocked_ips is None:
blocked_ips = BLOCKLISTED_IPS

if ip and ip in blocked_ips:
return True

return False


def get_geolocation(ip_address):
try:
ipaddress.ip_address(force_text(ip_address))
Expand All @@ -216,6 +225,21 @@ def get_geolocation(ip_address):
return None


def build_blocked_ip_set():
"""Build a set of blocked IPs for preventing bogus ad impressions."""
blocked_ips = set()

filepath = os.path.join(settings.GEOIP_PATH, "torbulkexitlist.txt")
if os.path.exists(filepath):
with open(filepath) as fd:
for line in fd.readlines():
line = line.strip()
if line:
blocked_ips.add(line)

return blocked_ips


def generate_client_id(ip_address, user_agent):
"""
Create an advertising ID.
Expand All @@ -240,3 +264,13 @@ def generate_client_id(ip_address, user_agent):
hash_id.update(force_bytes(get_random_string()))

return hash_id.hexdigest()


# Compile these regular expressions at startup time for performance purposes
BLOCKLISTED_UA_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_USER_AGENTS
]
BLOCKLISTED_REFERRERS_REGEXES = [
re.compile(s) for s in settings.ADSERVER_BLOCKLISTED_REFERRERS
]
BLOCKLISTED_IPS = build_blocked_ip_set()
8 changes: 8 additions & 0 deletions adserver/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from .utils import get_client_ip
from .utils import get_client_user_agent
from .utils import get_geolocation
from .utils import is_blocklisted_ip
from .utils import is_blocklisted_referrer
from .utils import is_blocklisted_user_agent
from .utils import is_click_ratelimited
Expand Down Expand Up @@ -375,6 +376,13 @@ def ignore_tracking_reason(self, request, advertisement, nonce, publisher):
user_agent,
)
reason = "Blocked referrer impression"
elif is_blocklisted_ip(ip_address):
log.log(
self.log_security_level,
"Blocked IP impression, Publisher: [%s]",
publisher,
)
reason = "Blocked IP impression"
elif not publisher:
log.log(self.log_level, "Ad impression for unknown publisher")
reason = "Unknown publisher"
Expand Down
Empty file added geoip/.gitkeep
Empty file.
13 changes: 0 additions & 13 deletions geoip/GeoIP.conf

This file was deleted.

0 comments on commit 6787b66

Please sign in to comment.