Permalink
Browse files

Add formal ratelimiting headers

Three headers can now be included for API requests:

X-Ratelimit-Used: Number of requests used in this period
X-Ratelimit-Remaining: Number of requests left to use
X-Ratelimit-Reset: Approximate number of seconds to end of period

Additionally, 429 responses can be returned for requests that
exceed the ratelimit.

Ratelimits are per-IP normally. OAuth clients will be
limited per user-client combo.
  • Loading branch information...
1 parent c918fdc commit 084f5736ce9b5f256daa2ea16148b5a997956563 @kemitche kemitche committed Feb 19, 2014
View
@@ -204,12 +204,7 @@ embedly_api_key =
############################################ QUOTAS
-# rate limiter duration (minutes)
-RATELIMIT = 10
-# user agent substrings to hard-ratelimit to a number of requests per ten second period
-# example: agents = googlebot:10, appengine:2
-agents =
-# ratelimits for various types of relations creatable in subreddits
+# quota for various types of relations creatable in subreddits
sr_banned_quota = 10000
sr_moderator_invite_quota = 10000
sr_contributor_quota = 10000
@@ -223,14 +218,39 @@ new_link_share_delay = 30 seconds
max_sr_images = 50
+############################################ RATELIMITS
+
+# If true, send 429 responses on exceeded ratelimits
+# If false, send headers only, but don't abort
+# Only applies if tracking is enabled below
+ENFORCE_RATELIMIT = false
+
+# If true, store per-user request counts in ratelimits cache
+RL_SITEWIDE_ENABLED = true
+# How large of a burst window will users be allowed?
+RL_RESET_MINUTES = 10
+# What is the average request rate over the above time period?
+RL_AVG_REQ_PER_SEC = 0.5
+
+# Same as above, but configured separately for connections via OAuth
+RL_OAUTH_SITEWIDE_ENABLED = true
+RL_OAUTH_RESET_MINUTES = 10
+RL_OAUTH_AVG_REQ_PER_SEC = 0.5
+
+# user agent substrings to hard-ratelimit to a number of requests per ten second period
+# example: agents = googlebot:10, appengine:2
+agents =
+
+# karma needed to avoid per-subreddit submission ratelimits
+MIN_RATE_LIMIT_KARMA = 10
+MIN_RATE_LIMIT_COMMENT_KARMA = 1
+
+
############################################ THRESHOLDS
# minimum item score to be considered for quota baskets
QUOTA_THRESHOLD = 5
# if the user has positive total karma, their per-subreddit karma will default to this, else 0
MIN_UP_KARMA = 1
-# karma needed to avoid per-subreddit submission ratelimits
-MIN_RATE_LIMIT_KARMA = 10
-MIN_RATE_LIMIT_COMMENT_KARMA = 1
# ages in days at which various actions are disallowed to preserve history
REPLY_AGE_LIMIT = 180
VOTE_AGE_LIMIT = 180
@@ -295,6 +315,8 @@ permacache_memcaches = 127.0.0.1:11211
srmembercaches = 127.0.0.1:11211
# a local cache that's not globally consistent and can have stale data (optional)
stalecaches =
+# cache for tracking rate limit thresholds
+ratelimitcaches = 127.0.0.1:11211
############################################ MISCELLANEOUS
@@ -30,6 +30,7 @@ class APIv1Controller(OAuth2ResourceController):
def pre(self):
OAuth2ResourceController.pre(self)
self.check_for_bearer_token()
+ self.run_sitewide_ratelimits()
def try_pagecache(self):
pass
@@ -570,15 +570,20 @@ def set_colors():
c.bordercolor = request.GET.get('bordercolor')
+def _get_ratelimit_timeslice(slice_seconds):
+ slice_start, secs_since = divmod(time.time(), slice_seconds)
+ slice_start = time.gmtime(int(slice_start * slice_seconds))
+ secs_to_next = slice_seconds - int(secs_since)
+ return slice_start, secs_to_next
+
+
def ratelimit_agent(agent, limit=10, slice_size=10):
slice_size = min(slice_size, 60)
- slice, remainder = map(int, divmod(time.time(), slice_size))
- time_slice = time.gmtime(slice * slice_size)
+ time_slice, retry_after = _get_ratelimit_timeslice(slice_size)
key = "rate_agent_" + agent + time.strftime("_%S", time_slice)
-
g.cache.add(key, 0, time=slice_size + 1)
if g.cache.incr(key) > limit:
- request.environ['retry_after'] = slice_size - remainder
+ request.environ['retry_after'] = retry_after
abort(429)
appengine_re = re.compile(r'AppEngine-Google; \(\+http://code.google.com/appengine; appid: (?:dev|s)~([a-z0-9-]{6,30})\)\Z')
@@ -733,6 +738,7 @@ def abort_with_error(error):
class MinimalController(BaseController):
allow_stylesheets = False
+ defer_ratelimiting = False
def request_key(self):
# note that this references the cookie at request time, not
@@ -758,6 +764,65 @@ def request_key(self):
def cached_response(self):
return ""
+ def run_sitewide_ratelimits(self):
+ """Ratelimit users and add ratelimit headers to the response.
+
+ Headers added are:
+ X-Ratelimit-Used: Number of requests used in this period
+ X-Ratelimit-Remaining: Number of requests left to use
+ X-Ratelimit-Reset: Approximate number of seconds to end of period
+
+ This function only has an effect if one of
+ g.RL_SITEWIDE_ENABLED or g.RL_OAUTH_SITEWIDE_ENABLED
+ are set to 'true' in the app configuration
+
+ If the ratelimit is exceeded, a 429 response will be sent,
+ unless the app configuration has g.ENFORCE_RATELIMIT off.
+ Headers will be sent even on aborted requests.
+
+ """
+ if c.cdn_cacheable or not is_api():
+ # No ratelimiting or headers for:
+ # * Web requests (HTML)
+ # * CDN requests (logged out via www.reddit.com)
+ return
+ elif c.oauth_user and g.RL_OAUTH_SITEWIDE_ENABLED:
+ max_reqs = g.RL_OAUTH_MAX_REQS
+ period = g.RL_OAUTH_RESET_SECONDS
+ # Convert client_id to ascii str for use as memcache key
+ client_id = c.oauth2_access_token.client_id.encode("ascii")
+ # OAuth2 ratelimits are per user-app combination
+ key = 'siterl-oauth-' + c.user._id36 + ":" + client_id
+ elif g.RL_SITEWIDE_ENABLED:
+ max_reqs = g.RL_MAX_REQS
+ period = g.RL_RESET_SECONDS
+ # API (non-oauth) limits are per-ip
+ key = 'siterl-api-' + request.ip
+ else:
+ # Not in a context where sitewide ratelimits are on
+ return
+
+ period_start, retry_after = _get_ratelimit_timeslice(period)
+ key += time.strftime("-%H%M%S", period_start)
+
+ g.ratelimitcache.add(key, 0, time=retry_after + 1)
+
+ # Increment the key to track the current request
+ recent_reqs = g.ratelimitcache.incr(key)
+ reqs_remaining = max(0, max_reqs - recent_reqs)
+
+ c.ratelimit_headers = {
+ "X-Ratelimit-Used": str(recent_reqs),
+ "X-Ratelimit-Reset": str(retry_after),
+ "X-Ratelimit-Remaining": str(reqs_remaining),
+ }
+
+ if reqs_remaining <= 0 and g.ENFORCE_RATELIMIT:
+ # For non-abort situations, the headers will be added in post(),
+ # to avoid including them in a pagecache
+ response.headers.update(c.ratelimit_headers)
+ abort(429)
+
def pre(self):
action = request.environ["pylons.routes_dict"].get("action")
if action:
@@ -785,6 +850,9 @@ def pre(self):
c.allow_loggedin_cache = False
c.allow_framing = False
+ c.cdn_cacheable = (request.via_cdn and
+ g.login_cookie not in request.cookies)
+
# the domain has to be set before Cookies get initialized
set_subreddit()
c.errors = ErrorSet()
@@ -799,6 +867,10 @@ def pre(self):
g.stats.count_string('user_agents', request.user_agent)
+ if not self.defer_ratelimiting:
+ self.run_sitewide_ratelimits()
+ c.request_timer.intermediate("minimal-ratelimits")
+
hooks.get_hook("reddit.request.minimal_begin").call()
def can_use_pagecache(self):
@@ -891,6 +963,9 @@ def post(self):
pagecache_state = "disallowed"
response.headers["X-Reddit-Pagecache"] = pagecache_state
+ if c.ratelimit_headers:
+ response.headers.update(c.ratelimit_headers)
+
# send cookies
for k, v in c.cookies.iteritems():
if v.dirty:
@@ -979,6 +1054,8 @@ def should_update_last_visit(self):
class OAuth2ResourceController(MinimalController):
+ defer_ratelimiting = True
+
def authenticate_with_token(self):
set_extension(request.environ, "json")
set_content_type()
@@ -1150,6 +1227,9 @@ def pre(self):
c.request_timer.intermediate("base-auth")
+ self.run_sitewide_ratelimits()
+ c.request_timer.intermediate("base-ratelimits")
+
c.over18 = over18()
set_obey_over18()
@@ -125,7 +125,6 @@ class Globals(object):
'REPLY_AGE_LIMIT',
'REPORT_AGE_LIMIT',
'HOT_PAGE_AGE',
- 'RATELIMIT',
'QUOTA_THRESHOLD',
'ADMIN_COOKIE_TTL',
'ADMIN_COOKIE_MAX_IDLE',
@@ -154,13 +153,17 @@ class Globals(object):
'wiki_max_page_separators',
'min_promote_future',
'max_promote_future',
+ 'RL_RESET_MINUTES',
+ 'RL_OAUTH_RESET_MINUTES',
],
ConfigValue.float: [
'min_promote_bid',
'max_promote_bid',
'statsd_sample_rate',
'querycache_prune_chance',
+ 'RL_AVG_REQ_PER_SEC',
+ 'RL_OAUTH_AVG_REQ_PER_SEC',
],
ConfigValue.bool: [
@@ -187,6 +190,9 @@ class Globals(object):
'shard_link_vote_queues',
'shard_commentstree_queues',
'subreddit_stylesheets_static',
+ 'ENFORCE_RATELIMIT',
+ 'RL_SITEWIDE_ENABLED',
+ 'RL_OAUTH_SITEWIDE_ENABLED',
],
ConfigValue.tuple: [
@@ -199,6 +205,7 @@ class Globals(object):
'pagecaches',
'memoizecaches',
'srmembercaches',
+ 'ratelimitcaches',
'cassandra_seeds',
'admins',
'sponsors',
@@ -443,6 +450,15 @@ def setup(self):
locale.setlocale(locale.LC_ALL, self.locale)
+ # Pre-calculate ratelimit values
+ self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
+ self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
+ self.RL_RESET_SECONDS)
+
+ self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
+ self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
+ self.RL_OAUTH_RESET_SECONDS)
+
self.startup_timer.intermediate("configuration")
################# ZOOKEEPER
@@ -497,6 +513,12 @@ def setup(self):
num_clients=num_mc_clients,
)
+ ratelimitcaches = CMemcache(
+ self.ratelimitcaches,
+ min_compress_len=96,
+ num_clients=num_mc_clients,
+ )
+
# a smaller pool of caches used only for distributed locks.
# TODO: move this to ZooKeeper
self.lock_cache = CMemcache(self.lockcaches,
@@ -615,6 +637,10 @@ def setup(self):
(localcache_cls(), srmembercaches))
cache_chains.update(srmembercache=self.srmembercache)
+ self.ratelimitcache = MemcacheChain(
+ (localcache_cls(), ratelimitcaches))
+ cache_chains.update(ratelimitcaches=self.ratelimitcache)
+
self.rendercache = MemcacheChain((
localcache_cls(),
rendercaches,
View
@@ -90,12 +90,14 @@ def __call__(self, environ, start_response):
forwarded_for = environ.get('HTTP_X_FORWARDED_FOR', ())
remote_addr = environ.get('REMOTE_ADDR')
+ request.via_cdn = False
if (g.secrets["true_ip"]
and true_client_ip
and ip_hash
and hashlib.md5(true_client_ip + g.secrets["true_ip"]).hexdigest() \
== ip_hash.lower()):
request.ip = true_client_ip
+ request.via_cdn = True
elif g.trust_local_proxies and forwarded_for and is_local_address(remote_addr):
request.ip = forwarded_for.split(',')[-1]
else:
@@ -1605,7 +1605,7 @@ def ratelimit(self, rate_user = False, rate_ip = False, prefix = "rate_",
seconds = None):
to_set = {}
if seconds is None:
- seconds = g.RATELIMIT*60
+ seconds = g.RL_RESET_SECONDS
expire_time = datetime.now(g.tz) + timedelta(seconds = seconds)
if rate_user and c.user_is_loggedin:
to_set['user' + str(c.user._id36)] = expire_time
@@ -1632,7 +1632,7 @@ def run (self):
@classmethod
def record_violation(self, category, seconds = None, growfast=False):
if seconds is None:
- seconds = g.RATELIMIT*60
+ seconds = g.RL_RESET_SECONDS
key = "VDelay-%s-%s" % (category, request.ip)
prev_violations = g.memcache.get(key)

0 comments on commit 084f573

Please sign in to comment.