Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ratelimiting #345

Merged
merged 19 commits into from Jul 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions config.py.example
Expand Up @@ -34,6 +34,11 @@ REDIS_PORT = 6379
REDIS_NAMESPACE = "AB"
REDIS_NS_VERSIONS_LOCATION = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cache_namespaces')

# RATE LIMITING
# set a limit of per_ip requests per window seconds per unique ip address
RATELIMIT_PER_IP = 100
RATELIMIT_WINDOW = 10

# LOGGING
# Uncomment any of the following logging stubs if you want to enable logging
# during development. In general you shouldn't need to do this.
Expand Down
3 changes: 3 additions & 0 deletions consul_config.py.ctmpl
Expand Up @@ -49,3 +49,6 @@ DATASET_DIR = '''{{template "KEY" "dataset_dir"}}'''
FILE_STORAGE_DIR = '''{{template "KEY" "file_storage_dir"}}'''

FEATURE_EVAL_LOCATION = False

RATELIMIT_PER_IP = {{template "KEY" "ratelimit_per_ip"}} # number of requests per ip
RATELIMIT_WINDOW = {{template "KEY" "ratelimit_window"}} # window size in seconds
33 changes: 33 additions & 0 deletions docs/api.rst
Expand Up @@ -24,9 +24,42 @@ Datasets
:include-empty-docstring:
:undoc-static:

Rate limiting
^^^^^^^^^^^^^

The AcousticBrainz API is rate limited via the use of rate limiting headers that
are sent as part of the HTTP response headers. Each call will include the
following headers:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the default rate limit if we don't make any changes? It'd be nice to say this here. "We typically set the limit to 10 queries every 10 seconds, but these values may change. Make sure you check the response headers if you want to know the specific values"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking at the BU code it seems like the default is 30 queries in 10 seconds. Is this actually useful for us - should we check the mean number of queries per IP address for AB to ensure that this actually results in a reduction of queries, or see if we need to reduce the default.
Our custom defaults should be in the config file instead of hard-coded.


- **X-RateLimit-Limit**: Number of requests allowed in given time window

- **X-RateLimit-Remaining**: Number of requests remaining in current time
window

- **X-RateLimit-Reset-In**: Number of seconds when current time window expires
(*recommended*: this header is resilient against clients with incorrect
clocks)

- **X-RateLimit-Reset**: UNIX epoch number of seconds (without timezone) when
current time window expires [#]_

We typically set the limit to 10 queries every 10 seconds per IP address,
but these values may change. Make sure you check the response headers
if you want to know the specific values.

Rate limiting is automatic and the client must use these headers to determine
the rate to make API calls. If the client exceeds the number of requests
allowed, the server will respond with error code ``429: Too Many Requests``.
Requests that provide the *Authorization* header with a valid user token may
receive higher rate limits than those without valid user tokens.

.. [#] Provided for compatibility with other APIs, but we still recommend using
``X-RateLimit-Reset-In`` wherever possible

Constants
^^^^^^^^^

Constants that are relevant to using the API:

.. autodata:: webserver.views.api.v1.core.MAX_ITEMS_PER_BULK_REQUEST

37 changes: 35 additions & 2 deletions manage.py
Expand Up @@ -5,7 +5,7 @@
import sys

import click
from brainzutils import cache
from brainzutils import cache, ratelimit
import flask.cli
from flask import current_app
from flask.cli import FlaskGroup
Expand Down Expand Up @@ -278,7 +278,40 @@ def remove_failed_rows():
sys.exit(1)


# Keep additional sets of commands down here
@cli.command(name='set_rate_limits')
@click.argument('per_ip', type=click.IntRange(1, None), required=False)
@click.argument('window_size', type=click.IntRange(1, None), required=False)
def set_rate_limits(per_ip, window_size):
"""Set rate limit parameters for the AcousticBrainz webserver. If no arguments
are provided, print the current limits. To set limits, specify PER_IP and WINDOW_SIZE

\b
PER_IP: the number of requests allowed per IP address
WINDOW_SIZE: the window in number of seconds for how long the limit is applied
"""

current_limit_per_ip = cache.get(ratelimit.ratelimit_per_ip_key)
current_limit_window = cache.get(ratelimit.ratelimit_window_key)

click.echo("Current values:")
if current_limit_per_ip is None and current_limit_window is None:
click.echo("No values set, showing limit defaults")
current_limit_per_ip = ratelimit.ratelimit_per_ip_default
current_limit_window = ratelimit.ratelimit_window_default
click.echo("Requests per IP: %s" % current_limit_per_ip)
click.echo("Window size (s): %s" % current_limit_window)

if per_ip is not None and window_size is not None:
if per_ip / float(window_size) < 1:
click.echo("Warning: Effective rate limit is less than 1 query per second")

ratelimit.set_rate_limits(per_ip, per_ip, window_size)
print("New ratelimit parameters set:")
click.echo("Requests per IP: %s" % per_ip)
click.echo("Window size (s): %s" % window_size)


# Please keep additional sets of commands down there
cli.add_command(db.dump_manage.cli, name="dump")

if __name__ == '__main__':
Expand Down
10 changes: 10 additions & 0 deletions webserver/__init__.py
@@ -1,6 +1,7 @@
from __future__ import print_function

from brainzutils.flask import CustomFlask
from brainzutils.ratelimit import set_rate_limits, inject_x_rate_headers
from flask import request, url_for, redirect
from flask_login import current_user
from pprint import pprint
Expand Down Expand Up @@ -81,6 +82,15 @@ def create_app(debug=None):
else:
raise Exception('One or more redis cache configuration options are missing from config.py')

# Add rate limiting support
@app.after_request
def after_request_callbacks(response):
return inject_x_rate_headers(response)

# check for ratelimit config values and set them if present
if 'RATELIMIT_PER_IP' in app.config and 'RATELIMIT_WINDOW' in app.config:
set_rate_limits(app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_WINDOW'])

# MusicBrainz
import musicbrainzngs
from db import SCHEMA_VERSION
Expand Down
8 changes: 8 additions & 0 deletions webserver/testing.py
Expand Up @@ -3,9 +3,17 @@

TEST_DATA_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'views', 'test_data')

from brainzutils.ratelimit import set_rate_limits

class ServerTestCase(DatabaseTestCase):


def setUp(self):
super(ServerTestCase, self).setUp()

#TODO: https://tickets.metabrainz.org/browse/BU-27
set_rate_limits(1000, 1000, 10000)
paramsingh marked this conversation as resolved.
Show resolved Hide resolved

def temporary_login(self, user_id):
with self.client.session_transaction() as session:
session['user_id'] = user_id
Expand Down
4 changes: 4 additions & 0 deletions webserver/views/api/legacy.py
@@ -1,5 +1,6 @@
from __future__ import absolute_import
from flask import Blueprint, request, jsonify
from brainzutils.ratelimit import ratelimit
from db.data import count_lowlevel, submit_low_level_data
from db.exceptions import NoDataFoundException, BadDataException
from webserver.decorators import crossdomain
Expand All @@ -13,6 +14,7 @@

@api_legacy_bp.route("/<uuid(strict=False):mbid>/count", methods=["GET"])
@crossdomain()
@ratelimit()
def count(mbid):
return jsonify({
'mbid': mbid,
Expand All @@ -22,6 +24,7 @@ def count(mbid):

@api_legacy_bp.route("/<string:mbid>/low-level", methods=["GET"])
@crossdomain()
@ratelimit()
def get_low_level(mbid):
"""Endpoint for fetching low-level data.
If there is more than one document with the same mbid, you can specify
Expand All @@ -37,6 +40,7 @@ def get_low_level(mbid):

@api_legacy_bp.route("/<string:mbid>/high-level", methods=["GET"])
@crossdomain()
@ratelimit()
def get_high_level(mbid):
"""Endpoint for fetching high-level data.
If there is more than one document with the same mbid, you can specify
Expand Down
8 changes: 8 additions & 0 deletions webserver/views/api/v1/core.py
Expand Up @@ -10,6 +10,7 @@
from db.data import submit_low_level_data, count_lowlevel
from db.exceptions import NoDataFoundException, BadDataException
from webserver.decorators import crossdomain
from brainzutils.ratelimit import ratelimit

bp_core = Blueprint('api_v1_core', __name__)

Expand All @@ -26,6 +27,7 @@

@bp_core.route("/<uuid(strict=False):mbid>/count", methods=["GET"])
@crossdomain()
@ratelimit()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to see at least one test to see that we're returning ratelimiting headers on one of these methods

def count(mbid):
"""Get the number of low-level data submissions for a recording with a
given MBID.
Expand All @@ -51,6 +53,7 @@ def count(mbid):

@bp_core.route("/<uuid(strict=False):mbid>/low-level", methods=["GET"])
@crossdomain()
@ratelimit()
def get_low_level(mbid):
"""Get low-level data for a recording with a given MBID.

Expand All @@ -74,6 +77,7 @@ def get_low_level(mbid):

@bp_core.route("/<uuid(strict=False):mbid>/high-level", methods=["GET"])
@crossdomain()
@ratelimit()
def get_high_level(mbid):
"""Get high-level data for recording with a given MBID.

Expand All @@ -99,6 +103,7 @@ def get_high_level(mbid):


@bp_core.route("/<uuid:mbid>/low-level", methods=["POST"])
@ratelimit()
def submit_low_level(mbid):
"""Submit low-level data to AcousticBrainz.

Expand Down Expand Up @@ -220,6 +225,7 @@ def check_bad_request_for_multiple_recordings():

@bp_core.route("/low-level", methods=["GET"])
@crossdomain()
@ratelimit()
def get_many_lowlevel():
"""Get low-level data for many recordings at once.

Expand Down Expand Up @@ -258,6 +264,7 @@ def get_many_lowlevel():

@bp_core.route("/high-level", methods=["GET"])
@crossdomain()
@ratelimit()
def get_many_highlevel():
"""Get high-level data for many recordings at once.

Expand Down Expand Up @@ -299,6 +306,7 @@ def get_many_highlevel():

@bp_core.route("/count", methods=["GET"])
@crossdomain()
@ratelimit()
def get_many_count():
"""Get low-level count for many recordings at once. MBIDs not found in
the database are omitted in the response.
Expand Down
4 changes: 3 additions & 1 deletion webserver/views/api/v1/dataset_eval.py
Expand Up @@ -3,12 +3,14 @@
from flask_login import current_user
from webserver.decorators import auth_required
from webserver.views.api import exceptions
from brainzutils.ratelimit import ratelimit
import db.dataset_eval
import db.dataset
from webserver.views.api.v1 import datasets

bp_dataset_eval = Blueprint('api_v1_dataset_eval', __name__)

# Don't ratelimit this view since it is being used by JS
@bp_dataset_eval.route("/jobs", methods=["GET"])
@auth_required
def get_jobs():
Expand Down Expand Up @@ -53,6 +55,7 @@ def get_jobs():

@bp_dataset_eval.route("/jobs/<uuid:job_id>", methods=["GET"])
@auth_required
@ratelimit()
def job_details(job_id):
"""Returns the details of a particular job.
API key argument is required.
Expand Down Expand Up @@ -114,4 +117,3 @@ def job_details(job_id):

job['dataset'] = datasets.get_check_dataset(job['dataset_id'])
return jsonify(job)

9 changes: 9 additions & 0 deletions webserver/views/api/v1/datasets.py
Expand Up @@ -3,6 +3,7 @@
from flask_login import current_user
from webserver.decorators import auth_required
from webserver.views.api import exceptions as api_exceptions
from brainzutils.ratelimit import ratelimit
import db.dataset
import db.exceptions
from utils import dataset_validator
Expand All @@ -19,6 +20,7 @@ def get_dataset(dataset_id):
return jsonify(get_check_dataset(dataset_id))


# don't ratelimit this function, since it is called from our JS
@bp_datasets.route("/", methods=["POST"])
@auth_required
def create_dataset():
Expand Down Expand Up @@ -85,6 +87,7 @@ def create_dataset():

@bp_datasets.route("/<uuid:dataset_id>", methods=["DELETE"])
@auth_required
@ratelimit()
def delete_dataset(dataset_id):
"""Delete a dataset."""
ds = get_dataset(dataset_id)
Expand All @@ -99,6 +102,7 @@ def delete_dataset(dataset_id):

@bp_datasets.route("/<uuid:dataset_id>", methods=["PUT"])
@auth_required
@ratelimit()
def update_dataset_details(dataset_id):
"""Update dataset details.

Expand Down Expand Up @@ -138,6 +142,7 @@ def update_dataset_details(dataset_id):

@bp_datasets.route("/<uuid:dataset_id>/classes", methods=["POST"])
@auth_required
@ratelimit()
def add_class(dataset_id):
"""Add a class to a dataset.

Expand Down Expand Up @@ -187,6 +192,7 @@ def add_class(dataset_id):

@bp_datasets.route("/<uuid:dataset_id>/classes", methods=["PUT"])
@auth_required
@ratelimit()
def update_class(dataset_id):
"""Update class in a dataset.

Expand Down Expand Up @@ -232,6 +238,7 @@ def update_class(dataset_id):

@bp_datasets.route("/<uuid:dataset_id>/classes", methods=["DELETE"])
@auth_required
@ratelimit()
def delete_class(dataset_id):
"""Delete class and all of its recordings from a dataset.

Expand Down Expand Up @@ -265,6 +272,7 @@ def delete_class(dataset_id):

@bp_datasets.route("/<uuid:dataset_id>/recordings", methods=["PUT"])
@auth_required
@ratelimit()
def add_recordings(dataset_id):
"""Add recordings to a class in a dataset.

Expand Down Expand Up @@ -309,6 +317,7 @@ def add_recordings(dataset_id):

@bp_datasets.route("/<uuid:dataset_id>/recordings", methods=["DELETE"])
@auth_required
@ratelimit()
def delete_recordings(dataset_id):
"""Delete recordings from a class in a dataset.

Expand Down
4 changes: 4 additions & 0 deletions webserver/views/api/v1/test/test_core.py
Expand Up @@ -35,6 +35,10 @@ def test_get_low_level(self):

resp = self.client.get("/api/v1/%s/low-level" % mbid)
self.assertEqual(resp.status_code, 200)
self.assertIn('X-RateLimit-Limit', resp.headers)
paramsingh marked this conversation as resolved.
Show resolved Hide resolved
self.assertIn('X-RateLimit-Remaining', resp.headers)
self.assertIn('X-RateLimit-Reset-In', resp.headers)
self.assertIn('X-RateLimit-Reset', resp.headers)

# upper-case MBID
resp = self.client.get("/api/v1/%s/low-level" % mbid.upper())
Expand Down