From 78955c27fc3f9d12b0fd188a042683fca94db36c Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 15 May 2019 20:55:30 +0200 Subject: [PATCH 01/19] Add ratelimit header injector and ratelimit decorators --- webserver/__init__.py | 7 +++++++ webserver/views/api/legacy.py | 5 +++++ webserver/views/api/v1/core.py | 8 ++++++++ webserver/views/api/v1/dataset_eval.py | 4 +++- webserver/views/api/v1/datasets.py | 9 +++++++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/webserver/__init__.py b/webserver/__init__.py index 10d8e14a0..941a60930 100644 --- a/webserver/__init__.py +++ b/webserver/__init__.py @@ -1,6 +1,7 @@ from __future__ import print_function from brainzutils.flask import CustomFlask +from brainzutils.ratelimit import inject_x_rate_headers from flask import request, url_for, redirect from flask_login import current_user from pprint import pprint @@ -60,6 +61,12 @@ def create_app(debug=None): sentry_config=app.config.get('LOG_SENTRY') ) + + # Add rate limiting support + @app.after_request + def after_request_callbacks(response): + return inject_x_rate_headers(response) + # Database connection from db import init_db_engine init_db_engine(app.config['SQLALCHEMY_DATABASE_URI']) diff --git a/webserver/views/api/legacy.py b/webserver/views/api/legacy.py index 839989ed1..e85d2ef38 100644 --- a/webserver/views/api/legacy.py +++ b/webserver/views/api/legacy.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from flask import Blueprint, request, jsonify +from brainzutils.ratelimit import ratelimit from db.data import count_lowlevel, submit_low_level_data from db.exceptions import NoDataFoundException, BadDataException from webserver.decorators import crossdomain @@ -13,6 +14,7 @@ @api_legacy_bp.route("//count", methods=["GET"]) @crossdomain() +@ratelimit() def count(mbid): return jsonify({ 'mbid': mbid, @@ -22,6 +24,7 @@ def count(mbid): @api_legacy_bp.route("//low-level", methods=["GET"]) @crossdomain() +@ratelimit() def get_low_level(mbid): """Endpoint for fetching low-level data. If there is more than one document with the same mbid, you can specify @@ -37,6 +40,7 @@ def get_low_level(mbid): @api_legacy_bp.route("//high-level", methods=["GET"]) @crossdomain() +@ratelimit() def get_high_level(mbid): """Endpoint for fetching high-level data. If there is more than one document with the same mbid, you can specify @@ -51,6 +55,7 @@ def get_high_level(mbid): @api_legacy_bp.route("//low-level", methods=["POST"]) +@ratelimit() def submit_low_level(mbid): """Endpoint for submitting low-level information to AcousticBrainz.""" raw_data = request.get_data() diff --git a/webserver/views/api/v1/core.py b/webserver/views/api/v1/core.py index 486eb90cc..82341d628 100644 --- a/webserver/views/api/v1/core.py +++ b/webserver/views/api/v1/core.py @@ -10,6 +10,7 @@ from db.data import submit_low_level_data, count_lowlevel from db.exceptions import NoDataFoundException, BadDataException from webserver.decorators import crossdomain +from brainzutils.ratelimit import ratelimit bp_core = Blueprint('api_v1_core', __name__) @@ -26,6 +27,7 @@ @bp_core.route("//count", methods=["GET"]) @crossdomain() +@ratelimit() def count(mbid): """Get the number of low-level data submissions for a recording with a given MBID. @@ -51,6 +53,7 @@ def count(mbid): @bp_core.route("//low-level", methods=["GET"]) @crossdomain() +@ratelimit() def get_low_level(mbid): """Get low-level data for a recording with a given MBID. @@ -74,6 +77,7 @@ def get_low_level(mbid): @bp_core.route("//high-level", methods=["GET"]) @crossdomain() +@ratelimit() def get_high_level(mbid): """Get high-level data for recording with a given MBID. @@ -99,6 +103,7 @@ def get_high_level(mbid): @bp_core.route("//low-level", methods=["POST"]) +@ratelimit() def submit_low_level(mbid): """Submit low-level data to AcousticBrainz. @@ -220,6 +225,7 @@ def check_bad_request_for_multiple_recordings(): @bp_core.route("/low-level", methods=["GET"]) @crossdomain() +@ratelimit() def get_many_lowlevel(): """Get low-level data for many recordings at once. @@ -258,6 +264,7 @@ def get_many_lowlevel(): @bp_core.route("/high-level", methods=["GET"]) @crossdomain() +@ratelimit() def get_many_highlevel(): """Get high-level data for many recordings at once. @@ -299,6 +306,7 @@ def get_many_highlevel(): @bp_core.route("/count", methods=["GET"]) @crossdomain() +@ratelimit() def get_many_count(): """Get low-level count for many recordings at once. MBIDs not found in the database are omitted in the response. diff --git a/webserver/views/api/v1/dataset_eval.py b/webserver/views/api/v1/dataset_eval.py index fbfe41763..fd7a42769 100644 --- a/webserver/views/api/v1/dataset_eval.py +++ b/webserver/views/api/v1/dataset_eval.py @@ -3,6 +3,7 @@ from flask_login import current_user from webserver.decorators import auth_required from webserver.views.api import exceptions +from brainzutils.ratelimit import ratelimit import db.dataset_eval import db.dataset from webserver.views.api.v1 import datasets @@ -11,6 +12,7 @@ @bp_dataset_eval.route("/jobs", methods=["GET"]) @auth_required +@ratelimit() def get_jobs(): """Return a list of jobs related to the current user. Identify the current user by being logged in or by passing a Token for authentication. @@ -53,6 +55,7 @@ def get_jobs(): @bp_dataset_eval.route("/jobs/", methods=["GET"]) @auth_required +@ratelimit() def job_details(job_id): """Returns the details of a particular job. API key argument is required. @@ -114,4 +117,3 @@ def job_details(job_id): job['dataset'] = datasets.get_check_dataset(job['dataset_id']) return jsonify(job) - diff --git a/webserver/views/api/v1/datasets.py b/webserver/views/api/v1/datasets.py index 34410bda0..700ef4f9f 100644 --- a/webserver/views/api/v1/datasets.py +++ b/webserver/views/api/v1/datasets.py @@ -3,6 +3,7 @@ from flask_login import current_user from webserver.decorators import auth_required from webserver.views.api import exceptions as api_exceptions +from brainzutils.ratelimit import ratelimit import db.dataset import db.exceptions from utils import dataset_validator @@ -21,6 +22,7 @@ def get_dataset(dataset_id): @bp_datasets.route("/", methods=["POST"]) @auth_required +@ratelimit() def create_dataset(): """Create a new dataset. @@ -85,6 +87,7 @@ def create_dataset(): @bp_datasets.route("/", methods=["DELETE"]) @auth_required +@ratelimit() def delete_dataset(dataset_id): """Delete a dataset.""" ds = get_dataset(dataset_id) @@ -99,6 +102,7 @@ def delete_dataset(dataset_id): @bp_datasets.route("/", methods=["PUT"]) @auth_required +@ratelimit() def update_dataset_details(dataset_id): """Update dataset details. @@ -138,6 +142,7 @@ def update_dataset_details(dataset_id): @bp_datasets.route("//classes", methods=["POST"]) @auth_required +@ratelimit() def add_class(dataset_id): """Add a class to a dataset. @@ -187,6 +192,7 @@ def add_class(dataset_id): @bp_datasets.route("//classes", methods=["PUT"]) @auth_required +@ratelimit() def update_class(dataset_id): """Update class in a dataset. @@ -232,6 +238,7 @@ def update_class(dataset_id): @bp_datasets.route("//classes", methods=["DELETE"]) @auth_required +@ratelimit() def delete_class(dataset_id): """Delete class and all of its recordings from a dataset. @@ -265,6 +272,7 @@ def delete_class(dataset_id): @bp_datasets.route("//recordings", methods=["PUT"]) @auth_required +@ratelimit() def add_recordings(dataset_id): """Add recordings to a class in a dataset. @@ -309,6 +317,7 @@ def add_recordings(dataset_id): @bp_datasets.route("//recordings", methods=["DELETE"]) @auth_required +@ratelimit() def delete_recordings(dataset_id): """Delete recordings from a class in a dataset. From bee589e4b462c628eb6751a78bc860ea82b3da8b Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 15 May 2019 21:14:33 +0200 Subject: [PATCH 02/19] Don't ratelimit two views that are called by our JS --- webserver/views/api/v1/dataset_eval.py | 2 +- webserver/views/api/v1/datasets.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/webserver/views/api/v1/dataset_eval.py b/webserver/views/api/v1/dataset_eval.py index fd7a42769..33b0cb85e 100644 --- a/webserver/views/api/v1/dataset_eval.py +++ b/webserver/views/api/v1/dataset_eval.py @@ -10,9 +10,9 @@ bp_dataset_eval = Blueprint('api_v1_dataset_eval', __name__) +# Don't ratelimit this view since it is being used by JS @bp_dataset_eval.route("/jobs", methods=["GET"]) @auth_required -@ratelimit() def get_jobs(): """Return a list of jobs related to the current user. Identify the current user by being logged in or by passing a Token for authentication. diff --git a/webserver/views/api/v1/datasets.py b/webserver/views/api/v1/datasets.py index 700ef4f9f..45d35f43d 100644 --- a/webserver/views/api/v1/datasets.py +++ b/webserver/views/api/v1/datasets.py @@ -20,9 +20,9 @@ def get_dataset(dataset_id): return jsonify(get_check_dataset(dataset_id)) +# don't ratelimit this function, since it is called from our JS @bp_datasets.route("/", methods=["POST"]) @auth_required -@ratelimit() def create_dataset(): """Create a new dataset. From 3107447497f800f37fbb21705badb461bd38867f Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 15 May 2019 21:19:56 +0200 Subject: [PATCH 03/19] Remove the () on the ratelimit decorator --- webserver/views/api/legacy.py | 8 ++++---- webserver/views/api/v1/core.py | 14 +++++++------- webserver/views/api/v1/dataset_eval.py | 2 +- webserver/views/api/v1/datasets.py | 14 +++++++------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/webserver/views/api/legacy.py b/webserver/views/api/legacy.py index e85d2ef38..be914a40e 100644 --- a/webserver/views/api/legacy.py +++ b/webserver/views/api/legacy.py @@ -14,7 +14,7 @@ @api_legacy_bp.route("//count", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def count(mbid): return jsonify({ 'mbid': mbid, @@ -24,7 +24,7 @@ def count(mbid): @api_legacy_bp.route("//low-level", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def get_low_level(mbid): """Endpoint for fetching low-level data. If there is more than one document with the same mbid, you can specify @@ -40,7 +40,7 @@ def get_low_level(mbid): @api_legacy_bp.route("//high-level", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def get_high_level(mbid): """Endpoint for fetching high-level data. If there is more than one document with the same mbid, you can specify @@ -55,7 +55,7 @@ def get_high_level(mbid): @api_legacy_bp.route("//low-level", methods=["POST"]) -@ratelimit() +@ratelimit def submit_low_level(mbid): """Endpoint for submitting low-level information to AcousticBrainz.""" raw_data = request.get_data() diff --git a/webserver/views/api/v1/core.py b/webserver/views/api/v1/core.py index 82341d628..06b45bc65 100644 --- a/webserver/views/api/v1/core.py +++ b/webserver/views/api/v1/core.py @@ -27,7 +27,7 @@ @bp_core.route("//count", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def count(mbid): """Get the number of low-level data submissions for a recording with a given MBID. @@ -53,7 +53,7 @@ def count(mbid): @bp_core.route("//low-level", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def get_low_level(mbid): """Get low-level data for a recording with a given MBID. @@ -77,7 +77,7 @@ def get_low_level(mbid): @bp_core.route("//high-level", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def get_high_level(mbid): """Get high-level data for recording with a given MBID. @@ -103,7 +103,7 @@ def get_high_level(mbid): @bp_core.route("//low-level", methods=["POST"]) -@ratelimit() +@ratelimit def submit_low_level(mbid): """Submit low-level data to AcousticBrainz. @@ -225,7 +225,7 @@ def check_bad_request_for_multiple_recordings(): @bp_core.route("/low-level", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def get_many_lowlevel(): """Get low-level data for many recordings at once. @@ -264,7 +264,7 @@ def get_many_lowlevel(): @bp_core.route("/high-level", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def get_many_highlevel(): """Get high-level data for many recordings at once. @@ -306,7 +306,7 @@ def get_many_highlevel(): @bp_core.route("/count", methods=["GET"]) @crossdomain() -@ratelimit() +@ratelimit def get_many_count(): """Get low-level count for many recordings at once. MBIDs not found in the database are omitted in the response. diff --git a/webserver/views/api/v1/dataset_eval.py b/webserver/views/api/v1/dataset_eval.py index 33b0cb85e..f623246d6 100644 --- a/webserver/views/api/v1/dataset_eval.py +++ b/webserver/views/api/v1/dataset_eval.py @@ -55,7 +55,7 @@ def get_jobs(): @bp_dataset_eval.route("/jobs/", methods=["GET"]) @auth_required -@ratelimit() +@ratelimit def job_details(job_id): """Returns the details of a particular job. API key argument is required. diff --git a/webserver/views/api/v1/datasets.py b/webserver/views/api/v1/datasets.py index 45d35f43d..659f11af2 100644 --- a/webserver/views/api/v1/datasets.py +++ b/webserver/views/api/v1/datasets.py @@ -87,7 +87,7 @@ def create_dataset(): @bp_datasets.route("/", methods=["DELETE"]) @auth_required -@ratelimit() +@ratelimit def delete_dataset(dataset_id): """Delete a dataset.""" ds = get_dataset(dataset_id) @@ -102,7 +102,7 @@ def delete_dataset(dataset_id): @bp_datasets.route("/", methods=["PUT"]) @auth_required -@ratelimit() +@ratelimit def update_dataset_details(dataset_id): """Update dataset details. @@ -142,7 +142,7 @@ def update_dataset_details(dataset_id): @bp_datasets.route("//classes", methods=["POST"]) @auth_required -@ratelimit() +@ratelimit def add_class(dataset_id): """Add a class to a dataset. @@ -192,7 +192,7 @@ def add_class(dataset_id): @bp_datasets.route("//classes", methods=["PUT"]) @auth_required -@ratelimit() +@ratelimit def update_class(dataset_id): """Update class in a dataset. @@ -238,7 +238,7 @@ def update_class(dataset_id): @bp_datasets.route("//classes", methods=["DELETE"]) @auth_required -@ratelimit() +@ratelimit def delete_class(dataset_id): """Delete class and all of its recordings from a dataset. @@ -272,7 +272,7 @@ def delete_class(dataset_id): @bp_datasets.route("//recordings", methods=["PUT"]) @auth_required -@ratelimit() +@ratelimit def add_recordings(dataset_id): """Add recordings to a class in a dataset. @@ -317,7 +317,7 @@ def add_recordings(dataset_id): @bp_datasets.route("//recordings", methods=["DELETE"]) @auth_required -@ratelimit() +@ratelimit def delete_recordings(dataset_id): """Delete recordings from a class in a dataset. From 03cb0baf87b285148df74fadf2bc29b66297d1e0 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 15 May 2019 21:24:35 +0200 Subject: [PATCH 04/19] Bring the () back --- webserver/views/api/legacy.py | 8 ++++---- webserver/views/api/v1/core.py | 14 +++++++------- webserver/views/api/v1/dataset_eval.py | 2 +- webserver/views/api/v1/datasets.py | 14 +++++++------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/webserver/views/api/legacy.py b/webserver/views/api/legacy.py index be914a40e..e85d2ef38 100644 --- a/webserver/views/api/legacy.py +++ b/webserver/views/api/legacy.py @@ -14,7 +14,7 @@ @api_legacy_bp.route("//count", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def count(mbid): return jsonify({ 'mbid': mbid, @@ -24,7 +24,7 @@ def count(mbid): @api_legacy_bp.route("//low-level", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def get_low_level(mbid): """Endpoint for fetching low-level data. If there is more than one document with the same mbid, you can specify @@ -40,7 +40,7 @@ def get_low_level(mbid): @api_legacy_bp.route("//high-level", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def get_high_level(mbid): """Endpoint for fetching high-level data. If there is more than one document with the same mbid, you can specify @@ -55,7 +55,7 @@ def get_high_level(mbid): @api_legacy_bp.route("//low-level", methods=["POST"]) -@ratelimit +@ratelimit() def submit_low_level(mbid): """Endpoint for submitting low-level information to AcousticBrainz.""" raw_data = request.get_data() diff --git a/webserver/views/api/v1/core.py b/webserver/views/api/v1/core.py index 06b45bc65..82341d628 100644 --- a/webserver/views/api/v1/core.py +++ b/webserver/views/api/v1/core.py @@ -27,7 +27,7 @@ @bp_core.route("//count", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def count(mbid): """Get the number of low-level data submissions for a recording with a given MBID. @@ -53,7 +53,7 @@ def count(mbid): @bp_core.route("//low-level", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def get_low_level(mbid): """Get low-level data for a recording with a given MBID. @@ -77,7 +77,7 @@ def get_low_level(mbid): @bp_core.route("//high-level", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def get_high_level(mbid): """Get high-level data for recording with a given MBID. @@ -103,7 +103,7 @@ def get_high_level(mbid): @bp_core.route("//low-level", methods=["POST"]) -@ratelimit +@ratelimit() def submit_low_level(mbid): """Submit low-level data to AcousticBrainz. @@ -225,7 +225,7 @@ def check_bad_request_for_multiple_recordings(): @bp_core.route("/low-level", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def get_many_lowlevel(): """Get low-level data for many recordings at once. @@ -264,7 +264,7 @@ def get_many_lowlevel(): @bp_core.route("/high-level", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def get_many_highlevel(): """Get high-level data for many recordings at once. @@ -306,7 +306,7 @@ def get_many_highlevel(): @bp_core.route("/count", methods=["GET"]) @crossdomain() -@ratelimit +@ratelimit() def get_many_count(): """Get low-level count for many recordings at once. MBIDs not found in the database are omitted in the response. diff --git a/webserver/views/api/v1/dataset_eval.py b/webserver/views/api/v1/dataset_eval.py index f623246d6..33b0cb85e 100644 --- a/webserver/views/api/v1/dataset_eval.py +++ b/webserver/views/api/v1/dataset_eval.py @@ -55,7 +55,7 @@ def get_jobs(): @bp_dataset_eval.route("/jobs/", methods=["GET"]) @auth_required -@ratelimit +@ratelimit() def job_details(job_id): """Returns the details of a particular job. API key argument is required. diff --git a/webserver/views/api/v1/datasets.py b/webserver/views/api/v1/datasets.py index 659f11af2..45d35f43d 100644 --- a/webserver/views/api/v1/datasets.py +++ b/webserver/views/api/v1/datasets.py @@ -87,7 +87,7 @@ def create_dataset(): @bp_datasets.route("/", methods=["DELETE"]) @auth_required -@ratelimit +@ratelimit() def delete_dataset(dataset_id): """Delete a dataset.""" ds = get_dataset(dataset_id) @@ -102,7 +102,7 @@ def delete_dataset(dataset_id): @bp_datasets.route("/", methods=["PUT"]) @auth_required -@ratelimit +@ratelimit() def update_dataset_details(dataset_id): """Update dataset details. @@ -142,7 +142,7 @@ def update_dataset_details(dataset_id): @bp_datasets.route("//classes", methods=["POST"]) @auth_required -@ratelimit +@ratelimit() def add_class(dataset_id): """Add a class to a dataset. @@ -192,7 +192,7 @@ def add_class(dataset_id): @bp_datasets.route("//classes", methods=["PUT"]) @auth_required -@ratelimit +@ratelimit() def update_class(dataset_id): """Update class in a dataset. @@ -238,7 +238,7 @@ def update_class(dataset_id): @bp_datasets.route("//classes", methods=["DELETE"]) @auth_required -@ratelimit +@ratelimit() def delete_class(dataset_id): """Delete class and all of its recordings from a dataset. @@ -272,7 +272,7 @@ def delete_class(dataset_id): @bp_datasets.route("//recordings", methods=["PUT"]) @auth_required -@ratelimit +@ratelimit() def add_recordings(dataset_id): """Add recordings to a class in a dataset. @@ -317,7 +317,7 @@ def add_recordings(dataset_id): @bp_datasets.route("//recordings", methods=["DELETE"]) @auth_required -@ratelimit +@ratelimit() def delete_recordings(dataset_id): """Delete recordings from a class in a dataset. From 76ecc181ecd6281d3f94be693658479870dbae46 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 15 May 2019 21:37:15 +0200 Subject: [PATCH 05/19] set rate limits complete, not tested. --- set_rate_limits.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 set_rate_limits.py diff --git a/set_rate_limits.py b/set_rate_limits.py new file mode 100755 index 000000000..503e19458 --- /dev/null +++ b/set_rate_limits.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import sys +from acousticbrainz import config +from brainzutils.ratelimit import set_rate_limits, ratelimit_per_token_key, ratelimit_per_ip_key, ratelimit_window_key +from brainzutils import cache +import config + +# Yes, I could use getoptgetargparsewtfbbw, but then I would spend 20 mimnutes re-learning the stupid syntax. +# Or, I could just do it myself in the space of seconds. +# Also, I tried to integrate this script with manage.py, but then I ended up wasting an hour trying +# to figure out how to do this. So, we have this script. If you want to see it part of manage.py, you'll +# have to do it. + +cache.init( + host=config['REDIS_HOST'], + port=config['REDIS_PORT'], + namespace=config['REDIS_NAMESPACE'], + ns_versions_loc=config['REDIS_NS_VERSIONS_LOCATION']) + +if len(sys.argv) < 4: + print("Usage: %s " % (sys.argv[0])) + print("Current values:") + print(" Requests per ip: ", int(cache.get(ratelimit_per_ip_key) or -1)) + print(" Requests per token: ", int(cache.get(ratelimit_per_token_key) or -1)) + print(" window size: ", int(cache.get(ratelimit_window_key) or -1)) + sys.exit(-1) + +try: + per_ip = int(sys.argv[1]) +except ValueError: + print("Invalid per ip limit. Must be non zero integer.") + sys.exit(-1) + +if per_ip <= 0: + print("Invalid per ip limit. Must be non zero integer.") + + +try: + per_token = int(sys.argv[2]) +except ValueError: + print("Invalid per token limit. Must be non zero integer.") + sys.exit(-1) + +if per_token <= 0: + print("Invalid per token limit. Must be non zero integer.") + + +try: + window = int(sys.argv[3]) +except ValueError: + print("Invalid window size. Must be non zero integer.") + sys.exit(-1) + +if window <= 0: + print("Invalid window size. Must be non zero integer.") + +set_rate_limits(per_token, per_ip, window) From 5bdf70d50389979f0b62e66279a92d089cb5f574 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Mon, 3 Jun 2019 22:50:41 +0530 Subject: [PATCH 06/19] Add manage.py command to set rate limit parameters --- manage.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/manage.py b/manage.py index de3afcabc..96d0dc8e2 100644 --- a/manage.py +++ b/manage.py @@ -5,7 +5,7 @@ import sys import click -from brainzutils import cache +from brainzutils import cache, ratelimit import flask.cli from flask import current_app from flask.cli import FlaskGroup @@ -237,6 +237,7 @@ def toggle_site_status(): print('Done!') +<<<<<<< HEAD @cli.group() @click.pass_context def highlevel(ctx): @@ -278,7 +279,15 @@ def remove_failed_rows(): sys.exit(1) -# Keep additional sets of commands down here +@cli.command(name='set_rate_limits') +@click.argument('per_ip') +@click.argument('per_token') +@click.argument('window_size') +def set_rate_limits(per_ip, per_token, window_size): + ratelimit.set_rate_limits(per_token, per_ip, window_size) + print("new ratelimit parameters set!") + +# Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") if __name__ == '__main__': From 39cc0f8ca7cb92bd9739cfc0c540438e5ed4bcd3 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Mon, 3 Jun 2019 22:56:26 +0530 Subject: [PATCH 07/19] Remove unneeded set rate limits script --- set_rate_limits.py | 58 ---------------------------------------------- 1 file changed, 58 deletions(-) delete mode 100755 set_rate_limits.py diff --git a/set_rate_limits.py b/set_rate_limits.py deleted file mode 100755 index 503e19458..000000000 --- a/set_rate_limits.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -import sys -from acousticbrainz import config -from brainzutils.ratelimit import set_rate_limits, ratelimit_per_token_key, ratelimit_per_ip_key, ratelimit_window_key -from brainzutils import cache -import config - -# Yes, I could use getoptgetargparsewtfbbw, but then I would spend 20 mimnutes re-learning the stupid syntax. -# Or, I could just do it myself in the space of seconds. -# Also, I tried to integrate this script with manage.py, but then I ended up wasting an hour trying -# to figure out how to do this. So, we have this script. If you want to see it part of manage.py, you'll -# have to do it. - -cache.init( - host=config['REDIS_HOST'], - port=config['REDIS_PORT'], - namespace=config['REDIS_NAMESPACE'], - ns_versions_loc=config['REDIS_NS_VERSIONS_LOCATION']) - -if len(sys.argv) < 4: - print("Usage: %s " % (sys.argv[0])) - print("Current values:") - print(" Requests per ip: ", int(cache.get(ratelimit_per_ip_key) or -1)) - print(" Requests per token: ", int(cache.get(ratelimit_per_token_key) or -1)) - print(" window size: ", int(cache.get(ratelimit_window_key) or -1)) - sys.exit(-1) - -try: - per_ip = int(sys.argv[1]) -except ValueError: - print("Invalid per ip limit. Must be non zero integer.") - sys.exit(-1) - -if per_ip <= 0: - print("Invalid per ip limit. Must be non zero integer.") - - -try: - per_token = int(sys.argv[2]) -except ValueError: - print("Invalid per token limit. Must be non zero integer.") - sys.exit(-1) - -if per_token <= 0: - print("Invalid per token limit. Must be non zero integer.") - - -try: - window = int(sys.argv[3]) -except ValueError: - print("Invalid window size. Must be non zero integer.") - sys.exit(-1) - -if window <= 0: - print("Invalid window size. Must be non zero integer.") - -set_rate_limits(per_token, per_ip, window) From d73fdf05eee0cefd9583af9d0cf5c3fb7c0f29f3 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Mon, 3 Jun 2019 23:00:30 +0530 Subject: [PATCH 08/19] Add documentation for API ratelimiting --- docs/api.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index bd2e925d5..52be75c21 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -24,9 +24,38 @@ Datasets :include-empty-docstring: :undoc-static: +Rate limiting +^^^^^^^^^^^^^ + +The AcousticBrainz API is rate limited via the use of rate limiting headers that +are sent as part of the HTTP response headers. Each call will include the +following headers: + +- **X-RateLimit-Limit**: Number of requests allowed in given time window + +- **X-RateLimit-Remaining**: Number of requests remaining in current time + window + +- **X-RateLimit-Reset-In**: Number of seconds when current time window expires + (*recommended*: this header is resilient against clients with incorrect + clocks) + +- **X-RateLimit-Reset**: UNIX epoch number of seconds (without timezone) when + current time window expires [#]_ + +Rate limiting is automatic and the client must use these headers to determine +the rate to make API calls. If the client exceeds the number of requests +allowed, the server will respond with error code ``429: Too Many Requests``. +Requests that provide the *Authorization* header with a valid user token may +receive higher rate limits than those without valid user tokens. + +.. [#] Provided for compatibility with other APIs, but we still recommend using + ``X-RateLimit-Reset-In`` wherever possible + Constants ^^^^^^^^^ Constants that are relevant to using the API: .. autodata:: webserver.views.api.v1.core.MAX_ITEMS_PER_BULK_REQUEST + From 7bfa1a103067d21b99b4a6e222365d4091f43d09 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Mon, 3 Jun 2019 23:49:31 +0530 Subject: [PATCH 09/19] Set high ratelimit defaults for tests --- webserver/testing.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/webserver/testing.py b/webserver/testing.py index 0c68f71a6..a79cba352 100644 --- a/webserver/testing.py +++ b/webserver/testing.py @@ -3,9 +3,15 @@ TEST_DATA_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'views', 'test_data') +from brainzutils.ratelimit import set_rate_limits class ServerTestCase(DatabaseTestCase): + + def setUp(self): + super(ServerTestCase, self).setUp() + set_rate_limits(1000, 1000, 10000) + def temporary_login(self, user_id): with self.client.session_transaction() as session: session['user_id'] = user_id From 105f09d3951cf4a32597cc2d5c2527ae15147e20 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Tue, 4 Jun 2019 21:39:12 +0530 Subject: [PATCH 10/19] Add test for ratelimit headers in the API tests --- webserver/views/api/v1/test/test_core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/webserver/views/api/v1/test/test_core.py b/webserver/views/api/v1/test/test_core.py index a7902f033..aab8a5c77 100644 --- a/webserver/views/api/v1/test/test_core.py +++ b/webserver/views/api/v1/test/test_core.py @@ -35,6 +35,10 @@ def test_get_low_level(self): resp = self.client.get("/api/v1/%s/low-level" % mbid) self.assertEqual(resp.status_code, 200) + self.assertIn('X-RateLimit-Limit', resp.headers) + self.assertIn('X-RateLimit-Remaining', resp.headers) + self.assertIn('X-RateLimit-Reset-In', resp.headers) + self.assertIn('X-RateLimit-Reset', resp.headers) # upper-case MBID resp = self.client.get("/api/v1/%s/low-level" % mbid.upper()) From 43260113c8e03029b5f4d01861bc9627f0a6d5a7 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Tue, 4 Jun 2019 21:52:11 +0530 Subject: [PATCH 11/19] Improve the set_rate_limits command Remove the per_token argument. Also, add documentation and better logging. --- manage.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/manage.py b/manage.py index 96d0dc8e2..8de550ea3 100644 --- a/manage.py +++ b/manage.py @@ -281,11 +281,18 @@ def remove_failed_rows(): @cli.command(name='set_rate_limits') @click.argument('per_ip') -@click.argument('per_token') @click.argument('window_size') -def set_rate_limits(per_ip, per_token, window_size): - ratelimit.set_rate_limits(per_token, per_ip, window_size) - print("new ratelimit parameters set!") +def set_rate_limits(per_ip, window_size): + """ Set rate limit parameters for the AcousticBrainz webserver + + Args: + per_ip (int): the number of requests allowed per IP address + window_size (int): the window in number of seconds for how long the limit is applied + """ + if per_ip / float(window_size) <= 1: + print("Warning: Effective rate limit per IP address is less than 1!") + ratelimit.set_rate_limits(per_ip, per_ip, window_size) + print("New ratelimit parameters set to %d requests over a window of %d seconds!" % (per_ip, window_size)) # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") From a8d6c76db31db31e1820e79156cf542f6088037a Mon Sep 17 00:00:00 2001 From: Param Singh Date: Tue, 4 Jun 2019 21:58:55 +0530 Subject: [PATCH 12/19] Set default ratelimits from config during app creation --- consul_config.py.ctmpl | 3 +++ webserver/__init__.py | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/consul_config.py.ctmpl b/consul_config.py.ctmpl index 31cd54758..cd0afaf65 100644 --- a/consul_config.py.ctmpl +++ b/consul_config.py.ctmpl @@ -49,3 +49,6 @@ DATASET_DIR = '''{{template "KEY" "dataset_dir"}}''' FILE_STORAGE_DIR = '''{{template "KEY" "file_storage_dir"}}''' FEATURE_EVAL_LOCATION = False + +RATELIMIT_PER_IP = {{template "KEY" "ratelimit_per_ip"}} # number of requests per ip +RATELIMIT_WINDOW = {{template "KEY" "ratelimit_window"}} # window size in seconds diff --git a/webserver/__init__.py b/webserver/__init__.py index 941a60930..5165bae18 100644 --- a/webserver/__init__.py +++ b/webserver/__init__.py @@ -1,7 +1,7 @@ from __future__ import print_function from brainzutils.flask import CustomFlask -from brainzutils.ratelimit import inject_x_rate_headers +from brainzutils.ratelimit import set_rate_limits, inject_x_rate_headers from flask import request, url_for, redirect from flask_login import current_user from pprint import pprint @@ -67,6 +67,10 @@ def create_app(debug=None): def after_request_callbacks(response): return inject_x_rate_headers(response) + # check for ratelimit config values and set them if present + if 'RATELIMIT_PER_IP' in app.config and 'RATELIMIT_WINDOW' in app.config: + set_rate_limits(app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_WINDOW']) + # Database connection from db import init_db_engine init_db_engine(app.config['SQLALCHEMY_DATABASE_URI']) From 9faca777c133c5ff3b3cdeb17203dcafc60137f8 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Tue, 4 Jun 2019 22:03:07 +0530 Subject: [PATCH 13/19] Add TODO for brainzutils ratelimit disabling --- webserver/testing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/webserver/testing.py b/webserver/testing.py index a79cba352..a2c91d979 100644 --- a/webserver/testing.py +++ b/webserver/testing.py @@ -10,6 +10,8 @@ class ServerTestCase(DatabaseTestCase): def setUp(self): super(ServerTestCase, self).setUp() + + #TODO: https://tickets.metabrainz.org/browse/BU-27 set_rate_limits(1000, 1000, 10000) def temporary_login(self, user_id): From f2ab5ab45bb18b6d65ce88b25403ccfa41f227d4 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Tue, 4 Jun 2019 22:04:45 +0530 Subject: [PATCH 14/19] Mention the ratelimits in the documentation --- docs/api.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index 52be75c21..af03a9cc5 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -43,6 +43,10 @@ following headers: - **X-RateLimit-Reset**: UNIX epoch number of seconds (without timezone) when current time window expires [#]_ +We typically set the limit to 10 queries every 10 seconds per IP address, +but these values may change. Make sure you check the response headers +if you want to know the specific values. + Rate limiting is automatic and the client must use these headers to determine the rate to make API calls. If the client exceeds the number of requests allowed, the server will respond with error code ``429: Too Many Requests``. From def80b0a4be9074b95fd4ebd16e258c9c2a112c6 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Thu, 6 Jun 2019 16:29:25 +0530 Subject: [PATCH 15/19] Type check arguments and print current values --- manage.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/manage.py b/manage.py index 8de550ea3..db95625bc 100644 --- a/manage.py +++ b/manage.py @@ -280,8 +280,8 @@ def remove_failed_rows(): @cli.command(name='set_rate_limits') -@click.argument('per_ip') -@click.argument('window_size') +@click.argument('per_ip', type=int) +@click.argument('window_size', type=int) def set_rate_limits(per_ip, window_size): """ Set rate limit parameters for the AcousticBrainz webserver @@ -289,8 +289,23 @@ def set_rate_limits(per_ip, window_size): per_ip (int): the number of requests allowed per IP address window_size (int): the window in number of seconds for how long the limit is applied """ + print("Current values:") + print("Requests per IP:\t", int(cache.get(ratelimit.ratelimit_per_ip_key) or -1)) + print("Window size:\t", int(cache.get(ratelimit.ratelimit_window_key) or -1)) + + if per_ip / float(window_size) <= 1: print("Warning: Effective rate limit per IP address is less than 1!") + + if per_ip <= 0: + print("Invalid per IP limit, must be greater than zero") + raise ValueError("Invalid per IP limit, must be greater than zero") + + if window_size <= 0: + print("Invalid window size, must be greater than zero") + raise ValueError("Invalid window size, must be greater than zero") + + ratelimit.set_rate_limits(per_ip, per_ip, window_size) print("New ratelimit parameters set to %d requests over a window of %d seconds!" % (per_ip, window_size)) From 39594cd5d5e822dd640bd22116ead31ebd62eff9 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Thu, 6 Jun 2019 16:32:21 +0530 Subject: [PATCH 16/19] Remove ratelimiting from legacy api submit endpoint --- webserver/views/api/legacy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/webserver/views/api/legacy.py b/webserver/views/api/legacy.py index e85d2ef38..b3340982d 100644 --- a/webserver/views/api/legacy.py +++ b/webserver/views/api/legacy.py @@ -55,7 +55,6 @@ def get_high_level(mbid): @api_legacy_bp.route("//low-level", methods=["POST"]) -@ratelimit() def submit_low_level(mbid): """Endpoint for submitting low-level information to AcousticBrainz.""" raw_data = request.get_data() From bfc9c270f1092393768014b36fbddf34c22e427c Mon Sep 17 00:00:00 2001 From: Param Singh Date: Mon, 1 Jul 2019 22:26:11 +0530 Subject: [PATCH 17/19] Remove syntax error --- manage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/manage.py b/manage.py index db95625bc..29f59974b 100644 --- a/manage.py +++ b/manage.py @@ -237,7 +237,6 @@ def toggle_site_status(): print('Done!') -<<<<<<< HEAD @cli.group() @click.pass_context def highlevel(ctx): From 49845343729439afacf6f9b284bc98d154ec0651 Mon Sep 17 00:00:00 2001 From: Alastair Porter Date: Thu, 4 Jul 2019 14:04:44 +0200 Subject: [PATCH 18/19] Enable ratelimiting after cache setup Add ratelimiting default examples to example config file --- config.py.example | 5 +++++ webserver/__init__.py | 19 +++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/config.py.example b/config.py.example index 89ff50601..c25ec432d 100644 --- a/config.py.example +++ b/config.py.example @@ -34,6 +34,11 @@ REDIS_PORT = 6379 REDIS_NAMESPACE = "AB" REDIS_NS_VERSIONS_LOCATION = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cache_namespaces') +# RATE LIMITING +# set a limit of per_ip requests per window seconds per unique ip address +RATELIMIT_PER_IP = 100 +RATELIMIT_WINDOW = 10 + # LOGGING # Uncomment any of the following logging stubs if you want to enable logging # during development. In general you shouldn't need to do this. diff --git a/webserver/__init__.py b/webserver/__init__.py index 5165bae18..6958d153b 100644 --- a/webserver/__init__.py +++ b/webserver/__init__.py @@ -61,16 +61,6 @@ def create_app(debug=None): sentry_config=app.config.get('LOG_SENTRY') ) - - # Add rate limiting support - @app.after_request - def after_request_callbacks(response): - return inject_x_rate_headers(response) - - # check for ratelimit config values and set them if present - if 'RATELIMIT_PER_IP' in app.config and 'RATELIMIT_WINDOW' in app.config: - set_rate_limits(app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_WINDOW']) - # Database connection from db import init_db_engine init_db_engine(app.config['SQLALCHEMY_DATABASE_URI']) @@ -92,6 +82,15 @@ def after_request_callbacks(response): else: raise Exception('One or more redis cache configuration options are missing from config.py') + # Add rate limiting support + @app.after_request + def after_request_callbacks(response): + return inject_x_rate_headers(response) + + # check for ratelimit config values and set them if present + if 'RATELIMIT_PER_IP' in app.config and 'RATELIMIT_WINDOW' in app.config: + set_rate_limits(app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_PER_IP'], app.config['RATELIMIT_WINDOW']) + # MusicBrainz import musicbrainzngs from db import SCHEMA_VERSION From 4b89e6e5d6412e3126964bc8f04444ecda6eb9a6 Mon Sep 17 00:00:00 2001 From: Alastair Porter Date: Thu, 4 Jul 2019 14:08:13 +0200 Subject: [PATCH 19/19] Improve rate limiting management command --- manage.py | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/manage.py b/manage.py index 29f59974b..d1e7e100a 100644 --- a/manage.py +++ b/manage.py @@ -279,35 +279,38 @@ def remove_failed_rows(): @cli.command(name='set_rate_limits') -@click.argument('per_ip', type=int) -@click.argument('window_size', type=int) +@click.argument('per_ip', type=click.IntRange(1, None), required=False) +@click.argument('window_size', type=click.IntRange(1, None), required=False) def set_rate_limits(per_ip, window_size): - """ Set rate limit parameters for the AcousticBrainz webserver + """Set rate limit parameters for the AcousticBrainz webserver. If no arguments + are provided, print the current limits. To set limits, specify PER_IP and WINDOW_SIZE - Args: - per_ip (int): the number of requests allowed per IP address - window_size (int): the window in number of seconds for how long the limit is applied + \b + PER_IP: the number of requests allowed per IP address + WINDOW_SIZE: the window in number of seconds for how long the limit is applied """ - print("Current values:") - print("Requests per IP:\t", int(cache.get(ratelimit.ratelimit_per_ip_key) or -1)) - print("Window size:\t", int(cache.get(ratelimit.ratelimit_window_key) or -1)) + current_limit_per_ip = cache.get(ratelimit.ratelimit_per_ip_key) + current_limit_window = cache.get(ratelimit.ratelimit_window_key) - if per_ip / float(window_size) <= 1: - print("Warning: Effective rate limit per IP address is less than 1!") + click.echo("Current values:") + if current_limit_per_ip is None and current_limit_window is None: + click.echo("No values set, showing limit defaults") + current_limit_per_ip = ratelimit.ratelimit_per_ip_default + current_limit_window = ratelimit.ratelimit_window_default + click.echo("Requests per IP: %s" % current_limit_per_ip) + click.echo("Window size (s): %s" % current_limit_window) - if per_ip <= 0: - print("Invalid per IP limit, must be greater than zero") - raise ValueError("Invalid per IP limit, must be greater than zero") + if per_ip is not None and window_size is not None: + if per_ip / float(window_size) < 1: + click.echo("Warning: Effective rate limit is less than 1 query per second") - if window_size <= 0: - print("Invalid window size, must be greater than zero") - raise ValueError("Invalid window size, must be greater than zero") + ratelimit.set_rate_limits(per_ip, per_ip, window_size) + print("New ratelimit parameters set:") + click.echo("Requests per IP: %s" % per_ip) + click.echo("Window size (s): %s" % window_size) - ratelimit.set_rate_limits(per_ip, per_ip, window_size) - print("New ratelimit parameters set to %d requests over a window of %d seconds!" % (per_ip, window_size)) - # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump")