From be6b424377519f39ebf5fd073c3e3d7fbad7e231 Mon Sep 17 00:00:00 2001 From: "David W. Dougherty" Date: Fri, 8 May 2026 08:13:35 -0700 Subject: [PATCH] DEV: revert GCRA (8.8) content --- content/commands/gcra.md | 188 ----------- content/commands/gcrasetvalue.md | 90 ------ .../develop/using-commands/rate-limiting.md | 291 ------------------ layouts/commands/list.html | 2 +- static/images/railroad/gcra.svg | 57 ---- static/images/railroad/gcrasetvalue.svg | 50 --- 6 files changed, 1 insertion(+), 677 deletions(-) delete mode 100644 content/commands/gcra.md delete mode 100644 content/commands/gcrasetvalue.md delete mode 100644 content/develop/using-commands/rate-limiting.md delete mode 100644 static/images/railroad/gcra.svg delete mode 100644 static/images/railroad/gcrasetvalue.svg diff --git a/content/commands/gcra.md b/content/commands/gcra.md deleted file mode 100644 index 255fee5c82..0000000000 --- a/content/commands/gcra.md +++ /dev/null @@ -1,188 +0,0 @@ ---- -acl_categories: -- '@rate_limit' -- '@write' -arguments: -- key_spec_index: 0 - name: key - type: key -- name: max-burst - type: integer -- name: tokens-per-period - type: integer -- name: period - type: double -- name: count - optional: true - token: TOKENS - type: integer -arity: -5 -categories: -- docs -- develop -- stack -- oss -- rs -- rc -- oss -- kubernetes -- clients -command_flags: -- write -- denyoom -- fast -complexity: O(1) -description: Rate limit via GCRA (Generic Cell Rate Algorithm). -group: rate_limit -hidden: false -key_specs: -- begin_search: - index: - pos: 1 - find_keys: - range: - lastkey: 0 - limit: 0 - step: 1 - flags: - - rw - - access - - update -linkTitle: GCRA -since: 8.8.0 -summary: Rate limit via GCRA (Generic Cell Rate Algorithm). -syntax_fmt: "GCRA key max-burst tokens-per-period period [TOKENS\_count]" -title: GCRA ---- - -Performs rate limiting using the [Generic Cell Rate Algorithm (GCRA)](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm). - -GCRA is a popular rate limiting algorithm known for its simplicity and speed. Each request (a single call to this command) consumes a number of tokens; the default cost is one token per request. The sustained rate is `tokens-per-period` tokens per `period` seconds, with a minimum spacing (emission interval) of `period / tokens-per-period` seconds between requests. The `max_burst` parameter allows for occasional spikes by granting up to `max_burst` additional tokens that can be consumed at once beyond the sustained rate. - -The implementation is based on the popular [redis-cell](https://github.com/brandur/redis-cell) module with small changes in the API. Unlike redis-cell and most other implementations where `period` is given as an integer number of seconds, this command accepts `period` as a floating-point number for greater flexibility. Internally, time periods are calculated with microsecond granularity. - -The `GCRA` command is used either to establish a new rate limiter (if the key doesn't exist) or use an existing one (if the key exists). -All the parameters need to be repeated on each call, and clients don't need to validate that the key exists before using a rate limiter. -Under normal usage, `max-burst`, `tokens-per-period`, and `period` should not change between calls, though this command supports such changes. - -In a typical deployment, the application server calls `GCRA` on each end user's request. Based on the response, the application server either fulfills the end user's request or rejects it. - -{{< note >}} -`GCRA` sets an expiration time for its key when it's executed. This expiration time is based on the generic cell rate algorithm's [theoretical arrival time (TAT)](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm#Virtual_scheduling_description). -{{< /note >}} - -See the [rate limiting docs]({{< relref "/develop/using-commands/rate-limiting" >}}) for more information. - -## Required arguments - -
key - -is the key associated with a specific rate limiting case. The key stores the internal state needed for the GCRA algorithm to track request timing. - -
- -
max-burst - -The maximum number of additional tokens allowed as a burst, above the sustained rate. This controls how many tokens can be consumed at once before rate limiting starts. The total token capacity is `max_burst + 1`. Minimum value: `0`. - -
- -
tokens-per-period - -The number of tokens replenished per `period`, which defines the sustained rate of the rate limiter. Minimum value: `1`. - -
- -
period - -The time period in seconds, specified as a floating-point number, over which `tokens-per-period` tokens are replenished. The emission interval (minimum spacing between single-token requests) is `period / tokens-per-period`. Minimum value: `1.0`. - -
- -## Optional arguments - -
TOKENS count - -The number of tokens consumed by this request. A higher value drains the token allowance faster, which is useful when different operations have different costs. Default value: `1`. - -
- -## Examples - -Rate limit an API endpoint to 10 tokens per 60 seconds with a burst of 5: - -``` -127.0.0.1:6379> GCRA api:user:123 5 10 60 -1) (integer) 0 -2) (integer) 6 -3) (integer) 5 -4) (integer) -1 -5) (integer) 30 -``` - -The response shows: the request is allowed (`0`), the total token capacity is `6` (`max_burst + 1`), `5` tokens are available immediately, retry-after is `-1` (the request was allowed; no need to retry), and the full token allowance will be restored after `30` seconds. - -After exhausting the token allowance: - -``` -127.0.0.1:6379> GCRA api:user:123 5 10 60 -1) (integer) 1 -2) (integer) 6 -3) (integer) 0 -4) (integer) 6 -5) (integer) 36 -``` - -This time the request is denied (`1`), `0` tokens remain, and the application server should retry after `6` seconds. - -Using the `TOKENS` option to assign a higher cost to a request: - -``` -127.0.0.1:6379> GCRA api:user:123 5 10 60 TOKENS 3 -1) (integer) 0 -2) (integer) 6 -3) (integer) 2 -4) (integer) -1 -5) (integer) 24 -``` - -This request consumes 3 tokens instead of the default 1. - -## Redis Software and Redis Cloud compatibility - -| Redis
Software | Redis
Cloud | Notes | -|:----------------------|:-----------------|:------| -| ❌ Standard
❌ Active-Active | ❌ Standard
❌ Active-Active | | - -## Return information - -{{< multitabs id="return-info" - tab1="RESP2" - tab2="RESP3" >}} - -One of the following: - -- Returns an [array]({{< relref "/develop/reference/protocol-spec#arrays" >}}) with exactly 5 elements: - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): `0` if the request is allowed, `1` if the request is denied. - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): The maximum number of tokens that can be requested if no previous requests have been made, or if earlier requests are no longer within the relevant time window. Always equal to `max_burst` + 1 (+1 to allow requests when `max_burst` is 0). - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): the number of remaining tokens that can be requested immediately (the remaining burst). - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): the number of milliseconds until the caller can retry, or -1 if the request was allowed. - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): the number of milliseconds until the full burst will be allowed again. -- A [simple error reply]({{< relref "/develop/reference/protocol-spec#simple-errors" >}}) for the following cases: wrong number of arguments, -incorrect value for `max_burst`, `tokens_per_period` <= 1, `period` <= 1, or `tokens` <= 1. - --tab-sep- - -One of the following: - -- Returns an [array]({{< relref "/develop/reference/protocol-spec#arrays" >}}) with exactly 5 elements: - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): `0` if the request is allowed, `1` if the request is blocked. - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): the maximum number of tokens that can be requested (if no previous requests were made, or if they were made long enough ago). Always equal to `max_burst` + 1 (+1 to allow requests when `max_burst` is 0). - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): the number of remaining tokens that can be requested immediately (the remaining burst). - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): the number of milliseconds until the caller can retry, or -1 if the request was allowed. - 1. [Integer reply]({{< relref "/develop/reference/protocol-spec#integers" >}}): the number of milliseconds until the full burst will be allowed again. -- A [simple error reply]({{< relref "/develop/reference/protocol-spec#simple-errors" >}}) for the following cases: wrong number of arguments, -incorrect value for `max_burst`, `tokens_per_period` <= 1, `period` <= 1, or `tokens` <= 1. - -{{< /multitabs >}} - diff --git a/content/commands/gcrasetvalue.md b/content/commands/gcrasetvalue.md deleted file mode 100644 index 95d6637a83..0000000000 --- a/content/commands/gcrasetvalue.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -acl_categories: -- '@rate_limit' -- '@write' -arguments: -- key_spec_index: 0 - name: key - type: key -- name: tat - type: integer -arity: 3 -categories: -- docs -- develop -- stack -- oss -- rs -- rc -- oss -- kubernetes -- clients -command_flags: -- write -- denyoom -- fast -complexity: O(1) -description: An internal command for recording a GCRA TAT value during AOF rewrite - and replication. -group: rate_limit -hidden: false -key_specs: -- begin_search: - index: - pos: 1 - find_keys: - range: - lastkey: 0 - limit: 0 - step: 1 - flags: - - ow - - update -linkTitle: GCRASETVALUE -since: 8.8.0 -summary: An internal command for recording a GCRA TAT value during AOF rewrite and - replication. -syntax_fmt: GCRASETVALUE key tat -title: GCRASETVALUE ---- -This is an internal command; it records a GCRA [theoretical arrival time (TAT)](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm#Virtual_scheduling_description) value during AOF rewrite and replication. - -## Required arguments - -
key - -is the key associated with a specific rate limiting case. - -
- -
tat - -is the expiration time, based on the generic cell rate algorithm's [theoretical arrival time (TAT)](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm#Virtual_scheduling_description). - -
- -## Redis Software and Redis Cloud compatibility - -| Redis
Software | Redis
Cloud | Notes | -|:----------------------|:-----------------|:------| -| ❌ Standard
❌ Active-Active | ❌ Standard
❌ Active-Active | | - -## Return information - -{{< multitabs id="return-info" - tab1="RESP2" - tab2="RESP3" >}} - -One of the following: - -- A [simple string reply]({{< relref "/develop/reference/protocol-spec#simple-strings" >}}) of `OK` indicating that the operation succeeded. -- A [simple error reply]({{< relref "/develop/reference/protocol-spec#simple-errors" >}}) indicating that the operation failed. - --tab-sep- - -One of the following: - -- A [simple string reply]({{< relref "/develop/reference/protocol-spec#simple-strings" >}}) of `OK` indicating that the operation succeeded. -- A [simple error reply]({{< relref "/develop/reference/protocol-spec#simple-errors" >}}) indicating that the operation failed. - -{{< /multitabs >}} diff --git a/content/develop/using-commands/rate-limiting.md b/content/develop/using-commands/rate-limiting.md deleted file mode 100644 index 50564496fa..0000000000 --- a/content/develop/using-commands/rate-limiting.md +++ /dev/null @@ -1,291 +0,0 @@ ---- -categories: -- docs -- develop -- stack -- oss -- rs -- rc -- oss -- kubernetes -- clients -description: How to use the GCRA command for rate limiting in Redis -linkTitle: Rate limiting -title: Rate limiting -weight: 40 ---- - -Rate limiting controls how often a user or client can perform an action within -a given time period. Common use cases include: - -- **Preventing DDoS attacks**: throttle incoming requests to keep services - available during traffic floods. -- **Blocking brute-force attacks**: limit login attempts per account to slow - down credential stuffing. -- **Preventing API abuse**: enforce per-user or per-key request quotas on - public APIs. -- **Preventing web scraping**: restrict how quickly an IP address or user agent - can crawl your pages. -- **Limiting resource consumption by subscription tier**: give free-tier users - a lower request allowance than paying customers. - -Redis is a natural fit for rate limiting because it offers fast, atomic -operations with low latency. Starting in version 8.8, Redis provides the -[`GCRA`]({{< relref "/commands/gcra" >}}) command, which implements rate -limiting directly in the server using the -[Generic Cell Rate Algorithm](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm). - -## How GCRA works - -In a typical deployment, the application server calls -[`GCRA`]({{< relref "/commands/gcra" >}}) on each end user's request. -Based on the response, the application server either fulfills the -request or rejects it. - -GCRA uses a [leaky bucket](https://en.wikipedia.org/wiki/Leaky_bucket) model. -Imagine a bucket with a fixed capacity. Water -leaks out of the bucket at a constant rate, and each accepted request adds -water to it. The available tokens correspond to the empty space in the bucket — -the more empty space, the more requests can be accepted. If a request would -cause the bucket to overflow, it's rejected. Over time the bucket leaks, -freeing up capacity for new requests. -There's an excellent visual aid for this process on [this page](https://davecturner.github.io/2016/12/01/rate-limiting.html). - -Two parameters control the behavior: - -- **Sustained rate**: defined as `tokens_per_period / period`. This is the - steady-state token throughput. For example, 10 tokens per 60 seconds means - one token replenishes every 6 seconds. -- **Burst allowance**: defined by `max_burst`. This lets clients consume a - burst of tokens above the sustained rate. The total token capacity is - `max_burst + 1`. - -## Basic usage - -The [`GCRA`]({{< relref "/commands/gcra" >}}) command has the following syntax: - -``` -GCRA key max_burst tokens_per_period period [TOKENS count] -``` - -For example, to allow a sustained rate of 30 tokens per minute with a burst -allowance of 5 additional tokens: - -``` -> GCRA api:user:123 5 30 60 -1) (integer) 0 -2) (integer) 6 -3) (integer) 5 -4) (integer) -1 -5) (integer) 10 -``` - -The response is an array of five integers: - -1. **Limited**: `0` means the request is allowed; `1` means it's denied. -2. **Max tokens**: the total token capacity (`max_burst + 1`). -3. **Remaining tokens**: the number of tokens available right now. -4. **Retry after**: seconds until the application server should retry. - Returns `-1` when the request isn't denied. -5. **Reset after**: seconds until the full token allowance is restored. - -## Handling rate-limited requests - -When a request is denied, the response tells the application server exactly -when to retry: - -``` -> GCRA api:user:123 5 30 60 -1) (integer) 1 -2) (integer) 6 -3) (integer) 0 -4) (integer) 2 -5) (integer) 12 -``` - -The first element is `1`, which means the request was denied. The fourth -element tells the application server to wait 2 seconds before retrying. - -## Weighted requests - -Some operations cost more than others. Use the `TOKENS` option to -assign a higher token cost to expensive operations: - -``` -> GCRA api:user:123 5 30 60 TOKENS 3 -1) (integer) 0 -2) (integer) 6 -3) (integer) 2 -4) (integer) -1 -5) (integer) 16 -``` - -This request consumes 3 tokens instead of the default 1, so the remaining -token count drops by 3. - -## Example: rate limiting in action - -The following example walks through a sequence of requests to show how the -token allowance drains and recovers over time. It configures a sustained rate -of 5 tokens per 10 seconds with a burst allowance of 3 additional tokens (total -capacity of 4). - -The first request arrives on a fresh key. The full token allowance is -available: - -``` -> GCRA api:user:1 3 5 10 -1) (integer) 0 # allowed -2) (integer) 4 # total token capacity (max_burst + 1) -3) (integer) 3 # 3 tokens remaining -4) (integer) -1 # not denied, so retry-after is -1 -5) (integer) 2 # full allowance restored in 2 seconds -``` - -Three more requests arrive immediately. Each one consumes a token: - -``` -> GCRA api:user:1 3 5 10 -1) (integer) 0 # allowed -2) (integer) 4 -3) (integer) 2 # 2 tokens remaining -4) (integer) -1 -5) (integer) 4 - -> GCRA api:user:1 3 5 10 -1) (integer) 0 # allowed -2) (integer) 4 -3) (integer) 1 # 1 token remaining -4) (integer) -1 -5) (integer) 6 - -> GCRA api:user:1 3 5 10 -1) (integer) 0 # allowed -2) (integer) 4 -3) (integer) 0 # 0 tokens remaining — allowance exhausted -4) (integer) -1 -5) (integer) 8 -``` - -The next request is denied because no tokens are available: - -``` -> GCRA api:user:1 3 5 10 -1) (integer) 1 # DENIED -2) (integer) 4 -3) (integer) 0 # still 0 tokens remaining -4) (integer) 2 # retry after 2 seconds -5) (integer) 8 -``` - -After waiting for the retry-after period, a token has replenished and the -request succeeds: - -``` -> GCRA api:user:1 3 5 10 -1) (integer) 0 # allowed again -2) (integer) 4 -3) (integer) 0 -4) (integer) -1 -5) (integer) 8 -``` - -### Using GCRA in application code - -The following Python example shows how an application server checks the -rate limit before handling an end user's request: - -```python -import redis -import time - -r = redis.Redis() - -def handle_request(user_id, action): - """Check the rate limit and handle the end user's request.""" - key = f"api:user:{user_id}" - # Sustained rate: 5 tokens per 10 seconds, burst of 3 tokens - limited, max_tokens, remaining, retry_after, reset_after = ( - r.execute_command("GCRA", key, 3, 5, 10) - ) - - if limited: - # Deny the end user's request - return {"error": "Too many requests", "retry_after": retry_after} - - # Tokens available — fulfill the end user's request - result = perform_action(action) - return {"result": result, "remaining_tokens": remaining} -``` - -## Real-world examples - -### Limit credit card transactions - -A payment processor needs to flag suspicious activity. The application server -limits each user to 5 tokens per minute with a small burst of 2 to handle -rapid legitimate retries: - -``` -GCRA txn:user:8841 2 5 60 -``` - -If the response returns `limited = 1`, the application server rejects the -end user's transaction and returns an error prompting them to wait. - -### Throttle profile views on a dating site - -To prevent scraping of user profiles, the application server allocates 60 -tokens per hour per member with a burst of 10: - -``` -GCRA profiles:user:2297 10 60 3600 -``` - -This lets an end user browse through a handful of profiles quickly, but -enforces a steady pace over the course of an hour. - -### Restrict downloads by subscription tier - -A file-hosting service offers different token budgets for free and -paid users. Free-tier users get 10 tokens per day with no burst. -Premium users get 100 tokens per day with a burst of 20: - -``` -# Free tier -GCRA downloads:user:5510 0 10 86400 - -# Premium tier -GCRA downloads:user:5510 20 100 86400 -``` - -### Per-endpoint API rate limiting - -Combine the user and endpoint in the key for more granular control: - -``` -GCRA api:user:42:/search 2 10 60 -GCRA api:user:42:/export 0 2 3600 -``` - -The search endpoint allows 10 tokens per minute with a burst of 2. -The export endpoint allows 2 tokens per hour with no burst. - -## Choose parameter values - -When configuring rate limits, consider these guidelines: - -- **`tokens_per_period` and `period`** define the sustained rate. Choose - values that reflect your actual capacity. A period of `60` (one minute) is - a common starting point. -- **`max_burst`** controls how tolerant you are of traffic spikes. A value of - `0` enforces strict spacing between requests. Higher values accommodate - bursty workloads like page loads that trigger multiple API calls at once. -- **`period`** accepts floating-point values (minimum `1.0`), which gives you - fine-grained control. Internally, Redis calculates timing with microsecond - precision. - -## Learn more - -- [`GCRA` command reference]({{< relref "/commands/gcra" >}}) -- [Generic Cell Rate Algorithm (Wikipedia)](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm) diff --git a/layouts/commands/list.html b/layouts/commands/list.html index c0d49db51c..f8ee13ba27 100644 --- a/layouts/commands/list.html +++ b/layouts/commands/list.html @@ -42,7 +42,7 @@

Commands

- + diff --git a/static/images/railroad/gcra.svg b/static/images/railroad/gcra.svg deleted file mode 100644 index da5a6386c1..0000000000 --- a/static/images/railroad/gcra.svg +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - -GCRA -key -max-burst -tokens-per-period -period - - - -TOKENS -count \ No newline at end of file diff --git a/static/images/railroad/gcrasetvalue.svg b/static/images/railroad/gcrasetvalue.svg deleted file mode 100644 index 06d796bb4b..0000000000 --- a/static/images/railroad/gcrasetvalue.svg +++ /dev/null @@ -1,50 +0,0 @@ - - - - - - - - -GCRASETVALUE -key -tat \ No newline at end of file