diff --git a/src/quota/__init__.py b/src/quota/__init__.py index b96d7cc6..08b8ef9a 100644 --- a/src/quota/__init__.py +++ b/src/quota/__init__.py @@ -1 +1,18 @@ -"""Quota management.""" +"""Quota management. + +Tokens and token quota limits + +Tokens are small chunks of text, which can be as small as one character or as +large as one word. Tokens are the units of measurement used to quantify the +amount of text that the service sends to, or receives from, a large language +model (LLM). Every interaction with the Service and the LLM is counted in +tokens. + +LLM providers typically charge for their services using a token-based pricing model. + +Token quota limits define the number of tokens that can be used in a certain +timeframe. Implementing token quota limits helps control costs, encourage more +efficient use of queries, and regulate demand on the system. In a multi-user +configuration, token quota limits help provide equal access to all users +ensuring everyone has an opportunity to submit queries. +""" diff --git a/src/quota/quota_limiter.py b/src/quota/quota_limiter.py index ca682880..2c3ff06b 100644 --- a/src/quota/quota_limiter.py +++ b/src/quota/quota_limiter.py @@ -1,4 +1,34 @@ -"""Abstract class that is the parent for all quota limiter implementations.""" +"""Abstract class that is the parent for all quota limiter implementations. + +It is possible to limit quota usage per user or per service or services (that +typically run in one cluster). Each limit is configured as a separate _quota +limiter_. It can be of type `user_limiter` or `cluster_limiter` (which is name +that makes sense in OpenShift deployment). There are three configuration +options for each limiter: + +1. `period` specified in a human-readable form, see +https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT +for all possible options. When the end of the period is reached, quota is reset +or increased +1. `initial_quota` is set at beginning of the period +1. `quota_increase` this value (if specified) is used to increase quota when period is reached + +There are two basic use cases: + +1. When quota needs to be reset specific value periodically (for example on +weekly on monthly basis), specify `initial_quota` to the required value +1. When quota needs to be increased by specific value periodically (for example +on daily basis), specify `quota_increase` + +Technically it is possible to specify both `initial_quota` and +`quota_increase`. It means that at the end of time period the quota will be +*reset* to `initial_quota + quota_increase`. + +Please note that any number of quota limiters can be configured. For example, +two user quota limiters can be set to: +- increase quota by 100,000 tokens each day +- reset quota to 10,000,000 tokens each month +""" from abc import ABC, abstractmethod