-
Notifications
You must be signed in to change notification settings - Fork 55
LCORE-741: consume quota mechanism #722
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,11 @@ | |
| validate_conversation_ownership, | ||
| validate_model_provider_override, | ||
| ) | ||
| from utils.quota import ( | ||
| get_available_quotas, | ||
| check_tokens_available, | ||
| consume_tokens, | ||
| ) | ||
| from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency | ||
| from utils.transcripts import store_transcript | ||
| from utils.types import TurnSummary | ||
|
|
@@ -273,6 +278,7 @@ async def query_endpoint_handler( # pylint: disable=R0914 | |
| logger.debug("Query does not contain conversation ID") | ||
|
|
||
| try: | ||
| check_tokens_available(configuration.quota_limiters, user_id) | ||
| # try to get Llama Stack client | ||
| client = AsyncLlamaStackClientHolder().get_client() | ||
| llama_stack_model_id, model_id, provider_id = select_model_and_provider_id( | ||
|
|
@@ -344,6 +350,13 @@ async def query_endpoint_handler( # pylint: disable=R0914 | |
| referenced_documents=referenced_documents if referenced_documents else None, | ||
| ) | ||
|
|
||
| consume_tokens( | ||
| configuration.quota_limiters, | ||
| user_id, | ||
| input_tokens=token_usage.input_tokens, | ||
| output_tokens=token_usage.output_tokens, | ||
| ) | ||
|
Comment on lines
+353
to
+358
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same call-site bug for consume_tokens. Call the method to get the list; consider storing it once in a local variable to avoid repeated calls. - consume_tokens(
- configuration.quota_limiters,
+ consume_tokens(
+ configuration.quota_limiters(),
user_id,
input_tokens=token_usage.input_tokens,
output_tokens=token_usage.output_tokens,
)Optional small refactor: + quota_limiters = configuration.quota_limiters()
- check_tokens_available(configuration.quota_limiters(), user_id)
+ check_tokens_available(quota_limiters, user_id)
...
- consume_tokens(configuration.quota_limiters(), user_id,
+ consume_tokens(quota_limiters, user_id,
input_tokens=token_usage.input_tokens,
output_tokens=token_usage.output_tokens)
...
- available_quotas = get_available_quotas(configuration.quota_limiters(), user_id)
+ available_quotas = get_available_quotas(quota_limiters, user_id)🤖 Prompt for AI Agents |
||
|
|
||
| store_conversation_into_cache( | ||
| configuration, | ||
| user_id, | ||
|
|
@@ -372,6 +385,8 @@ async def query_endpoint_handler( # pylint: disable=R0914 | |
|
|
||
| logger.info("Using referenced documents from response...") | ||
|
|
||
| available_quotas = get_available_quotas(configuration.quota_limiters, user_id) | ||
|
|
||
| logger.info("Building final response...") | ||
| response = QueryResponse( | ||
| conversation_id=conversation_id, | ||
|
|
@@ -382,7 +397,7 @@ async def query_endpoint_handler( # pylint: disable=R0914 | |
| truncated=False, # TODO: implement truncation detection | ||
| input_tokens=token_usage.input_tokens, | ||
| output_tokens=token_usage.output_tokens, | ||
| available_quotas={}, # TODO: implement quota tracking | ||
| available_quotas=available_quotas, | ||
| ) | ||
| logger.info("Query processing completed successfully!") | ||
| return response | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,101 @@ | ||||||||||||||
| """Quota handling helper functions.""" | ||||||||||||||
|
|
||||||||||||||
| import psycopg2 | ||||||||||||||
|
|
||||||||||||||
| from fastapi import HTTPException, status | ||||||||||||||
|
|
||||||||||||||
| from quota.quota_limiter import QuotaLimiter | ||||||||||||||
| from quota.quota_exceed_error import QuotaExceedError | ||||||||||||||
|
|
||||||||||||||
| from log import get_logger | ||||||||||||||
|
|
||||||||||||||
| logger = get_logger(__name__) | ||||||||||||||
|
Comment on lines
+10
to
+12
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use standard logging setup per coding guidelines. The coding guidelines specify using As per coding guidelines. Apply this diff to align with the guidelines: -from log import get_logger
+import logging
-logger = get_logger(__name__)
+logger = logging.getLogger(__name__)📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def consume_tokens( | ||||||||||||||
| quota_limiters: list[QuotaLimiter], | ||||||||||||||
| user_id: str, | ||||||||||||||
| input_tokens: int, | ||||||||||||||
| output_tokens: int, | ||||||||||||||
| ) -> None: | ||||||||||||||
| """Consume tokens from cluster and/or user quotas. | ||||||||||||||
|
|
||||||||||||||
| Args: | ||||||||||||||
| quota_limiters: List of quota limiter instances to consume tokens from. | ||||||||||||||
| user_id: Identifier of the user consuming tokens. | ||||||||||||||
| input_tokens: Number of input tokens to consume. | ||||||||||||||
| output_tokens: Number of output tokens to consume. | ||||||||||||||
|
|
||||||||||||||
| Returns: | ||||||||||||||
| None | ||||||||||||||
| """ | ||||||||||||||
| # consume tokens all configured quota limiters | ||||||||||||||
| for quota_limiter in quota_limiters: | ||||||||||||||
| quota_limiter.consume_tokens( | ||||||||||||||
| input_tokens=input_tokens, | ||||||||||||||
| output_tokens=output_tokens, | ||||||||||||||
| subject_id=user_id, | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def check_tokens_available(quota_limiters: list[QuotaLimiter], user_id: str) -> None: | ||||||||||||||
| """Check if tokens are available for user. | ||||||||||||||
|
|
||||||||||||||
| Args: | ||||||||||||||
| quota_limiters: List of quota limiter instances to check. | ||||||||||||||
| user_id: Identifier of the user to check quota for. | ||||||||||||||
|
|
||||||||||||||
| Returns: | ||||||||||||||
| None | ||||||||||||||
|
|
||||||||||||||
| Raises: | ||||||||||||||
| HTTPException: With status 500 if database communication fails, | ||||||||||||||
| or status 429 if quota is exceeded. | ||||||||||||||
| """ | ||||||||||||||
| try: | ||||||||||||||
| # check available tokens using all configured quota limiters | ||||||||||||||
| for quota_limiter in quota_limiters: | ||||||||||||||
| quota_limiter.ensure_available_quota(subject_id=user_id) | ||||||||||||||
| except psycopg2.Error as pg_error: | ||||||||||||||
| message = "Error communicating with quota database backend" | ||||||||||||||
| logger.error(message) | ||||||||||||||
| raise HTTPException( | ||||||||||||||
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | ||||||||||||||
| detail={ | ||||||||||||||
| "response": message, | ||||||||||||||
| "cause": str(pg_error), | ||||||||||||||
| }, | ||||||||||||||
| ) from pg_error | ||||||||||||||
| except QuotaExceedError as quota_exceed_error: | ||||||||||||||
| message = "The quota has been exceeded" | ||||||||||||||
| logger.error(message) | ||||||||||||||
| raise HTTPException( | ||||||||||||||
| status_code=status.HTTP_429_TOO_MANY_REQUESTS, | ||||||||||||||
| detail={ | ||||||||||||||
| "response": message, | ||||||||||||||
| "cause": str(quota_exceed_error), | ||||||||||||||
| }, | ||||||||||||||
| ) from quota_exceed_error | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def get_available_quotas( | ||||||||||||||
| quota_limiters: list[QuotaLimiter], | ||||||||||||||
| user_id: str, | ||||||||||||||
| ) -> dict[str, int]: | ||||||||||||||
| """Get quota available from all quota limiters. | ||||||||||||||
|
|
||||||||||||||
| Args: | ||||||||||||||
| quota_limiters: List of quota limiter instances to query. | ||||||||||||||
| user_id: Identifier of the user to get quotas for. | ||||||||||||||
|
|
||||||||||||||
| Returns: | ||||||||||||||
| Dictionary mapping quota limiter class names to available token counts. | ||||||||||||||
| """ | ||||||||||||||
| available_quotas: dict[str, int] = {} | ||||||||||||||
|
|
||||||||||||||
| # retrieve available tokens using all configured quota limiters | ||||||||||||||
| for quota_limiter in quota_limiters: | ||||||||||||||
| name = quota_limiter.__class__.__name__ | ||||||||||||||
| available_quota = quota_limiter.available_quota(user_id) | ||||||||||||||
| available_quotas[name] = available_quota | ||||||||||||||
| return available_quotas | ||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
Imports LGTM; ensure utils.quota adheres to logging guideline.
The imported helpers are fine; see separate comment in utils/quota.py about replacing get_logger with logging.getLogger(name).
Scan for any other missing parentheses usages:
🏁 Script executed:
Length of output: 0
Length of output: 63
🏁 Script executed:
Length of output: 18
🏁 Script executed:
Length of output: 62
Length of output: 508
🏁 Script executed:
Length of output: 3113
Fix logging in utils/quota.py to meet guideline requirements.
Imports in query.py are syntactically correct. However, utils/quota.py violates the logging guideline: it uses custom
get_logger(line 10) instead of the standardlogging.getLogger(__name__). Replace:With:
🤖 Prompt for AI Agents