Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion src/app/endpoints/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
validate_conversation_ownership,
validate_model_provider_override,
)
from utils.quota import (
get_available_quotas,
check_tokens_available,
consume_tokens,
)
Comment on lines +56 to +60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Imports LGTM; ensure utils.quota adheres to logging guideline.

The imported helpers are fine; see separate comment in utils/quota.py about replacing get_logger with logging.getLogger(name).

Scan for any other missing parentheses usages:


🏁 Script executed:

#!/bin/bash
rg -nP "configuration\\.quota_limiters(?!\\s*\\()" -g "src/**.py" -S -C2

Length of output: 0



---

🏁 Script executed:

```shell
#!/bin/bash
# Verify the file at src/app/endpoints/query.py exists and check imports
fd "query.py" src/app/endpoints/

Length of output: 63


🏁 Script executed:

#!/bin/bash
# Check if utils/quota.py exists and examine its logging implementation
fd "quota.py" src/

Length of output: 18


🏁 Script executed:

#!/bin/bash
# Search for get_logger usage in utils/quota.py
rg -n "get_logger|logging\.getLogger" src/utils/quota.py

Length of output: 62



---

🏁 Script executed:

```shell
#!/bin/bash
# Examine query.py lines 50-65 to verify imports context
sed -n '50,65p' src/app/endpoints/query.py

Length of output: 508


🏁 Script executed:

#!/bin/bash
# Examine utils/quota.py to see full logging implementation and scope
cat src/utils/quota.py

Length of output: 3113


Fix logging in utils/quota.py to meet guideline requirements.

Imports in query.py are syntactically correct. However, utils/quota.py violates the logging guideline: it uses custom get_logger (line 10) instead of the standard logging.getLogger(__name__). Replace:

from log import get_logger
logger = get_logger(__name__)

With:

import logging
logger = logging.getLogger(__name__)
🤖 Prompt for AI Agents
In utils/quota.py around line 10, replace the custom logger import and
instantiation that uses get_logger with the standard logging module: remove
"from log import get_logger" and "logger = get_logger(__name__)" and instead
import the built-in logging module and instantiate the logger with
logging.getLogger(__name__); keep the same logger variable name so existing
calls to logger.debug/info/error continue to work.

from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
from utils.transcripts import store_transcript
from utils.types import TurnSummary
Expand Down Expand Up @@ -273,6 +278,7 @@ async def query_endpoint_handler( # pylint: disable=R0914
logger.debug("Query does not contain conversation ID")

try:
check_tokens_available(configuration.quota_limiters, user_id)
# try to get Llama Stack client
client = AsyncLlamaStackClientHolder().get_client()
llama_stack_model_id, model_id, provider_id = select_model_and_provider_id(
Expand Down Expand Up @@ -344,6 +350,13 @@ async def query_endpoint_handler( # pylint: disable=R0914
referenced_documents=referenced_documents if referenced_documents else None,
)

consume_tokens(
configuration.quota_limiters,
user_id,
input_tokens=token_usage.input_tokens,
output_tokens=token_usage.output_tokens,
)
Comment on lines +353 to +358
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Same call-site bug for consume_tokens.

Call the method to get the list; consider storing it once in a local variable to avoid repeated calls.

-        consume_tokens(
-            configuration.quota_limiters,
+        consume_tokens(
+            configuration.quota_limiters(),
             user_id,
             input_tokens=token_usage.input_tokens,
             output_tokens=token_usage.output_tokens,
         )

Optional small refactor:

+        quota_limiters = configuration.quota_limiters()
-        check_tokens_available(configuration.quota_limiters(), user_id)
+        check_tokens_available(quota_limiters, user_id)
 ...
-        consume_tokens(configuration.quota_limiters(), user_id,
+        consume_tokens(quota_limiters, user_id,
             input_tokens=token_usage.input_tokens,
             output_tokens=token_usage.output_tokens)
 ...
-        available_quotas = get_available_quotas(configuration.quota_limiters(), user_id)
+        available_quotas = get_available_quotas(quota_limiters, user_id)
🤖 Prompt for AI Agents
In src/app/endpoints/query.py around lines 353 to 358, the call to
consume_tokens is passing configuration.quota_limiters directly which likely
triggers repeated property access or a method call; call the method/property
once, assign the resulting list to a local variable (e.g., quota_limiters =
configuration.quota_limiters or configuration.get_quota_limiters()) and then
pass that local variable into consume_tokens so the list is retrieved once and
reused; update the call site accordingly to use the local variable.


store_conversation_into_cache(
configuration,
user_id,
Expand Down Expand Up @@ -372,6 +385,8 @@ async def query_endpoint_handler( # pylint: disable=R0914

logger.info("Using referenced documents from response...")

available_quotas = get_available_quotas(configuration.quota_limiters, user_id)

logger.info("Building final response...")
response = QueryResponse(
conversation_id=conversation_id,
Expand All @@ -382,7 +397,7 @@ async def query_endpoint_handler( # pylint: disable=R0914
truncated=False, # TODO: implement truncation detection
input_tokens=token_usage.input_tokens,
output_tokens=token_usage.output_tokens,
available_quotas={}, # TODO: implement quota tracking
available_quotas=available_quotas,
)
logger.info("Query processing completed successfully!")
return response
Expand Down
101 changes: 101 additions & 0 deletions src/utils/quota.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Quota handling helper functions."""

import psycopg2

from fastapi import HTTPException, status

from quota.quota_limiter import QuotaLimiter
from quota.quota_exceed_error import QuotaExceedError

from log import get_logger

logger = get_logger(__name__)
Comment on lines +10 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use standard logging setup per coding guidelines.

The coding guidelines specify using logger = logging.getLogger(__name__) after importing the logging module directly, rather than using the custom get_logger function.

As per coding guidelines.

Apply this diff to align with the guidelines:

-from log import get_logger
+import logging

-logger = get_logger(__name__)
+logger = logging.getLogger(__name__)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from log import get_logger
logger = get_logger(__name__)
import logging
logger = logging.getLogger(__name__)
🤖 Prompt for AI Agents
In src/utils/quota.py around lines 9 to 11, the module currently imports and
uses a custom get_logger; replace it with the standard logging setup by
importing the logging module and creating the logger via
logging.getLogger(__name__); remove the custom get_logger import and ensure only
the standard logging import and logger assignment remain.



def consume_tokens(
quota_limiters: list[QuotaLimiter],
user_id: str,
input_tokens: int,
output_tokens: int,
) -> None:
"""Consume tokens from cluster and/or user quotas.

Args:
quota_limiters: List of quota limiter instances to consume tokens from.
user_id: Identifier of the user consuming tokens.
input_tokens: Number of input tokens to consume.
output_tokens: Number of output tokens to consume.

Returns:
None
"""
# consume tokens all configured quota limiters
for quota_limiter in quota_limiters:
quota_limiter.consume_tokens(
input_tokens=input_tokens,
output_tokens=output_tokens,
subject_id=user_id,
)


def check_tokens_available(quota_limiters: list[QuotaLimiter], user_id: str) -> None:
"""Check if tokens are available for user.

Args:
quota_limiters: List of quota limiter instances to check.
user_id: Identifier of the user to check quota for.

Returns:
None

Raises:
HTTPException: With status 500 if database communication fails,
or status 429 if quota is exceeded.
"""
try:
# check available tokens using all configured quota limiters
for quota_limiter in quota_limiters:
quota_limiter.ensure_available_quota(subject_id=user_id)
except psycopg2.Error as pg_error:
message = "Error communicating with quota database backend"
logger.error(message)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={
"response": message,
"cause": str(pg_error),
},
) from pg_error
except QuotaExceedError as quota_exceed_error:
message = "The quota has been exceeded"
logger.error(message)
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail={
"response": message,
"cause": str(quota_exceed_error),
},
) from quota_exceed_error


def get_available_quotas(
quota_limiters: list[QuotaLimiter],
user_id: str,
) -> dict[str, int]:
"""Get quota available from all quota limiters.

Args:
quota_limiters: List of quota limiter instances to query.
user_id: Identifier of the user to get quotas for.

Returns:
Dictionary mapping quota limiter class names to available token counts.
"""
available_quotas: dict[str, int] = {}

# retrieve available tokens using all configured quota limiters
for quota_limiter in quota_limiters:
name = quota_limiter.__class__.__name__
available_quota = quota_limiter.available_quota(user_id)
available_quotas[name] = available_quota
return available_quotas
4 changes: 4 additions & 0 deletions tests/unit/app/endpoints/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ async def _test_query_endpoint_handler(
mock_config.user_data_collection_configuration.transcripts_enabled = (
store_transcript_to_file
)
mock_config.quota_limiters = []
mocker.patch("app.endpoints.query.configuration", mock_config)

mock_store_in_cache = mocker.patch(
Expand Down Expand Up @@ -1434,6 +1435,7 @@ async def test_auth_tuple_unpacking_in_query_endpoint_handler(
# Mock dependencies
mock_config = mocker.Mock()
mock_config.llama_stack_configuration = mocker.Mock()
mock_config.quota_limiters = []
mocker.patch("app.endpoints.query.configuration", mock_config)

mock_client = mocker.AsyncMock()
Expand Down Expand Up @@ -1499,6 +1501,7 @@ async def test_query_endpoint_handler_no_tools_true(mocker, dummy_request) -> No

mock_config = mocker.Mock()
mock_config.user_data_collection_configuration.transcripts_disabled = True
mock_config.quota_limiters = []
mocker.patch("app.endpoints.query.configuration", mock_config)

summary = TurnSummary(
Expand Down Expand Up @@ -1555,6 +1558,7 @@ async def test_query_endpoint_handler_no_tools_false(mocker, dummy_request) -> N

mock_config = mocker.Mock()
mock_config.user_data_collection_configuration.transcripts_disabled = True
mock_config.quota_limiters = []
mocker.patch("app.endpoints.query.configuration", mock_config)

summary = TurnSummary(
Expand Down
Loading