Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/docs/usage-guide/changing_a_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ OPENAI__API_BASE=https://api.openai.com/v1
OPENAI__KEY=sk-...
```

### OpenAI Flex Processing

To reduce costs for non-urgent/background tasks, enable Flex Processing:

```toml
[litellm]
extra_body='{"processing_mode": "flex"}'
```

See [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details.

### Azure

Expand Down
34 changes: 34 additions & 0 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,37 @@ def prepare_logs(self, response, system, user, resp, finish_reason):
response_log['main_pr_language'] = 'unknown'
return response_log

def _process_litellm_extra_body(self, kwargs: dict) -> dict:
"""
Process LITELLM.EXTRA_BODY configuration and update kwargs accordingly.

Args:
kwargs: The current kwargs dictionary to update

Returns:
Updated kwargs dictionary

Raises:
ValueError: If extra_body contains invalid JSON, unsupported keys, or colliding keys
"""
allowed_extra_body_keys = {"processing_mode", "service_tier"}
extra_body = getattr(getattr(get_settings(), "litellm", None), "extra_body", None)
if extra_body:
try:
litellm_extra_body = json.loads(extra_body)
if not isinstance(litellm_extra_body, dict):
raise ValueError("LITELLM.EXTRA_BODY must be a JSON object")
unsupported_keys = set(litellm_extra_body.keys()) - allowed_extra_body_keys
if unsupported_keys:
raise ValueError(f"LITELLM.EXTRA_BODY contains unsupported keys: {', '.join(unsupported_keys)}. Allowed keys: {', '.join(allowed_extra_body_keys)}")
colliding_keys = kwargs.keys() & litellm_extra_body.keys()
if colliding_keys:
raise ValueError(f"LITELLM.EXTRA_BODY cannot override existing parameters: {', '.join(colliding_keys)}")
kwargs.update(litellm_extra_body)
except json.JSONDecodeError as e:
raise ValueError(f"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}")
return kwargs

def _configure_claude_extended_thinking(self, model: str, kwargs: dict) -> dict:
"""
Configure Claude extended thinking parameters if applicable.
Expand Down Expand Up @@ -364,6 +395,9 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}")
kwargs["extra_headers"] = litellm_extra_headers

# Support for custom OpenAI body fields (e.g., Flex Processing)
kwargs = self._process_litellm_extra_body(kwargs)

get_logger().debug("Prompts", artifact={"system": system, "user": user})

if get_settings().config.verbosity_level >= 2:
Expand Down
4 changes: 4 additions & 0 deletions pr_agent/settings/.secrets_template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ key = "" # Acquire through https://platform.openai.com
#deployment_id = "" # The deployment name you chose when you deployed the engine
#fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id

# OpenAI Flex Processing (optional, for cost savings)
# [litellm]
# extra_body='{"processing_mode": "flex"}'

[pinecone]
api_key = "..."
environment = "gcp-starter"
Expand Down