# Retrying with `backoff`

In [3]:
from dotenv import load_dotenv
from openai import AsyncOpenAI, RateLimitError, APITimeoutError

load_dotenv()

True

In [4]:
client = AsyncOpenAI()

GPT_4O_MINI = "gpt-4o-mini"

dir(client)

['__aenter__',
 '__aexit__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_base_url',
 '_build_headers',
 '_build_request',
 '_calculate_retry_timeout',
 '_client',
 '_custom_headers',
 '_custom_query',
 '_default_stream_cls',
 '_enforce_trailing_slash',
 '_idempotency_header',
 '_idempotency_key',
 '_limits',
 '_make_sse_decoder',
 '_make_status_error',
 '_make_status_error_from_response',
 '_maybe_override_cast_to',
 '_parse_retry_after_header',
 '_platform',
 '_prepare_options',
 '_prepare_request',
 '_prepare_url',
 '_process_response',
 '_process_response_data',
 '

In [8]:
import backoff

dir(backoff)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_async',
 '_common',
 '_decorator',
 '_jitter',
 '_sync',
 '_typing',
 '_wait_gen',
 'constant',
 'expo',
 'fibo',
 'full_jitter',
 'on_exception',
 'on_predicate',
 'random_jitter',
 'runtime']

In [None]:
# RETRY this Function Call
await client.chat.completions.create(
    model=GPT_4O_MINI,
    messages=[dict(role="user", content="Hey! Mista!")],
)

ChatCompletion(id='chatcmpl-AXNfIZ3kxZJUGI45HW4yRb4WRlqPL', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hello! How can I assist you today?', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1732519156, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_3de1288069', usage=CompletionUsage(completion_tokens=9, prompt_tokens=12, total_tokens=21, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [14]:
import backoff
from functools import wraps


@wraps(wrapped=client.chat.completions.create)
@backoff.on_exception(
    wait_gen=backoff.expo, exception=(RateLimitError, APITimeoutError), max_time=60
)
async def do_call(*, model, messages, **kwargs) -> None:
    await client.chat.completions.create(
        model=model,
        messages=messages,
    )

### Code Breakdown:

#### **1. Importing Modules**

```python
import backoff
from functools import wraps
```

- **`functools.wraps`**: A decorator for preserving the metadata (like the name, docstring, etc.) of the wrapped function when wrapping it with another function.
- **`backoff`**: A library for retrying a function with an exponential backoff delay when specific exceptions occur.

---

#### **2. The `@wraps` Decorator**

```python
@wraps(wrapped=client.chat.completions.create)
```

- **Purpose**: Ensures that `do_call` retains the attributes (e.g., `__name__`, `__doc__`) of the `client.chat.completions.create` method.
- **Why it’s needed**: Without `@wraps`, the `do_call` function would lose its identity and show up as a generic wrapper function in logs or debugging.

---

#### **3. The `@backoff.on_exception` Decorator**

```python
@backoff.on_exception(
    wait_gen=backoff.expo, exception=(RateLimitError, APITimeoutError), max_time=60
)
```

- **Purpose**: Automatically retries the decorated function (`do_call`) when certain exceptions occur.
- **Parameters**:
  - `wait_gen=backoff.expo`: Uses exponential backoff for retry intervals (e.g., 1s, 2s, 4s, etc.).
  - `exception=(RateLimitError, APITimeoutError)`: Specifies the exceptions that trigger the retry mechanism.
  - `max_time=60`: Limits the retry attempts to a maximum of 60 seconds.

---

#### **4. The `do_call` Function**

```python
async def do_call(*, model, messages, **kwargs) -> None:
    await client.chat.completions.create(
        model=model,
        messages=messages,
    )
```

- **Async Function**: `async def` indicates that this is an asynchronous function.
- **Arguments**:
  - `model`, `messages`: Required keyword arguments for the function.
  - `**kwargs`: Additional optional keyword arguments.
- **Action**: Calls `client.chat.completions.create` asynchronously, passing along `model` and `messages`.

---

### How It Works:

1. **Retry on Exceptions**:
   - If `client.chat.completions.create` raises either a `RateLimitError` or `APITimeoutError`, the `backoff` library retries the `do_call` function after an exponentially increasing delay.
   - The retries stop after 60 seconds (due to `max_time=60`).

2. **Metadata Preservation**:
   - The `@wraps` decorator ensures `do_call` appears identical to `client.chat.completions.create` in terms of metadata. This is helpful for debugging, logging, and documentation purposes.

3. **Asynchronous Execution**:
   - `do_call` is designed for asynchronous workflows, making it suitable for non-blocking operations in an event loop (e.g., in frameworks like `asyncio` or `FastAPI`).

---

### Notes:

- **Using `@wraps`**:
  - When you wrap a function with a decorator, the decorated function usually loses its metadata.
  - `functools.wraps` fixes this by copying the original function's metadata to the wrapper function.

- **Using `backoff`**:
  - The `backoff.on_exception` decorator simplifies adding retry logic.
  - Exponential backoff prevents overwhelming the server during retries by gradually increasing the delay between attempts.

- **Error Handling**:
  - By specifying exceptions (`RateLimitError, APITimeoutError`), only those errors trigger the retry logic, preventing unnecessary retries for other failures.

- **Async Context**:
  - The `async` keyword allows the function to perform non-blocking calls, ideal for I/O-bound tasks like API calls.

---

This code block is a robust way to handle retries for API calls, especially when working in an asynchronous context and dealing with rate-limiting or timeout errors.