Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
319 changes: 187 additions & 132 deletions codex-rs/core/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,155 +225,169 @@ impl ModelClient {
if azure_workaround {
attach_item_ids(&mut payload_json, &input_with_instructions);
}
let payload_body = serde_json::to_string(&payload_json)?;

let mut attempt = 0;
let max_retries = self.provider.request_max_retries();
let max_attempts = self.provider.request_max_retries();
for attempt in 0..=max_attempts {
match self
.attempt_stream_responses(&payload_json, &auth_manager)
.await
{
Ok(stream) => {
return Ok(stream);
}
Err(StreamAttemptError::Fatal(e)) => {
return Err(e);
}
Err(retryable_attempt_error) => {
if attempt == max_attempts {
return Err(retryable_attempt_error.into_error());
}

tokio::time::sleep(retryable_attempt_error.delay(attempt)).await;
}
}
}

loop {
attempt += 1;
unreachable!("stream_responses_attempt should always return");
}

// Always fetch the latest auth in case a prior attempt refreshed the token.
let auth = auth_manager.as_ref().and_then(|m| m.auth());
/// Single attempt to start a streaming Responses API call.
async fn attempt_stream_responses(
Comment thread
bolinfest marked this conversation as resolved.
&self,
payload_json: &Value,
auth_manager: &Option<Arc<AuthManager>>,
) -> std::result::Result<ResponseStream, StreamAttemptError> {
// Always fetch the latest auth in case a prior attempt refreshed the token.
let auth = auth_manager.as_ref().and_then(|m| m.auth());

trace!(
"POST to {}: {:?}",
self.provider.get_full_url(&auth),
serde_json::to_string(payload_json)
);

let mut req_builder = self
.provider
.create_request_builder(&self.client, &auth)
.await
.map_err(StreamAttemptError::Fatal)?;

req_builder = req_builder
.header("OpenAI-Beta", "responses=experimental")
// Send session_id for compatibility.
.header("conversation_id", self.conversation_id.to_string())
.header("session_id", self.conversation_id.to_string())
.header(reqwest::header::ACCEPT, "text/event-stream")
.json(payload_json);

if let Some(auth) = auth.as_ref()
&& auth.mode == AuthMode::ChatGPT
&& let Some(account_id) = auth.get_account_id()
{
req_builder = req_builder.header("chatgpt-account-id", account_id);
}

let res = req_builder.send().await;
if let Ok(resp) = &res {
trace!(
"POST to {}: {}",
self.provider.get_full_url(&auth),
payload_body.as_str()
"Response status: {}, cf-ray: {}",
resp.status(),
resp.headers()
.get("cf-ray")
.map(|v| v.to_str().unwrap_or_default())
.unwrap_or_default()
);
}

let mut req_builder = self
.provider
.create_request_builder(&self.client, &auth)
.await?;
match res {
Ok(resp) if resp.status().is_success() => {
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);

if let Some(snapshot) = parse_rate_limit_snapshot(resp.headers())
&& tx_event
.send(Ok(ResponseEvent::RateLimits(snapshot)))
.await
.is_err()
{
debug!("receiver dropped rate limit snapshot event");
}

req_builder = req_builder
.header("OpenAI-Beta", "responses=experimental")
// Send session_id for compatibility.
.header("conversation_id", self.conversation_id.to_string())
.header("session_id", self.conversation_id.to_string())
.header(reqwest::header::ACCEPT, "text/event-stream")
.json(&payload_json);

if let Some(auth) = auth.as_ref()
&& auth.mode == AuthMode::ChatGPT
&& let Some(account_id) = auth.get_account_id()
{
req_builder = req_builder.header("chatgpt-account-id", account_id);
}
// spawn task to process SSE
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
tokio::spawn(process_sse(
stream,
tx_event,
self.provider.stream_idle_timeout(),
));

let res = req_builder.send().await;
if let Ok(resp) = &res {
trace!(
"Response status: {}, cf-ray: {}",
resp.status(),
resp.headers()
.get("cf-ray")
.map(|v| v.to_str().unwrap_or_default())
.unwrap_or_default()
);
Ok(ResponseStream { rx_event })
}

match res {
Ok(resp) if resp.status().is_success() => {
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);

if let Some(snapshot) = parse_rate_limit_snapshot(resp.headers())
&& tx_event
.send(Ok(ResponseEvent::RateLimits(snapshot)))
.await
.is_err()
{
debug!("receiver dropped rate limit snapshot event");
}

// spawn task to process SSE
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
tokio::spawn(process_sse(
stream,
tx_event,
self.provider.stream_idle_timeout(),
));

return Ok(ResponseStream { rx_event });
Ok(res) => {
let status = res.status();

// Pull out Retry‑After header if present.
let retry_after_secs = res
.headers()
.get(reqwest::header::RETRY_AFTER)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u64>().ok());
let retry_after = retry_after_secs.map(|s| Duration::from_millis(s * 1_000));

if status == StatusCode::UNAUTHORIZED
&& let Some(manager) = auth_manager.as_ref()
&& manager.auth().is_some()
{
let _ = manager.refresh_token().await;
}
Ok(res) => {
let status = res.status();

// Pull out Retry‑After header if present.
let retry_after_secs = res
.headers()
.get(reqwest::header::RETRY_AFTER)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u64>().ok());

if status == StatusCode::UNAUTHORIZED
&& let Some(manager) = auth_manager.as_ref()
&& manager.auth().is_some()
{
let _ = manager.refresh_token().await;
}

// The OpenAI Responses endpoint returns structured JSON bodies even for 4xx/5xx
// errors. When we bubble early with only the HTTP status the caller sees an opaque
// "unexpected status 400 Bad Request" which makes debugging nearly impossible.
// Instead, read (and include) the response text so higher layers and users see the
// exact error message (e.g. "Unknown parameter: 'input[0].metadata'"). The body is
// small and this branch only runs on error paths so the extra allocation is
// negligible.
if !(status == StatusCode::TOO_MANY_REQUESTS
|| status == StatusCode::UNAUTHORIZED
|| status.is_server_error())
{
// Surface the error body to callers. Use `unwrap_or_default` per Clippy.
let body = res.text().await.unwrap_or_default();
return Err(CodexErr::UnexpectedStatus(status, body));
}

if status == StatusCode::TOO_MANY_REQUESTS {
let rate_limit_snapshot = parse_rate_limit_snapshot(res.headers());
let body = res.json::<ErrorResponse>().await.ok();
if let Some(ErrorResponse { error }) = body {
if error.r#type.as_deref() == Some("usage_limit_reached") {
// Prefer the plan_type provided in the error message if present
// because it's more up to date than the one encoded in the auth
// token.
let plan_type = error
.plan_type
.or_else(|| auth.as_ref().and_then(CodexAuth::get_plan_type));
let resets_in_seconds = error.resets_in_seconds;
return Err(CodexErr::UsageLimitReached(UsageLimitReachedError {
plan_type,
resets_in_seconds,
rate_limits: rate_limit_snapshot,
}));
} else if error.r#type.as_deref() == Some("usage_not_included") {
return Err(CodexErr::UsageNotIncluded);
}
}
}
// The OpenAI Responses endpoint returns structured JSON bodies even for 4xx/5xx
// errors. When we bubble early with only the HTTP status the caller sees an opaque
// "unexpected status 400 Bad Request" which makes debugging nearly impossible.
// Instead, read (and include) the response text so higher layers and users see the
// exact error message (e.g. "Unknown parameter: 'input[0].metadata'"). The body is
// small and this branch only runs on error paths so the extra allocation is
// negligible.
if !(status == StatusCode::TOO_MANY_REQUESTS
|| status == StatusCode::UNAUTHORIZED
|| status.is_server_error())
{
// Surface the error body to callers. Use `unwrap_or_default` per Clippy.
let body = res.text().await.unwrap_or_default();
return Err(StreamAttemptError::Fatal(CodexErr::UnexpectedStatus(
status, body,
)));
}

if attempt > max_retries {
if status == StatusCode::INTERNAL_SERVER_ERROR {
return Err(CodexErr::InternalServerError);
if status == StatusCode::TOO_MANY_REQUESTS {
let rate_limit_snapshot = parse_rate_limit_snapshot(res.headers());
let body = res.json::<ErrorResponse>().await.ok();
if let Some(ErrorResponse { error }) = body {
if error.r#type.as_deref() == Some("usage_limit_reached") {
// Prefer the plan_type provided in the error message if present
// because it's more up to date than the one encoded in the auth
// token.
let plan_type = error
.plan_type
.or_else(|| auth.as_ref().and_then(CodexAuth::get_plan_type));
let resets_in_seconds = error.resets_in_seconds;
let codex_err = CodexErr::UsageLimitReached(UsageLimitReachedError {
plan_type,
resets_in_seconds,
rate_limits: rate_limit_snapshot,
});
return Err(StreamAttemptError::Fatal(codex_err));
} else if error.r#type.as_deref() == Some("usage_not_included") {
return Err(StreamAttemptError::Fatal(CodexErr::UsageNotIncluded));
}

return Err(CodexErr::RetryLimit(status));
}

let delay = retry_after_secs
.map(|s| Duration::from_millis(s * 1_000))
.unwrap_or_else(|| backoff(attempt));
tokio::time::sleep(delay).await;
}
Err(e) => {
if attempt > max_retries {
return Err(e.into());
}
let delay = backoff(attempt);
tokio::time::sleep(delay).await;
}

Err(StreamAttemptError::RetryableHttpError {
status,
retry_after,
})
}
Err(e) => Err(StreamAttemptError::RetryableTransportError(e.into())),
}
}

Expand Down Expand Up @@ -406,6 +420,47 @@ impl ModelClient {
}
}

enum StreamAttemptError {
RetryableHttpError {
status: StatusCode,
retry_after: Option<Duration>,
},
RetryableTransportError(CodexErr),
Fatal(CodexErr),
}

impl StreamAttemptError {
/// attempt is 0-based.
fn delay(&self, attempt: u64) -> Duration {
// backoff() uses 1-based attempts.
let backoff_attempt = attempt + 1;
match self {
Self::RetryableHttpError { retry_after, .. } => {
retry_after.unwrap_or_else(|| backoff(backoff_attempt))
}
Self::RetryableTransportError { .. } => backoff(backoff_attempt),
Self::Fatal(_) => {
// Should not be called on Fatal errors.
Duration::from_secs(0)
}
}
}

fn into_error(self) -> CodexErr {
match self {
Self::RetryableHttpError { status, .. } => {
if status == StatusCode::INTERNAL_SERVER_ERROR {
CodexErr::InternalServerError
} else {
CodexErr::RetryLimit(status)
}
}
Self::RetryableTransportError(error) => error,
Self::Fatal(error) => error,
}
}
}

#[derive(Debug, Deserialize, Serialize)]
struct SseEvent {
#[serde(rename = "type")]
Expand Down
Loading