Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 43 additions & 7 deletions codex-rs/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ mod doctor;
mod marketplace_cmd;
mod mcp_cmd;
mod plugin_cmd;
mod state_db_recovery;
#[cfg(not(windows))]
mod wsl_paths;

use crate::mcp_cmd::McpCli;
use crate::plugin_cmd::PluginCli;
use crate::plugin_cmd::PluginSubcommand;
use doctor::DoctorCommand;
use state_db_recovery as local_state_db;

use codex_config::LoaderOverrides;
use codex_core::build_models_manager;
Expand Down Expand Up @@ -1981,13 +1983,47 @@ async fn run_interactive_tui(
};
*slot = Some(auth_token);
}
codex_tui::run_main(
interactive,
arg0_paths,
codex_config::LoaderOverrides::default(),
remote_endpoint,
)
.await
let start_tui = || {
codex_tui::run_main(
interactive.clone(),
arg0_paths.clone(),
codex_config::LoaderOverrides::default(),
remote_endpoint.clone(),
)
};
let mut attempted_repair = false;
loop {
let err = match start_tui().await {
Ok(exit_info) => return Ok(exit_info),
Err(err) => err,
};
let Some(startup_error) = local_state_db::startup_error(&err) else {
return Err(err);
};
if local_state_db::is_locked(startup_error.detail()) {
local_state_db::print_locked_guidance(startup_error);
return Ok(AppExitInfo::fatal(startup_error.to_string()));
}
if attempted_repair {
local_state_db::print_diagnostic_guidance(startup_error);
return Ok(AppExitInfo::fatal(startup_error.to_string()));
}
if !local_state_db::confirm_repair(startup_error)? {
local_state_db::print_diagnostic_guidance(startup_error);
return Ok(AppExitInfo::fatal(startup_error.to_string()));
}

match local_state_db::repair_files(startup_error).await {
Ok(backups) => local_state_db::print_repair_backups(&backups),
Err(repair_err) => {
local_state_db::print_diagnostic_guidance(startup_error);
return Ok(AppExitInfo::fatal(format!(
"failed to repair Codex local data automatically: {repair_err}"
)));
}
}
attempted_repair = true;
}
}

fn confirm(prompt: &str) -> std::io::Result<bool> {
Expand Down
183 changes: 183 additions & 0 deletions codex-rs/cli/src/state_db_recovery.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//! CLI recovery for local state database startup failures.
//!
//! This keeps user-facing repair and lock-contention handling out of the main
//! CLI dispatch path while preserving the TUI startup error as the boundary type.

use codex_tui::LocalStateDbStartupError;
use std::path::PathBuf;

pub(crate) fn startup_error(err: &std::io::Error) -> Option<&LocalStateDbStartupError> {
err.get_ref()
.and_then(|err| err.downcast_ref::<LocalStateDbStartupError>())
}

pub(crate) fn is_locked(detail: &str) -> bool {
let detail = detail.to_ascii_lowercase();
detail.contains("database is locked") || detail.contains("database is busy")
}

pub(crate) fn confirm_repair(startup_error: &LocalStateDbStartupError) -> std::io::Result<bool> {
eprintln!("Codex couldn't start because its local database appears to be damaged.");
eprintln!("Codex can try a safe repair by backing up those files and rebuilding them.");
print_technical_details(startup_error);
crate::confirm("Repair Codex local data now? [y/N]: ")
}

pub(crate) async fn repair_files(
startup_error: &LocalStateDbStartupError,
) -> std::io::Result<Vec<PathBuf>> {
let state_db_path = startup_error.state_db_path();
let sqlite_home = state_db_path.parent().ok_or_else(|| {
std::io::Error::other("state database path does not have a parent directory")
})?;
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map_or(0, |duration| duration.as_secs());
let repair_suffix = format!("codex-repair-{timestamp}");
let mut backups = Vec::new();

match tokio::fs::metadata(sqlite_home).await {
Ok(metadata) if metadata.is_dir() => {}
Ok(_) => {
backups.push(backup_path(sqlite_home, &repair_suffix).await?);
tokio::fs::create_dir_all(sqlite_home).await?;
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
tokio::fs::create_dir_all(sqlite_home).await?;
}
Err(err) => return Err(err),
}

let logs_db_path = codex_state::logs_db_path(sqlite_home);
for path in sqlite_paths(state_db_path)
.into_iter()
.chain(sqlite_paths(logs_db_path.as_path()))
{
if tokio::fs::try_exists(path.as_path()).await? {
backups.push(backup_path(path.as_path(), &repair_suffix).await?);
}
}

if backups.is_empty() {
return Err(std::io::Error::other(
"no repairable Codex local data files were found",
));
}

Ok(backups)
}

pub(crate) fn print_repair_backups(backups: &[PathBuf]) {
eprintln!("Backed up Codex local data before repair:");
for backup in backups {
eprintln!(" {}", backup.display());
}
eprintln!("Retrying startup with rebuilt local data...");
}

pub(crate) fn print_diagnostic_guidance(startup_error: &LocalStateDbStartupError) {
eprintln!("Codex couldn't start because its local database appears to be damaged.");
eprintln!("Run `codex doctor` to check your setup and get next-step guidance.");
eprintln!("If this keeps happening, share the technical details below when asking for help.");
print_technical_details(startup_error);
}

pub(crate) fn print_locked_guidance(startup_error: &LocalStateDbStartupError) {
eprintln!("Codex couldn't start because another Codex process is using its local data.");
eprintln!("Quit any other copies of Codex that may still be running, then try again.");
print_technical_details(startup_error);
}

fn sqlite_paths(db_path: &std::path::Path) -> Vec<PathBuf> {
let mut wal_path = db_path.as_os_str().to_os_string();
wal_path.push("-wal");
let mut shm_path = db_path.as_os_str().to_os_string();
shm_path.push("-shm");
vec![
db_path.to_path_buf(),
PathBuf::from(wal_path),
PathBuf::from(shm_path),
]
}

async fn backup_path(path: &std::path::Path, repair_suffix: &str) -> std::io::Result<PathBuf> {
let file_name = path.file_name().ok_or_else(|| {
std::io::Error::other(format!(
"cannot create a repair backup name for {}",
path.display()
))
})?;
let mut sequence = 0;
loop {
let mut backup_name = file_name.to_os_string();
backup_name.push(format!(".{repair_suffix}.{sequence}.bak"));
let backup_path = path.with_file_name(backup_name);
if !tokio::fs::try_exists(backup_path.as_path()).await? {
tokio::fs::rename(path, backup_path.as_path()).await?;
return Ok(backup_path);
}
sequence += 1;
}
}

fn print_technical_details(startup_error: &LocalStateDbStartupError) {
eprintln!("Technical details:");
eprintln!(" Location: {}", startup_error.state_db_path().display());
eprintln!(" Cause: {}", startup_error.detail());
}

#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use tempfile::TempDir;

#[tokio::test]
async fn repair_backs_up_owned_database_files() -> std::io::Result<()> {
let temp_dir = TempDir::new()?;
let state_path = codex_state::state_db_path(temp_dir.path());
let logs_path = codex_state::logs_db_path(temp_dir.path());
let state_sidecars = sqlite_paths(state_path.as_path());
tokio::fs::write(state_path.as_path(), b"state").await?;
tokio::fs::write(state_sidecars[1].as_path(), b"state-wal").await?;
tokio::fs::write(logs_path.as_path(), b"logs").await?;

let startup_error =
LocalStateDbStartupError::new(state_path.clone(), "corrupt".to_string());
let backups = repair_files(&startup_error).await?;

assert_eq!(backups.len(), 3);
assert!(!tokio::fs::try_exists(state_path.as_path()).await?);
assert!(!tokio::fs::try_exists(state_sidecars[1].as_path()).await?);
assert!(!tokio::fs::try_exists(logs_path.as_path()).await?);
for backup in backups {
assert!(tokio::fs::try_exists(backup.as_path()).await?);
}
Ok(())
}

#[tokio::test]
async fn repair_replaces_blocking_sqlite_home_file() -> std::io::Result<()> {
let temp_dir = TempDir::new()?;
let sqlite_home = temp_dir.path().join("sqlite-home");
tokio::fs::write(sqlite_home.as_path(), b"not-a-directory").await?;
let startup_error = LocalStateDbStartupError::new(
codex_state::state_db_path(sqlite_home.as_path()),
"File exists".to_string(),
);

let backups = repair_files(&startup_error).await?;

assert_eq!(backups.len(), 1);
assert!(tokio::fs::metadata(sqlite_home.as_path()).await?.is_dir());
assert!(tokio::fs::try_exists(backups[0].as_path()).await?);
Ok(())
}

#[test]
fn lock_failures_skip_repair() {
assert!(is_locked("database is locked"));
assert!(is_locked("database is busy"));
assert!(!is_locked("database disk image is malformed"));
}
}
4 changes: 2 additions & 2 deletions codex-rs/tui/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use codex_utils_cli::ApprovalModeCliArg;
use codex_utils_cli::CliConfigOverrides;
use codex_utils_cli::SharedCliOptions;

#[derive(Parser, Debug)]
#[derive(Parser, Clone, Debug)]
#[command(version)]
pub struct Cli {
/// Optional user prompt to start the session.
Expand Down Expand Up @@ -89,7 +89,7 @@ impl std::ops::DerefMut for Cli {
}
}

#[derive(Debug, Default)]
#[derive(Clone, Debug, Default)]
pub struct TuiSharedCliOptions(SharedCliOptions);

impl TuiSharedCliOptions {
Expand Down
58 changes: 52 additions & 6 deletions codex-rs/tui/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::legacy_core::format_exec_policy_error_with_source;
use crate::legacy_core::windows_sandbox::WindowsSandboxLevelExt;
use crate::session_resume::ResolveCwdOutcome;
use crate::session_resume::resolve_cwd_for_resume_or_fork;
pub use crate::startup_error::LocalStateDbStartupError;
use additional_dirs::add_dir_warning_message;
use app::App;
pub use app::AppExitInfo;
Expand Down Expand Up @@ -167,6 +168,7 @@ mod session_state;
mod shimmer;
mod skills_helpers;
mod slash_command;
mod startup_error;
mod startup_hooks_review;
mod status;
mod status_indicator_widget;
Expand Down Expand Up @@ -312,6 +314,21 @@ pub(crate) enum AppServerTarget {
Remote { endpoint: RemoteAppServerEndpoint },
}

async fn init_state_db_for_app_server_target(
config: &Config,
app_server_target: &AppServerTarget,
) -> std::io::Result<Option<StateDbHandle>> {
match app_server_target {
AppServerTarget::Embedded => state_db::try_init(config).await.map(Some).map_err(|err| {
std::io::Error::other(LocalStateDbStartupError::new(
codex_state::state_db_path(config.sqlite_home.as_path()),
err.to_string(),
))
}),
AppServerTarget::Remote { .. } => Ok(state_db::get_state_db(config).await),
}
}

fn remote_addr_has_explicit_port(addr: &str, parsed: &Url) -> bool {
let Some(host) = parsed.host_str() else {
return false;
Expand Down Expand Up @@ -509,7 +526,7 @@ pub(crate) async fn start_app_server_for_picker(
pub(crate) async fn start_embedded_app_server_for_picker(
config: &Config,
) -> color_eyre::Result<AppServerSession> {
let state_db = state_db::init(config).await;
let state_db = init_state_db_for_app_server_target(config, &AppServerTarget::Embedded).await?;
start_app_server_for_picker(
config,
&AppServerTarget::Embedded,
Expand Down Expand Up @@ -989,10 +1006,7 @@ pub async fn run_main(
otel.as_ref(),
otel_originator.as_str(),
);
let state_db = match &app_server_target {
AppServerTarget::Embedded => state_db::init(&config).await,
AppServerTarget::Remote { .. } => state_db::get_state_db(&config).await,
};
let state_db = init_state_db_for_app_server_target(&config, &app_server_target).await?;

let effective_toml = config.config_layer_stack.effective_config();
match effective_toml.try_into() {
Expand Down Expand Up @@ -1823,7 +1837,8 @@ mod tests {
async fn start_test_embedded_app_server(
config: Config,
) -> color_eyre::Result<InProcessAppServerClient> {
let state_db = state_db::init(&config).await;
let state_db =
init_state_db_for_app_server_target(&config, &AppServerTarget::Embedded).await?;
start_embedded_app_server(
Arg0DispatchPaths::default(),
config,
Expand Down Expand Up @@ -2416,6 +2431,37 @@ mod tests {
);
Ok(())
}

#[tokio::test]
async fn embedded_state_db_failure_is_typed_for_cli_recovery() -> color_eyre::Result<()> {
let temp_dir = TempDir::new()?;
let mut config = build_config(&temp_dir).await?;
let occupied_sqlite_home = temp_dir.path().join("sqlite-home");
std::fs::write(&occupied_sqlite_home, "occupied")?;
config.sqlite_home = occupied_sqlite_home.clone();

let err =
match init_state_db_for_app_server_target(&config, &AppServerTarget::Embedded).await {
Ok(_) => panic!("embedded startup should surface state db init failures"),
Err(err) => err,
};
let startup_error = err
.get_ref()
.and_then(|err| err.downcast_ref::<LocalStateDbStartupError>())
.expect("state db startup failure should retain its typed context");

assert_eq!(
startup_error.state_db_path(),
codex_state::state_db_path(occupied_sqlite_home.as_path()).as_path()
);
assert!(
startup_error
.detail()
.contains("failed to initialize state runtime"),
"startup error should preserve the underlying state db failure"
);
Ok(())
}
#[tokio::test]
#[serial]
async fn windows_shows_trust_prompt_with_sandbox() -> std::io::Result<()> {
Expand Down
Loading
Loading