Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/rust-ci-full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ jobs:
export CODEX_TEST_REMOTE_ENV_CONTAINER_NAME=codex-remote-test-env
source "${GITHUB_WORKSPACE}/scripts/test-remote-env.sh"
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}" >> "$GITHUB_ENV"
echo "CODEX_TEST_REMOTE_EXEC_SERVER_URL=${CODEX_TEST_REMOTE_EXEC_SERVER_URL}" >> "$GITHUB_ENV"

- name: tests
id: test
Expand Down
226 changes: 27 additions & 199 deletions codex-rs/core/tests/common/test_codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ use std::process::Command;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use std::time::Duration;
use std::time::Instant;

use anyhow::Context;
use anyhow::Result;
Expand Down Expand Up @@ -44,7 +42,6 @@ use tempfile::TempDir;
use wiremock::MockServer;

use crate::PathBufExt;
use crate::RemoteEnvConfig;
use crate::TempDirExt;
use crate::get_remote_test_env;
use crate::load_default_config_for_test;
Expand All @@ -62,50 +59,15 @@ type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;
type WorkspaceSetup = dyn FnOnce(AbsolutePathBuf, Arc<dyn ExecutorFileSystem>) -> BoxFuture<'static, Result<()>>
+ Send;
const TEST_MODEL_WITH_EXPERIMENTAL_TOOLS: &str = "test-gpt-5.1-codex";
const REMOTE_EXEC_SERVER_START_TIMEOUT: Duration = Duration::from_secs(5);
const REMOTE_EXEC_SERVER_POLL_INTERVAL: Duration = Duration::from_millis(25);
static REMOTE_EXEC_SERVER_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);

#[derive(Debug)]
struct RemoteExecServerProcess {
container_name: String,
pid: u32,
remote_exec_server_path: String,
stdout_path: String,
cleanup_paths: Vec<String>,
}

impl Drop for RemoteExecServerProcess {
fn drop(&mut self) {
let cleanup_paths = self.cleanup_paths.join(" ");
let cleanup_paths_script = if cleanup_paths.is_empty() {
String::new()
} else {
format!("rm -rf {cleanup_paths}; ")
};
let script = format!(
"if kill -0 {pid} 2>/dev/null; then kill {pid}; fi; {cleanup_paths_script}rm -f {remote_exec_server_path} {stdout_path}",
pid = self.pid,
cleanup_paths_script = cleanup_paths_script,
remote_exec_server_path = self.remote_exec_server_path,
stdout_path = self.stdout_path
);
let _ = docker_command_capture_stdout(["exec", &self.container_name, "sh", "-lc", &script]);
}
}

impl RemoteExecServerProcess {
fn register_cleanup_path(&mut self, path: &Path) {
self.cleanup_paths.push(path.display().to_string());
}
}
const REMOTE_EXEC_SERVER_URL_ENV_VAR: &str = "CODEX_TEST_REMOTE_EXEC_SERVER_URL";
static REMOTE_TEST_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);

#[derive(Debug)]
pub struct TestEnv {
environment: codex_exec_server::Environment,
cwd: AbsolutePathBuf,
local_cwd_temp_dir: Option<Arc<TempDir>>,
_remote_exec_server_process: Option<RemoteExecServerProcess>,
remote_container_name: Option<String>,
}

impl TestEnv {
Expand All @@ -117,7 +79,7 @@ impl TestEnv {
environment,
cwd,
local_cwd_temp_dir: Some(local_cwd_temp_dir),
_remote_exec_server_process: None,
remote_container_name: None,
})
}

Expand All @@ -138,12 +100,19 @@ impl TestEnv {
}
}

impl Drop for TestEnv {
fn drop(&mut self) {
if let Some(container_name) = &self.remote_container_name {
let script = format!("rm -rf {}", self.cwd.as_path().display());
let _ = docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &script]);
}
}
}

pub async fn test_env() -> Result<TestEnv> {
match get_remote_test_env() {
Some(remote_env) => {
let mut remote_process = start_remote_exec_server(&remote_env)?;
let remote_ip = remote_container_ip(&remote_env.container_name)?;
let websocket_url = rewrite_websocket_host(&remote_process.listen_url, &remote_ip)?;
let websocket_url = remote_exec_server_url()?;
let environment = codex_exec_server::Environment::create(Some(websocket_url)).await?;
let cwd = remote_aware_cwd_path();
environment
Expand All @@ -154,182 +123,41 @@ pub async fn test_env() -> Result<TestEnv> {
/*sandbox*/ None,
)
.await?;
remote_process.process.register_cleanup_path(cwd.as_path());
Ok(TestEnv {
environment,
cwd,
local_cwd_temp_dir: None,
_remote_exec_server_process: Some(remote_process.process),
remote_container_name: Some(remote_env.container_name),
})
}
None => TestEnv::local().await,
}
}

struct RemoteExecServerStart {
process: RemoteExecServerProcess,
listen_url: String,
}

fn start_remote_exec_server(remote_env: &RemoteEnvConfig) -> Result<RemoteExecServerStart> {
let container_name = remote_env.container_name.as_str();
let instance_id = remote_exec_server_instance_id();
let remote_exec_server_path = format!("/tmp/codex-{instance_id}");
let remote_linux_sandbox_path = format!("/tmp/codex-linux-sandbox-{instance_id}");
let stdout_path = format!("/tmp/codex-exec-server-{instance_id}.stdout");
let local_binary = codex_utils_cargo_bin::cargo_bin("codex").context("resolve codex binary")?;
let local_linux_sandbox = codex_utils_cargo_bin::cargo_bin("codex-linux-sandbox")
.context("resolve codex-linux-sandbox binary")?;
let local_binary = local_binary.to_string_lossy().to_string();
let local_linux_sandbox = local_linux_sandbox.to_string_lossy().to_string();
let remote_binary = format!("{container_name}:{remote_exec_server_path}");
let remote_linux_sandbox = format!("{container_name}:{remote_linux_sandbox_path}");

docker_command_success(["cp", &local_binary, &remote_binary])?;
docker_command_success(["cp", &local_linux_sandbox, &remote_linux_sandbox])?;
docker_command_success([
"exec",
container_name,
"chmod",
"+x",
&remote_exec_server_path,
])?;
docker_command_success([
"exec",
container_name,
"chmod",
"+x",
&remote_linux_sandbox_path,
])?;
probe_remote_linux_sandbox(container_name, &remote_linux_sandbox_path)?;

let start_script = format!(
"rm -f {stdout_path}; \
nohup {remote_exec_server_path} exec-server --listen ws://0.0.0.0:0 > {stdout_path} 2>&1 & \
echo $!"
);
let pid_output =
docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &start_script])?;
let pid = pid_output
.trim()
.parse::<u32>()
.with_context(|| format!("parse remote exec-server PID from {pid_output:?}"))?;

let listen_url = wait_for_remote_listen_url(container_name, &stdout_path)?;

Ok(RemoteExecServerStart {
process: RemoteExecServerProcess {
container_name: container_name.to_string(),
pid,
remote_exec_server_path,
stdout_path,
cleanup_paths: vec![remote_linux_sandbox_path],
},
listen_url,
})
}

fn probe_remote_linux_sandbox(container_name: &str, remote_linux_sandbox_path: &str) -> Result<()> {
let policy = serde_json::to_string(&SandboxPolicy::new_read_only_policy())
.context("serialize remote sandbox probe policy")?;
let probe_script = format!(
"{remote_linux_sandbox_path} --sandbox-policy-cwd /tmp --sandbox-policy '{policy}' -- /bin/true"
);
let output = Command::new("docker")
.args(["exec", container_name, "sh", "-lc", &probe_script])
.output()
.with_context(|| format!("probe remote linux sandbox in container `{container_name}`"))?;
if !output.status.success() {
return Err(anyhow!(
"remote linux sandbox probe failed in container `{container_name}`: stdout={} stderr={}",
String::from_utf8_lossy(&output.stdout).trim(),
String::from_utf8_lossy(&output.stderr).trim()
));
}
Ok(())
}

fn remote_aware_cwd_path() -> AbsolutePathBuf {
PathBuf::from(format!(
"/tmp/codex-core-test-cwd-{}",
remote_exec_server_instance_id()
remote_test_instance_id()
))
.abs()
}

fn wait_for_remote_listen_url(container_name: &str, stdout_path: &str) -> Result<String> {
let deadline = Instant::now() + REMOTE_EXEC_SERVER_START_TIMEOUT;
loop {
let line = docker_command_capture_stdout([
"exec",
container_name,
"sh",
"-lc",
&format!("head -n 1 {stdout_path} 2>/dev/null || true"),
])?;
let listen_url = line.trim();
if listen_url.starts_with("ws://") {
return Ok(listen_url.to_string());
}

if Instant::now() >= deadline {
return Err(anyhow!(
"timed out waiting for remote exec-server listen URL in container `{container_name}` after {REMOTE_EXEC_SERVER_START_TIMEOUT:?}"
));
}
std::thread::sleep(REMOTE_EXEC_SERVER_POLL_INTERVAL);
}
}

fn remote_exec_server_instance_id() -> String {
let instance = REMOTE_EXEC_SERVER_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
format!("{}-{instance}", std::process::id())
}

fn remote_container_ip(container_name: &str) -> Result<String> {
let ip = docker_command_capture_stdout([
"inspect",
"-f",
"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
container_name,
])?;
let ip = ip.trim();
if ip.is_empty() {
fn remote_exec_server_url() -> Result<String> {
let listen_url = std::env::var(REMOTE_EXEC_SERVER_URL_ENV_VAR).with_context(|| {
format!("{REMOTE_EXEC_SERVER_URL_ENV_VAR} must be set for remote tests")
})?;
let listen_url = listen_url.trim();
if listen_url.is_empty() {
return Err(anyhow!(
"container `{container_name}` has no IP address; cannot connect to remote exec-server"
"{REMOTE_EXEC_SERVER_URL_ENV_VAR} must not be empty"
));
}
Ok(ip.to_string())
}

fn rewrite_websocket_host(listen_url: &str, host: &str) -> Result<String> {
let Some(address) = listen_url.strip_prefix("ws://") else {
return Err(anyhow!(
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
));
};
let Some((_, port)) = address.rsplit_once(':') else {
return Err(anyhow!(
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
));
};
Ok(format!("ws://{host}:{port}"))
Ok(listen_url.to_string())
}

fn docker_command_success<const N: usize>(args: [&str; N]) -> Result<()> {
let output = Command::new("docker")
.args(args)
.output()
.with_context(|| format!("run docker {args:?}"))?;
if !output.status.success() {
return Err(anyhow!(
"docker {:?} failed: stdout={} stderr={}",
args,
String::from_utf8_lossy(&output.stdout).trim(),
String::from_utf8_lossy(&output.stderr).trim()
));
}
Ok(())
fn remote_test_instance_id() -> String {
let instance = REMOTE_TEST_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
format!("{}-{instance}", std::process::id())
}

fn docker_command_capture_stdout<const N: usize>(args: [&str; N]) -> Result<String> {
Expand Down
50 changes: 50 additions & 0 deletions scripts/test-remote-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ is_sourced() {
setup_remote_env() {
local container_name
local codex_binary_path
local container_ip
local remote_codex_path
local remote_exec_server_pid
local remote_exec_server_port
local remote_exec_server_stdout_path

container_name="${CODEX_TEST_REMOTE_ENV_CONTAINER_NAME:-codex-remote-test-env-local-$(date +%s)-${RANDOM}}"
codex_binary_path="${REPO_ROOT}/codex-rs/target/debug/codex"
Expand Down Expand Up @@ -59,14 +64,58 @@ setup_remote_env() {
return 1
fi

if [[ -z "${CODEX_TEST_REMOTE_EXEC_SERVER_URL:-}" ]]; then
remote_codex_path="/tmp/codex-remote-env/codex"
remote_exec_server_port="31987"
remote_exec_server_stdout_path="/tmp/codex-remote-env/exec-server.stdout"
docker exec "${container_name}" sh -lc "mkdir -p /tmp/codex-remote-env"
docker cp "${codex_binary_path}" "${container_name}:${remote_codex_path}"
docker exec "${container_name}" chmod +x "${remote_codex_path}"
remote_exec_server_pid="$(
docker exec "${container_name}" sh -lc \
"rm -f ${remote_exec_server_stdout_path}; nohup ${remote_codex_path} exec-server --listen ws://0.0.0.0:${remote_exec_server_port} > ${remote_exec_server_stdout_path} 2>&1 & echo \$!"
)"
wait_for_remote_exec_server_port "${container_name}" "${remote_exec_server_port}" "${remote_exec_server_stdout_path}"
container_ip="$(
docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "${container_name}"
)"
if [[ -z "${container_ip}" ]]; then
echo "container ${container_name} has no IP address" >&2
docker rm -f "${container_name}" >/dev/null 2>&1 || true
return 1
fi
export CODEX_TEST_REMOTE_EXEC_SERVER_PID="${remote_exec_server_pid}"
export CODEX_TEST_REMOTE_EXEC_SERVER_URL="ws://${container_ip}:${remote_exec_server_port}"
fi

export CODEX_TEST_REMOTE_ENV="${container_name}"
}

wait_for_remote_exec_server_port() {
local container_name="$1"
local port="$2"
local stdout_path="$3"
local deadline=$((SECONDS + 5))

while (( SECONDS < deadline )); do
if docker exec "${container_name}" python3 -c "import socket; socket.create_connection(('127.0.0.1', ${port}), timeout=0.2).close()" >/dev/null 2>&1; then
return 0
fi
sleep 0.025
done

echo "timed out waiting for remote exec-server on ${container_name}:${port}" >&2
docker exec "${container_name}" sh -lc "cat ${stdout_path} 2>/dev/null || true" >&2 || true
return 1
}

codex_remote_env_cleanup() {
if [[ -n "${CODEX_TEST_REMOTE_ENV:-}" ]]; then
docker rm -f "${CODEX_TEST_REMOTE_ENV}" >/dev/null 2>&1 || true
unset CODEX_TEST_REMOTE_ENV
fi
unset CODEX_TEST_REMOTE_EXEC_SERVER_PID
unset CODEX_TEST_REMOTE_EXEC_SERVER_URL
}

if ! is_sourced; then
Expand All @@ -79,6 +128,7 @@ set -euo pipefail
if setup_remote_env; then
status=0
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}"
echo "CODEX_TEST_REMOTE_EXEC_SERVER_URL=${CODEX_TEST_REMOTE_EXEC_SERVER_URL}"
echo "Remote env ready. Run your command, then call: codex_remote_env_cleanup"
else
status=$?
Expand Down
Loading