Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/tracevault-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ rand = "0.8"
base64 = "0.22"
git2 = "0.20"
reqwest = { version = "0.13", features = ["json", "stream"] }
dashmap = "6"
futures-util = "0.3"
async-trait = "0.1"
aes-gcm = "0.10"
dotenvy = "0.15.7"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- Per-credential concurrency cap for the transparent Anthropic LLM proxy
-- (issue softwaremill/tracevault#210, parent #181).
--
-- The cap is the maximum number of in-flight proxy requests this credential
-- can have at any one moment. Enforced in-process via a tokio Semaphore in
-- AppState, sized to this value at first use of the credential.
--
-- Default 8: comfortable for typical multi-agent setups (Claude Code + GSD2),
-- well under any paid Anthropic tier. Upper bound 256 prevents user-typed
-- nonsense values; lower bound 1 prevents accidental lockout.
ALTER TABLE user_anthropic_keys
ADD COLUMN max_concurrent INTEGER NOT NULL DEFAULT 8
CHECK (max_concurrent > 0 AND max_concurrent <= 256);
145 changes: 112 additions & 33 deletions crates/tracevault-server/src/api/me.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,24 @@ use crate::AppState;
pub struct AnthropicKeyStatus {
pub configured: bool,
pub configured_at: Option<DateTime<Utc>>,
/// Per-credential proxy concurrency cap. `None` when no key is
/// configured; otherwise the value stored on the row.
pub max_concurrent: Option<i32>,
}

#[derive(Deserialize)]
pub struct PutAnthropicKeyRequest {
pub key: String,
/// Optional new Anthropic key. When omitted the existing ciphertext is
/// preserved — this is the "cap only" update path, used from the UI
/// when the user wants to change `max_concurrent` without rotating
/// the key. At least one of `key` or `max_concurrent` must be present.
#[serde(default)]
pub key: Option<String>,
/// Optional per-credential proxy concurrency cap. Omit to keep the
/// existing value on update, or fall back to the DB default (8) on
/// first insert.
#[serde(default)]
pub max_concurrent: Option<i32>,
}

/// Reject the synthetic nil user_id that the AuthUser extractor returns when
Expand All @@ -50,57 +63,123 @@ pub async fn get_anthropic_key_status(
auth: AuthUser,
) -> Result<Json<AnthropicKeyStatus>, AppError> {
let user_id = require_real_user(&auth)?;
let configured_at = UserAnthropicKeyRepo::configured_at(&state.pool, user_id).await?;
Ok(Json(AnthropicKeyStatus {
configured: configured_at.is_some(),
configured_at,
let status = UserAnthropicKeyRepo::status(&state.pool, user_id).await?;
Ok(Json(match status {
Some(s) => AnthropicKeyStatus {
configured: true,
configured_at: Some(s.configured_at),
max_concurrent: Some(s.max_concurrent),
},
None => AnthropicKeyStatus {
configured: false,
configured_at: None,
max_concurrent: None,
},
}))
}

/// PUT /api/v1/me/anthropic-key
///
/// Upserts the caller's Anthropic key, encrypted with the server's master
/// encryption key. Returns 204 on success.
/// Upserts the caller's Anthropic key and/or its concurrency cap. The
/// request body has two optional fields, `key` and `max_concurrent`, but
/// at least one must be present. Use cases:
///
/// * `{ key: "sk-ant-...", max_concurrent: 16 }` — first-time setup or
/// full rotation.
/// * `{ key: "sk-ant-..." }` — rotate the key; cap preserved (default 8
/// applied if no row yet).
/// * `{ max_concurrent: 16 }` — change only the cap; key must already
/// exist (400 otherwise).
///
/// In all cases the in-memory per-credential semaphore for this user is
/// dropped from the DashMap so the *next* proxy request rebuilds it
/// against the new cap value. In-flight requests keep their permits on
/// the old (dropped) semaphore for the lifetime of their response,
/// effectively letting the cap change apply at the natural next quiet
/// point.
pub async fn put_anthropic_key(
State(state): State<AppState>,
auth: AuthUser,
Json(req): Json<PutAnthropicKeyRequest>,
) -> Result<StatusCode, AppError> {
let user_id = require_real_user(&auth)?;

let key = req.key.trim();
if key.is_empty() {
if req.key.is_none() && req.max_concurrent.is_none() {
return Err(AppError::BadRequest(
"Anthropic key must not be empty".into(),
"Request must include `key`, `max_concurrent`, or both".into(),
));
}
// Real Anthropic keys are ~110 chars; cap at 256 to leave generous
// headroom for future formats while preventing the endpoint from
// accepting a ~2 MB junk string and persisting it encrypted on the
// user_anthropic_keys row.
if key.len() > 256 {
return Err(AppError::BadRequest(
"Anthropic key is unreasonably long (max 256 chars)".into(),
));

// Validate max_concurrent if the caller specified one. Bounds mirror
// the DB CHECK constraint so we fail fast with a clear 400 instead of
// surfacing a generic constraint-violation 500 from the upsert.
if let Some(n) = req.max_concurrent {
if !(1..=256).contains(&n) {
return Err(AppError::BadRequest(
"max_concurrent must be between 1 and 256".into(),
));
}
}
// Anthropic API keys begin with `sk-ant-` (modern format). We reject
// anything that doesn't look like one to catch obvious paste mistakes
// (TV session token, empty string, environment variable name, etc.).
// We do *not* validate the key against api.anthropic.com here — that
// would couple this endpoint to upstream availability.
if !key.starts_with("sk-ant-") {
return Err(AppError::BadRequest(
"Anthropic key must start with 'sk-ant-'".into(),
));

match req.key.as_deref() {
Some(raw_key) => {
let key = raw_key.trim();
if key.is_empty() {
return Err(AppError::BadRequest(
"Anthropic key must not be empty".into(),
));
}
// Real Anthropic keys are ~110 chars; cap at 256 to leave generous
// headroom for future formats while preventing the endpoint from
// accepting a ~2 MB junk string and persisting it encrypted.
if key.len() > 256 {
return Err(AppError::BadRequest(
"Anthropic key is unreasonably long (max 256 chars)".into(),
));
}
if !key.starts_with("sk-ant-") {
return Err(AppError::BadRequest(
"Anthropic key must start with 'sk-ant-'".into(),
));
}
let encryption_key = state.encryption_key.as_deref().ok_or_else(|| {
AppError::Internal(
"Server is not configured with an encryption key; cannot store Anthropic keys"
.into(),
)
})?;
UserAnthropicKeyRepo::upsert(
&state.pool,
encryption_key,
user_id,
key,
req.max_concurrent,
)
.await?;
}
None => {
// Settings-only update — the caller explicitly passed
// max_concurrent without a new key. Requires an existing row;
// otherwise there is nothing to update and we refuse with 400
// rather than silently inserting a half-row.
let new_cap = req.max_concurrent.expect("checked above");
let updated =
UserAnthropicKeyRepo::update_max_concurrent(&state.pool, user_id, new_cap).await?;
if !updated {
return Err(AppError::BadRequest(
"Cannot update settings: no Anthropic key configured yet".into(),
));
}
}
}

let encryption_key = state.encryption_key.as_deref().ok_or_else(|| {
AppError::Internal(
"Server is not configured with an encryption key; cannot store Anthropic keys".into(),
)
})?;
// Flush the in-memory per-credential semaphore so the next request
// rebuilds it against the new cap (or the freshly-persisted row).
// In-flight requests still hold permits on the old, now-orphaned
// Arc<Semaphore> — when they finish they release naturally and the
// arc drops.
state.proxy_per_credential_semaphores.remove(&user_id);

UserAnthropicKeyRepo::upsert(&state.pool, encryption_key, user_id, key).await?;
Ok(StatusCode::NO_CONTENT)
}

Expand Down
Loading
Loading