Skip to content

Commit

Permalink
log duration and completed timestamp (#824)
Browse files Browse the repository at this point in the history
  • Loading branch information
ChuckHend committed May 31, 2024
1 parent 1722b3c commit 7d30804
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 9 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion inference-gateway/Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
DATABASE_URL:=postgresql://postgres:postgres@localhost:5432/postgres
LLM_SERVICE_HOST_PORT=http://localhost:8000
RUST_LOG=debug
SQLX_OFFLINE:=true

fmt:
cargo sqlx prepare --database-url ${DATABASE_URL}
cargo +nightly fmt --all
cargo clippy

run:
LLM_SERVICE_HOST_PORT=${LLM_SERVICE_HOST_PORT} RUST_LOG=${RUST_LOG} cargo run

run-migrations:
sqlx migrate run
sqlx migrate run --database-url ${DATABASE_URL}

run-postgres:
docker compose up -d postgres

test:
cargo test -- --ignored
2 changes: 1 addition & 1 deletion inference-gateway/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
services:
postgres:
restart: always
image: postgres:16
image: quay.io/tembo/timeseries-pg:latest
ports:
- 5432:5432
environment:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE EXTENSION IF NOT EXISTS timeseries CASCADE;
ALTER TABLE inference.requests ADD COLUMN completed_at timestamp with time zone not null DEFAULT now();
ALTER TABLE inference.requests ADD COLUMN duration_ms integer not null default 0;
14 changes: 9 additions & 5 deletions inference-gateway/src/routes/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ pub async fn forward_request(
new_url.set_path(path);
new_url.set_query(req.uri().query());

// For now, only POST is supported
// log request duration
let start = std::time::Instant::now();
let resp = client.post(new_url).json(&body).send().await?;
let duration = start.elapsed().as_millis() as i32;
if resp.status().is_success() {
let llm_resp = resp.json::<serde_json::Value>().await?;
let model = llm_resp
Expand All @@ -45,7 +47,7 @@ pub async fn forward_request(
})?
.clone(),
)?;
if let Err(e) = insert_data(x_tembo, model, usage, &dbclient).await {
if let Err(e) = insert_data(x_tembo, model, usage, duration, &dbclient).await {
log::error!("{}", e);
}
Ok(HttpResponse::Ok().json(llm_resp))
Expand All @@ -60,15 +62,17 @@ async fn insert_data(
org: &str,
model: &str,
usage: Usage,
duration_ms: i32,
con: &Pool<Postgres>,
) -> Result<(), PlatformError> {
let _r = sqlx::query!(
"INSERT INTO inference.requests ( organization_id, model, prompt_tokens, completion_tokens )
VALUES ($1, $2, $3, $4)",
"INSERT INTO inference.requests ( organization_id, model, prompt_tokens, completion_tokens, duration_ms )
VALUES ($1, $2, $3, $4, $5)",
org,
model,
usage.prompt_tokens,
usage.completion_tokens
usage.completion_tokens,
duration_ms
)
.execute(con)
.await?;
Expand Down

0 comments on commit 7d30804

Please sign in to comment.