Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **api-helper** Box path futures for faster compile times
- Upgrade `async-nats`
- `test-mm-lobby-echo` now handles `SIGTERM` and exits immediately, allows for less resource consumption while testing lobbies
- **mm** Dynamically sleep based on lobby's `create_ts` for Treafik config to update

### Security

Expand Down
13 changes: 10 additions & 3 deletions svc/pkg/job-run/worker/src/workers/create/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::collections::HashMap;

use chirp_worker::prelude::*;
use proto::backend::{self, pkg::*};
use tokio::time::Duration;

mod create_job;

Expand All @@ -10,6 +11,13 @@ mod create_job;
const MAX_PARAMETER_KEY_LEN: usize = 64;
const MAX_PARAMETER_VALUE_LEN: usize = 8_192; // 8 KB

/// HACK: Give the Traefik load balancer time to complete before considering the lobby ready.
///
/// Traefik updates every 500 ms and we give an extra 500 ms for grace.
///
/// See also svc/pkg/mm/worker/src/workers/lobby_ready_set.rs TRAEFIK_GRACE_MS.
const TRAEFIK_GRACE: Duration = Duration::from_millis(1_000);

lazy_static::lazy_static! {
static ref NOMAD_CONFIG: nomad_client::apis::configuration::Configuration =
nomad_util::config_from_env().unwrap();
Expand Down Expand Up @@ -127,9 +135,8 @@ async fn worker(ctx: &OperationContext<job_run::msg::create::Message>) -> Global
write_to_db_after_run(&ctx, run_id, &nomad_dispatched_job_id).await?;
db_write_perf.end();

// HACK: Wait for Treafik to pick up the new job. 500 ms is the polling interval for the
// Traefik HTTP provider.
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
// See TRAEFIK_GRACE_MS
tokio::time::sleep(TRAEFIK_GRACE).await;

msg!([ctx] job_run::msg::create_complete(run_id) {
run_id: Some(run_id.into()),
Expand Down
16 changes: 16 additions & 0 deletions svc/pkg/mm/worker/src/workers/lobby_ready_set.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
use chirp_worker::prelude::*;
use proto::backend::pkg::*;
use serde_json::json;
use tokio::time::Duration;

/// HACK: Give the Traefik load balancer time to complete before considering the lobby ready.
///
/// Traefik updates every 500 ms and we give an extra 500 ms for grace.
///
/// See also svc/pkg/job-run/worker/src/workers/create/mod.rs TRAEFIK_GRACE_MS
// const TRAEFIK_GRACE_MS: i64 = 750;
const TRAEFIK_GRACE_MS: i64 = 100;

lazy_static::lazy_static! {
static ref REDIS_SCRIPT: redis::Script = redis::Script::new(include_str!("../../redis-scripts/lobby_ready_set.lua"));
Expand Down Expand Up @@ -53,6 +62,13 @@ async fn worker(ctx: &OperationContext<mm::msg::lobby_ready::Message>) -> Global
}
};

// See TRAEFIK_GRACE_MS
let traefik_grace_ms = TRAEFIK_GRACE_MS - (util::timestamp::now() - lobby_row.create_ts);
if traefik_grace_ms > 0 {
tracing::info!(traefik_grace_ms, "sleeping for traefik grace");
tokio::time::sleep(Duration::from_millis(traefik_grace_ms as u64)).await;
}

msg!([ctx] mm::msg::lobby_ready_complete(lobby_id) {
lobby_id: Some(lobby_id.into()),
})
Expand Down