From 3fa0611b336f45c014d550d03876412e955936ff Mon Sep 17 00:00:00 2001 From: NathanFlurry Date: Sun, 30 Jun 2024 22:01:39 +0000 Subject: [PATCH] chore: read job-runner from ats (#968) Fixes RVT-3679 Fixes RVT-3690 --- infra/tf/infra_artifacts/job_runner.tf | 6 +- infra/tf/infra_artifacts/output.tf | 4 + lib/bolt/core/src/context/service.rs | 7 ++ lib/bolt/core/src/dep/terraform/output.rs | 9 ++ .../mm/worker/src/workers/lobby_create/mod.rs | 109 ++++++++++++++---- 5 files changed, 110 insertions(+), 25 deletions(-) create mode 100644 infra/tf/infra_artifacts/output.tf diff --git a/infra/tf/infra_artifacts/job_runner.tf b/infra/tf/infra_artifacts/job_runner.tf index bbc367c87..cf646096a 100644 --- a/infra/tf/infra_artifacts/job_runner.tf +++ b/infra/tf/infra_artifacts/job_runner.tf @@ -46,8 +46,12 @@ resource "null_resource" "job_runner_build" { resource "aws_s3_object" "job_runner_binary_upload" { depends_on = [null_resource.job_runner_build] + lifecycle { + prevent_destroy = true + } + bucket = "${var.namespace}-bucket-infra-artifacts" - key = "job-runner/job-runner" + key = "job-runner/${local.job_runner_src_hash}/job-runner" source = local.job_runner_dst_binary_path } diff --git a/infra/tf/infra_artifacts/output.tf b/infra/tf/infra_artifacts/output.tf new file mode 100644 index 000000000..cc1307293 --- /dev/null +++ b/infra/tf/infra_artifacts/output.tf @@ -0,0 +1,4 @@ +output "job_runner_binary_key" { + value = aws_s3_object.job_runner_binary_upload.key +} + diff --git a/lib/bolt/core/src/context/service.rs b/lib/bolt/core/src/context/service.rs index b55455412..e2c1263bb 100644 --- a/lib/bolt/core/src/context/service.rs +++ b/lib/bolt/core/src/context/service.rs @@ -889,6 +889,13 @@ impl ServiceContextData { ); } + // Infra Artifacts + let infra_artifacts_output = terraform::output::read_infra_artifacts(&project_ctx).await; + env.insert( + "JOB_RUNNER_BINARY_KEY".into(), + (*infra_artifacts_output.job_runner_binary_key).clone(), + ); + // OpenGB if project_ctx.ns().rivet.opengb.is_some() { let opengb_output = terraform::output::read_opengb(&project_ctx).await; diff --git a/lib/bolt/core/src/dep/terraform/output.rs b/lib/bolt/core/src/dep/terraform/output.rs index 4cebb211b..a99f58281 100644 --- a/lib/bolt/core/src/dep/terraform/output.rs +++ b/lib/bolt/core/src/dep/terraform/output.rs @@ -49,6 +49,11 @@ pub struct DnsZones { pub job: String, } +#[derive(Debug, Clone, Deserialize)] +pub struct InfraArtifacts { + pub job_runner_binary_key: TerraformOutputValue, +} + #[derive(Debug, Clone, Deserialize)] pub struct OpenGb { pub dispatcher_namespace_name: TerraformOutputValue, @@ -135,6 +140,10 @@ pub async fn read_redis(ctx: &ProjectContext) -> Redis { } } +pub async fn read_infra_artifacts(ctx: &ProjectContext) -> InfraArtifacts { + read_plan::(ctx, "infra_artifacts").await +} + pub async fn read_opengb(ctx: &ProjectContext) -> OpenGb { read_plan::(ctx, "opengb").await } diff --git a/svc/pkg/mm/worker/src/workers/lobby_create/mod.rs b/svc/pkg/mm/worker/src/workers/lobby_create/mod.rs index 8620f7955..7042899d5 100644 --- a/svc/pkg/mm/worker/src/workers/lobby_create/mod.rs +++ b/svc/pkg/mm/worker/src/workers/lobby_create/mod.rs @@ -596,7 +596,7 @@ async fn create_docker_job( let lobby_group_id = unwrap_ref!(lobby_group_meta.lobby_group_id).as_uuid(); let region_id = unwrap_ref!(region.region_id).as_uuid(); - let job_runner_binary_url = resolve_job_runner_binary_url(ctx).await?; + let job_runner_binary_url = resolve_job_runner_binary_url(ctx, region).await?; let resolve_perf = ctx.perf().start("resolve-image-artifact-url").await; let build_id = unwrap_ref!(runtime.build_id).as_uuid(); @@ -752,31 +752,92 @@ async fn create_docker_job( #[tracing::instrument] async fn resolve_job_runner_binary_url( ctx: &OperationContext, + region: &backend::region::Region, ) -> GlobalResult { - // Build client - let s3_client = s3_util::Client::from_env_opt( - "bucket-infra-artifacts", - s3_util::Provider::default()?, - s3_util::EndpointKind::External, - ) - .await?; - let presigned_req = s3_client - .get_object() - .bucket(s3_client.bucket()) - .key("job-runner/job-runner") - .presigned( - s3_util::aws_sdk_s3::presigning::config::PresigningConfig::builder() - .expires_in(std::time::Duration::from_secs(15 * 60)) - .build()?, - ) - .await?; + // Get provider + let provider = s3_util::Provider::default()?; + + let file_name = std::env::var("JOB_RUNNER_BINARY_KEY")?; + + // Build URL + let mm_lobby_delivery_method = unwrap!( + backend::cluster::BuildDeliveryMethod::from_i32(region.build_delivery_method), + "invalid datacenter build delivery method" + ); + match mm_lobby_delivery_method { + backend::cluster::BuildDeliveryMethod::S3Direct => { + tracing::info!("job runner using s3 direct delivery"); + + // Build client + let s3_client = s3_util::Client::from_env_opt( + "bucket-infra-artifacts", + provider, + s3_util::EndpointKind::External, + ) + .await?; + let presigned_req = s3_client + .get_object() + .bucket(s3_client.bucket()) + .key(file_name) + .presigned( + s3_util::aws_sdk_s3::presigning::config::PresigningConfig::builder() + .expires_in(std::time::Duration::from_secs(15 * 60)) + .build()?, + ) + .await?; + + let addr = presigned_req.uri().clone(); + + let addr_str = addr.to_string(); + tracing::info!(addr = %addr_str, "resolved job runner presigned request"); + + Ok(addr_str) + } + backend::cluster::BuildDeliveryMethod::TrafficServer => { + tracing::info!("job runner using traffic server delivery"); + + let region_id = unwrap_ref!(region.region_id).as_uuid(); + + // Choose a random ATS node to pull from + let (ats_vlan_ip,) = sql_fetch_one!( + [ctx, (IpAddr,)] + " + WITH sel AS ( + -- Select candidate vlan ips + SELECT + vlan_ip + FROM db_cluster.servers + WHERE + datacenter_id = $1 AND + pool_type = $2 AND + vlan_ip IS NOT NULL AND + install_complete_ts IS NOT NULL AND + drain_ts IS NULL AND + cloud_destroy_ts IS NULL + ) + SELECT vlan_ip + FROM sel + ORDER BY random() + LIMIT 1 + ", + // NOTE: region_id is just the old name for datacenter_id + ®ion_id, + backend::cluster::PoolType::Ats as i64, + ) + .await?; - let addr = presigned_req.uri().clone(); + let addr = format!( + "http://{vlan_ip}:8080/s3-cache/{provider}/{namespace}-bucket-infra-artifacts/{file_name}", + vlan_ip = ats_vlan_ip, + provider = heck::KebabCase::to_kebab_case(provider.as_str()), + namespace = util::env::namespace(), + ); - let addr_str = addr.to_string(); - tracing::info!(addr = %addr_str, "resolved job runner presigned request"); + tracing::info!(%addr, "resolved artifact s3 url"); - Ok(addr_str) + Ok(addr) + } + } } #[tracing::instrument] @@ -822,7 +883,7 @@ async fn resolve_image_artifact_url( ); match mm_lobby_delivery_method { backend::cluster::BuildDeliveryMethod::S3Direct => { - tracing::info!("using s3 direct delivery"); + tracing::info!("image artifact using s3 direct delivery"); let bucket = "bucket-build"; @@ -851,7 +912,7 @@ async fn resolve_image_artifact_url( Ok(addr_str) } backend::cluster::BuildDeliveryMethod::TrafficServer => { - tracing::info!("using traffic server delivery"); + tracing::info!("image artifact using traffic server delivery"); let region_id = unwrap_ref!(region.region_id).as_uuid();