Skip to content

Commit e77a4a5

Browse files
committed
fix(infra): dynamically generate nomad server count in install script
1 parent ec2178d commit e77a4a5

File tree

7 files changed

+40
-20
lines changed

7 files changed

+40
-20
lines changed

infra/tf/k8s_infra/nomad.tf

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,9 @@
1313
# complicated + adds another point of failure and (b) it doesn't fix the problem with Nomad server addresses changing.
1414

1515
locals {
16-
# !!! DO NOT CHANGE !!!
17-
#
18-
# This value must be 3, 5, or 7. More = better redundancy, but does not make things faster.
19-
#
20-
# See https://developer.hashicorp.com/nomad/tutorials/enterprise/production-reference-architecture-vm-with-consul
2116
nomad_server_count = var.deploy_method_cluster ? 3 : 1
2217

23-
nomad_server_addrs = [for i in range(0, local.nomad_server_count): "127.0.0.1:${6000 + i}"]
18+
nomad_server_addrs = [for i in range(0, var.nomad_server_count): "127.0.0.1:${6000 + i}"]
2419
nomad_server_addrs_escaped = [for addr in local.nomad_server_addrs : "\"${addr}\""]
2520
nomad_server_configmap_data = {
2621
"server.hcl" = <<-EOT
@@ -36,7 +31,7 @@ locals {
3631
3732
server {
3833
enabled = true
39-
bootstrap_expect = ${local.nomad_server_count}
34+
bootstrap_expect = ${var.nomad_server_count}
4035
4136
server_join {
4237
retry_join = [${join(", ", local.nomad_server_addrs_escaped)}]
@@ -128,7 +123,7 @@ resource "kubernetes_service" "nomad_server" {
128123
}
129124

130125
resource "kubernetes_service" "nomad_server_indexed" {
131-
count = var.edge_enabled ? local.nomad_server_count : 0
126+
count = var.edge_enabled ? var.nomad_server_count : 0
132127

133128
metadata {
134129
namespace = kubernetes_namespace.nomad.0.metadata.0.name
@@ -202,7 +197,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
202197
}
203198
}
204199
spec {
205-
replicas = local.nomad_server_count
200+
replicas = var.nomad_server_count
206201

207202
selector {
208203
match_labels = {
@@ -324,7 +319,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
324319

325320
# Entrypoints
326321
flatten([
327-
for i in range(0, local.nomad_server_count):
322+
for i in range(0, var.nomad_server_count):
328323
[
329324
"--entryPoints.nomad-${i}-rpc-tcp.address=:${5000 + i}/tcp",
330325
"--entryPoints.nomad-${i}-serf-tcp.address=:${6000 + i}/tcp",
@@ -334,7 +329,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
334329
])
335330

336331
dynamic "port" {
337-
for_each = [for i in range(0, local.nomad_server_count) : i]
332+
for_each = [for i in range(0, var.nomad_server_count) : i]
338333
content {
339334
name = "n-${port.value}-rpc-tcp"
340335
container_port = 5000 + port.value
@@ -343,7 +338,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
343338
}
344339

345340
dynamic "port" {
346-
for_each = [for i in range(0, local.nomad_server_count) : i]
341+
for_each = [for i in range(0, var.nomad_server_count) : i]
347342
content {
348343
name = "n-${port.value}-serf-tcp"
349344
container_port = 6000 + port.value
@@ -352,7 +347,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
352347
}
353348

354349
dynamic "port" {
355-
for_each = [for i in range(0, local.nomad_server_count) : i]
350+
for_each = [for i in range(0, var.nomad_server_count) : i]
356351
content {
357352
name = "n-${port.value}-serf-udp"
358353
container_port = 6000 + port.value
@@ -421,7 +416,7 @@ resource "kubernetes_config_map" "nomad_server_sidecar_traefik_config" {
421416
}
422417

423418
data = {
424-
for i in range(0, local.nomad_server_count):
419+
for i in range(0, var.nomad_server_count):
425420
"nomad-${i}.yaml" => yamlencode({
426421
tcp = {
427422
routers = {

infra/tf/k8s_infra/vars.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ variable "authenticate_all_docker_hub_pulls" {
6060
}
6161

6262
# MARK: Nomad
63+
variable "nomad_server_count" {
64+
type = number
65+
}
66+
6367
variable "edge_enabled" {
6468
type = bool
6569
}

lib/bolt/core/src/context/project.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,18 @@ impl ProjectContextData {
930930
.and_then(|dns| dns.provider.as_ref())
931931
.is_some()
932932
}
933+
934+
pub fn nomad_server_count(&self) -> usize {
935+
// !!! DO NOT CHANGE !!!
936+
//
937+
// This value must be 1, 3, 5, or 7. More = better redundancy, but does not make things faster.
938+
//
939+
// See https://developer.hashicorp.com/nomad/tutorials/enterprise/production-reference-architecture-vm-with-consul
940+
match self.ns().cluster.kind {
941+
config::ns::ClusterKind::Distributed { .. } => 3,
942+
config::ns::ClusterKind::SingleNode { .. } => 1,
943+
}
944+
}
933945
}
934946

935947
pub struct S3Credentials {

lib/bolt/core/src/context/service.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,12 @@ impl ServiceContextData {
10481048
env.insert("RIVET_PROFANITY_FILTER_DISABLE".into(), "1".into());
10491049
}
10501050

1051+
// Nomad
1052+
env.insert(
1053+
"NOMAD_SERVER_COUNT".into(),
1054+
project_ctx.nomad_server_count().to_string(),
1055+
);
1056+
10511057
if let Some(provisioning) = &project_ctx.ns().rivet.provisioning {
10521058
if self.depends_on_cluster_config() || matches!(run_context, RunContext::Test { .. }) {
10531059
env.insert(
@@ -1303,7 +1309,6 @@ impl ServiceContextData {
13031309

13041310
// if self.depends_on_infra() && project_ctx.ns().rivet.provisioning.is_some() {
13051311
let tls = terraform::output::read_tls(&project_ctx).await;
1306-
let k8s_infra = terraform::output::read_k8s_infra(&project_ctx).await;
13071312

13081313
env.insert(
13091314
"TLS_CERT_LOCALLY_SIGNED_JOB_CERT_PEM".into(),

lib/bolt/core/src/dep/terraform/gen.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ async fn vars(ctx: &ProjectContext) {
224224
}
225225

226226
// Edge nodes
227+
vars.insert("nomad_server_count".into(), json!(ctx.nomad_server_count()));
227228
vars.insert(
228229
"edge_enabled".into(),
229230
json!(config.rivet.provisioning.is_some()),

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/nomad.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@ pub fn install() -> String {
44
include_str!("../files/nomad_install.sh").to_string()
55
}
66

7-
pub fn configure() -> String {
8-
let servers = &["127.0.0.1:5000", "127.0.0.1:5001", "127.0.0.1:5002"];
7+
pub fn configure() -> GlobalResult<String> {
8+
let nomad_server_count = util::env::var("NOMAD_SERVER_COUNT")?.parse::<usize>()?;
9+
let servers = (0..nomad_server_count)
10+
.map(|idx| format!("127.0.0.1:{}", 5000 + idx))
11+
.collect::<Vec<_>>();
912

10-
include_str!("../files/nomad_configure.sh")
13+
Ok(include_str!("../files/nomad_configure.sh")
1114
// HACK: Hardcoded to Linode
1215
.replace("__PUBLIC_IFACE__", "eth0")
1316
// HACK: Hardcoded to Linode
@@ -27,5 +30,5 @@ pub fn configure() -> String {
2730
.replace(
2831
"__ATS_VLAN_SUBNET__",
2932
&util::net::ats::vlan_ip_net().to_string(),
30-
)
33+
))
3134
}

svc/pkg/cluster/worker/src/workers/server_install/install_scripts/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ pub async fn gen_initialize(
9292
// MARK: Specific pool components
9393
match pool_type {
9494
backend::cluster::PoolType::Job => {
95-
script.push(components::nomad::configure());
95+
script.push(components::nomad::configure()?);
9696

9797
prometheus_targets.insert(
9898
"nomad".into(),

0 commit comments

Comments
 (0)