Skip to content

Commit

Permalink
Expose each Nomad server individually via tunnel
Browse files Browse the repository at this point in the history
  • Loading branch information
NathanFlurry committed Jan 17, 2024
1 parent 7434388 commit a5ef3fa
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **Infra** CNI ports not being removed from the `nat` iptable, therefore occasionally causing failed connections
- **Infra** Disable `nativeLB` for Traefik tunnel
- **Infra** Update default Nomad storage to 64Gi
- **Infra** Tunnel now exposes each Nomad server individually so the Nomad client can handle failover natively instead of relying on Traefik

## [23.2.0-rc.1] - 2023-12-01

Expand Down
33 changes: 32 additions & 1 deletion infra/tf/k8s_infra/nomad.tf
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,37 @@ resource "kubernetes_service" "nomad_server" {
}
}

resource "kubernetes_service" "nomad_server_indexed" {
count = local.nomad_server_count

metadata {
namespace = kubernetes_namespace.nomad.metadata.0.name
name = "nomad-server-${count.index}"
labels = {
name = "nomad-server-${count.index}"
"app.kubernetes.io/name" = "nomad-server-${count.index}"
}
}
spec {
selector = {
app = "nomad-server"
"apps.kubernetes.io/pod-index" = count.index
}

port {
name = "http"
port = 4646
protocol = "TCP"
}

port {
name = "rpc"
port = 4647
protocol = "TCP"
}
}
}

resource "kubectl_manifest" "nomad_server_monitor" {
depends_on = [kubernetes_stateful_set.nomad_server]

Expand Down Expand Up @@ -368,7 +399,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
access_modes = ["ReadWriteOnce"]
resources {
requests = {
storage = "64Gi"
storage = var.deploy_method_cluster ? "64Gi" : "1Gi"
}
}
storage_class_name = var.k8s_storage_class
Expand Down
19 changes: 19 additions & 0 deletions infra/tf/k8s_infra/traefik_tunnel.tf
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
locals {
# Specify what services to expose via the tunnel server
tunnel_services = {
# LEGACY: Addresses a random Nomad server.
"nomad" = {
service = "nomad-server"
service_namespace = kubernetes_namespace.nomad.metadata[0].name
service_port = 4647
}

# Addresses specific Nomad servers.
"nomad-server-0" = {
service = "nomad-server-0"
service_namespace = kubernetes_namespace.nomad.metadata[0].name
service_port = 4647
}
"nomad-server-1" = {
service = "nomad-server-1"
service_namespace = kubernetes_namespace.nomad.metadata[0].name
service_port = 4647
}
"nomad-server-2" = {
service = "nomad-server-2"
service_namespace = kubernetes_namespace.nomad.metadata[0].name
service_port = 4647
}

"api-route" = {
service = "rivet-api-route"
service_namespace = kubernetes_namespace.rivet_service.metadata[0].name
Expand Down
71 changes: 54 additions & 17 deletions lib/bolt/core/src/dep/terraform/install_scripts/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,46 @@ use crate::{
dep::terraform::{net, output::Cert, servers::Server},
};

/// Service that gets exposed from the Traefik tunnel.
pub struct TunnelService {
/// Name of the service for the subdomain. This is how the Treafik tunnel server knows where to
/// route traffic.
name: &'static str,

/// The port to serve the service on locally.
port: u16,
}

pub const TUNNEL_API_ROUTE_PORT: u16 = 5010;
pub const TUNNEL_VECTOR_PORT: u16 = 5020;
pub const TUNNEL_VECTOR_TCP_JSON_PORT: u16 = 5021;
pub const TUNNEL_SERVICES: &[TunnelService] = &[
TunnelService {
name: "nomad-server-0",
port: 5000,
},
TunnelService {
name: "nomad-server-1",
port: 5001,
},
TunnelService {
name: "nomad-server-2",
port: 5002,
},
TunnelService {
name: "api-route",
port: TUNNEL_API_ROUTE_PORT,
},
TunnelService {
name: "vector",
port: TUNNEL_VECTOR_PORT,
},
TunnelService {
name: "vector-tcp-json",
port: TUNNEL_VECTOR_TCP_JSON_PORT,
},
];

pub fn common() -> String {
vec![
format!("apt-get update -y"),
Expand Down Expand Up @@ -60,7 +100,7 @@ pub fn cni_plugins() -> String {
}

pub fn nomad(server: &Server) -> String {
let servers = &["127.0.0.1:5000"];
let servers = &["127.0.0.1:5000", "127.0.0.1:5001", "127.0.0.1:5002"];

include_str!("files/nomad.sh")
.replace("__REGION_ID__", &server.region_id)
Expand Down Expand Up @@ -203,20 +243,18 @@ pub fn traefik_instance(config: TraefikInstance) -> String {
script
}

const TUNNEL_SERVICES: &[&'static str] = &["nomad", "api-route", "vector", "vector-tcp-json"];

pub fn traefik_tunnel(
_ctx: &ProjectContext,
k8s_infra: &crate::dep::terraform::output::K8sInfra,
tls: &crate::dep::terraform::output::Tls,
) -> String {
// Build transports for each service
let mut tcp_server_transports = IndexMap::new();
for service in TUNNEL_SERVICES {
for TunnelService { name, .. } in TUNNEL_SERVICES {
tcp_server_transports.insert(
service.to_string(),
name.to_string(),
ServerTransport {
server_name: format!("{service}.tunnel.rivet.gg"),
server_name: format!("{name}.tunnel.rivet.gg"),
root_cas: vec![(*tls.root_ca_cert_pem).clone()],
certs: vec![(*tls.tls_cert_locally_signed_job).clone()],
},
Expand All @@ -241,13 +279,12 @@ fn tunnel_traefik_static_config() -> String {
"#
);

for (i, service) in TUNNEL_SERVICES.iter().enumerate() {
for TunnelService { name, port } in TUNNEL_SERVICES.iter() {
config.push_str(&formatdoc!(
r#"
[entryPoints.{service}]
[entryPoints.{name}]
address = "127.0.0.1:{port}"
"#,
port = 5000 + i
))
}

Expand All @@ -256,18 +293,18 @@ fn tunnel_traefik_static_config() -> String {

fn tunnel_traefik_dynamic_config(tunnel_external_ip: &str) -> String {
let mut config = String::new();
for service in TUNNEL_SERVICES.iter() {
for TunnelService { name, .. } in TUNNEL_SERVICES.iter() {
config.push_str(&formatdoc!(
r#"
[tcp.routers.{service}]
entryPoints = ["{service}"]
[tcp.routers.{name}]
entryPoints = ["{name}"]
rule = "HostSNI(`*`)" # Match all ingress, unrelated to the outbound TLS
service = "{service}"
service = "{name}"
[tcp.services.{service}.loadBalancer]
serversTransport = "{service}"
[tcp.services.{name}.loadBalancer]
serversTransport = "{name}"
[[tcp.services.{service}.loadBalancer.servers]]
[[tcp.services.{name}.loadBalancer.servers]]
address = "{tunnel_external_ip}:5000"
tls = true
"#
Expand Down Expand Up @@ -302,7 +339,7 @@ pub fn vector(config: &VectorConfig) -> String {
[sinks.vector_sink]
type = "vector"
inputs = [{sources}]
address = "127.0.0.1:5002"
address = "127.0.0.1:{TUNNEL_VECTOR_PORT}"
healthcheck.enabled = false
compression = true
"#
Expand Down
3 changes: 2 additions & 1 deletion lib/bolt/core/src/dep/terraform/install_scripts/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ pub async fn gen(

fn gg_traefik_static_config(server: &Server, api_route_token: &str) -> String {
let http_provider_endpoint = format!(
"http://127.0.0.1:5001/traefik/config/game-guard?token={api_route_token}&region={region}",
"http://127.0.0.1:{port}/traefik/config/game-guard?token={api_route_token}&region={region}",
port = components::TUNNEL_API_ROUTE_PORT,
region = server.region_id
);

Expand Down
2 changes: 1 addition & 1 deletion svc/pkg/mm/worker/src/workers/lobby_create/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ async fn create_docker_job(
},
job_run::msg::create::Parameter {
key: "vector_socket_addr".into(),
value: "127.0.0.1:5003".to_string(),
value: "127.0.0.1:5021".to_string(),
},
job_run::msg::create::Parameter {
key: "image_artifact_url".into(),
Expand Down

0 comments on commit a5ef3fa

Please sign in to comment.