From 2564c129fe5557d7074ce95452071d9ffddb86e5 Mon Sep 17 00:00:00 2001 From: MasterPtato <23087326+MasterPtato@users.noreply.github.com> Date: Thu, 18 Apr 2024 01:18:37 +0000 Subject: [PATCH] chore: clean up server install scripts (#682) ## Changes --- svc/pkg/cluster/util/gen/hash.txt | 1 - .../install_scripts/components.rs | 565 ------------------ .../install_scripts/components/mod.rs | 79 +++ .../install_scripts/components/nomad.rs | 31 + .../install_scripts/components/rivet.rs | 42 ++ .../install_scripts/components/s3.rs | 65 ++ .../install_scripts/components/traefik.rs | 289 +++++++++ .../components/traffic_server.rs | 97 +++ .../install_scripts/components/vector.rs | 80 +++ .../server_install/install_scripts/mod.rs | 139 ++--- 10 files changed, 717 insertions(+), 671 deletions(-) delete mode 100644 svc/pkg/cluster/util/gen/hash.txt delete mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components.rs create mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/mod.rs create mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/nomad.rs create mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/rivet.rs create mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/s3.rs create mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traefik.rs create mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traffic_server.rs create mode 100644 svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/vector.rs diff --git a/svc/pkg/cluster/util/gen/hash.txt b/svc/pkg/cluster/util/gen/hash.txt deleted file mode 100644 index 000fa88910..0000000000 --- a/svc/pkg/cluster/util/gen/hash.txt +++ /dev/null @@ -1 +0,0 @@ -80b6ae8f1c15377d4c11bbb28a5705b982c8ac92 \ No newline at end of file diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components.rs deleted file mode 100644 index 9fbf18c99b..0000000000 --- a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components.rs +++ /dev/null @@ -1,565 +0,0 @@ -use std::collections::HashMap; - -use chirp_worker::prelude::*; -use include_dir::{include_dir, Dir}; -use indoc::{formatdoc, indoc}; -use proto::backend; -use s3_util::Provider; - -/// Service that gets exposed from the Traefik tunnel. -pub struct TunnelService { - /// Name of the service for the subdomain. This is how the Treafik tunnel server knows where to - /// route traffic. - name: &'static str, - - /// The port to serve the service on locally. - port: u16, -} - -pub const TUNNEL_API_INTERNAL_PORT: u16 = 5010; -pub const TUNNEL_VECTOR_PORT: u16 = 5020; -pub const TUNNEL_VECTOR_TCP_JSON_PORT: u16 = 5021; -pub const TUNNEL_SERVICES: &[TunnelService] = &[ - TunnelService { - name: "nomad-server-0", - port: 5000, - }, - TunnelService { - name: "nomad-server-1", - port: 5001, - }, - TunnelService { - name: "nomad-server-2", - port: 5002, - }, - TunnelService { - name: "api-internal", - port: TUNNEL_API_INTERNAL_PORT, - }, - TunnelService { - name: "vector", - port: TUNNEL_VECTOR_PORT, - }, - TunnelService { - name: "vector-tcp-json", - port: TUNNEL_VECTOR_TCP_JSON_PORT, - }, -]; - -pub fn common() -> String { - indoc!( - " - apt-get update -y - apt-get install -y apt-transport-https ca-certificates gnupg2 software-properties-common curl jq unzip - " - ).to_string() -} - -pub fn node_exporter() -> String { - include_str!("files/node_exporter.sh").to_string() -} -pub fn sysctl() -> String { - include_str!("files/sysctl.sh").to_string() -} - -pub fn docker() -> String { - include_str!("files/docker.sh").to_string() -} - -pub fn lz4() -> String { - "apt-get install -y lz4".to_string() -} - -pub fn skopeo() -> String { - "apt-get install -y skopeo".to_string() -} - -pub fn umoci() -> String { - indoc!( - r#" - curl -Lf -o /usr/bin/umoci "https://github.com/opencontainers/umoci/releases/download/v0.4.7/umoci.amd64" - chmod +x /usr/bin/umoci - "# - ).to_string() -} - -pub fn cnitool() -> String { - indoc!( - r#" - curl -Lf -o /usr/bin/cnitool "https://github.com/rivet-gg/cni/releases/download/v1.1.2-build3/cnitool" - chmod +x /usr/bin/cnitool - "# - ).to_string() -} - -pub fn cni_plugins() -> String { - include_str!("files/cni_plugins.sh").to_string() -} - -pub fn nomad_install() -> String { - include_str!("files/nomad_install.sh").to_string() -} - -pub fn nomad_configure() -> String { - let servers = &["127.0.0.1:5000", "127.0.0.1:5001", "127.0.0.1:5002"]; - - include_str!("files/nomad_configure.sh") - // HACK: Hardcoded to Linode - .replace("__PUBLIC_IFACE__", "eth0") - // HACK: Hardcoded to Linode - .replace("__VLAN_IFACE__", "eth1") - .replace( - "__SERVER_JOIN__", - &servers - .iter() - .map(|x| format!("\"{x}\"")) - .collect::>() - .join(", "), - ) - .replace( - "__GG_VLAN_SUBNET__", - &util::net::gg::vlan_ip_net().to_string(), - ) - .replace( - "__ATS_VLAN_SUBNET__", - &util::net::ats::vlan_ip_net().to_string(), - ) -} - -/// Installs Traefik, but does not create the Traefik service. -pub fn traefik() -> String { - include_str!("files/traefik.sh").to_string() -} - -pub struct TlsCert { - pub cert_pem: String, - pub key_pem: String, -} - -pub struct TraefikInstance { - pub name: String, - pub static_config: String, - pub dynamic_config: String, - pub tcp_server_transports: HashMap, -} - -pub struct ServerTransport { - pub server_name: String, - pub root_cas: Vec, - pub certs: Vec, -} - -/// Creates a Traefik instance. -/// -/// Requires `traefik()`. -pub fn traefik_instance(config: TraefikInstance) -> String { - let config_name = &config.name; - - let mut script = include_str!("files/traefik_instance.sh") - .replace("__NAME__", &config.name) - .replace("__STATIC_CONFIG__", &config.static_config) - .replace("__DYNAMIC_CONFIG__", &config.dynamic_config); - - for (transport_id, transport) in config.tcp_server_transports { - // Build config - let root_cas = transport - .root_cas - .iter() - .enumerate() - .map(|(i, _)| { - format!("\"/etc/{config_name}/tls/transport_{transport_id}_root_ca_{i}_cert.pem\"",) - }) - .collect::>() - .join(", "); - let mut transport_config = formatdoc!( - r#" - [tcp.serversTransports.{transport_id}.tls] - serverName = "{server_name}" - rootCAs = [{root_cas}] - "#, - server_name = transport.server_name - ); - - // Write root CAs - for (i, cert) in transport.root_cas.iter().enumerate() { - script.push_str(&formatdoc!( - r#" - cat << 'EOF' > /etc/{config_name}/tls/transport_{transport_id}_root_ca_{i}_cert.pem - {cert} - EOF - "#, - )); - } - - // Write certs - for (i, cert) in transport.certs.iter().enumerate() { - script.push_str(&formatdoc!( - r#" - cat << 'EOF' > /etc/{config_name}/tls/transport_{transport_id}_cert_{i}_cert.pem - {cert} - EOF - - cat << 'EOF' > /etc/{config_name}/tls/transport_{transport_id}_cert_{i}_key.pem - {key} - EOF - "#, - cert = cert.cert_pem, - key = cert.key_pem, - )); - transport_config.push_str(&formatdoc!( - r#" - [[tcp.serversTransports.{transport_id}.tls.certificates]] - certFile = "/etc/{config_name}/tls/transport_{transport_id}_cert_{i}_cert.pem" - keyFile = "/etc/{config_name}/tls/transport_{transport_id}_cert_{i}_key.pem" - "# - )) - } - - // Write config - script.push_str(&formatdoc!( - r#" - cat << 'EOF' > /etc/{config_name}/dynamic/transport_{transport_id}.toml - {transport_config} - EOF - "# - )); - } - - script -} - -pub fn traefik_tunnel() -> GlobalResult { - // Build transports for each service - let mut tcp_server_transports = HashMap::new(); - for TunnelService { name, .. } in TUNNEL_SERVICES { - tcp_server_transports.insert( - name.to_string(), - ServerTransport { - server_name: format!("{name}.tunnel.rivet.gg"), - root_cas: vec![util::env::var("TLS_ROOT_CA_CERT_PEM")?], - certs: vec![TlsCert { - cert_pem: util::env::var("TLS_CERT_LOCALLY_SIGNED_JOB_CERT_PEM")?, - key_pem: util::env::var("TLS_CERT_LOCALLY_SIGNED_JOB_KEY_PEM")?, - }], - }, - ); - } - - Ok(traefik_instance(TraefikInstance { - name: "tunnel".into(), - static_config: tunnel_traefik_static_config(), - dynamic_config: tunnel_traefik_dynamic_config(&util::env::var( - "K8S_TRAEFIK_TUNNEL_EXTERNAL_IP", - )?), - tcp_server_transports, - })) -} - -fn tunnel_traefik_static_config() -> String { - let mut config = formatdoc!( - r#" - [providers] - [providers.file] - directory = "/etc/tunnel/dynamic" - "# - ); - - for TunnelService { name, port } in TUNNEL_SERVICES.iter() { - config.push_str(&formatdoc!( - r#" - [entryPoints.{name}] - address = "127.0.0.1:{port}" - "#, - )) - } - - config -} - -fn tunnel_traefik_dynamic_config(tunnel_external_ip: &str) -> String { - let mut config = String::new(); - for TunnelService { name, .. } in TUNNEL_SERVICES.iter() { - config.push_str(&formatdoc!( - r#" - [tcp.routers.{name}] - entryPoints = ["{name}"] - rule = "HostSNI(`*`)" # Match all ingress, unrelated to the outbound TLS - service = "{name}" - - [tcp.services.{name}.loadBalancer] - serversTransport = "{name}" - - [[tcp.services.{name}.loadBalancer.servers]] - address = "{tunnel_external_ip}:5000" - tls = true - "# - )) - } - - config -} - -pub fn vector_install() -> String { - include_str!("files/vector_install.sh").to_string() -} - -pub struct VectorConfig { - pub prometheus_targets: HashMap, -} - -pub struct VectorPrometheusTarget { - pub endpoint: String, - pub scrape_interval: usize, -} - -pub fn vector_configure(config: &VectorConfig, pool_type: backend::cluster::PoolType) -> String { - let sources = config - .prometheus_targets - .keys() - .map(|x| format!("\"prometheus_{x}\"")) - .collect::>() - .join(", "); - - let pool_type_str = match pool_type { - backend::cluster::PoolType::Job => "job", - backend::cluster::PoolType::Gg => "gg", - backend::cluster::PoolType::Ats => "ats", - }; - - let mut config_str = formatdoc!( - r#" - [api] - enabled = true - - [transforms.add_meta] - type = "remap" - inputs = [{sources}] - source = ''' - .tags.server_id = "___SERVER_ID___" - .tags.datacenter_id = "___DATACENTER_ID___" - .tags.cluster_id = "___CLUSTER_ID___" - .tags.pool_type = "{pool_type_str}" - .tags.public_ip = "${{PUBLIC_IP}}" - ''' - - [sinks.vector_sink] - type = "vector" - inputs = ["add_meta"] - address = "127.0.0.1:{TUNNEL_VECTOR_PORT}" - healthcheck.enabled = false - compression = true - "# - ); - - for ( - key, - VectorPrometheusTarget { - endpoint, - scrape_interval, - }, - ) in &config.prometheus_targets - { - config_str.push_str(&formatdoc!( - r#" - [sources.prometheus_{key}] - type = "prometheus_scrape" - endpoints = ["{endpoint}"] - scrape_interval_secs = {scrape_interval} - "# - )); - } - - include_str!("files/vector_configure.sh").replace("__VECTOR_CONFIG__", &config_str) -} - -const TRAFFIC_SERVER_IMAGE: &str = "ghcr.io/rivet-gg/apache-traffic-server:9934dc2"; - -pub fn traffic_server_install() -> String { - include_str!("files/traffic_server_install.sh").replace("__IMAGE__", TRAFFIC_SERVER_IMAGE) -} - -pub async fn traffic_server_configure() -> GlobalResult { - // Write config to files - let config = traffic_server_config().await?; - let mut config_scripts = config - .into_iter() - .map(|(k, v)| format!("cat << 'EOF' > /etc/trafficserver/{k}\n{v}\nEOF\n")) - .collect::>(); - - // Update default storage config size to be entire filesystem size minus 4GB - config_scripts.push( - indoc!( - r#" - df -h / | - awk 'NR==2 {gsub(/G/, "", $2); print $2 - 4 "G"}' | - xargs -I {} sed -i 's/64G/{}/' /etc/trafficserver/storage.config - "# - ) - .to_string(), - ); - - let script = include_str!("files/traffic_server_configure.sh") - .replace("__IMAGE__", TRAFFIC_SERVER_IMAGE) - .replace("__CONFIG__", &config_scripts.join("\n\n")); - - Ok(script) -} - -static TRAFFIC_SERVER_CONFIG_DIR: Dir<'_> = include_dir!( - "$CARGO_MANIFEST_DIR/src/workers/server_install/install_scripts/files/traffic_server" -); - -async fn traffic_server_config() -> GlobalResult> { - // Static files - let mut config_files = Vec::new(); - collect_config_files(&TRAFFIC_SERVER_CONFIG_DIR, &mut config_files)?; - - // Storage (default value of 64 gets overwritten in config script) - let volume_size = 64; - config_files.push(( - "storage.config".to_string(), - format!("/var/cache/trafficserver {volume_size}G"), - )); - - // Remap & S3 - let mut remap = String::new(); - let default_s3_provider = Provider::default()?; - if s3_util::s3_provider_active("bucket-build", Provider::Minio) { - let output = gen_s3_provider(Provider::Minio, default_s3_provider).await?; - remap.push_str(&output.append_remap); - config_files.extend(output.config_files); - } - if s3_util::s3_provider_active("bucket-build", Provider::Backblaze) { - let output = gen_s3_provider(Provider::Backblaze, default_s3_provider).await?; - remap.push_str(&output.append_remap); - config_files.extend(output.config_files); - } - if s3_util::s3_provider_active("bucket-build", Provider::Aws) { - let output = gen_s3_provider(Provider::Aws, default_s3_provider).await?; - remap.push_str(&output.append_remap); - config_files.extend(output.config_files); - } - config_files.push(("remap.config".to_string(), remap)); - - Ok(config_files) -} - -fn collect_config_files( - dir: &include_dir::Dir, - config_files: &mut Vec<(String, String)>, -) -> GlobalResult<()> { - for entry in dir.entries() { - match entry { - include_dir::DirEntry::File(file) => { - let key = unwrap!(unwrap!(file.path().file_name()).to_str()).to_string(); - - let value = unwrap!(file.contents_utf8()); - config_files.push((key, value.to_string())); - } - include_dir::DirEntry::Dir(dir) => collect_config_files(dir, config_files)?, - } - } - - Ok(()) -} - -struct GenRemapS3ProviderOutput { - /// Append to remap.config - append_remap: String, - - /// Concat with config files - config_files: Vec<(String, String)>, -} - -async fn gen_s3_provider( - provider: Provider, - default_s3_provider: Provider, -) -> GlobalResult { - let mut remap = String::new(); - let provider_name = provider.as_str(); - let endpoint_external = s3_util::s3_endpoint_external("bucket-build", provider)?; - let region = s3_util::s3_region("bucket-build", provider)?; - let (access_key_id, secret_access_key) = s3_util::s3_credentials("bucket-build", provider)?; - - // Build plugin chain - let plugins = format!("@plugin=tslua.so @pparam=/etc/trafficserver/strip_headers.lua @plugin=s3_auth.so @pparam=--config @pparam=s3_auth_v4_{provider_name}.config"); - - // Add remap - remap.push_str(&format!( - "map /s3-cache/{provider_name} {endpoint_external} {plugins}\n", - )); - - // Add default route - if default_s3_provider == provider { - remap.push_str(&format!("map /s3-cache {endpoint_external} {plugins}\n",)); - } - - // Add credentials - let mut config_files = Vec::<(String, String)>::new(); - config_files.push(( - format!("s3_auth_v4_{provider_name}.config"), - formatdoc!( - r#" - access_key={access_key_id} - secret_key={secret_access_key} - version=4 - v4-region-map=s3_region_map_{provider_name}.config - "#, - ), - )); - config_files.push(( - format!("s3_region_map_{provider_name}.config"), - formatdoc!( - r#" - # Default region - {s3_host}: {s3_region} - "#, - s3_host = endpoint_external.split_once("://").unwrap().1, - s3_region = region, - ), - )); - - Ok(GenRemapS3ProviderOutput { - append_remap: remap, - config_files, - }) -} - -pub fn rivet_create_hook(initialize_immediately: bool) -> GlobalResult { - let mut script = include_str!("files/rivet_create_hook.sh").to_string(); - - if initialize_immediately { - script.push_str("systemctl start rivet_hook\n"); - } - - Ok(script) -} - -pub fn rivet_fetch_info(server_token: &str) -> GlobalResult { - Ok(include_str!("files/rivet_fetch_info.sh") - .replace("__SERVER_TOKEN__", server_token) - .replace( - "__TUNNEL_API_INTERNAL_PORT__", - &TUNNEL_API_INTERNAL_PORT.to_string(), - )) -} - -pub fn rivet_fetch_tls( - initialize_immediately: bool, - server_token: &str, - traefik_instance_name: &str, -) -> GlobalResult { - let mut script = include_str!("files/rivet_fetch_tls.sh") - .replace("__NAME__", traefik_instance_name) - .replace("__SERVER_TOKEN__", server_token) - .replace( - "__TUNNEL_API_INTERNAL_PORT__", - &TUNNEL_API_INTERNAL_PORT.to_string(), - ); - - if initialize_immediately { - script.push_str("systemctl start rivet_fetch_tls.timer\n"); - } - - Ok(script) -} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/mod.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/mod.rs new file mode 100644 index 0000000000..e5604d3065 --- /dev/null +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/mod.rs @@ -0,0 +1,79 @@ +use indoc::indoc; + +pub mod nomad; +pub mod rivet; +pub mod s3; +pub mod traefik; +pub mod traffic_server; +pub mod vector; + +pub const TUNNEL_API_INTERNAL_PORT: u16 = 5010; + +pub fn common() -> String { + indoc!( + " + apt-get update -y + apt-get install -y apt-transport-https ca-certificates gnupg2 software-properties-common curl jq unzip + " + ).to_string() +} + +pub mod node_exporter { + pub fn install() -> String { + include_str!("../files/node_exporter.sh").to_string() + } +} + +pub mod sysctl { + pub fn install() -> String { + include_str!("../files/sysctl.sh").to_string() + } +} + +pub mod docker { + pub fn install() -> String { + include_str!("../files/docker.sh").to_string() + } +} + +pub mod lz4 { + pub fn install() -> String { + "apt-get install -y lz4".to_string() + } +} + +pub mod skopeo { + pub fn install() -> String { + "apt-get install -y skopeo".to_string() + } +} + +pub mod umoci { + use indoc::indoc; + + pub fn install() -> String { + indoc!( + r#" + curl -Lf -o /usr/bin/umoci "https://github.com/opencontainers/umoci/releases/download/v0.4.7/umoci.amd64" + chmod +x /usr/bin/umoci + "# + ).to_string() + } +} + +pub mod cni { + use indoc::indoc; + + pub fn tool() -> String { + indoc!( + r#" + curl -Lf -o /usr/bin/cnitool "https://github.com/rivet-gg/cni/releases/download/v1.1.2-build3/cnitool" + chmod +x /usr/bin/cnitool + "# + ).to_string() + } + + pub fn plugins() -> String { + include_str!("../files/cni_plugins.sh").to_string() + } +} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/nomad.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/nomad.rs new file mode 100644 index 0000000000..acaaf99b99 --- /dev/null +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/nomad.rs @@ -0,0 +1,31 @@ +use chirp_worker::prelude::*; + +pub fn install() -> String { + include_str!("../files/nomad_install.sh").to_string() +} + +pub fn configure() -> String { + let servers = &["127.0.0.1:5000", "127.0.0.1:5001", "127.0.0.1:5002"]; + + include_str!("../files/nomad_configure.sh") + // HACK: Hardcoded to Linode + .replace("__PUBLIC_IFACE__", "eth0") + // HACK: Hardcoded to Linode + .replace("__VLAN_IFACE__", "eth1") + .replace( + "__SERVER_JOIN__", + &servers + .iter() + .map(|x| format!("\"{x}\"")) + .collect::>() + .join(", "), + ) + .replace( + "__GG_VLAN_SUBNET__", + &util::net::gg::vlan_ip_net().to_string(), + ) + .replace( + "__ATS_VLAN_SUBNET__", + &util::net::ats::vlan_ip_net().to_string(), + ) +} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/rivet.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/rivet.rs new file mode 100644 index 0000000000..5c518a4aab --- /dev/null +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/rivet.rs @@ -0,0 +1,42 @@ +use chirp_worker::prelude::*; + +use super::TUNNEL_API_INTERNAL_PORT; + +pub fn create_hook(initialize_immediately: bool) -> GlobalResult { + let mut script = include_str!("../files/rivet_create_hook.sh").to_string(); + + if initialize_immediately { + script.push_str("systemctl start rivet_hook\n"); + } + + Ok(script) +} + +pub fn fetch_info(server_token: &str) -> GlobalResult { + Ok(include_str!("../files/rivet_fetch_info.sh") + .replace("__SERVER_TOKEN__", server_token) + .replace( + "__TUNNEL_API_INTERNAL_PORT__", + &TUNNEL_API_INTERNAL_PORT.to_string(), + )) +} + +pub fn fetch_tls( + initialize_immediately: bool, + server_token: &str, + traefik_instance_name: &str, +) -> GlobalResult { + let mut script = include_str!("../files/rivet_fetch_tls.sh") + .replace("__NAME__", traefik_instance_name) + .replace("__SERVER_TOKEN__", server_token) + .replace( + "__TUNNEL_API_INTERNAL_PORT__", + &TUNNEL_API_INTERNAL_PORT.to_string(), + ); + + if initialize_immediately { + script.push_str("systemctl start rivet_fetch_tls.timer\n"); + } + + Ok(script) +} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/s3.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/s3.rs new file mode 100644 index 0000000000..23e2547b35 --- /dev/null +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/s3.rs @@ -0,0 +1,65 @@ +use chirp_worker::prelude::*; +use indoc::formatdoc; +use s3_util::Provider; + +pub struct GenRemapS3ProviderOutput { + /// Append to remap.config + pub append_remap: String, + + /// Concat with config files + pub config_files: Vec<(String, String)>, +} + +pub async fn gen_provider( + provider: Provider, + default_s3_provider: Provider, +) -> GlobalResult { + let mut remap = String::new(); + let provider_name = provider.as_str(); + let endpoint_external = s3_util::s3_endpoint_external("bucket-build", provider)?; + let region = s3_util::s3_region("bucket-build", provider)?; + let (access_key_id, secret_access_key) = s3_util::s3_credentials("bucket-build", provider)?; + + // Build plugin chain + let plugins = format!("@plugin=tslua.so @pparam=/etc/trafficserver/strip_headers.lua @plugin=s3_auth.so @pparam=--config @pparam=s3_auth_v4_{provider_name}.config"); + + // Add remap + remap.push_str(&format!( + "map /s3-cache/{provider_name} {endpoint_external} {plugins}\n", + )); + + // Add default route + if default_s3_provider == provider { + remap.push_str(&format!("map /s3-cache {endpoint_external} {plugins}\n",)); + } + + // Add credentials + let mut config_files = Vec::<(String, String)>::new(); + config_files.push(( + format!("s3_auth_v4_{provider_name}.config"), + formatdoc!( + r#" + access_key={access_key_id} + secret_key={secret_access_key} + version=4 + v4-region-map=s3_region_map_{provider_name}.config + "#, + ), + )); + config_files.push(( + format!("s3_region_map_{provider_name}.config"), + formatdoc!( + r#" + # Default region + {s3_host}: {s3_region} + "#, + s3_host = endpoint_external.split_once("://").unwrap().1, + s3_region = region, + ), + )); + + Ok(GenRemapS3ProviderOutput { + append_remap: remap, + config_files, + }) +} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traefik.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traefik.rs new file mode 100644 index 0000000000..ee9da07e8a --- /dev/null +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traefik.rs @@ -0,0 +1,289 @@ +use std::collections::HashMap; + +use chirp_worker::prelude::*; +use indoc::formatdoc; + +use super::{ + vector::{TUNNEL_VECTOR_PORT, TUNNEL_VECTOR_TCP_JSON_PORT}, + TUNNEL_API_INTERNAL_PORT, +}; + +pub const TUNNEL_SERVICES: &[TunnelService] = &[ + TunnelService { + name: "nomad-server-0", + port: 5000, + }, + TunnelService { + name: "nomad-server-1", + port: 5001, + }, + TunnelService { + name: "nomad-server-2", + port: 5002, + }, + TunnelService { + name: "api-internal", + port: TUNNEL_API_INTERNAL_PORT, + }, + TunnelService { + name: "vector", + port: TUNNEL_VECTOR_PORT, + }, + TunnelService { + name: "vector-tcp-json", + port: TUNNEL_VECTOR_TCP_JSON_PORT, + }, +]; + +/// Service that gets exposed from the Traefik tunnel. +pub struct TunnelService { + /// Name of the service for the subdomain. This is how the Treafik tunnel server knows where to + /// route traffic. + name: &'static str, + + /// The port to serve the service on locally. + port: u16, +} + +/// Installs Traefik, but does not create the Traefik service. +pub fn install() -> String { + include_str!("../files/traefik.sh").to_string() +} + +pub struct TlsCert { + pub cert_pem: String, + pub key_pem: String, +} + +pub struct Instance { + pub name: String, + pub static_config: String, + pub dynamic_config: String, + pub tcp_server_transports: HashMap, +} + +pub struct ServerTransport { + pub server_name: String, + pub root_cas: Vec, + pub certs: Vec, +} + +/// Creates a Traefik instance. +/// +/// Requires `install()`. +pub fn instance(config: Instance) -> String { + let config_name = &config.name; + + let mut script = include_str!("../files/traefik_instance.sh") + .replace("__NAME__", &config.name) + .replace("__STATIC_CONFIG__", &config.static_config) + .replace("__DYNAMIC_CONFIG__", &config.dynamic_config); + + for (transport_id, transport) in config.tcp_server_transports { + // Build config + let root_cas = transport + .root_cas + .iter() + .enumerate() + .map(|(i, _)| { + format!("\"/etc/{config_name}/tls/transport_{transport_id}_root_ca_{i}_cert.pem\"",) + }) + .collect::>() + .join(", "); + let mut transport_config = formatdoc!( + r#" + [tcp.serversTransports.{transport_id}.tls] + serverName = "{server_name}" + rootCAs = [{root_cas}] + "#, + server_name = transport.server_name + ); + + // Write root CAs + for (i, cert) in transport.root_cas.iter().enumerate() { + script.push_str(&formatdoc!( + r#" + cat << 'EOF' > /etc/{config_name}/tls/transport_{transport_id}_root_ca_{i}_cert.pem + {cert} + EOF + "#, + )); + } + + // Write certs + for (i, cert) in transport.certs.iter().enumerate() { + script.push_str(&formatdoc!( + r#" + cat << 'EOF' > /etc/{config_name}/tls/transport_{transport_id}_cert_{i}_cert.pem + {cert} + EOF + + cat << 'EOF' > /etc/{config_name}/tls/transport_{transport_id}_cert_{i}_key.pem + {key} + EOF + "#, + cert = cert.cert_pem, + key = cert.key_pem, + )); + transport_config.push_str(&formatdoc!( + r#" + [[tcp.serversTransports.{transport_id}.tls.certificates]] + certFile = "/etc/{config_name}/tls/transport_{transport_id}_cert_{i}_cert.pem" + keyFile = "/etc/{config_name}/tls/transport_{transport_id}_cert_{i}_key.pem" + "# + )) + } + + // Write config + script.push_str(&formatdoc!( + r#" + cat << 'EOF' > /etc/{config_name}/dynamic/transport_{transport_id}.toml + {transport_config} + EOF + "# + )); + } + + script +} + +pub fn tunnel() -> GlobalResult { + // Build transports for each service + let mut tcp_server_transports = HashMap::new(); + for TunnelService { name, .. } in TUNNEL_SERVICES { + tcp_server_transports.insert( + name.to_string(), + ServerTransport { + server_name: format!("{name}.tunnel.rivet.gg"), + root_cas: vec![util::env::var("TLS_ROOT_CA_CERT_PEM")?], + certs: vec![TlsCert { + cert_pem: util::env::var("TLS_CERT_LOCALLY_SIGNED_JOB_CERT_PEM")?, + key_pem: util::env::var("TLS_CERT_LOCALLY_SIGNED_JOB_KEY_PEM")?, + }], + }, + ); + } + + Ok(instance(Instance { + name: "tunnel".into(), + static_config: tunnel_static_config(), + dynamic_config: tunnel_dynamic_config(&util::env::var("K8S_TRAEFIK_TUNNEL_EXTERNAL_IP")?), + tcp_server_transports, + })) +} + +fn tunnel_static_config() -> String { + let mut config = formatdoc!( + r#" + [providers] + [providers.file] + directory = "/etc/tunnel/dynamic" + "# + ); + + for TunnelService { name, port } in TUNNEL_SERVICES.iter() { + config.push_str(&formatdoc!( + r#" + [entryPoints.{name}] + address = "127.0.0.1:{port}" + "#, + )) + } + + config +} + +fn tunnel_dynamic_config(tunnel_external_ip: &str) -> String { + let mut config = String::new(); + for TunnelService { name, .. } in TUNNEL_SERVICES.iter() { + config.push_str(&formatdoc!( + r#" + [tcp.routers.{name}] + entryPoints = ["{name}"] + rule = "HostSNI(`*`)" # Match all ingress, unrelated to the outbound TLS + service = "{name}" + + [tcp.services.{name}.loadBalancer] + serversTransport = "{name}" + + [[tcp.services.{name}.loadBalancer.servers]] + address = "{tunnel_external_ip}:5000" + tls = true + "# + )) + } + + config +} + +pub async fn gg_static_config() -> GlobalResult { + let api_traefik_provider_token = + &util::env::read_secret(&["rivet", "api_traefik_provider", "token"]).await?; + let http_provider_endpoint = format!( + "http://127.0.0.1:{port}/traefik-provider/config/game-guard?token={api_traefik_provider_token}&datacenter=___DATACENTER_ID___", + port = TUNNEL_API_INTERNAL_PORT, + ); + + let mut config = formatdoc!( + r#" + [entryPoints] + [entryPoints.traefik] + address = "127.0.0.1:9980" + + [entryPoints.lb-80] + address = ":80" + + [entryPoints.lb-443] + address = ":443" + + [api] + insecure = true + + [metrics.prometheus] + # See lib/chirp/metrics/src/buckets.rs + buckets = [0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0] + addEntryPointsLabels = true + addRoutersLabels = true + addServicesLabels = true + + [providers] + [providers.file] + directory = "/etc/game_guard/dynamic" + + [providers.http] + endpoint = "{http_provider_endpoint}" + pollInterval = "0.5s" + "# + ); + + // TCP ports + for port in util::net::job::MIN_INGRESS_PORT_TCP..=util::net::job::MAX_INGRESS_PORT_TCP { + config.push_str(&formatdoc!( + r#" + [entryPoints.lb-{port}-tcp] + address = ":{port}/tcp" + + [entryPoints.lb-{port}-tcp.transport.respondingTimeouts] + readTimeout = "12h" + writeTimeout = "12h" + idleTimeout = "30s" + + "# + )); + } + + // UDP ports + for port in util::net::job::MIN_INGRESS_PORT_UDP..=util::net::job::MAX_INGRESS_PORT_UDP { + config.push_str(&formatdoc!( + r#" + [entryPoints.lb-{port}-udp] + address = ":{port}/udp" + + [entryPoints.lb-{port}-udp.udp] + timeout = "15s" + "# + )); + } + + Ok(config) +} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traffic_server.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traffic_server.rs new file mode 100644 index 0000000000..4fe03ec521 --- /dev/null +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/traffic_server.rs @@ -0,0 +1,97 @@ +use chirp_worker::prelude::*; +use include_dir::{include_dir, Dir}; +use s3_util::Provider; + +use super::s3; + +const TRAFFIC_SERVER_IMAGE: &str = "ghcr.io/rivet-gg/apache-traffic-server:9934dc2"; + +pub fn install() -> String { + include_str!("../files/traffic_server_install.sh").replace("__IMAGE__", TRAFFIC_SERVER_IMAGE) +} + +pub async fn configure() -> GlobalResult { + // Write config to files + let mut config_scripts = config() + .await? + .into_iter() + .map(|(k, v)| format!("cat << 'EOF' > /etc/trafficserver/{k}\n{v}\nEOF\n")) + .collect::>(); + + // Update default storage config size to be entire filesystem size minus 4GB + config_scripts.push( + indoc!( + r#" + df -h / | + awk 'NR==2 {gsub(/G/, "", $2); print $2 - 4 "G"}' | + xargs -I {} sed -i 's/64G/{}/' /etc/trafficserver/storage.config + "# + ) + .to_string(), + ); + + let script = include_str!("../files/traffic_server_configure.sh") + .replace("__IMAGE__", TRAFFIC_SERVER_IMAGE) + .replace("__CONFIG__", &config_scripts.join("\n\n")); + + Ok(script) +} + +static TRAFFIC_SERVER_CONFIG_DIR: Dir<'_> = include_dir!( + "$CARGO_MANIFEST_DIR/src/workers/server_install/install_scripts/files/traffic_server" +); + +async fn config() -> GlobalResult> { + // Static files + let mut config_files = Vec::new(); + collect_config_files(&TRAFFIC_SERVER_CONFIG_DIR, &mut config_files)?; + + // Storage (default value of 64 gets overwritten in config script) + let volume_size = 64; + config_files.push(( + "storage.config".to_string(), + format!("/var/cache/trafficserver {volume_size}G"), + )); + + // Remap & S3 + let mut remap = String::new(); + let default_s3_provider = Provider::default()?; + if s3_util::s3_provider_active("bucket-build", Provider::Minio) { + let output = s3::gen_provider(Provider::Minio, default_s3_provider).await?; + remap.push_str(&output.append_remap); + config_files.extend(output.config_files); + } + if s3_util::s3_provider_active("bucket-build", Provider::Backblaze) { + let output = s3::gen_provider(Provider::Backblaze, default_s3_provider).await?; + remap.push_str(&output.append_remap); + config_files.extend(output.config_files); + } + if s3_util::s3_provider_active("bucket-build", Provider::Aws) { + let output = s3::gen_provider(Provider::Aws, default_s3_provider).await?; + remap.push_str(&output.append_remap); + config_files.extend(output.config_files); + } + config_files.push(("remap.config".to_string(), remap)); + + Ok(config_files) +} + +// Recursively collects all of the files in a folder into a hashmap +fn collect_config_files( + dir: &include_dir::Dir, + config_files: &mut Vec<(String, String)>, +) -> GlobalResult<()> { + for entry in dir.entries() { + match entry { + include_dir::DirEntry::File(file) => { + let key = unwrap!(unwrap!(file.path().file_name()).to_str()).to_string(); + + let value = unwrap!(file.contents_utf8()); + config_files.push((key, value.to_string())); + } + include_dir::DirEntry::Dir(dir) => collect_config_files(dir, config_files)?, + } + } + + Ok(()) +} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/vector.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/vector.rs new file mode 100644 index 0000000000..671da337d3 --- /dev/null +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/components/vector.rs @@ -0,0 +1,80 @@ +use std::collections::HashMap; + +use chirp_worker::prelude::*; +use proto::backend; + +pub const TUNNEL_VECTOR_PORT: u16 = 5020; +pub const TUNNEL_VECTOR_TCP_JSON_PORT: u16 = 5021; + +pub fn install() -> String { + include_str!("../files/vector_install.sh").to_string() +} + +pub struct Config { + pub prometheus_targets: HashMap, +} + +pub struct PrometheusTarget { + pub endpoint: String, + pub scrape_interval: usize, +} + +pub fn configure(config: &Config, pool_type: backend::cluster::PoolType) -> String { + let sources = config + .prometheus_targets + .keys() + .map(|x| format!("\"prometheus_{x}\"")) + .collect::>() + .join(", "); + + let pool_type_str = match pool_type { + backend::cluster::PoolType::Job => "job", + backend::cluster::PoolType::Gg => "gg", + backend::cluster::PoolType::Ats => "ats", + }; + + let mut config_str = formatdoc!( + r#" + [api] + enabled = true + + [transforms.add_meta] + type = "remap" + inputs = [{sources}] + source = ''' + .tags.server_id = "___SERVER_ID___" + .tags.datacenter_id = "___DATACENTER_ID___" + .tags.cluster_id = "___CLUSTER_ID___" + .tags.pool_type = "{pool_type_str}" + .tags.public_ip = "${{PUBLIC_IP}}" + ''' + + [sinks.vector_sink] + type = "vector" + inputs = ["add_meta"] + address = "127.0.0.1:{TUNNEL_VECTOR_PORT}" + healthcheck.enabled = false + compression = true + "# + ); + + for ( + key, + PrometheusTarget { + endpoint, + scrape_interval, + }, + ) in &config.prometheus_targets + { + config_str.push_str(&formatdoc!( + r#" + [sources.prometheus_{key}] + type = "prometheus_scrape" + endpoints = ["{endpoint}"] + scrape_interval_secs = {scrape_interval} + "# + )); + } + + include_str!("../files/vector_configure.sh").replace("__VECTOR_CONFIG__", &config_str) +} diff --git a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/mod.rs b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/mod.rs index 76fcf65a5b..e02929af21 100644 --- a/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/mod.rs +++ b/svc/pkg/cluster/worker/src/workers/server_install/install_scripts/mod.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use chirp_worker::prelude::*; -use indoc::formatdoc; use proto::backend; pub mod components; @@ -15,33 +14,33 @@ pub async fn gen_install( // MARK: Common (pre) let mut script = vec![ components::common(), - components::node_exporter(), - components::sysctl(), - components::traefik(), - components::traefik_tunnel()?, - components::vector_install(), + components::node_exporter::install(), + components::sysctl::install(), + components::traefik::install(), + components::traefik::tunnel()?, + components::vector::install(), ]; // MARK: Specific pool components match pool_type { backend::cluster::PoolType::Job => { - script.push(components::docker()); - script.push(components::lz4()); - script.push(components::skopeo()); - script.push(components::umoci()); - script.push(components::cnitool()); - script.push(components::cni_plugins()); - script.push(components::nomad_install()); + script.push(components::docker::install()); + script.push(components::lz4::install()); + script.push(components::skopeo::install()); + script.push(components::umoci::install()); + script.push(components::cni::tool()); + script.push(components::cni::plugins()); + script.push(components::nomad::install()); } backend::cluster::PoolType::Gg => {} backend::cluster::PoolType::Ats => { - script.push(components::docker()); - script.push(components::traffic_server_install()); + script.push(components::docker::install()); + script.push(components::traffic_server::install()); } } // MARK: Common (post) - script.push(components::rivet_create_hook(initialize_immediately)?); + script.push(components::rivet::create_hook(initialize_immediately)?); let joined = script.join("\n\necho \"======\"\n\n"); Ok(format!("#!/usr/bin/env bash\nset -eu\n\n{joined}")) @@ -49,14 +48,14 @@ pub async fn gen_install( // This script is run by systemd on startup and gets the server's data from the Rivet API pub async fn gen_hook(server_token: &str) -> GlobalResult { - let mut script = vec![components::rivet_fetch_info(server_token)?]; + let script = [components::rivet::fetch_info(server_token)?]; let joined = script.join("\n\necho \"======\"\n\n"); Ok(format!("#!/usr/bin/env bash\nset -eu\n\n{joined}")) } -// This script is templated on the server itself after fetching server data from the Rivet API -// (see gen_hook) After being templated, it is run. +// This script is templated on the server itself after fetching server data from the Rivet API (see gen_hook). +// After being templated, it is run. pub async fn gen_initialize( pool_type: backend::cluster::PoolType, initialize_immediately: bool, @@ -69,7 +68,7 @@ pub async fn gen_initialize( // MARK: Common (pre) prometheus_targets.insert( "node_exporter".into(), - components::VectorPrometheusTarget { + components::vector::PrometheusTarget { endpoint: "http://127.0.0.1:9100/metrics".into(), scrape_interval: 15, }, @@ -78,11 +77,11 @@ pub async fn gen_initialize( // MARK: Specific pool components match pool_type { backend::cluster::PoolType::Job => { - script.push(components::nomad_configure()); + script.push(components::nomad::configure()); prometheus_targets.insert( "nomad".into(), - components::VectorPrometheusTarget { + components::vector::PrometheusTarget { endpoint: "http://127.0.0.1:4646/v1/metrics?format=prometheus".into(), scrape_interval: 15, }, @@ -91,14 +90,16 @@ pub async fn gen_initialize( backend::cluster::PoolType::Gg => { let traefik_instance_name = "game_guard".to_string(); - script.push(components::traefik_instance(components::TraefikInstance { - name: traefik_instance_name.clone(), - static_config: gg_traefik_static_config().await?, - dynamic_config: String::new(), - tcp_server_transports: Default::default(), - })); + script.push(components::traefik::instance( + components::traefik::Instance { + name: traefik_instance_name.clone(), + static_config: components::traefik::gg_static_config().await?, + dynamic_config: String::new(), + tcp_server_transports: Default::default(), + }, + )); - script.push(components::rivet_fetch_tls( + script.push(components::rivet::fetch_tls( initialize_immediately, server_token, &traefik_instance_name, @@ -106,21 +107,21 @@ pub async fn gen_initialize( prometheus_targets.insert( "game_guard".into(), - components::VectorPrometheusTarget { + components::vector::PrometheusTarget { endpoint: "http://127.0.0.1:9980/metrics".into(), scrape_interval: 15, }, ); } backend::cluster::PoolType::Ats => { - script.push(components::traffic_server_configure().await?); + script.push(components::traffic_server::configure().await?); } } // MARK: Common (post) if !prometheus_targets.is_empty() { - script.push(components::vector_configure( - &components::VectorConfig { prometheus_targets }, + script.push(components::vector::configure( + &components::vector::Config { prometheus_targets }, pool_type, )); } @@ -128,75 +129,3 @@ pub async fn gen_initialize( let joined = script.join("\n\necho \"======\"\n\n"); Ok(format!("#!/usr/bin/env bash\nset -eu\n\n{joined}")) } - -async fn gg_traefik_static_config() -> GlobalResult { - let api_traefik_provider_token = - &util::env::read_secret(&["rivet", "api_traefik_provider", "token"]).await?; - let http_provider_endpoint = format!( - "http://127.0.0.1:{port}/traefik-provider/config/game-guard?token={api_traefik_provider_token}&datacenter=___DATACENTER_ID___", - port = components::TUNNEL_API_INTERNAL_PORT, - ); - - let mut config = formatdoc!( - r#" - [entryPoints] - [entryPoints.traefik] - address = "127.0.0.1:9980" - - [entryPoints.lb-80] - address = ":80" - - [entryPoints.lb-443] - address = ":443" - - [api] - insecure = true - - [metrics.prometheus] - # See lib/chirp/metrics/src/buckets.rs - buckets = [0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0] - addEntryPointsLabels = true - addRoutersLabels = true - addServicesLabels = true - - [providers] - [providers.file] - directory = "/etc/game_guard/dynamic" - - [providers.http] - endpoint = "{http_provider_endpoint}" - pollInterval = "0.5s" - "# - ); - - // TCP ports - for port in util::net::job::MIN_INGRESS_PORT_TCP..=util::net::job::MAX_INGRESS_PORT_TCP { - config.push_str(&formatdoc!( - r#" - [entryPoints.lb-{port}-tcp] - address = ":{port}/tcp" - - [entryPoints.lb-{port}-tcp.transport.respondingTimeouts] - readTimeout = "12h" - writeTimeout = "12h" - idleTimeout = "30s" - - "# - )); - } - - // UDP ports - for port in util::net::job::MIN_INGRESS_PORT_UDP..=util::net::job::MAX_INGRESS_PORT_UDP { - config.push_str(&formatdoc!( - r#" - [entryPoints.lb-{port}-udp] - address = ":{port}/udp" - - [entryPoints.lb-{port}-udp.udp] - timeout = "15s" - "# - )); - } - - Ok(config) -}