Skip to content

Commit

Permalink
feat(clusters): static endpoint for gg nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
MasterPtato committed Jun 25, 2024
1 parent fb75556 commit b860766
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 7 deletions.
2 changes: 2 additions & 0 deletions infra/tf/better_uptime/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ resource "betteruptime_monitor" "monitor" {
]
email = var.better_uptime_notify
push = var.better_uptime_notify
verify_ssl = try(each.value.monitor.verify_ssl, false)
ssl_expiration = try(each.value.monitor.verify_ssl, false) ? 14 : null
}

resource "betteruptime_status_page_resource" "status_page_resource" {
Expand Down
1 change: 1 addition & 0 deletions infra/tf/better_uptime/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ variable "better_uptime_groups" {
id = string
url = string
public_name = string
verify_ssl = optional(bool)
}))
}))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,7 @@
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum by (pool_type, datacenter_id) (provision_draining_tainted_servers{cluster_id=~\"[[cluster_id]]\", datacenter_id=~\"[[datacenter_id]]\", provider_datacenter_id=~\"[[provider_datacenter_id]]\", pool_type=~\"[[pool_type]]\"})",
"expr": "sum by (pool_type, datacenter_id) (rivet_provision_draining_tainted_servers{cluster_id=~\"[[cluster_id]]\", datacenter_id=~\"[[datacenter_id]]\", provider_datacenter_id=~\"[[provider_datacenter_id]]\", pool_type=~\"[[pool_type]]\"})",
"instant": false,
"legendFormat": "{{pool_type}} ({{datacenter_id}})",
"range": true,
Expand Down
23 changes: 21 additions & 2 deletions lib/bolt/core/src/dep/terraform/gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ async fn vars(ctx: &ProjectContext) {
};

// Create monitors
let mm_monitors = cluster
let api_status_monitors = cluster
.datacenters
.iter()
.map(|(name_id, dc)| {
Expand All @@ -555,14 +555,33 @@ async fn vars(ctx: &ProjectContext) {
})
})
.collect::<Vec<_>>();
let gg_monitors = if let Some(domain_job) = ctx.domain_job() {
cluster
.datacenters
.values()
.map(|dc| {
json!({
"id": format!("{}-gg", dc.datacenter_id),
"url": format!("https://lobby.{}.{domain_job}/status", dc.datacenter_id),
"public_name": format!("{} (GG)", dc.display_name),
"verify_ssl": true,
})
})
.collect::<Vec<_>>()
} else {
Vec::new()
};

vars.insert(
"better_uptime_groups".into(),
json!([
{
"id": "mm",
"name": "Matchmaker",
"monitors": mm_monitors,
"monitors": api_status_monitors
.into_iter()
.chain(gg_monitors)
.collect::<Vec<_>>(),
},
{
"id": "cdn",
Expand Down
2 changes: 1 addition & 1 deletion svc/api/cloud/src/route/devices/links.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ pub async fn complete(
) -> GlobalResult<serde_json::Value> {
// Verify completer is a user. Cloud tokens should not be able to link other
// cloud tokens.
let rivet_claims::ent::User { .. } = ctx.auth().claims()?.as_user()?;
ctx.auth().claims()?.as_user()?;

// Verify has access to game
ctx.auth()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use indoc::indoc;

pub mod nomad;
pub mod ok_server;
pub mod rivet;
pub mod s3;
pub mod traefik;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
pub const OK_SERVER_PORT: usize = 9999;

pub fn install(initialize_immediately: bool) -> String {
let mut script = include_str!("../files/ok_server.sh")
.replace("__OK_SERVER_PORT__", &OK_SERVER_PORT.to_string());

if initialize_immediately {
// Run script immediately
script.push_str("systemctl start --no-block ok_server.service");
}

script
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use chirp_worker::prelude::*;
use indoc::formatdoc;

use super::{
ok_server::OK_SERVER_PORT,
vector::{TUNNEL_VECTOR_PORT, TUNNEL_VECTOR_TCP_JSON_PORT},
TUNNEL_API_INTERNAL_PORT,
};
Expand Down Expand Up @@ -282,3 +283,31 @@ pub async fn gg_static_config() -> GlobalResult<String> {

Ok(config)
}

pub fn gg_dynamic_config(datacenter_id: Uuid) -> GlobalResult<String> {
let domain_job = unwrap!(util::env::domain_job(), "dns not enabled");

let main = format!("{datacenter_id}.{domain_job}");

Ok(formatdoc!(
r#"
# Always returns 200 at /status
[http.routers.ok-status]
entryPoints = ["lb-80"]
rule = "Host(`lobby.{main}`) && Path(`/status`)"
service = "ok-service"
[http.routers.ok-status-secure]
entryPoints = ["lb-443"]
rule = "Host(`lobby.{main}`) && Path(`/status`)"
service = "ok-service"
[[http.routers.ok-status-secure.tls.domains]]
main = "{main}"
sans = []
[http.services.ok-service.loadBalancer]
[[http.services.ok-service.loadBalancer.servers]]
url = "http://127.0.0.1:{OK_SERVER_PORT}"
"#
))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Write script
cat << 'EOF' > /usr/bin/ok_server.sh
#!/bin/bash
set -e
trap "exit" INT
while true; do
{ echo -e 'HTTP/1.1 200 OK\r\n\r\n'; } | nc -l -p __OK_SERVER_PORT__ -q 0;
done
EOF

chmod +x /usr/bin/ok_server.sh

# Create systemd service file
cat << 'EOF' > /etc/systemd/system/ok_server.service
[Unit]
Description=Rivet Ok Server
Requires=network-online.target
After=network-online.target
[Service]
User=root
Group=root
Type=oneshot
ExecStart=/usr/bin/ok_server.sh
Type=simple
[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable ok_server.service
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub async fn gen_install(
GG_TRAEFIK_INSTANCE_NAME,
datacenter_id,
)?);
script.push(components::ok_server::install(initialize_immediately));
}
backend::cluster::PoolType::Ats => {
script.push(components::docker::install());
Expand Down Expand Up @@ -71,7 +72,10 @@ pub async fn gen_hook(server_token: &str) -> GlobalResult<String> {

// This script is templated on the server itself after fetching server data from the Rivet API (see gen_hook).
// After being templated, it is run.
pub async fn gen_initialize(pool_type: backend::cluster::PoolType) -> GlobalResult<String> {
pub async fn gen_initialize(
pool_type: backend::cluster::PoolType,
datacenter_id: Uuid,
) -> GlobalResult<String> {
let mut script = Vec::new();

let mut prometheus_targets = HashMap::new();
Expand Down Expand Up @@ -103,7 +107,7 @@ pub async fn gen_initialize(pool_type: backend::cluster::PoolType) -> GlobalResu
components::traefik::Instance {
name: GG_TRAEFIK_INSTANCE_NAME.to_string(),
static_config: components::traefik::gg_static_config().await?,
dynamic_config: String::new(),
dynamic_config: components::traefik::gg_dynamic_config(datacenter_id)?,
tcp_server_transports: Default::default(),
},
));
Expand Down
2 changes: 1 addition & 1 deletion svc/pkg/cluster/worker/src/workers/server_install/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ async fn worker(ctx: &OperationContext<cluster::msg::server_install::Message>) -
)
.await?;
let hook_script = install_scripts::gen_hook(server_token).await?;
let initialize_script = install_scripts::gen_initialize(pool_type).await?;
let initialize_script = install_scripts::gen_initialize(pool_type, datacenter_id).await?;

// Spawn blocking thread for ssh (no async support)
tokio::task::spawn_blocking(move || {
Expand Down

0 comments on commit b860766

Please sign in to comment.