File tree Expand file tree Collapse file tree 4 files changed +36
-7
lines changed
svc/pkg/cluster/worker/src/workers/server_install Expand file tree Collapse file tree 4 files changed +36
-7
lines changed Original file line number Diff line number Diff line change @@ -26,17 +26,25 @@ pub fn fetch_tls(
2626 initialize_immediately : bool ,
2727 server_token : & str ,
2828 traefik_instance_name : & str ,
29+ datacenter_id : Uuid ,
2930) -> GlobalResult < String > {
3031 let mut script = include_str ! ( "../files/rivet_fetch_tls.sh" )
3132 . replace ( "__NAME__" , traefik_instance_name)
3233 . replace ( "__SERVER_TOKEN__" , server_token)
3334 . replace (
3435 "__TUNNEL_API_INTERNAL_PORT__" ,
3536 & TUNNEL_API_INTERNAL_PORT . to_string ( ) ,
36- ) ;
37+ )
38+ . replace ( "__DATACENTER_ID__" , & datacenter_id. to_string ( ) ) ;
3739
3840 if initialize_immediately {
39- script. push_str ( "systemctl start rivet_fetch_tls.timer\n " ) ;
41+ // Start timer & run script immediately
42+ script. push_str ( indoc ! (
43+ "
44+ systemctl start rivet_fetch_tls.timer
45+ systemctl start --no-block rivet_fetch_tls.service
46+ "
47+ ) ) ;
4048 }
4149
4250 Ok ( script)
Original file line number Diff line number Diff line change 1+ # Create dir to hold TLS certs
2+ #
3+ # The Traefik install script also creates these directories (and chown them),
4+ # but we need the dirs to exist for the rivet_fetch_tls.sh script to run before
5+ # Traefik is installed when using initialize_immediately.
6+ mkdir -p /etc/__NAME__/dynamic/tls /etc/__NAME__/tls
7+
18# Write script
29cat << 'EOF ' > /usr/bin/rivet_fetch_tls.sh
310#!/usr/bin/env bash
@@ -6,12 +13,13 @@ set -eu -o pipefail
613CERT_ID="job"
714STUB="/etc/__NAME__/tls/$CERT_ID"
815
16+
917# Retry script every 5 seconds
1018while true; do
1119 response=$(
1220 curl -f \
1321 -H "Authorization: Bearer __SERVER_TOKEN__" \
14- "http://127.0.0.1:__TUNNEL_API_INTERNAL_PORT__/provision/datacenters/___DATACENTER_ID___ /tls"
22+ "http://127.0.0.1:__TUNNEL_API_INTERNAL_PORT__/provision/datacenters/__DATACENTER_ID__ /tls"
1523 ) && break || sleep 5
1624done
1725
@@ -59,7 +67,12 @@ Requires=network-online.target
5967After=network-online.target
6068
6169[Timer]
62- OnUnitInactiveSec=1h
70+ # Run immediately on startup
71+ OnBootSec=0
72+ # Trigger every hour
73+ OnCalendar=*:0
74+ # Prevent stampeding herd
75+ RandomizedDelaySec=60
6376Unit=rivet_fetch_tls.service
6477
6578[Install]
Original file line number Diff line number Diff line change @@ -14,6 +14,7 @@ pub async fn gen_install(
1414 pool_type : backend:: cluster:: PoolType ,
1515 initialize_immediately : bool ,
1616 server_token : & str ,
17+ datacenter_id : Uuid ,
1718) -> GlobalResult < String > {
1819 // MARK: Common (pre)
1920 let mut script = vec ! [
@@ -41,6 +42,7 @@ pub async fn gen_install(
4142 initialize_immediately,
4243 server_token,
4344 GG_TRAEFIK_INSTANCE_NAME ,
45+ datacenter_id,
4446 ) ?) ;
4547 }
4648 backend:: cluster:: PoolType :: Ats => {
Original file line number Diff line number Diff line change @@ -13,6 +13,8 @@ mod install_scripts;
1313
1414#[ worker( name = "cluster-server-install" , timeout = 200 ) ]
1515async fn worker ( ctx : & OperationContext < cluster:: msg:: server_install:: Message > ) -> GlobalResult < ( ) > {
16+ let datacenter_id = unwrap ! ( ctx. datacenter_id) . as_uuid ( ) ;
17+
1618 // Check for stale message
1719 if ctx. req_dt ( ) > util:: duration:: hours ( 1 ) {
1820 tracing:: warn!( "discarding stale message" ) ;
@@ -71,8 +73,13 @@ async fn worker(ctx: &OperationContext<cluster::msg::server_install::Message>) -
7173 . await ?;
7274 let server_token = & unwrap_ref ! ( token_res. token) . token ;
7375
74- let install_script =
75- install_scripts:: gen_install ( pool_type, ctx. initialize_immediately , server_token) . await ?;
76+ let install_script = install_scripts:: gen_install (
77+ pool_type,
78+ ctx. initialize_immediately ,
79+ server_token,
80+ datacenter_id,
81+ )
82+ . await ?;
7683 let hook_script = install_scripts:: gen_hook ( server_token) . await ?;
7784 let initialize_script = install_scripts:: gen_initialize ( pool_type) . await ?;
7885
@@ -161,7 +168,6 @@ async fn worker(ctx: &OperationContext<cluster::msg::server_install::Message>) -
161168 . await ?;
162169
163170 // Scale to get rid of tainted servers
164- let datacenter_id = unwrap_ref ! ( ctx. datacenter_id) . as_uuid ( ) ;
165171 msg ! ( [ ctx] @recursive cluster:: msg:: datacenter_scale( datacenter_id) {
166172 datacenter_id: ctx. datacenter_id,
167173 } )
You can’t perform that action at this time.
0 commit comments