-
Notifications
You must be signed in to change notification settings - Fork 66
/
rke2-init.sh
218 lines (174 loc) · 6.17 KB
/
rke2-init.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/bin/sh
export TYPE="${type}"
export CCM="${ccm}"
export CCM_EXTERNAL="${ccm_external}"
# info logs the given argument at info log level.
info() {
echo "[INFO] " "$@"
}
# warn logs the given argument at warn log level.
warn() {
echo "[WARN] " "$@" >&2
}
# fatal logs the given argument at fatal log level.
fatal() {
echo "[ERROR] " "$@" >&2
exit 1
}
timestamp() {
date "+%Y-%m-%d %H:%M:%S"
}
config() {
mkdir -p "/etc/rancher/rke2"
cat <<EOF >> "/etc/rancher/rke2/config.yaml"
# Additional user defined configuration
${config}
EOF
}
append_config() {
echo "$1" >> "/etc/rancher/rke2/config.yaml"
}
append_config_san() {
grep "^tls-san:$" /etc/rancher/rke2/config.yaml > /dev/null
if [ $? -eq 0 ]; then
sed -i "/^tls-san:$/a \ \ - ${server_url}" /etc/rancher/rke2/config.yaml
return
fi
echo "tls-san:" >> /etc/rancher/rke2/config.yaml
echo " - ${server_url}" >> /etc/rancher/rke2/config.yaml
}
# The most simple "leader election" you've ever seen in your life
elect_leader() {
# Fetch other running instances in ASG
TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
instance_id=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-id)
asg_name=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$instance_id" --query 'AutoScalingInstances[*].AutoScalingGroupName' --output text)
instances=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name "$asg_name" --query 'AutoScalingGroups[*].Instances[?HealthStatus==`Healthy`].InstanceId' --output text)
# Simply identify the leader as the first of the instance ids sorted alphanumerically
leader=$(echo $instances | tr ' ' '\n' | sort -n | head -n1)
info "Current instance: $instance_id | Leader instance: $leader"
if [ "$instance_id" = "$leader" ]; then
SERVER_TYPE="leader"
info "Electing as cluster leader"
else
info "Electing as joining server"
fi
}
identify() {
# Default to server
SERVER_TYPE="server"
TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
supervisor_status=$(curl --write-out '%%{http_code}' -sk --output /dev/null https://${server_url}:9345/ping)
if [ "$supervisor_status" -ne 200 ]; then
info "API server unavailable, performing simple leader election"
elect_leader
else
info "API server available, identifying as server joining existing cluster"
fi
}
cp_wait() {
while true; do
supervisor_status=$(curl --write-out '%%{http_code}' -sk --output /dev/null https://${server_url}:9345/ping)
if [ "$supervisor_status" -eq 200 ]; then
info "Cluster is ready"
# Let things settle down for a bit, not required
# TODO: Remove this after some testing
sleep 10
break
fi
info "Waiting for cluster to be ready..."
sleep 10
done
}
local_cp_api_wait() {
export PATH=$PATH:/var/lib/rancher/rke2/bin
export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
while true; do
info "$(timestamp) Waiting for kube-apiserver..."
if timeout 1 bash -c "true <>/dev/tcp/localhost/6443" 2>/dev/null; then
break
fi
sleep 5
done
wait $!
nodereadypath='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'
until kubectl get nodes --selector='node-role.kubernetes.io/master' -o jsonpath="$nodereadypath" | grep -E "Ready=True"; do
info "$(timestamp) Waiting for servers to be ready..."
sleep 5
done
info "$(timestamp) all kube-system deployments are ready!"
}
fetch_token() {
info "Fetching rke2 join token..."
TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
aws configure set default.region "$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/placement/region)"
# Validate aws caller identity, fatal if not valid
if ! aws sts get-caller-identity 2>/dev/null; then
fatal "No valid aws caller identity"
fi
# Either
# a) fetch token from s3 bucket
# b) fail
if token=$(aws s3 cp "s3://${token_bucket}/${token_object}" - 2>/dev/null);then
info "Found token from s3 object"
else
fatal "Could not find cluster token from s3"
fi
echo "token: $${token}" >> "/etc/rancher/rke2/config.yaml"
}
upload() {
# Wait for kubeconfig to exist, then upload to s3 bucket
retries=10
while [ ! -f /etc/rancher/rke2/rke2.yaml ]; do
sleep 10
if [ "$retries" = 0 ]; then
fatal "Failed to create kubeconfig"
fi
((retries--))
done
# Replace localhost with server url and upload to s3 bucket
sed "s/127.0.0.1/${server_url}/g" /etc/rancher/rke2/rke2.yaml | aws s3 cp - "s3://${token_bucket}/rke2.yaml" --content-type "text/yaml"
}
{
info "Beginning rke2-init userdata"
config
fetch_token
if [ $CCM = "true" ]; then
append_config 'cloud-provider-name: "external"'
append_config 'disable-cloud-controller: "true"'
fi
systemctl is-enabled --quiet nm-cloud-setup && \
systemctl disable nm-cloud-setup; systemctl disable nm-cloud-setup.timer
if [ $TYPE = "server" ]; then
# Initialize server
identify
append_config_san
if [ $SERVER_TYPE = "server" ]; then # additional server joining an existing cluster
append_config 'server: https://${server_url}:9345'
# Wait for cluster to exist, then init another server
cp_wait
fi
systemctl enable rke2-server
systemctl daemon-reload
export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
export PATH=$PATH:/var/lib/rancher/rke2/bin
if [ $SERVER_TYPE = "leader" ]; then
systemctl start rke2-server
# Upload kubeconfig to s3 bucket
upload
# For servers, wait for apiserver to be ready before continuing so that `post_userdata` can operate on the cluster
local_cp_api_wait
elif ${rke2_start}; then
systemctl start rke2-server
fi
else
append_config 'server: https://${server_url}:9345'
# Default to agent
systemctl enable rke2-agent
systemctl daemon-reload
if ${rke2_start}; then
systemctl start rke2-agent
fi
fi
info "Ending rke2-init userdata"
}