-
Notifications
You must be signed in to change notification settings - Fork 457
/
e2e.sh
executable file
·292 lines (243 loc) · 10.8 KB
/
e2e.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#!/usr/bin/env bash
# This file contains common environment variables and setup logic for all test
# scripts. It assumes that the following environment variables are set by the
# Makefile:
# - PLATFORM
# - TAG
# - SHA
# - REGISTRY
# - IMAGE
# - INSTALLER_IMAGE
# - ARTIFACTS
# - TALOSCTL
# - INTEGRATION_TEST
# - MODULE_SIG_VERIFY
# - KERNEL_MODULE_SIGNING_PUBLIC_KEY
# - SHORT_INTEGRATION_TEST
# - CUSTOM_CNI_URL
# - KUBECTL
# - KUBESTR
# - HELM
# - CLUSTERCTL
# - CILIUM_CLI
#
# Some environment variables set in this file (e. g. TALOS_VERSION and KUBERNETES_VERSION)
# are referenced by https://github.com/siderolabs/cluster-api-templates.
# See other e2e-*.sh scripts.
set -eoux pipefail
TMP="/tmp/e2e/${PLATFORM}"
mkdir -p "${TMP}"
# Talos
export TALOSCONFIG="${TMP}/talosconfig"
export TALOS_VERSION=v1.1
# Kubernetes
export KUBECONFIG="${TMP}/kubeconfig"
export KUBERNETES_VERSION=${KUBERNETES_VERSION:-1.29.3}
export NAME_PREFIX="talos-e2e-${SHA}-${PLATFORM}"
export TIMEOUT=1200
export NUM_NODES=${TEST_NUM_NODES:-6}
# default values, overridden by talosctl cluster create tests
PROVISIONER=
CLUSTER_NAME=
cleanup_capi() {
${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig delete cluster "${NAME_PREFIX}"
}
# Create a cluster via CAPI.
function create_cluster_capi {
trap cleanup_capi EXIT
${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig apply -f "${TMP}/cluster.yaml"
# Wait for first controlplane machine to have a name
timeout=$(($(date +%s) + TIMEOUT))
until [ -n "$(${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get machine -l cluster.x-k8s.io/control-plane,cluster.x-k8s.io/cluster-name="${NAME_PREFIX}" --all-namespaces -o json | jq -re '.items[0].metadata.name | select (.!=null)')" ]; do
[[ $(date +%s) -gt $timeout ]] && exit 1
sleep 10
${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get machine -l cluster.x-k8s.io/control-plane,cluster.x-k8s.io/cluster-name="${NAME_PREFIX}" --all-namespaces
done
FIRST_CP_NODE=$(${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get machine -l cluster.x-k8s.io/control-plane,cluster.x-k8s.io/cluster-name="${NAME_PREFIX}" --all-namespaces -o json | jq -r '.items[0].metadata.name')
# Wait for first controlplane machine to have a talosconfig ref
timeout=$(($(date +%s) + TIMEOUT))
until [ -n "$(${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get machine "${FIRST_CP_NODE}" -o json | jq -re '.spec.bootstrap.configRef.name | select (.!=null)')" ]; do
[[ $(date +%s) -gt $timeout ]] && exit 1
sleep 10
done
FIRST_CP_TALOSCONFIG=$(${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get machine "${FIRST_CP_NODE}" -o json | jq -re '.spec.bootstrap.configRef.name')
# Wait for talosconfig in cm then dump it out
timeout=$(($(date +%s) + TIMEOUT))
until [ -n "$(${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get talosconfig "${FIRST_CP_TALOSCONFIG}" -o jsonpath='{.status.talosConfig}')" ]; do
[[ $(date +%s) -gt $timeout ]] && exit 1
sleep 10
done
${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get talosconfig "${FIRST_CP_TALOSCONFIG}" -o jsonpath='{.status.talosConfig}' > "${TALOSCONFIG}"
# Wait until we have an IP for first controlplane node
timeout=$(($(date +%s) + TIMEOUT))
until [ -n "$(${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get machine -o go-template --template='{{range .status.addresses}}{{if eq .type "ExternalIP"}}{{.address}}{{end}}{{end}}' "${FIRST_CP_NODE}")" ]; do
[[ $(date +%s) -gt $timeout ]] && exit 1
sleep 10
done
MASTER_IP=$(${KUBECTL} --kubeconfig /tmp/e2e/docker/kubeconfig get machine -o go-template --template='{{range .status.addresses}}{{if eq .type "ExternalIP"}}{{.address}}{{end}}{{end}}' "${FIRST_CP_NODE}")
"${TALOSCTL}" config endpoint "${MASTER_IP}"
"${TALOSCTL}" config node "${MASTER_IP}"
# Wait for the kubeconfig from first cp node
timeout=$(($(date +%s) + TIMEOUT))
until get_kubeconfig; do
[[ $(date +%s) -gt $timeout ]] && exit 1
sleep 10
done
# Wait for nodes to check in
timeout=$(($(date +%s) + TIMEOUT))
until ${KUBECTL} get nodes -o go-template='{{ len .items }}' | grep ${NUM_NODES} >/dev/null; do
[[ $(date +%s) -gt $timeout ]] && exit 1
${KUBECTL} get nodes -o wide && :
sleep 10
done
# Wait for nodes to be ready
timeout=$(($(date +%s) + TIMEOUT))
until ${KUBECTL} wait --timeout=1s --for=condition=ready=true --all nodes > /dev/null; do
[[ $(date +%s) -gt $timeout ]] && exit 1
${KUBECTL} get nodes -o wide && :
sleep 10
done
# Verify that we have an HA controlplane
timeout=$(($(date +%s) + TIMEOUT))
until ${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' -o go-template='{{ len .items }}' | grep 3 > /dev/null; do
[[ $(date +%s) -gt $timeout ]] && exit 1
${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' && :
sleep 10
done
}
TEST_SHORT=()
TEST_RUN=("-test.run" ".")
function run_talos_integration_test {
case "${SHORT_INTEGRATION_TEST:-no}" in
no)
;;
*)
TEST_SHORT=("-test.short")
;;
esac
case "${INTEGRATION_TEST_RUN:-no}" in
no)
;;
*)
TEST_RUN=("-test.run" "${INTEGRATION_TEST_RUN}")
;;
esac
"${INTEGRATION_TEST}" -test.v -talos.failfast -talos.talosctlpath "${TALOSCTL}" -talos.kubectlpath "${KUBECTL}" -talos.provisioner "${PROVISIONER}" -talos.name "${CLUSTER_NAME}" -talos.image "${REGISTRY}/siderolabs/talos" "${EXTRA_TEST_ARGS[@]}" "${TEST_RUN[@]}" "${TEST_SHORT[@]}"
}
function run_talos_integration_test_docker {
case "${SHORT_INTEGRATION_TEST:-no}" in
no)
;;
*)
TEST_SHORT=("-test.short")
;;
esac
case "${INTEGRATION_TEST_RUN:-no}" in
no)
;;
*)
TEST_RUN=("-test.run" "${INTEGRATION_TEST_RUN}")
;;
esac
"${INTEGRATION_TEST}" -test.v -talos.talosctlpath "${TALOSCTL}" -talos.kubectlpath "${KUBECTL}" -talos.k8sendpoint 127.0.0.1:6443 -talos.provisioner "${PROVISIONER}" -talos.name "${CLUSTER_NAME}" -talos.image "${REGISTRY}/siderolabs/talos" "${EXTRA_TEST_ARGS[@]}" "${TEST_RUN[@]}" "${TEST_SHORT[@]}"
}
function run_kubernetes_conformance_test {
"${TALOSCTL}" conformance kubernetes --mode="${1}"
}
function run_kubernetes_integration_test {
"${TALOSCTL}" health --run-e2e
}
function run_control_plane_cis_benchmark {
${KUBECTL} apply -f "${PWD}/hack/test/cis/kube-bench-master.yaml"
${KUBECTL} wait --timeout=300s --for=condition=complete job/kube-bench-master > /dev/null
${KUBECTL} logs job/kube-bench-master
}
function run_worker_cis_benchmark {
${KUBECTL} apply -f "${PWD}/hack/test/cis/kube-bench-node.yaml"
${KUBECTL} wait --timeout=300s --for=condition=complete job/kube-bench-node > /dev/null
${KUBECTL} logs job/kube-bench-node
}
function get_kubeconfig {
rm -f "${TMP}/kubeconfig"
"${TALOSCTL}" kubeconfig "${TMP}"
}
function dump_cluster_state {
nodes=$(${KUBECTL} get nodes -o jsonpath="{.items[*].status.addresses[?(@.type == 'InternalIP')].address}" | tr '[:space:]' ',')
"${TALOSCTL}" -n "${nodes}" services
${KUBECTL} get nodes -o wide
${KUBECTL} get pods --all-namespaces -o wide
}
function build_registry_mirrors {
if [[ "${CI:-false}" == "true" ]]; then
REGISTRY_MIRROR_FLAGS=()
for registry in docker.io registry.k8s.io quay.io gcr.io ghcr.io registry.dev.talos-systems.io; do
local service="registry-${registry//./-}.ci.svc"
addr=$(python3 -c "import socket; print(socket.gethostbyname('${service}'))")
REGISTRY_MIRROR_FLAGS+=("--registry-mirror=${registry}=http://${addr}:5000")
done
else
# use the value from the environment, if present
REGISTRY_MIRROR_FLAGS=("${REGISTRY_MIRROR_FLAGS:-}")
fi
}
function run_csi_tests {
${HELM} repo add rook-release https://charts.rook.io/release
${HELM} repo update
${HELM} upgrade --install --version=v1.8.2 --set=pspEnable=false --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph
${HELM} upgrade --install --version=v1.8.2 --set=pspEnable=false --create-namespace --namespace rook-ceph rook-ceph-cluster rook-release/rook-ceph-cluster
${KUBECTL} label ns rook-ceph pod-security.kubernetes.io/enforce=privileged
# wait for the controller to populate the status field
sleep 30
${KUBECTL} --namespace rook-ceph wait --timeout=900s --for=jsonpath='{.status.phase}=Ready' cephclusters.ceph.rook.io/rook-ceph
${KUBECTL} --namespace rook-ceph wait --timeout=900s --for=jsonpath='{.status.state}=Created' cephclusters.ceph.rook.io/rook-ceph
# .status.ceph is populated later only
sleep 60
${KUBECTL} --namespace rook-ceph wait --timeout=900s --for=jsonpath='{.status.ceph.health}=HEALTH_OK' cephclusters.ceph.rook.io/rook-ceph
# hack until https://github.com/kastenhq/kubestr/issues/101 is addressed
KUBERNETES_SERVICE_HOST="" KUBECONFIG="${TMP}/kubeconfig" "${KUBESTR}" fio --storageclass ceph-block --size 10G
}
function install_and_run_cilium_cni_tests {
get_kubeconfig
case "${WITH_KUBESPAN:-false}" in
true)
CILIUM_NODE_ENCRYPTION=no
CILIUM_TEST_EXTRA_ARGS=("--test="!node-to-node-encryption"")
;;
*)
CILIUM_NODE_ENCRYPTION=yes
CILIUM_TEST_EXTRA_ARGS=()
;;
esac
case "${CILIUM_INSTALL_TYPE:-none}" in
strict)
${CILIUM_CLI} install \
--set=ipam.mode=kubernetes \
--set=kubeProxyReplacement=true \
--set=encryption.nodeEncryption=${CILIUM_NODE_ENCRYPTION} \
--set=securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
--set=securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
--set=cgroup.autoMount.enabled=false \
--set=cgroup.hostRoot=/sys/fs/cgroup \
--set=k8sServiceHost=localhost \
--set=k8sServicePort=13336
;;
*)
# explicitly setting kubeProxyReplacement=disabled since by the time cilium cli runs talos
# has not yet applied the kube-proxy manifests
${CILIUM_CLI} install \
--set=ipam.mode=kubernetes \
--set=kubeProxyReplacement=false \
--set=encryption.nodeEncryption=${CILIUM_NODE_ENCRYPTION} \
--set=securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
--set=securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
--set=cgroup.autoMount.enabled=false \
--set=cgroup.hostRoot=/sys/fs/cgroup
;;
esac
${CILIUM_CLI} status --wait --wait-duration=10m
${KUBECTL} delete ns --ignore-not-found cilium-test
${KUBECTL} create ns cilium-test
${KUBECTL} label ns cilium-test pod-security.kubernetes.io/enforce=privileged
# --external-target added, as default 'one.one.one.one' is buggy, and CloudFlare status is of course "all healthy"
${CILIUM_CLI} connectivity test --test-namespace cilium-test --external-target google.com "${CILIUM_TEST_EXTRA_ARGS[@]}"; ${KUBECTL} delete ns cilium-test
}