-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
commands.sh
1229 lines (1089 loc) · 37.2 KB
/
commands.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
export AWS_DEFAULT_OUTPUT=text
# Ignore SSH key validation when connecting to these remote hosts.
# (Otherwise, deployment scripts break when a VM IP address reuse.)
SSHOPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
HELP=""
_cmd() {
HELP="$(printf "%s\n%-20s %s\n" "$HELP" "$1" "$2")"
}
_cmd help "Show available commands"
_cmd_help() {
printf "$(basename $0) - the container training swiss army knife\n"
printf "Commands:"
printf "%s" "$HELP" | sort
}
_cmd build "Build the Docker image to run this program in a container"
_cmd_build() {
docker-compose build
}
_cmd wrap "Run this program in a container"
_cmd_wrap() {
docker-compose run --rm workshopctl "$@"
}
_cmd cards "Generate ready-to-print cards for a group of VMs"
_cmd_cards() {
TAG=$1
need_tag
# This will process ips.txt to generate two files: ips.pdf and ips.html
(
cd tags/$TAG
../../lib/ips-txt-to-html.py settings.yaml
)
ln -sf ../tags/$TAG/ips.html www/$TAG.html
ln -sf ../tags/$TAG/ips.pdf www/$TAG.pdf
info "Cards created. You can view them with:"
info "xdg-open tags/$TAG/ips.html tags/$TAG/ips.pdf (on Linux)"
info "open tags/$TAG/ips.html (on macOS)"
info "Or you can start a web server with:"
info "$0 www"
}
_cmd clean "Remove information about stopped clusters"
_cmd_clean() {
for TAG in tags/*; do
if grep -q ^stopped$ "$TAG/status"; then
info "Removing $TAG..."
rm -rf "$TAG"
fi
done
}
_cmd createuser "Create the user that students will use"
_cmd_createuser() {
TAG=$1
need_tag
need_login_password
pssh "
set -e
# Create the user if it doesn't exist yet.
id $USER_LOGIN || sudo useradd -d /home/$USER_LOGIN -g users -m -s /bin/bash $USER_LOGIN
# Add them to the docker group, if there is one.
grep ^docker: /etc/group && sudo usermod -aG docker $USER_LOGIN
# Set their password.
echo $USER_LOGIN:$USER_PASSWORD | sudo chpasswd
# Add them to sudoers and allow passwordless authentication.
echo '$USER_LOGIN ALL=(ALL) NOPASSWD:ALL' | sudo tee /etc/sudoers.d/$USER_LOGIN
"
# The MaxAuthTries is here to help with folks who have many SSH keys.
pssh "
set -e
sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config
sudo sed -i 's/#MaxAuthTries 6/MaxAuthTries 42/' /etc/ssh/sshd_config
sudo service ssh restart
"
pssh "
set -e
cd /home/$USER_LOGIN
sudo -u $USER_LOGIN mkdir -p .ssh
if i_am_first_node; then
# Generate a key pair with an empty passphrase.
if ! sudo -u $USER_LOGIN [ -f .ssh/id_rsa ]; then
sudo -u $USER_LOGIN ssh-keygen -t rsa -f .ssh/id_rsa -P ''
sudo -u $USER_LOGIN cp .ssh/id_rsa.pub .ssh/authorized_keys
fi
fi
"
pssh "
set -e
cd /home/$USER_LOGIN
if ! i_am_first_node; then
# Copy keys from the first node.
ssh $SSHOPTS \$(cat /etc/name_of_first_node) sudo -u $USER_LOGIN tar -C /home/$USER_LOGIN -cvf- .ssh |
sudo -u $USER_LOGIN tar -xf-
fi
"
# FIXME do this only once.
pssh -I "sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc" <<"SQRL"
# Fancy prompt courtesy of @soulshake.
export PS1='\e[1m\e[31m[$HOSTIP] \e[32m($(docker-prompt)) \e[34m\u@\h\e[35m \w\e[0m\n$ '
# Bigger history, in a different file, and saved before executing each command.
export HISTSIZE=9999
export HISTFILESIZE=9999
shopt -s histappend
trap 'history -a' DEBUG
export HISTFILE=~/.history
SQRL
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.vimrc" <<SQRL
syntax on
set autoindent
set expandtab
set number
set shiftwidth=2
set softtabstop=2
set nowrap
SQRL
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.tmux.conf" <<SQRL
set -g status-style bg=yellow,bold
bind h select-pane -L
bind j select-pane -D
bind k select-pane -U
bind l select-pane -R
# Allow using mouse to switch panes
set -g mouse on
# Make scrolling with wheels work
bind -n WheelUpPane if-shell -F -t = "#{mouse_any_flag}" "send-keys -M" "if -Ft= '#{pane_in_mode}' 'send-keys -M' 'select-pane -t=; copy-mode -e; send-keys -M'"
bind -n WheelDownPane select-pane -t= \; send-keys -M
SQRL
# Install docker-prompt script
pssh -I sudo tee /usr/local/bin/docker-prompt <lib/docker-prompt
pssh sudo chmod +x /usr/local/bin/docker-prompt
echo user_ok > tags/$TAG/status
}
_cmd clusterize "Group VMs in clusters"
_cmd_clusterize() {
TAG=$1
need_tag
# Disable unattended upgrades so that they don't mess up with the subsequent steps
pssh sudo rm -f /etc/apt/apt.conf.d/50unattended-upgrades
# Special case for scaleway since it doesn't come with sudo
if [ "$INFRACLASS" = "scaleway" ]; then
pssh -l root "
grep DEBIAN_FRONTEND /etc/environment || echo DEBIAN_FRONTEND=noninteractive >> /etc/environment
grep cloud-init /etc/sudoers && rm /etc/sudoers
apt-get update && apt-get install sudo -y"
fi
# FIXME
# Special case for hetzner since it doesn't have an ubuntu user
#if [ "$INFRACLASS" = "hetzner" ]; then
# pssh -l root "
#[ -d /home/ubuntu ] ||
# useradd ubuntu -m -s /bin/bash
#echo 'ubuntu ALL=(ALL:ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu
#[ -d /home/ubuntu/.ssh ] ||
# install --owner=ubuntu --mode=700 --directory /home/ubuntu/.ssh
#[ -f /home/ubuntu/.ssh/authorized_keys ] ||
# install --owner=ubuntu --mode=600 /root/.ssh/authorized_keys --target-directory /home/ubuntu/.ssh"
#fi
# Special case for oracle since their iptables blocks everything but SSH
pssh "
if [ -f /etc/iptables/rules.v4 ]; then
sudo sed -i 's/-A INPUT -j REJECT --reject-with icmp-host-prohibited//' /etc/iptables/rules.v4
sudo netfilter-persistent flush
sudo netfilter-persistent start
fi"
# oracle-cloud-agent upgrades pacakges in the background.
# This breaks our deployment scripts, because when we invoke apt-get, it complains
# that the lock already exists (symptom: random "Exited with error code 100").
# Workaround: if we detect oracle-cloud-agent, remove it.
# But this agent seems to also take care of installing/upgrading
# the unified-monitoring-agent package, so when we stop the snap,
# it can leave dpkg in a broken state. We "fix" it with the 2nd command.
pssh "
if [ -d /snap/oracle-cloud-agent ]; then
sudo snap remove oracle-cloud-agent
sudo dpkg --remove --force-remove-reinstreq unified-monitoring-agent
fi"
# Copy settings and install Python YAML parser
pssh -I tee /tmp/settings.yaml <tags/$TAG/settings.yaml
pssh "
sudo apt-get update &&
sudo apt-get install -y python-yaml"
# If there is no "python" binary, symlink to python3
pssh "
if ! which python; then
sudo ln -s $(which python3) /usr/local/bin/python
fi"
# Copy postprep.py to the remote machines, and execute it, feeding it the list of IP addresses
pssh -I tee /tmp/clusterize.py <lib/clusterize.py
pssh --timeout 900 --send-input "python /tmp/clusterize.py >>/tmp/pp.out 2>>/tmp/pp.err" <tags/$TAG/ips.txt
# On the first node, create and deploy TLS certs using Docker Machine
# (Currently disabled.)
true || pssh "
if i_am_first_node; then
grep '[0-9]\$' /etc/hosts |
xargs -n2 sudo -H -u $USER_LOGIN \
docker-machine create -d generic --generic-ssh-user $USER_LOGIN --generic-ip-address
fi"
echo cluster_ok > tags/$TAG/status
}
_cmd disabledocker "Stop Docker Engine and don't restart it automatically"
_cmd_disabledocker() {
TAG=$1
need_tag
pssh "
sudo systemctl disable docker.socket --now
sudo systemctl disable docker.service --now
sudo systemctl disable containerd.service --now
"
}
_cmd docker "Install and start Docker"
_cmd_docker() {
TAG=$1
need_tag
pssh "
set -e
# On EC2, the ephemeral disk might be mounted on /mnt.
# If /mnt is a mountpoint, place Docker workspace on it.
if mountpoint -q /mnt; then
sudo mkdir -p /mnt/docker
sudo ln -sfn /mnt/docker /var/lib/docker
fi
# This will install the latest Docker.
sudo apt-get -qy install apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository 'deb https://download.docker.com/linux/ubuntu bionic stable'
sudo apt-get -q update
sudo apt-get -qy install docker-ce
# Add registry mirror configuration.
if ! [ -f /etc/docker/daemon.json ]; then
echo '{\"registry-mirrors\": [\"https://mirror.gcr.io\"]}' | sudo tee /etc/docker/daemon.json
sudo systemctl restart docker
fi
"
##VERSION## https://github.com/docker/compose/releases
COMPOSE_VERSION=v2.11.1
COMPOSE_PLATFORM='linux-$(uname -m)'
# Just in case you need Compose 1.X, you can use the following lines.
# (But it will probably only work for x86_64 machines.)
#COMPOSE_VERSION=1.29.2
#COMPOSE_PLATFORM='Linux-$(uname -m)'
pssh "
set -e
### Install docker-compose.
sudo curl -fsSL -o /usr/local/bin/docker-compose \
https://github.com/docker/compose/releases/download/$COMPOSE_VERSION/docker-compose-$COMPOSE_PLATFORM
sudo chmod +x /usr/local/bin/docker-compose
docker-compose version
### Install docker-machine.
##VERSION## https://github.com/docker/machine/releases
MACHINE_VERSION=v0.16.2
sudo curl -fsSL -o /usr/local/bin/docker-machine \
https://github.com/docker/machine/releases/download/\$MACHINE_VERSION/docker-machine-\$(uname -s)-\$(uname -m)
sudo chmod +x /usr/local/bin/docker-machine
docker-machine version
"
}
_cmd kubebins "Install Kubernetes and CNI binaries but don't start anything"
_cmd_kubebins() {
TAG=$1
need_tag
##VERSION##
ETCD_VERSION=v3.4.13
K8SBIN_VERSION=v1.19.11 # Can't go to 1.20 because it requires a serviceaccount signing key.
CNI_VERSION=v0.8.7
ARCH=${ARCHITECTURE-amd64}
pssh --timeout 300 "
set -e
cd /usr/local/bin
if ! [ -x etcd ]; then
curl -L https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-$ARCH.tar.gz \
| sudo tar --strip-components=1 --wildcards -zx '*/etcd' '*/etcdctl'
fi
if ! [ -x hyperkube ]; then
##VERSION##
curl -L https://dl.k8s.io/$K8SBIN_VERSION/kubernetes-server-linux-$ARCH.tar.gz \
| sudo tar --strip-components=3 -zx \
kubernetes/server/bin/kube{ctl,let,-proxy,-apiserver,-scheduler,-controller-manager}
fi
sudo mkdir -p /opt/cni/bin
cd /opt/cni/bin
if ! [ -x bridge ]; then
curl -L https://github.com/containernetworking/plugins/releases/download/$CNI_VERSION/cni-plugins-linux-$ARCH-$CNI_VERSION.tgz \
| sudo tar -zx
fi
"
}
_cmd kube "Setup kubernetes clusters with kubeadm (must be run AFTER deploy)"
_cmd_kube() {
TAG=$1
need_tag
need_login_password
# Optional version, e.g. 1.13.5
SETTINGS=tags/$TAG/settings.yaml
KUBEVERSION=$(awk '/^kubernetes_version:/ {print $2}' $SETTINGS)
if [ "$KUBEVERSION" ]; then
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
Package: kubectl kubeadm kubelet
Pin: version $KUBEVERSION*
Pin-Priority: 1000
EOF"
fi
# Install packages
pssh --timeout 200 "
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
sudo apt-key add - &&
echo deb http://apt.kubernetes.io/ kubernetes-xenial main |
sudo tee /etc/apt/sources.list.d/kubernetes.list"
pssh --timeout 200 "
sudo apt-get update -q &&
sudo apt-get install -qy kubelet kubeadm kubectl &&
sudo apt-mark hold kubelet kubeadm kubectl &&
kubeadm completion bash | sudo tee /etc/bash_completion.d/kubeadm &&
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
# Disable swap
# (note that this won't survive across node reboots!)
if [ "$INFRACLASS" = "linode" ]; then
pssh "
sudo swapoff -a"
fi
# Re-enable CRI interface in containerd
pssh "
echo '# Use default parameters for containerd.' | sudo tee /etc/containerd/config.toml
sudo systemctl restart containerd"
# Initialize kube control plane
pssh --timeout 200 "
if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
kubeadm token generate > /tmp/token &&
cat >/tmp/kubeadm-config.yaml <<EOF
kind: InitConfiguration
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- token: \$(cat /tmp/token)
nodeRegistration:
# Comment out the next line to switch back to Docker.
criSocket: /run/containerd/containerd.sock
ignorePreflightErrors:
- NumCPU
---
kind: JoinConfiguration
apiVersion: kubeadm.k8s.io/v1beta2
discovery:
bootstrapToken:
apiServerEndpoint: \$(cat /etc/name_of_first_node):6443
token: \$(cat /tmp/token)
unsafeSkipCAVerification: true
nodeRegistration:
# Comment out the next line to switch back to Docker.
criSocket: /run/containerd/containerd.sock
ignorePreflightErrors:
- NumCPU
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
# The following line is necessary when using Docker.
# It doesn't seem necessary when using containerd.
#cgroupDriver: cgroupfs
---
kind: ClusterConfiguration
apiVersion: kubeadm.k8s.io/v1beta2
apiServer:
certSANs:
- \$(cat /tmp/ipv4)
EOF
sudo kubeadm init --config=/tmp/kubeadm-config.yaml
fi"
# Put kubeconfig in ubuntu's and $USER_LOGIN's accounts
pssh "
if i_am_first_node; then
sudo mkdir -p \$HOME/.kube /home/$USER_LOGIN/.kube &&
sudo cp /etc/kubernetes/admin.conf \$HOME/.kube/config &&
sudo cp /etc/kubernetes/admin.conf /home/$USER_LOGIN/.kube/config &&
sudo chown -R \$(id -u) \$HOME/.kube &&
sudo chown -R $USER_LOGIN /home/$USER_LOGIN/.kube
fi"
# Install weave as the pod network
pssh "
if i_am_first_node; then
#kubever=\$(kubectl version | base64 | tr -d '\n') &&
#kubectl apply -f https://cloud.weave.works/k8s/net?k8s-version=\$kubever
kubectl apply -f https://github.com/weaveworks/weave/releases/download/v2.8.1/weave-daemonset-k8s-1.11.yaml
fi"
# Join the other nodes to the cluster
pssh --timeout 200 "
if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
FIRSTNODE=\$(cat /etc/name_of_first_node) &&
ssh $SSHOPTS \$FIRSTNODE cat /tmp/kubeadm-config.yaml > /tmp/kubeadm-config.yaml &&
sudo kubeadm join --config /tmp/kubeadm-config.yaml
fi"
# Install metrics server
pssh "
if i_am_first_node; then
kubectl apply -f https://raw.githubusercontent.com/jpetazzo/container.training/master/k8s/metrics-server.yaml
#helm upgrade --install metrics-server \
# --repo https://kubernetes-sigs.github.io/metrics-server/ metrics-server \
# --namespace kube-system --set args={--kubelet-insecure-tls}
fi"
}
_cmd kubetools "Install a bunch of CLI tools for Kubernetes"
_cmd_kubetools() {
TAG=$1
need_tag
need_login_password
ARCH=${ARCHITECTURE-amd64}
# Folks, please, be consistent!
# Either pick "uname -m" (on Linux, that's x86_64, aarch64, etc.)
# Or GOARCH (amd64, arm64, etc.)
# But don't mix both! Thank you ♥
case $ARCH in
amd64)
HERP_DERP_ARCH=x86_64
TILT_ARCH=x86_64
;;
*)
HERP_DERP_ARCH=$ARCH
TILT_ARCH=${ARCH}_ALPHA
;;
esac
# Install kubectx and kubens
pssh "
set -e
if ! [ -x /usr/local/bin/kctx ]; then
cd /tmp
git clone https://github.com/ahmetb/kubectx
sudo cp kubectx/kubectx /usr/local/bin/kctx
sudo cp kubectx/kubens /usr/local/bin/kns
sudo cp kubectx/completion/*.bash /etc/bash_completion.d
fi"
# Install kube-ps1
pssh "
set -e
if ! [ -d /opt/kube-ps1 ]; then
cd /tmp
git clone https://github.com/jonmosco/kube-ps1
sudo mv kube-ps1 /opt/kube-ps1
sudo -u $USER_LOGIN sed -i s/docker-prompt/kube_ps1/ /home/$USER_LOGIN/.bashrc &&
sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc <<EOF
. /opt/kube-ps1/kube-ps1.sh
KUBE_PS1_PREFIX=""
KUBE_PS1_SUFFIX=""
KUBE_PS1_SYMBOL_ENABLE="false"
KUBE_PS1_CTX_COLOR="green"
KUBE_PS1_NS_COLOR="green"
EOF
fi"
# Install stern
##VERSION## https://github.com/stern/stern/releases
STERN_VERSION=1.22.0
FILENAME=stern_${STERN_VERSION}_linux_${ARCH}
URL=https://github.com/stern/stern/releases/download/v$STERN_VERSION/$FILENAME.tar.gz
pssh "
if [ ! -x /usr/local/bin/stern ]; then
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx stern
sudo chmod +x /usr/local/bin/stern
stern --completion bash | sudo tee /etc/bash_completion.d/stern
stern --version
fi"
# Install helm
pssh "
if [ ! -x /usr/local/bin/helm ]; then
curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get-helm-3 | sudo bash &&
helm completion bash | sudo tee /etc/bash_completion.d/helm
helm version
fi"
# Install kustomize
##VERSION## https://github.com/kubernetes-sigs/kustomize/releases
KUSTOMIZE_VERSION=v4.5.7
URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${ARCH}.tar.gz
pssh "
if [ ! -x /usr/local/bin/kustomize ]; then
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx kustomize
kustomize completion bash | sudo tee /etc/bash_completion.d/kustomize
kustomize version
fi"
# Install ship
# Note: 0.51.3 is the last version that doesn't display GIN-debug messages
# (don't want to get folks confused by that!)
# Only install ship on Intel platforms (no ARM 64 builds).
[ "$ARCH" = "amd64" ] &&
pssh "
if [ ! -x /usr/local/bin/ship ]; then
##VERSION##
curl -fsSL https://github.com/replicatedhq/ship/releases/download/v0.51.3/ship_0.51.3_linux_$ARCH.tar.gz |
sudo tar -C /usr/local/bin -zx ship
fi"
# Install the AWS IAM authenticator
pssh "
if [ ! -x /usr/local/bin/aws-iam-authenticator ]; then
##VERSION##
sudo curl -fsSLo /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.12.7/2019-03-27/bin/linux/$ARCH/aws-iam-authenticator
sudo chmod +x /usr/local/bin/aws-iam-authenticator
aws-iam-authenticator version
fi"
# Install the krew package manager
pssh "
if [ ! -d /home/$USER_LOGIN/.krew ]; then
cd /tmp &&
KREW=krew-linux_$ARCH
curl -fsSL https://github.com/kubernetes-sigs/krew/releases/latest/download/\$KREW.tar.gz |
tar -zxf- &&
sudo -u $USER_LOGIN -H ./\$KREW install krew &&
echo export PATH=/home/$USER_LOGIN/.krew/bin:\\\$PATH | sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc
fi"
# Install k9s
pssh "
if [ ! -x /usr/local/bin/k9s ]; then
FILENAME=k9s_Linux_$HERP_DERP_ARCH.tar.gz &&
curl -fsSL https://github.com/derailed/k9s/releases/latest/download/\$FILENAME |
sudo tar -zxvf- -C /usr/local/bin k9s
k9s version
fi"
# Install popeye
pssh "
if [ ! -x /usr/local/bin/popeye ]; then
FILENAME=popeye_Linux_$HERP_DERP_ARCH.tar.gz &&
curl -fsSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
sudo tar -zxvf- -C /usr/local/bin popeye
popeye version
fi"
# Install Tilt
# Official instructions:
# curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash
# But the install script is not arch-aware (see https://github.com/tilt-dev/tilt/pull/5050).
pssh "
if [ ! -x /usr/local/bin/tilt ]; then
TILT_VERSION=0.22.15
FILENAME=tilt.\$TILT_VERSION.linux.$TILT_ARCH.tar.gz
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v\$TILT_VERSION/\$FILENAME |
sudo tar -zxvf- -C /usr/local/bin tilt
tilt completion bash | sudo tee /etc/bash_completion.d/tilt
tilt version
fi"
# Install Skaffold
pssh "
if [ ! -x /usr/local/bin/skaffold ]; then
curl -fsSLo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-linux-$ARCH &&
sudo install skaffold /usr/local/bin/
skaffold completion bash | sudo tee /etc/bash_completion.d/skaffold
skaffold version
fi"
# Install Kompose
pssh "
if [ ! -x /usr/local/bin/kompose ]; then
curl -fsSLo kompose https://github.com/kubernetes/kompose/releases/latest/download/kompose-linux-$ARCH &&
sudo install kompose /usr/local/bin
kompose completion bash | sudo tee /etc/bash_completion.d/kompose
kompose version
fi"
# Install KinD
pssh "
if [ ! -x /usr/local/bin/kind ]; then
curl -fsSLo kind https://github.com/kubernetes-sigs/kind/releases/latest/download/kind-linux-$ARCH &&
sudo install kind /usr/local/bin
kind completion bash | sudo tee /etc/bash_completion.d/kind
kind version
fi"
# Install YTT
pssh "
if [ ! -x /usr/local/bin/ytt ]; then
curl -fsSLo ytt https://github.com/vmware-tanzu/carvel-ytt/releases/latest/download/ytt-linux-$ARCH &&
sudo install ytt /usr/local/bin
ytt completion bash | sudo tee /etc/bash_completion.d/ytt
ytt version
fi"
##VERSION## https://github.com/bitnami-labs/sealed-secrets/releases
KUBESEAL_VERSION=0.17.4
#case $ARCH in
#amd64) FILENAME=kubeseal-linux-amd64;;
#arm64) FILENAME=kubeseal-arm64;;
#*) FILENAME=nope;;
#esac
pssh "
if [ ! -x /usr/local/bin/kubeseal ]; then
curl -fsSL https://github.com/bitnami-labs/sealed-secrets/releases/download/v$KUBESEAL_VERSION/kubeseal-$KUBESEAL_VERSION-linux-$ARCH.tar.gz |
sudo tar -zxvf- -C /usr/local/bin kubeseal
kubeseal --version
fi"
}
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
_cmd_kubereset() {
TAG=$1
need_tag
pssh "sudo kubeadm reset --force"
}
_cmd kubetest "Check that all nodes are reporting as Ready"
_cmd_kubetest() {
TAG=$1
need_tag
# There are way too many backslashes in the command below.
# Feel free to make that better ♥
pssh "
set -e
if i_am_first_node; then
which kubectl
for NODE in \$(grep [0-9]\$ /etc/hosts | grep -v ^127 | awk {print\ \\\$2}); do
echo \$NODE ; kubectl get nodes | grep -w \$NODE | grep -w Ready
done
fi"
echo kube_ok > tags/$TAG/status
}
_cmd ips "Show the IP addresses for a given tag"
_cmd_ips() {
TAG=$1
need_tag $TAG
SETTINGS=tags/$TAG/settings.yaml
CLUSTERSIZE=$(awk '/^clustersize:/ {print $2}' $SETTINGS)
while true; do
for I in $(seq $CLUSTERSIZE); do
read ip || return 0
printf "%s\t" "$ip"
done
printf "\n"
done < tags/$TAG/ips.txt
}
_cmd inventory "List all VMs on a given infrastructure (or all infras if no arg given)"
_cmd_inventory() {
case "$1" in
"")
for INFRA in infra/*; do
$0 inventory $INFRA
done
;;
*/example.*)
;;
*)
need_infra $1
sep "Listing instances for $1"
infra_list
;;
esac
}
_cmd maketag "Generate a quasi-unique tag for a group of instances"
_cmd_maketag() {
if [ -z $USER ]; then
export USER=anonymous
fi
MS=$(($(date +%N | tr -d 0)/1000000))
date +%Y-%m-%d-%H-%M-$MS-$USER
}
_cmd netfix "Disable GRO and run a pinger job on the VMs"
_cmd_netfix () {
TAG=$1
need_tag
pssh "
sudo ethtool -K ens3 gro off
sudo tee /root/pinger.service <<EOF
[Unit]
Description=pinger
[Install]
WantedBy=multi-user.target
[Service]
WorkingDirectory=/
ExecStart=/bin/ping -w60 1.1
User=nobody
Group=nogroup
Restart=always
EOF
sudo systemctl enable /root/pinger.service
sudo systemctl start pinger"
}
_cmd ping "Ping VMs in a given tag, to check that they have network access"
_cmd_ping() {
TAG=$1
need_tag
fping < tags/$TAG/ips.txt
}
_cmd tailhist "Install history viewer on port 1088"
_cmd_tailhist () {
TAG=$1
need_tag
need_login_password
ARCH=${ARCHITECTURE-amd64}
[ "$ARCH" = "aarch64" ] && ARCH=arm64
pssh "
set -e
wget https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
unzip websocketd-0.3.0-linux_$ARCH.zip websocketd
sudo mv websocketd /usr/local/bin/websocketd
sudo mkdir -p /tmp/tailhist
sudo tee /root/tailhist.service <<EOF
[Unit]
Description=tailhist
[Install]
WantedBy=multi-user.target
[Service]
WorkingDirectory=/tmp/tailhist
ExecStart=/usr/local/bin/websocketd --port=1088 --staticdir=. sh -c \"tail -n +1 -f /home/$USER_LOGIN/.history || echo 'Could not read history file. Perhaps you need to \\\"chmod +r .history\\\"?'\"
User=nobody
Group=nogroup
Restart=always
EOF
sudo systemctl enable /root/tailhist.service --now
"
pssh -I sudo tee /tmp/tailhist/index.html <lib/tailhist.html
}
_cmd tools "Install a bunch of useful tools (editors, git, jq...)"
_cmd_tools() {
TAG=$1
need_tag
pssh "
sudo apt-get -q update
sudo apt-get -qy install apache2-utils emacs-nox git httping htop jid joe jq mosh python-setuptools tree unzip
# This is for VMs with broken PRNG (symptom: running docker-compose randomly hangs)
sudo apt-get -qy install haveged
# I don't remember why we need to remove this
sudo apt-get remove -y --purge dnsmasq-base
"
}
_cmd opensg "Open the default security group to ALL ingress traffic"
_cmd_opensg() {
need_infra $1
infra_opensg
}
_cmd disableaddrchecks "Disable source/destination IP address checks"
_cmd_disableaddrchecks() {
TAG=$1
need_tag
infra_disableaddrchecks
}
_cmd pssh "Run an arbitrary command on all nodes"
_cmd_pssh() {
TAG=$1
need_tag
shift
pssh "$@"
}
_cmd pull_images "Pre-pull a bunch of Docker images"
_cmd_pull_images() {
TAG=$1
need_tag
pull_tag
}
_cmd remap_nodeports "Remap NodePort range to 10000-10999"
_cmd_remap_nodeports() {
TAG=$1
need_tag
FIND_LINE=" - --service-cluster-ip-range=10.96.0.0\/12"
ADD_LINE=" - --service-node-port-range=10000-10999"
MANIFEST_FILE=/etc/kubernetes/manifests/kube-apiserver.yaml
pssh "
if i_am_first_node && ! grep -q '$ADD_LINE' $MANIFEST_FILE; then
sudo sed -i 's/\($FIND_LINE\)\$/\1\n$ADD_LINE/' $MANIFEST_FILE
fi"
info "If you have manifests hard-coding nodePort values,"
info "you might want to patch them with a command like:"
info "
if i_am_first_node; then
kubectl -n kube-system patch svc prometheus-server \\
-p 'spec: { ports: [ {port: 80, nodePort: 10101} ]}'
fi
"
}
_cmd quotas "Check our infrastructure quotas (max instances)"
_cmd_quotas() {
need_infra $1
infra_quotas
}
_cmd ssh "Open an SSH session to the first node of a tag"
_cmd_ssh() {
TAG=$1
need_tag
need_login_password
IP=$(head -1 tags/$TAG/ips.txt)
info "Logging into $IP (default password: $USER_PASSWORD)"
ssh $SSHOPTS $USER_LOGIN@$IP
}
_cmd start "Start a group of VMs"
_cmd_start() {
while [ ! -z "$*" ]; do
case "$1" in
--infra) INFRA=$2; shift 2;;
--settings) SETTINGS=$2; shift 2;;
--count) die "Flag --count is deprecated; please use --students instead." ;;
--tag) TAG=$2; shift 2;;
--students) STUDENTS=$2; shift 2;;
*) die "Unrecognized parameter: $1."
esac
done
if [ -z "$INFRA" ]; then
die "Please add --infra flag to specify which infrastructure file to use."
fi
if [ -z "$SETTINGS" ]; then
die "Please add --settings flag to specify which settings file to use."
fi
if [ -z "$COUNT" ]; then
CLUSTERSIZE=$(awk '/^clustersize:/ {print $2}' $SETTINGS)
if [ -z "$STUDENTS" ]; then
warning "Neither --count nor --students was specified."
warning "According to the settings file, the cluster size is $CLUSTERSIZE."
warning "Deploying one cluster of $CLUSTERSIZE nodes."
STUDENTS=1
fi
COUNT=$(($STUDENTS*$CLUSTERSIZE))
fi
# Check that the specified settings and infrastructure are valid.
need_settings $SETTINGS
need_infra $INFRA
if [ -z "$TAG" ]; then
TAG=$(_cmd_maketag)
fi
mkdir -p tags/$TAG
ln -s ../../$INFRA tags/$TAG/infra.sh
ln -s ../../$SETTINGS tags/$TAG/settings.yaml
echo creating > tags/$TAG/status
infra_start $COUNT
sep
info "Successfully created $COUNT instances with tag $TAG"
echo create_ok > tags/$TAG/status
# If the settings.yaml file has a "steps" field,
# automatically execute all the actions listed in that field.
# If an action fails, retry it up to 10 times.
python -c 'if True: # hack to deal with indentation
import sys, yaml
settings = yaml.safe_load(sys.stdin)
print ("\n".join(settings.get("steps", [])))
' < tags/$TAG/settings.yaml \
| while read step; do
if [ -z "$step" ]; then
break
fi
sep "$TAG -> $step"
TRY=1
MAXTRY=10
while ! $0 $step $TAG ; do
TRY=$(($TRY+1))
if [ $TRY -gt $MAXTRY ]; then
error "This step ($step) failed after $MAXTRY attempts."
info "You can troubleshoot the situation manually, or terminate these instances with:"
info "$0 stop $TAG"
die "Giving up."
else
sep
info "Step '$step' failed for '$TAG'. Let's wait 10 seconds and try again."
info "(Attempt $TRY out of $MAXTRY.)"
sleep 10
fi
done
done
sep
info "Deployment successful."
info "To log into the first machine of that batch, you can run:"
info "$0 ssh $TAG"
info "To terminate these instances, you can run:"
info "$0 stop $TAG"
}
_cmd stop "Stop (terminate, shutdown, kill, remove, destroy...) instances"
_cmd_stop() {
TAG=$1
need_tag
infra_stop
echo stopped > tags/$TAG/status
}
_cmd tags "List groups of VMs known locally"
_cmd_tags() {
(
cd tags
echo "[#] [Status] [Tag] [Infra]" \
| awk '{ printf "%-7s %-12s %-25s %-25s\n", $1, $2, $3, $4}'
for tag in *; do
if [ -f $tag/ips.txt ]; then
count="$(wc -l < $tag/ips.txt)"
else
count="?"
fi
if [ -f $tag/status ]; then
status="$(cat $tag/status)"
else
status="?"
fi
if [ -f $tag/infra.sh ]; then
infra="$(basename $(readlink $tag/infra.sh))"
else
infra="?"
fi
echo "$count $status $tag $infra" \
| awk '{ printf "%-7s %-12s %-25s %-25s\n", $1, $2, $3, $4}'
done