forked from intel/cri-resource-manager
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
executable file
·1447 lines (1359 loc) · 52.7 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
DEMO_TITLE="Container Runtime End-to-End Testing"
DEFAULT_DISTRO="ubuntu-20.04"
PV='pv -qL'
binsrc=${binsrc-local}
SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")"
DEMO_LIB_DIR=$(realpath "$SCRIPT_DIR/../../demo/lib")
OUTPUT_DIR=${outdir-"$SCRIPT_DIR"/output}
COMMAND_OUTPUT_DIR="$OUTPUT_DIR"/commands
# shellcheck disable=SC1091
# shellcheck source=../../demo/lib/command.bash
source "$DEMO_LIB_DIR"/command.bash
# shellcheck disable=SC1091
# shellcheck source=../../demo/lib/host.bash
source "$DEMO_LIB_DIR"/host.bash
# shellcheck disable=SC1091
# shellcheck source=../../demo/lib/vm.bash
source "$DEMO_LIB_DIR"/vm.bash
script_source="$(< "$0") $(< "$DEMO_LIB_DIR/host.bash") $(< "$DEMO_LIB_DIR/command.bash") $(< "$DEMO_LIB_DIR/vm.bash")"
usage() {
echo "$DEMO_TITLE"
echo "Usage: [VAR=VALUE] ./run.sh MODE [SCRIPT]"
echo " MODE: \"play\" plays the test as a demo."
echo " \"record\" plays and records the demo."
echo " \"test\" runs fast, reports pass or fail."
echo " \"debug\" enables k8scri pipe debugging and"
echo " copies sources of all *_src VARs (see below) to vm."
echo " \"interactive\" launches interactive shell"
echo " for running test script commands"
echo " (see ./run.sh help script [FUNCTION])."
echo " SCRIPT: test script file to run instead of the default test."
echo ""
echo " VARs:"
echo " vm: govm virtual machine name."
echo " For non-govm-managed hosts: set VM_IP and VM_SSH_USER, too."
echo " 'ssh \$VM_SSH_USER@\$VM_IP sudo id' must not require password."
echo " containerd_src:"
echo " \"/host/path/to/go/project\": replace vm /usr/bin binaries"
echo " from /host/path/to/go/project/bin directory."
echo " The default is to use vm OS package manager containerd."
echo " crio_src:"
echo " \"/host/path/to/go/project\": replace vm /usr/bin binaries"
echo " from /host/path/to/go/project/bin directory."
echo " Must be set if crio is a part of \$k8scri and the vm distro"
echo " does not have (or implement installing) cri-o packages."
echo " crirm_src:"
echo " \"/host/path/to/go/project\": replace vm /usr/local/bin binaries"
echo " from /host/path/to/go/project/bin directory."
echo " The default is to use the project of these e2e tests."
echo " runc_src:"
echo " \"/host/path/to/go/project\": replace vm /usr/bin binaries"
echo " from /host/path/to/go/project/bin directory."
echo " distro_binaries:"
echo " 0: use the normal binaries built for this host (the default)."
echo " 1: use binaries cross-built for distros."
echo " binsrc: Where to get cri-resmgr to the vm."
echo " \"github\": go get from master and build inside vm."
echo " \"local\": (the default) copy from \${crirm_src}/bin, or"
echo " from \${crirm_src}/binaries/\$distro if \$distro_binaries=1."
echo " \"packages/<distro>\": use distro packages from this dir"
echo " reinstall_<containerd|crio|cri_resmgr|cri_resmgr_agent|runc>:"
echo " If 1, stop the daemon (if not runc),"
echo " then reinstall and restart it before starting test run."
echo " The default is 0."
echo " Set containerd_src/crio_src/runc_src to install a local build."
echo " reinstall_k8s: if 1, destroy existing k8s cluster and create a new one."
echo " reinstall_bootstrap: if 1, run the bootstrap and proxy setup commands."
echo " Only available if VM_IP is set when calling the script."
echo " reinstall_all: if 1, set all above reinstall_* options to 1."
echo " omit_cri_resmgr: if 1, omit checking/installing/starting cri-resmgr."
echo " omit_agent: if 1, omit checking/installing/starting cri-resmgr-agent."
echo " outdir: Save output under given directory."
echo " The default is \"${SCRIPT_DIR}/output\"."
echo " speed: Demo play speed."
echo " The default is 10 (keypresses per second)."
echo " cleanup: Level of cleanup after a test run:"
echo " 0: leave vm running (the default)"
echo " 1: delete vm"
echo " 2: stop vm, but do not delete it."
echo " Hook VARs:"
echo " on_vm_online: code to be executed when SSH connection to vm works."
echo " on_k8s_online: code to be executed when Kubernetes is ready for use."
echo " on_verify_fail, on_create_fail: code to be executed in case"
echo " verify() or create() fails. Example: go to interactive"
echo " mode if a verification fails: on_verify_fail=interactive"
echo " on_verify, on_create, on_launch: code to be executed every time"
echo " after verify/create/launch function"
echo " on_{cri,runc,k8s}_install: code to be executed right after installing"
echo " these components."
echo ""
echo " VM configuration VARs: (effective when vm is not already configured)"
echo " topology: JSON to override NUMA node list used in tests."
echo " See: python3 ${DEMO_LIB_DIR}/topology2qemuopts.py --help"
echo " distro: Linux distribution to be / already installed on vm."
echo " Supported values: centos-7, centos-8, debian-10, debian-sid"
echo " fedora, fedora-33, opensuse-tumbleweed,"
echo " opensuse-15.3 (same as opensuse), opensuse-15.2, sles,"
echo " ubuntu-18.04, ubuntu-20.04, ubuntu-22.04"
echo " If sles: set VM_SLES_REGCODE=<CODE> to use official packages."
echo " cgroups: cgroups version in the VM, v1 or v2. The default is v1."
echo " cgroups=v2 is supported only on distro=fedora"
echo " k8s: Kubernetes version to be installed on VM creation"
echo " The default is the latest available on selected distro."
echo " Example: k8s=1.18.10"
echo " k8scri: The container runtime pipe where kubelet connects to."
echo " Options are:"
echo " \"cri-resmgr|containerd\" cri-resmgr is a proxy to containerd."
echo " \"cri-resmgr|crio\" cri-resmgr is a proxy to cri-o."
echo " \"containerd\" containerd, no cri-resmgr."
echo " \"containerd&cri-resmgr\" containerd, cri-resmgr is an NRI plugin."
echo " \"crio\" cri-o, no cri-resmgr."
echo " \"crio&cri-resmgr\" cri-o, cri-resmgr is an NRI plugin."
echo " The default is \"cri-resmgr|containerd\"."
echo " k8scni: The container network interface plugin to install. Options are:"
echo " \"cilium\" (the default), \"flannel\", \"weavenet\"."
echo " k8smaster: Name of the existing vm whose cluster this vm will join."
echo " If empty (default), this vm forms its own single-node cluster."
echo " crio_version: Version of cri-o to try to pull in, if cri-o is"
echo " not being installed from sources."
echo " setup_proxies: Setup proxies even if not using govm based VM."
echo " This is only needed if you have set VM_IP and want"
echo " the proxy information set in the target host. By default"
echo " the proxies are not set if VM_IP is set."
echo ""
echo " Test input VARs:"
echo " cri_resmgr_cfg: configuration file forced to cri-resmgr."
echo " cri_resmgr_extra_args: arguments to be added on cri-resmgr"
echo " command line when launched"
echo " cri_resmgr_agent_extra_args: arguments to be added on"
echo " cri-resmgr-agent command line when launched"
echo " use_host_images: if \"1\", export images from the host docker"
echo " to vm whenever they are available."
echo " The default is 0: always pull images from repositories to vm."
echo " vm_files: \"serialized\" associative array of files to be created on vm"
echo " associative array syntax:"
echo " vm_files['/path/file']=file:/path/on/host"
echo " ='data:,plain text content'"
echo " =data:;base64,ZGF0YQ=="
echo " =dir: (creates only /path/file directory)"
echo " vm_files['/etc/motd']='data:,hello world'"
echo " How to execute run.sh with serialized array:"
echo " vm_files=\$(declare -p vm_files) ./run.sh"
echo " code: Variable that contains test script code to be run"
echo " if SCRIPT is not given."
echo " py_consts: Python code that runs always before pyexec in SCRIPT."
echo ""
echo "Default test input VARs: ./run.sh help defaults"
echo ""
echo "Create VM 'foo' that runs k8s 1.20.2 on Debian Sid:"
echo "vm=foo distro=debian-sid k8s=1.20.2 ./run.sh interactive"
}
error() {
(echo ""; echo "error: $1" ) >&2
command-exit-if-not-interactive
}
out() {
if [ -n "$PV" ]; then
speed=${speed-10}
echo "$1" | $PV "$speed"
else
echo "$1"
fi
echo ""
}
record() {
clear
out "Recording this screencast..."
host-command "asciinema rec -t \"$DEMO_TITLE\" crirm-demo-blockio.cast -c \"./run.sh play\""
}
screen-create-vm() {
speed=60 out "### Running the test in vm=\"$VM_NAME\"."
host-create-vm "$vm" "$topology"
vm-networking
if [ -z "$VM_IP" ]; then
error "creating VM failed"
fi
}
screen-install-cri-resmgr() {
speed=60 out "### Installing CRI Resource Manager to VM."
vm-install-cri-resmgr
}
screen-launch-cri-resmgr() {
speed=60 out "### Launching cri-resmgr with config $cri_resmgr_cfg."
if [ "${binsrc#packages}" != "$binsrc" ]; then
launch cri-resmgr-systemd
else
launch cri-resmgr
fi
}
screen-create-singlenode-cluster() {
speed=60 out "### Setting up single-node Kubernetes cluster."
speed=60 out "### Container runtime parts: $k8scri"
vm-create-singlenode-cluster
}
screen-launch-cri-resmgr-agent() {
speed=60 out "### Launching cri-resmgr-agent."
speed=60 out "### The agent will make cri-resmgr configurable with ConfigMaps."
launch cri-resmgr-agent
}
get-py-allowed() {
topology_dump_file="$OUTPUT_DIR/topology_dump.$VM_NAME"
res_allowed_file="$OUTPUT_DIR/res_allowed.$VM_NAME"
if ! [ -f "$topology_dump_file" ]; then
vm-command "$("$DEMO_LIB_DIR/topology.py" bash_topology_dump)" >/dev/null || {
command-error "error fetching topology_dump from $VM_NAME"
}
echo -e "$COMMAND_OUTPUT" > "$topology_dump_file"
fi
# Fetch data and update allowed* variables from the virtual machine
vm-command "$("$DEMO_LIB_DIR/topology.py" bash_res_allowed 'pod[0-9]*c[0-9]*')" >/dev/null || {
command-error "error fetching res_allowed from $VM_NAME"
}
echo -e "$COMMAND_OUTPUT" > "$res_allowed_file"
# Validate res_allowed_file. Error out if there is same container
# name with two different sets of allowed CPUs or memories.
awk -F "[ /]" '{if (pod[$1]!=0 && pod[$1]!=""$3""$4){print "error: ambiguous allowed resources for name "$1; exit(1)};pod[$1]=""$3""$4}' "$res_allowed_file" || {
error "container/process name collision: test environment needs cleanup."
}
py_allowed="
import re
allowed=$("$DEMO_LIB_DIR/topology.py" -t "$topology_dump_file" -r "$res_allowed_file" res_allowed -o json)
_branch_pod=[(p, d, n, c, t, cpu, pod.rsplit('/', 1)[0])
for p in allowed
for d in allowed[p]
for n in allowed[p][d]
for c in allowed[p][d][n]
for t in allowed[p][d][n][c]
for cpu in allowed[p][d][n][c][t]
for pod in allowed[p][d][n][c][t][cpu]]
# cpu resources allowed for a pod:
packages, dies, nodes, cores, threads, cpus = {}, {}, {}, {}, {}, {}
# mem resources allowed for a pod:
mems = {}
for p, d, n, c, t, cpu, pod in _branch_pod:
if c == 'mem': # this _branch_pod entry is about memory
if not pod in mems:
mems[pod] = set()
# topology.py can print memory nodes as children of cpu-ful nodes
# if distance looks like they are behind the same memory controller.
# The thread field, however, is the true node who contains the memory.
mems[pod].add(t)
continue
# this _branch_pod entry is about cpu
if not pod in packages:
packages[pod] = set()
dies[pod] = set()
nodes[pod] = set()
cores[pod] = set()
threads[pod] = set()
cpus[pod] = set()
packages[pod].add(p)
dies[pod].add('%s/%s' % (p, d))
nodes[pod].add(n)
cores[pod].add('%s/%s' % (n, c))
threads[pod].add('%s/%s/%s' % (n, c, t))
cpus[pod].add(cpu)
def disjoint_sets(*sets):
'set.isdisjoint() for n > 1 sets'
s = sets[0]
for next in sets[1:]:
if not s.isdisjoint(next):
return False
s = s.union(next)
return True
def set_ids(str_ids, chars='[a-z]'):
num_ids = set()
for str_id in str_ids:
if '/' in str_id:
num_ids.add(tuple(int(re.sub(chars, '', s)) for s in str_id.split('/')))
else:
num_ids.add(int(re.sub(chars, '', str_id)))
return num_ids
package_ids = lambda i: set_ids(i, '[package]')
die_ids = lambda i: set_ids(i, '[packagedie]')
node_ids = lambda i: set_ids(i, '[node]')
core_ids = lambda i: set_ids(i, '[nodecore]')
thread_ids = lambda i: set_ids(i, '[nodecorethread]')
cpu_ids = lambda i: set_ids(i, '[cpu]')
"
}
get-py-cache() {
# Fetch current cri-resmgr cache from a virtual machine.
speed=1000 vm-command "cat \"/var/lib/cri-resmgr/cache\"" >/dev/null 2>&1 || {
command-error "fetching cache file failed"
}
cat > "${OUTPUT_DIR}/cache" <<<"$COMMAND_OUTPUT"
py_cache="
import json
cache=json.load(open(\"${OUTPUT_DIR}/cache\"))
try:
allocations=json.loads(cache['PolicyJSON']['allocations'])
except KeyError:
allocations=None
containers=cache['Containers']
pods=cache['Pods']
for _contid in list(containers.keys()):
try:
_cmd = ' '.join(containers[_contid]['Command'])
except:
continue # Command may be None
# Recognize echo podXcY ; sleep inf -type test pods and make them
# easily accessible: containers['pod0c0'], pods['pod0']
if 'echo pod' in _cmd and 'sleep inf' in _cmd:
_contname = _cmd.split()[3] # _contname is podXcY
_podid = containers[_contid]['PodID']
_podname = pods[_podid]['Name'] # _podname is podX
if not allocations is None and _contid in allocations:
allocations[_contname] = allocations[_contid]
containers[_contname] = containers[_contid]
pods[_podname] = pods[_podid]
"
}
resolve-template() {
local name="$1" r="" d t
shift
for d in "$@"; do
if [ -z "$d" ] || ! [ -d "$d" ]; then
continue
fi
t="$d/$name.in"
if ! [ -e "$t" ]; then
continue
fi
if [ -z "$r" ]; then
r="$t"
echo 1>&2 "template $name resolved to file $r"
else
echo 1>&2 "WARNING: template file $r shadows $t"
fi
done
if [ -n "$r" ]; then
echo "$r"
return 0
fi
return 1
}
is-hooked() {
local hook_code_var hook_code
hook_code_var=$1
hook_code="${!hook_code_var}"
if [ -n "${hook_code}" ]; then
return 0 # logic: if is-hooked xyz; then run-hook xyz; fi
fi
return 1
}
run-hook() {
local hook_code_var hook_code
hook_code_var=$1
hook_code="${!hook_code_var}"
echo "Running hook: $hook_code_var"
eval "${hook_code}"
}
install-files() {
# Usage: install-files $(declare -p files_assoc_array)
#
# Parameter is a serialized associative array with
# key: target filepath on VM
# value: source URL ("file:", limited "data:" and "dir:" schemes supported)
#
# Example: build an associative array and install files in the array
# files['/path/file1']=file:/hostpath/file
# files['/path/file2']=data:,hello
# files['/path/file3']=data:;base64,aGVsbG8=
# files['/path/dir1']='dir:'
# install-files "$(declare -p files)"
local -A files
eval "files=${1#*=}"
local tgt src data
for tgt in "${!files[@]}"; do
src="${files[$tgt]}"
case $src in
"data:,"*)
data=${src#data:,}
;;
"data:;base64,"*)
data=$(base64 -d <<< "${src#data:;base64,}")
;;
"file:"*)
data=$(< "${src#file:}")
;;
"dir:")
echo -n "Creating on vm: $tgt/... "
vm-command-q "mkdir -p \"$tgt\"" || {
error "failed to make directory to vm \"$tgt\""
}
echo "ok."
continue
;;
*)
error "invalid source scheme \"${src}\", expected \"data:,\" \"data:;base64,\", \"file:\" or \"dir:\""
;;
esac
echo -n "Writing on vm: $tgt... "
vm-write-file "$tgt" "$data" || {
error "failed to write to vm file \"$tgt\""
}
echo "ok."
done
}
### Test script helpers
install() { # script API
# Usage: install TARGET
#
# Supported TARGETs:
# cri-resmgr: install cri-resmgr to VM.
# Install latest local build to VM: (the default)
# $ install cri-resmgr
# Fetch github master to VM, build and install on VM:
# $ binsrc=github install cri-resmgr
# cri-resmgr-webhook: install cri-resmgr-webhook to VM.
# Installs from the latest webhook Docker image on the host.
#
# Example:
# uninstall cri-resmgr
# install cri-resmgr
# launch cri-resmgr
local target="$1"
case "$target" in
"cri-resmgr")
vm-install-cri-resmgr
;;
"cri-resmgr-agent")
vm-install-cri-resmgr-agent
;;
"cri-resmgr-webhook")
vm-install-cri-resmgr-webhook
;;
*)
error "unknown target to install \"$1\""
;;
esac
}
uninstall() { # script API
# Usage: uninstall TARGET
#
# Supported TARGETs:
# cri-resmgr: stop (kill) cri-resmgr and purge all files from VM.
# cri-resmgr-webhook: stop cri-resmgr-webhook and delete webhook files from VM.
local target="$1"
case $target in
"cri-resmgr")
terminate cri-resmgr
terminate cri-resmgr-agent
distro-remove-pkg cri-resource-manager
vm-command "rm -rf /usr/local/bin/cri-resmgr /usr/bin/cri-resmgr /usr/local/bin/cri-resmgr-agent /usr/bin/cri-resmgr-agent /var/lib/cri-resmgr /etc/cri-resmgr"
;;
"cri-resmgr-agent")
terminate cri-resmgr-agent
vm-command "rm -rf /usr/local/bin/cri-resmgr /usr/bin/cri-resmgr /usr/local/bin/cri-resmgr-agent /usr/bin/cri-resmgr-agent /var/lib/cri-resmgr /etc/cri-resmgr"
;;
"cri-resmgr-webhook")
terminate cri-resmgr-webhook
vm-command "rm -rf webhook"
;;
*)
error "uninstall: invalid target \"$target\""
;;
esac
}
launch() { # script API
# Usage: launch TARGET
#
# Supported TARGETs:
# cri-resmgr: launch cri-resmgr on VM. Environment variables:
# cri_resmgr_cfg: configuration filepath (on host)
# cri_resmgr_extra_args: extra arguments on command line
# cri_resmgr_config: "force" (default) or "fallback"
# k8scri: if the CRI pipe starts with cri-resmgr
# this launches cri-resmgr as a proxy,
# otherwise as a dynamic NRI plugin.
#
# cri-resmgr-systemd:
# launch cri-resmgr on VM using "systemctl start".
# Works when installed with binsrc=packages/<distro>.
# Environment variables:
# cri_resmgr_cfg: configuration filepath (on host)
#
# cri-resmgr-agent:
# launch cri-resmgr-agent on VM. Environment variables:
# cri_resmgr_agent_extra_args: extra arguments on command line
#
# cri-resmgr-webhook:
# deploy cri-resmgr-webhook from the image on VM.
#
# Example:
# cri_resmgr_cfg=/tmp/topology-aware.cfg launch cri-resmgr
local target="$1"
local launch_cmd
local adjustment_schema="$HOST_PROJECT_DIR/pkg/apis/resmgr/v1alpha1/adjustment-schema.yaml"
local cri_resmgr_config_option="-${cri_resmgr_config:-force}-config"
local cri_resmgr_mode=""
case $target in
"cri-resmgr")
host-command "$SCP \"$cri_resmgr_cfg\" $VM_SSH_USER@$VM_IP:" || {
command-error "copying \"$cri_resmgr_cfg\" to VM failed"
}
vm-command "cat $(basename "$cri_resmgr_cfg")"
if [[ "$k8scri" == cri-resmgr* ]]; then
# launch cri-resmgr as the top element in the k8s container runtime stack
cri_resmgr_mode="-relay-socket ${cri_resmgr_sock} -runtime-socket $cri_sock -image-socket $cri_sock"
else
# launch cri-resmgr as an NRI plugin to running container runtime
cri_resmgr_mode="-use-nri-plugin"
fi
launch_cmd="cri-resmgr $cri_resmgr_mode $cri_resmgr_config_option $(basename "$cri_resmgr_cfg") $cri_resmgr_extra_args"
vm-command-q "rm -f $cri_resmgr_pidfile"
vm-command-q "echo '$launch_cmd' > cri-resmgr.launch.sh ; rm -f cri-resmgr.output.txt"
vm-command "$launch_cmd >cri-resmgr.output.txt 2>&1 &"
vm-wait-process --timeout 30 --pidfile "$cri_resmgr_pidfile" cri-resmgr
vm-command "grep 'FATAL ERROR' cri-resmgr.output.txt" >/dev/null 2>&1 && {
command-error "launching cri-resmgr failed with FATAL ERROR"
}
vm-command "fuser ${cri_resmgr_pidfile}" >/dev/null 2>&1 || {
echo "cri-resmgr last output line:"
vm-command-q "tail -n 1 cri-resmgr.output.txt"
command-error "launching cri-resmgr failed, cannot find cri-resmgr PID"
}
;;
"cri-resmgr-agent")
host-command "$SCP \"$adjustment_schema\" $VM_SSH_USER@$VM_IP:" ||
command-error "copying \"$adjustment_schema\" to VM failed"
vm-command "kubectl delete -f $(basename "$adjustment_schema"); kubectl create -f $(basename "$adjustment_schema")"
launch_cmd="NODE_NAME=\$(hostname) cri-resmgr-agent -kubeconfig /root/.kube/config $cri_resmgr_agent_extra_args"
vm-command-q "echo '$launch_cmd' >cri-resmgr-agent.launch.sh; rm -f cri-resmgr-agent.output.txt"
vm-command "$launch_cmd >cri-resmgr-agent.output.txt 2>&1 &"
vm-wait-process --timeout 30 cri-resmgr-agent
vm-command "grep 'FATAL ERROR' cri-resmgr-agent.output.txt" >/dev/null 2>&1 &&
command-error "launching cri-resmgr-agent failed with FATAL ERROR"
vm-command "fuser ${cri_resmgr_agent_sock}" >/dev/null 2>&1 ||
command-error "launching cri-resmgr-agent failed, cannot find cri-resmgr-agent PID"
;;
"cri-resmgr-systemd")
host-command "$SCP \"$cri_resmgr_cfg\" $VM_SSH_USER@$VM_IP:" ||
command-error "copying \"$cri_resmgr_cfg\" to VM failed"
vm-command "cp \"$(basename "$cri_resmgr_cfg")\" /etc/cri-resmgr/fallback.cfg"
vm-command "systemctl daemon-reload ; systemctl start cri-resource-manager" ||
command-error "systemd failed to start cri-resource-manager"
vm-wait-process --timeout 30 cri-resmgr
vm-command "systemctl is-active cri-resource-manager" || {
vm-command "systemctl status cri-resource-manager"
command-error "cri-resource-manager did not become active after systemctl start"
}
;;
"cri-resmgr-webhook")
kubectl apply -f webhook/webhook-deployment.yaml
kubectl wait --for=condition=Available -n cri-resmgr deployments/cri-resmgr-webhook ||
error "cri-resmgr-webhook deployment did not become Available"
kubectl apply -f webhook/mutating-webhook-config.yaml
;;
*)
error "launch: invalid target \"$1\""
;;
esac
is-hooked on_launch && run-hook on_launch
return 0
}
terminate() { # script API
# Usage: terminate TARGET
#
# Supported TARGETs:
# cri-resmgr: stop (kill) cri-resmgr.
# cri-resmgr-agent: stop (kill) cri-resmgr-agent.
# cri-resmgr-webhook: delete cri-resmgr-webhook from k8s.
local target="$1"
case $target in
"cri-resmgr")
vm-command "fuser --kill ${cri_resmgr_pidfile} 2>/dev/null"
;;
"cri-resmgr-agent")
vm-command "fuser --kill ${cri_resmgr_agent_sock} 2>/dev/null"
;;
"cri-resmgr-webhook")
vm-command "kubectl delete -f webhook/mutating-webhook-config.yaml; kubectl delete -f webhook/webhook-deployment.yaml"
;;
*)
error "terminate: invalid target \"$target\""
;;
esac
}
sleep() { # script API
# Usage: sleep PARAMETERS
#
# Run sleep PARAMETERS on host.
host-command "sleep $*"
}
extended-resources() { # script API
# Usage: extended-resources <add|remove> RESOURCE [VALUE]
#
# Examples:
# extended-resources remove cmk.intel.com/exclusive-cpus
# extended-resources add cmk.intel.com/exclusive-cpus 4
local action="$1"
local resource="$2"
local value="$3"
local resource_escaped="${resource/\//~1}"
if [ -z "$resource" ]; then
error "extended-resource: missing resource"
return 1
fi
# make sure kubectl proxy is running
vm-command-q "ss -ltn | grep -q 127.0.0.1:8001 || { kubectl proxy &>/dev/null </dev/null & sleep 2 ; }"
case $action in
add)
if [ -z "$value" ]; then
error "extended-resource: missing value to add to resource $resource"
return 1
fi
vm-command "curl --header 'Content-Type: application/json-patch+json' --request PATCH --data '[{\"op\": \"add\", \"path\": \"/status/capacity/$resource_escaped\", \"value\": \"$value\"}]' http://localhost:8001/api/v1/nodes/\$(hostname)/status"
;;
remove)
vm-command "curl --header 'Content-Type: application/json-patch+json' --request PATCH --data '[{\"op\": \"remove\", \"path\": \"/status/capacity/$resource_escaped\"}]' http://localhost:8001/api/v1/nodes/\$(hostname)/status"
;;
*)
error "extended-resource: invalid action \"$action\""
return 1
;;
esac
}
pyexec() { # script API
# Usage: pyexec [PYTHONCODE...]
#
# Run python3 -c PYTHONCODEs on host. Stops if execution fails.
#
# Variables available in PYTHONCODE:
# allocations: dictionary: shorthand to cri-resmgr policy allocations
# (unmarshaled cache['PolicyJSON']['allocations'])
# allowed tree: {package: {die: {node: {core: {thread: {pod}}}}}}
# resource topology and pods allowed to use the resources.
# packages, dies, nodes, cores, threads:
# dictionaries: {podname: set-of-allowed}
# Example: pyexec 'print(dies["pod0c0"])'
# cache: dictionary, cri-resmgr cache
#
# Note that variables are *not* updated when pyexec is called.
# You can update the variables by running "verify" without expressions.
#
# Code in environment variable py_consts runs before PYTHONCODE.
#
# Example:
# verify ; pyexec 'import pprint; pprint.pprint(allowed)'
PYEXEC_STATE_PY="$OUTPUT_DIR/pyexec_state.py"
PYEXEC_PY="$OUTPUT_DIR/pyexec.py"
PYEXEC_LOG="$OUTPUT_DIR/pyexec.output.txt"
local last_exit_status=0
{
echo "import pprint; pp=pprint.pprint"
echo "# \$py_allowed:"
echo -e "$py_allowed"
echo "# \$py_cache:"
echo -e "$py_cache"
echo "# \$py_consts:"
echo -e "$py_consts"
} > "$PYEXEC_STATE_PY"
for PYTHONCODE in "$@"; do
{
echo "from pyexec_state import *"
echo -e "$PYTHONCODE"
} > "$PYEXEC_PY"
PYTHONPATH="$OUTPUT_DIR:$PYTHONPATH:$DEMO_LIB_DIR" python3 "$PYEXEC_PY" 2>&1 | tee "$PYEXEC_LOG"
last_exit_status=${PIPESTATUS[0]}
if [ "$last_exit_status" != "0" ]; then
error "pyexec: non-zero exit status \"$last_exit_status\", see \"$PYEXEC_PY\" and \"$PYEXEC_LOG\""
fi
done
return "$last_exit_status"
}
pp() { # script API
# Usage: pp EXPR
#
# Pretty-print the value of Python expression EXPR.
pyexec "pp($*)"
}
report() { # script API
# Usage: report [VARIABLE...]
#
# Updates and reports current value of VARIABLE.
#
# Supported VARIABLEs:
# allocations
# allowed
# cache
#
# Example: print cri-resmgr policy allocations. In interactive mode
# you may use a pager like less.
# report allocations | less
local varname
for varname in "$@"; do
if [ "$varname" == "allocations" ]; then
get-py-cache
pyexec "
import pprint
pprint.pprint(allocations)
"
elif [ "$varname" == "allowed" ]; then
get-py-allowed
pyexec "
import topology
print(topology.str_tree(allowed))
"
elif [ "$varname" == "cache" ]; then
get-py-cache
pyexec "
import pprint
pprint.pprint(cache)
"
else
error "report: unknown variable \"$varname\""
fi
done
}
verify() { # script API
# Usage: verify [EXPR...]
#
# Run python3 -c "assert(EXPR)" to test that every EXPR is True.
# Stop evaluation on the first EXPR not True and fail the test.
# You can allow script execution to continue after failed verification
# by running verify in a subshell (in parenthesis):
# (verify 'False') || echo '...but was expected to fail.'
#
# Variables available in EXPRs:
# See variables in: help pyexec
#
# Note that all variables are updated every time verify is called
# before evaluating (asserting) expressions.
#
# Example: require that containers pod0c0 and pod1c0 run on separate NUMA
# nodes and that pod0c0 is allowed to run on 4 CPUs:
# verify 'set.intersection(nodes["pod0c0"], nodes["pod1c0"]) == set()' \
# 'len(cpus["pod0c0"]) == 4'
get-py-allowed
get-py-cache
for py_assertion in "$@"; do
speed=1000 out "### Verifying assertion '$py_assertion'"
( speed=1000 pyexec "
try:
import time,sys
assert(${py_assertion})
except KeyError as e:
print('WARNING: *')
print('WARNING: *** KeyError - %s' % str(e))
print('WARNING: *** Your verify expression might have a typo/thinko.')
print('WARNING: *')
sys.stdout.flush()
time.sleep(5)
raise e
except IndexError as e:
print('WARNING: *')
print('WARNING: *** IndexError - %s' % str(e))
print('WARNING: *** Your verify expression might have a typo/thinko.')
print('WARNING: *')
sys.stdout.flush()
time.sleep(5)
raise e
" ) || {
out "### The assertion FAILED
### post-mortem debug help begin ###
cd $OUTPUT_DIR
python3
from pyexec_state import *
$py_assertion
### post-mortem debug help end ###"
echo "verify: assertion '$py_assertion' failed." >> "$SUMMARY_FILE"
if is-hooked on_verify_fail; then
run-hook on_verify_fail
else
command-exit-if-not-interactive
fi
}
speed=1000 out "### The assertion holds."
done
is-hooked on_verify && run-hook on_verify
return 0
}
kubectl-force-delete() { # script API
# Usage: kubectl-force-delete RESOURCE NAME
#
# Force-deleting a "Terminating" namespace clears finalizers that
# have failed to finish. Therefore there may be resources left in the
# namespace NAME. Following command prints them.
#
# kubectl api-resources --verbs=list --namespaced -o name | \
# xargs -n 1 kubectl get --show-kind --ignore-not-found -n NAME
#
# Example: delete a namespace that is stuck in the "Terminating" phase
#
# kubectl-force-delete namespace my-namespace
if [ -z "$1" ]; then
error "missing RESOURCE"
return 1
fi
if [ -z "$2" ]; then
error "missing resource NAME"
return 1
fi
if [[ "$1" == "namespace" ]] || [[ "$1" == "ns" ]]; then
local ns="$2"
vm-command "
kubectl get namespace $ns -o json > force-delete-ns.json || exit 0
(
grep -E phase.*Terminating force-delete-ns.json || exit 0
tr -d '\n' < force-delete-ns.json \
| sed 's/\"finalizers\": \[[^]]\+\]/\"finalizers\": []/' \
| kubectl replace --raw /api/v1/namespaces/$ns/finalize -f -
)
rm -f force-delete-ns.json
"
else
error "unsupported force-delete resource: $1"
return 1
fi
}
kubectl() { # script API
# Usage: kubectl parameters
#
# Runs kubectl command on virtual machine.
vm-command "kubectl $*" || {
command-error "kubectl $* failed"
}
}
delete() { # script API
# Usage: delete PARAMETERS
#
# Run "kubectl delete PARAMETERS".
vm-command "kubectl delete $*" || {
command-error "kubectl delete failed"
}
}
instantiate() { # script API
# Usage: instantiate FILENAME
#
# Produces $OUTPUT_DIR/instance/FILENAME. Prints the filename on success.
# Uses FILENAME.in as source (resolved from $TEST_DIR, $TOPOLOGY_DIR, ...)
local FILENAME="$1"
local RESULT="$OUTPUT_DIR/instance/$FILENAME"
template_file=$(resolve-template "$FILENAME" "$TEST_DIR" "$TOPOLOGY_DIR" "$POLICY_DIR" "$SCRIPT_DIR")
if [ ! -f "$template_file" ]; then
error "error instantiating \"$FILENAME\": missing template ${template_file}"
fi
mkdir -p "$(dirname "$RESULT")" 2>/dev/null
eval "echo -e \"$(<"${template_file}")\"" | grep -v '^ *$' > "$RESULT" ||
error "instantiating \"$FILENAME\" failed"
echo "$RESULT"
}
declare -a pulled_images_on_vm
create() { # script API
# Usage: [VAR=VALUE][n=COUNT] create TEMPLATE_NAME
#
# Create n instances from TEMPLATE_NAME.yaml.in, copy each of them
# from host to vm, kubectl create -f them, and wait for them
# becoming Ready. Templates are searched in $TEST_DIR, $TOPOLOGY_DIR,
# $POLICY_DIR, and $SCRIPT_DIR in this order of preference. The first
# template found is used.
#
# Parameters:
# TEMPLATE_NAME: the name of the template without extension (.yaml.in)
#
# Optional parameters (VAR=VALUE):
# namespace: namespace to which instances are created
# wait: condition to be waited for (see kubectl wait --for=condition=).
# If empty (""), skip waiting. The default is wait="Ready".
# wait_t: wait timeout. The default is wait_t=240s.
local template_file
template_file=$(resolve-template "$1.yaml" "$TEST_DIR" "$TOPOLOGY_DIR" "$POLICY_DIR" "$SCRIPT_DIR")
local namespace_args
local template_kind
template_kind=$(awk '/kind/{print tolower($2)}' < "$template_file")
local wait=${wait-Ready}
local wait_t=${wait_t-240s}
local images
local image
local tag
local errormsg
local default_name=${NAME:-""}
if [ -z "$n" ]; then
local n=1
fi
if [ -n "${namespace:-}" ]; then
namespace_args="-n $namespace"
else
namespace_args=""
fi
if [ ! -f "$template_file" ]; then
error "error creating from template \"$template_file.yaml.in\": template file not found"
fi
for _ in $(seq 1 $n); do
kind_count[$template_kind]=$(( ${kind_count[$template_kind]} + 1 ))
if [ -n "$default_name" ]; then
local NAME="$default_name"
else
local NAME="${template_kind}$(( ${kind_count[$template_kind]} - 1 ))" # the first pod is pod0
fi
eval "echo -e \"$(<"${template_file}")\"" | grep -v '^ *$' > "$OUTPUT_DIR/$NAME.yaml"
host-command "$SCP \"$OUTPUT_DIR/$NAME.yaml\" $VM_SSH_USER@$VM_IP:" || {
command-error "copying \"$OUTPUT_DIR/$NAME.yaml\" to VM failed"
}
vm-command "cat $NAME.yaml"
images="$(grep -E '^ *image: .*$' "$OUTPUT_DIR/$NAME.yaml" | sed -E 's/^ *image: *([^ ]*)$/\1/g' | sort -u)"
if [ "${#pulled_images_on_vm[@]}" = "0" ]; then
# Initialize pulled images available on VM
vm-command "crictl -i unix://${k8scri_sock} images" >/dev/null &&
while read -r image tag _; do
if [ "$image" = "IMAGE" ]; then
continue
fi
local notopdir_image="${image#*/}"
local norepo_image="${image##*/}"
if [ "$tag" = "latest" ]; then
pulled_images_on_vm+=("$image")
pulled_images_on_vm+=("$notopdir_image")
pulled_images_on_vm+=("$norepo_image")
fi
pulled_images_on_vm+=("$image:$tag")
pulled_images_on_vm+=("$notopdir_image:$tag")
pulled_images_on_vm+=("$norepo_image:$tag")
done <<< "$COMMAND_OUTPUT"
fi
for image in $images; do
if ! [[ " ${pulled_images_on_vm[*]} " == *" ${image} "* ]]; then
if [ "$use_host_images" == "1" ] && vm-put-docker-image "$image"; then
: # no need to pull the image to vm, it is now imported.
else
vm-command "crictl -i unix://${k8scri_sock} pull \"$image\"" || {
errormsg="pulling image \"$image\" for \"$OUTPUT_DIR/$NAME.yaml\" failed."
if is-hooked on_create_fail; then
echo "$errormsg"
run-hook on_create_fail
else
command-error "$errormsg"
fi
}
fi
pulled_images_on_vm+=("$image")
fi
done
vm-command "kubectl create -f $NAME.yaml $namespace_args" || {
if is-hooked on_create_fail; then
echo "kubectl create error"
run-hook on_create_fail
else
command-error "kubectl create error"
fi
}
if [ "x$wait" != "x" ]; then
speed=1000 vm-command "kubectl wait --timeout=${wait_t} --for=condition=${wait} $namespace_args ${template_kind}/$NAME" >/dev/null 2>&1 || {
errormsg="waiting for ${template_kind} \"$NAME\" to become ready timed out"
if is-hooked on_create_fail; then
echo "$errormsg"
run-hook on_create_fail
else
command-error "$errormsg"
fi
}
fi