Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run sriov test suite in CI #1970

Open
wants to merge 47 commits into
base: master
from
Open
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
57c61b6
Move SR-IOV functional tests into a separate suite
booxter Jan 17, 2019
2310bcb
Run sriov test suite in CI
booxter Mar 1, 2019
74052f4
Insert vfio-pci kernel module for sriov jobs
booxter Mar 2, 2019
1ed33dc
DO NOT MERGE Bump timeout to wait kubevirt up to 360s
booxter Mar 6, 2019
82984cb
DO NOT MERGE: log time it takes for kubevirt to get up in sriov job
booxter Mar 6, 2019
a5cb622
Merge branch 'master' into sriov_lane
booxter Mar 7, 2019
a707b7b
Adopt kind
Apr 3, 2019
a83ab5d
use external; configure kubectl
Apr 4, 2019
f3e305b
start docker registry
Apr 4, 2019
3ae3bd6
make cluster-build
Apr 4, 2019
d1d8a88
configure insecure registry for dockerd
Apr 4, 2019
3314af5
Merge remote-tracking branch 'upstream/master' into sriov_lane_wip_wip
Apr 4, 2019
58e5bc2
Prepare local storage
Apr 6, 2019
f4efbb6
Use host config file for sriovdp
booxter Apr 10, 2019
b0b1bc8
Merge remote-tracking branch 'upstream/master' into sriov_lane
booxter Apr 10, 2019
1a2495d
Fix configure_sriovdp when no devices present
booxter Apr 17, 2019
e9bcf63
Install ginkgo for sriov tests
booxter Apr 17, 2019
6ac7b65
Increase time to wait for cluster
booxter Apr 17, 2019
798af05
Fixed build for functests
booxter Apr 17, 2019
01ca1d5
Log docker container logs for sriov job
booxter Apr 17, 2019
6ef3550
Enable debug logs for kind
booxter Apr 25, 2019
a735f95
Mount /lib/modules into cluster node
booxter Apr 25, 2019
369a6e2
DNM: keep cluster up after failure to debug issues
booxter Apr 26, 2019
d8f6555
DNM: run sriov job only
booxter Apr 26, 2019
64729ab
DNM: don't even require sriov nic just yet
booxter Apr 26, 2019
b7efda6
Wait 1h for cluster up
booxter Apr 26, 2019
b1df4a7
Merge remote-tracking branch 'upstream/master' into sriov_lane
booxter Apr 26, 2019
c34eeff
fixed wait arg
booxter Apr 26, 2019
b3d91bc
create twice
booxter Apr 26, 2019
65a04f3
Sleep on start of script
booxter Apr 27, 2019
a236609
Don't use proxy to access the kind cluster
booxter Apr 29, 2019
3f0da18
Revert "DNM: don't even require sriov nic just yet"
booxter Apr 29, 2019
eb92fcc
Export NO_PROXY
booxter Apr 29, 2019
ecd7795
configure sriovdp
booxter Apr 29, 2019
9fae8f4
Dump pod logs while waiting
booxter Apr 29, 2019
fde17e6
extract logs for kube-system pods only
booxter Apr 29, 2019
0ff64e6
dump description of pods + don't fail on not running state
booxter Apr 29, 2019
38a06b3
try to understand why pcidp config is directory, and where
booxter Apr 30, 2019
70e950d
mount /etc/pcidp dir not config file
booxter Apr 30, 2019
08d4249
also list /etc/pcidp
booxter Apr 30, 2019
9e1cbec
DNM: sleep
booxter Apr 30, 2019
8a67631
mount /etc/pcidp
booxter Apr 30, 2019
e831861
list dirs in /etc
booxter Apr 30, 2019
9f5d97e
don't mount /etc
booxter Apr 30, 2019
73d73d1
generate pcidp conf inside cluster; use devel tag for images
booxter Apr 30, 2019
ec50f20
Merge remote-tracking branch 'upstream/master' into sriov_lane
booxter Apr 30, 2019
1455954
don't mount /etc/pcidp
booxter May 1, 2019
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

@@ -35,3 +35,7 @@ coverage.html
manifests/**/*.tmp
/bazel-*
/.bazelrc

cluster/k8s-1.13.0-sriov/cache/
cluster/k8s-1.13.0-sriov/config
cluster/k8s-1.13.0-sriov/http-cache/
@@ -0,0 +1,9 @@
/var/run/libvirt
/var/lib/libvirt/boot
/var/cache:/var/host_cache
/var/run/docker.sock:/var/run/docker.sock
/var/lib/stdci/shared:/var/lib/stdci/shared
/dev/vfio:/dev/vfio
/sys/bus/pci/devices:/sys/bus/pci/devices
/run/docker/netns:/run/docker/netns
/lib/modules:/lib/modules
@@ -0,0 +1,7 @@
git
rsync
docker
make
kubectl
iproute
golang
@@ -0,0 +1 @@
https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
@@ -0,0 +1,249 @@
#!/bin/bash -e
set -x

#############################################################
# This is based on https://github.com/SchSeba/kubevirt-docker
#############################################################

export NO_PROXY="localhost,127.0.0.1,172.17.0.2"

GOPATH=~/go
GOBIN=~/go/bin
PATH=$PATH:$GOBIN

CLUSTER_NAME=sriov-ci-$(uuidgen)
CLUSTER_CONTROL_PLANE=${CLUSTER_NAME}-control-plane
CONTAINER_REGISTRY_HOST="localhost:5000"

CLUSTER_CMD="docker exec -it -d ${CLUSTER_CONTROL_PLANE}"

KUBEVIRT_PATH=`pwd`
CLUSTER_DIR="cluster/k8s-1.13.0-sriov"
MANIFESTS_DIR="${CLUSTER_DIR}/manifests"
ARTIFACTS_DIR="${KUBEVIRT_PATH}/exported-artifacts"

SHARED_DIR="/var/lib/stdci/shared"
SRIOV_JOB_LOCKFILE="${SHARED_DIR}/sriov.lock"
SRIOV_TIMEOUT_SEC="14400" # 4h

function wait_containers_ready {
# wait until all containers are ready
while [ -n "$(kubectl get pods --all-namespaces -o'custom-columns=status:status.containerStatuses[*].ready,metadata:metadata.name' --no-headers | grep false)" ]; do
echo "Waiting for all containers to become ready ..."
kubectl get pods --all-namespaces -o'custom-columns=status:status.containerStatuses[*].ready,metadata:metadata.name' --no-headers
sleep 10
done
}

# NOTE: this assumes that once at least a single virt- service pops up then
# others will pop up too in quick succession, at least before the first one
# transits to ready state. If it's ever not the case, we may end up exiting
# this function before all virt pods are scheduled and in ready state. If this
# ever happens, we may need to list all services we expect in a kubevirt
# cluster and check each of them is up and running.
function wait_kubevirt_up {
# it takes a while for virt-operator to schedule virt pods; wait for at least one of them to pop up
while [ -z "$(kubectl get pods -n kubevirt | grep virt)" ]; do
echo "Waiting for all pods to create ..."
kubectl get pods -n kubevirt | grep virt
sleep 10
done

wait_containers_ready
}

function collect_artifacts {
mkdir -p "$ARTIFACTS_DIR/containers"
for c in $(docker ps -a | tail -n +2 | awk -e '{print$1}'); do
docker logs $c 2>&1 > $ARTIFACTS_DIR/containers/$c.log || true
done
kind export logs ${ARTIFACTS_DIR} --name=${CLUSTER_NAME} || true
}

function finish {
collect_artifacts
kind delete cluster --name=${CLUSTER_NAME}
}

trap finish EXIT

# serialize all sriov jobs running on the same ci node
[ -d "${SHARED_DIR}" ] || mkdir -p "${SHARED_DIR}"
touch "$SRIOV_JOB_LOCKFILE"
exec {fd}< "$SRIOV_JOB_LOCKFILE"
flock -e -w "$SRIOV_TIMEOUT_SEC" "$fd" || {
echo "ERROR: Timed out after $SRIOV_TIMEOUT_SEC seconds waiting for sriov.lock" >&2
exit 1
}

# ================
# bring up cluster
# ================
go get -u sigs.k8s.io/kind

# try to create cluster twice...
kind --loglevel debug create cluster --wait=$((60*60))s --retain --name=${CLUSTER_NAME} --config=${MANIFESTS_DIR}/kind.yaml

export KUBECONFIG=$(kind get kubeconfig-path --name=${CLUSTER_NAME})
kubectl cluster-info

# copied from https://github.com/kubernetes-sigs/federation-v2/blob/master/scripts/create-clusters.sh
function configure-insecure-registry-and-reload() {
local cmd_context="${1}" # context to run command e.g. sudo, docker exec
${cmd_context} "$(insecure-registry-config-cmd)"
${cmd_context} "$(reload-docker-daemon-cmd)"
}

function reload-docker-daemon-cmd() {
echo "kill -s SIGHUP \$(pgrep dockerd)"
}

function insecure-registry-config-cmd() {
echo "cat <<EOF > /etc/docker/daemon.json
{
\"insecure-registries\": [\"${CONTAINER_REGISTRY_HOST}\"]
}
EOF
"
}

configure-insecure-registry-and-reload "${CLUSTER_CMD} bash -c"

# copy config for debugging purposes
cp ${KUBECONFIG} ${CLUSTER_DIR}/cluster.config

# wait for nodes to become ready
until kubectl get nodes --no-headers
do
echo "Waiting for all nodes to become ready ..."
sleep 10
done

# wait until k8s pods are running
while [ -n "$(kubectl get pods --all-namespaces --no-headers | grep -v Running)" ]; do
echo "Waiting for all pods to enter the Running state ..."
kubectl get pods --all-namespaces --no-headers | >&2 grep -v Running || true
sleep 10
done

# wait until all containers are ready
wait_containers_ready

# ===============================================
# move all VF netlink interfaces into kube-master
# ===============================================
#DOCKER_NAMESPACE=`docker inspect kube-master | grep netns | tr "/" " " | awk '{print substr($7, 1, length($7)-2)}'`

# Instead of dealing with `setns` from within a chroot, we spawn a privileged
# container with host network. Since docker socket is mounted from the host,
# the container is actually being created on the host itself and have access to
# the different namespaces.
# We set MAC addresses for all VFs because some NICs leave their VFs with
# all-zeroes addresses. We use a common MAC prefix from Virtualbox for all of
# them. And we assume that the number of VFs per node is not higher than 255.
#docker run -i --privileged --net=host --rm \
# -v /run/docker/netns/:/var/run/netns/ centos:7 /bin/bash <<EOF
# set -x
# yum install -y iproute
# sriov_vfs=( /sys/class/net/*/device/virtfn* )
# i=0
# for vf in "\${sriov_vfs[@]}"; do
# ifs_arr=( "\$vf"/net/* )
# for ifs in "\${ifs_arr[@]}"; do
# ifs_name="\${ifs%%\/net\/*}"
# ifs_name="\${ifs##*\/}"
# ip link set dev "\$ifs_name" down
# ip link set dev "\$ifs_name" address 0a:00:27:00:00:\$(printf "%x\\n" "\$i")
# ip link set dev "\$ifs_name" up
# ip link set "\$ifs_name" netns "$DOCKER_NAMESPACE"
# i=\$((\$i+1))
# done
# done
#
# sriov_pfs=( /sys/class/net/*/device/sriov_numvfs )
# for ifs in "\${sriov_pfs[@]}"; do
# ifs_name="\${ifs%%/device/*}"
# ifs_name="\${ifs_name##*/}"
# ip link set "\$ifs_name" netns "$DOCKER_NAMESPACE"
# done
#EOF

# ========================
# deploy SR-IOV components
# ========================

# deploy multus
kubectl apply -f $MANIFESTS_DIR/multus.yaml

# deploy sriov cni
kubectl apply -f $MANIFESTS_DIR/sriov-crd.yaml
kubectl apply -f $MANIFESTS_DIR/sriov-cni-daemonset.yaml

# prepare kernel for vfio passthrough
modprobe vfio-pci

# deploy sriov device plugin
function configure-sriovdp() {
local cmd_context="${1}" # context to run command e.g. sudo, docker exec
${cmd_context} "mkdir -p /etc/pcidp"
${cmd_context} "$(sriovdp-config-cmd)"
}

function sriovdp-config-cmd() {
./automation/configure_sriovdp.sh
echo "cat <<EOF > /etc/pcidp/config.json
$(cat /etc/pcidp/config.json)
EOF
"
}

configure-sriovdp "${CLUSTER_CMD} bash -c"
kubectl apply -f $MANIFESTS_DIR/sriovdp-daemonset.yaml

# give them some time to create pods before checking pod status
sleep 10

# make sure all containers are ready
wait_containers_ready

# start local registry
until [ -z "$(docker ps -a | grep registry)" ]; do
docker stop registry || true
docker rm registry || true
sleep 5
done
docker run -d -p 5000:5000 --restart=always --name registry registry:2
${CLUSTER_CMD} socat TCP-LISTEN:5000,fork TCP:172.17.0.1:5000

# prepare local storage
for i in {1..10}; do
${CLUSTER_CMD} mkdir -p /var/local/kubevirt-storage/local-volume/disk${i}
${CLUSTER_CMD} mkdir -p /mnt/local-storage/local/disk${i}
done
${CLUSTER_CMD} chmod -R 777 /var/local/kubevirt-storage/local-volume
${CLUSTER_CMD} mknod /dev/loop0 b 7 0

# ===============
# deploy kubevirt
# ===============
export KUBEVIRT_PROVIDER=external
export DOCKER_PREFIX=${CONTAINER_REGISTRY_HOST}/kubevirt
export DOCKER_TAG=devel
make cluster-build
make cluster-deploy
wait_kubevirt_up

# =========================
# enable sriov feature gate
# =========================
kubectl patch configmap kubevirt-config -n kubevirt --patch "data:
feature-gates: DataVolumes, CPUManager, LiveMigration, SRIOV"

# delete all virt- pods so that they have a chance to catch up with feature gate change
kubectl get pods -n kubevirt | grep virt | awk '{print $1}' | xargs kubectl delete pods -n kubevirt
wait_kubevirt_up

# ========================
# execute functional tests
# ========================
./${CLUSTER_DIR}/test.sh
@@ -0,0 +1,42 @@
#!/bin/bash

get_sriov_pci_root_addresses() {
for dir in $(find /sys/devices/ -name sriov_totalvfs -exec dirname {} \;); do
if [ $(cat $dir/sriov_numvfs) -gt 0 ]; then
# use perl because sed doesn't support non-greedy matching
basename $dir | perl -pe 's|(.*?:)(.*)|\2|'
fi
done
}

create_pci_string() {
local quoted_values=($(echo "${pci_addresses[@]}" | xargs printf "\"%s\" " ))
local quoted_as_string=${quoted_values[@]}
if [ "$quoted_as_string" = "\"\"" ]; then
pci_string=""
else
pci_string=${quoted_as_string// /, }
fi
}

sriov_device_plugin() {
pci_addresses=$(get_sriov_pci_root_addresses)
create_pci_string

cat <<EOF > /etc/pcidp/config.json
{
"resourceList":
[
{
"resourceName": "sriov",
"rootDevices": [$pci_string],
"sriovMode": true,
"deviceType": "vfio"
}
]
}
EOF
}

mkdir -p /etc/pcidp
sriov_device_plugin
@@ -78,10 +78,10 @@ until _kubectl -n kubevirt get kv kubevirt; do
done

# wait until KubeVirt is ready
timeout=360
if [[ "$KUBEVIRT_PROVIDER" =~ .*.10..* ]]; then
# k8s version 1.10.* does not have wait command
# TODO: drop it once we get rid of 1.10.* provider
timeout=180
sample=30
current_time=0
while [ -n "$(_kubectl -n kubevirt get kv kubevirt -o'custom-columns=status:status.conditions[*].type' --no-headers | grep -v Ready)" ]; do
@@ -95,7 +95,9 @@ if [[ "$KUBEVIRT_PROVIDER" =~ .*.10..* ]]; then
fi
done
else
_kubectl wait -n kubevirt kv kubevirt --for condition=Ready --timeout 180s || (echo "KubeVirt not ready in time" && exit 1)
date
_kubectl wait -n kubevirt kv kubevirt --for condition=Ready --timeout ${timeout}s || (echo "KubeVirt not ready in time" && date && exit 1)
date
fi

echo "Done"
@@ -23,7 +23,7 @@ spec:
terminationGracePeriodSeconds: 0
volumes:
- containerDisk:
image: registry:5000/kubevirt/fedora-cloud-container-disk-demo:devel
image: kubevirt/fedora-cloud-container-disk-demo:devel
name: containerdisk
- cloudInitNoCloud:
userData: |-
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.