Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add E2E Split Server to Drone, support parallel testing in Drone #9940

Merged
merged 6 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 18 additions & 26 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -617,33 +617,24 @@ steps:
- mkdir -p dist/artifacts
- cp /tmp/artifacts/* dist/artifacts/
- docker stop registry && docker rm registry
# Cleanup VMs running, happens if a previous test panics
# Cleanup inactive domains, happens if previous test is canceled
- |
VMS=$(virsh list --name | grep '_server-\|_agent-' || true)
if [ -n "$VMS" ]; then
for vm in $VMS
do
virsh destroy $vm
virsh undefine $vm --remove-all-storage
done
fi
VMS=$(virsh list --name --inactive | grep '_server-\|_agent-' || true)
if [ -n "$VMS" ]; then
for vm in $VMS
do
virsh undefine $vm
done
fi
# Cleanup VMs that are older than 2h. Happens if a previous test panics or is canceled
- tests/e2e/scripts/cleanup_vms.sh
- docker run -d -p 5000:5000 -e REGISTRY_PROXY_REMOTEURL=https://registry-1.docker.io --name registry registry:2
- cd tests/e2e/validatecluster
- vagrant destroy -f
- go test -v -timeout=45m ./validatecluster_test.go -ci -local
- cp ./coverage.out /tmp/artifacts/validate-coverage.out
- cd ../secretsencryption
- vagrant destroy -f
- go test -v -timeout=30m ./secretsencryption_test.go -ci -local
- cp ./coverage.out /tmp/artifacts/se-coverage.out
- |
cd tests/e2e/validatecluster
vagrant destroy -f
go test -v -timeout=45m ./validatecluster_test.go -ci -local
cp ./coverage.out /tmp/artifacts/validate-coverage.out
- |
cd ../secretsencryption
vagrant destroy -f
go test -v -timeout=30m ./secretsencryption_test.go -ci -local
cp ./coverage.out /tmp/artifacts/se-coverage.out
- |
cd ../splitserver
vagrant destroy -f
go test -v -timeout=30m ./splitserver_test.go -ci -local
cp ./coverage.out /tmp/artifacts/split-coverage.out
- |
if [ "$DRONE_BUILD_EVENT" = "pull_request" ]; then
cd ../upgradecluster
Expand All @@ -669,6 +660,7 @@ steps:
files:
- /tmp/artifacts/validate-coverage.out
- /tmp/artifacts/se-coverage.out
- /tmp/artifacts/split-coverage.out
- /tmp/artifacts/upgrade-coverage.out
flags:
- e2etests
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/dualstack/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end

if NODE_ROLES.kind_of?(String)
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/embeddedmirror/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/externalip/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end

if NODE_ROLES.kind_of?(String)
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/privateregistry/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/rootless/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/rotateca/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/s3/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
35 changes: 35 additions & 0 deletions tests/e2e/scripts/cleanup_vms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

# Clean up any VMS that are older than 2 hours.
#
# We embed the time in the VM name, so we can easily filter them out.

# Get the current time in seconds since the epoch
current_time=$(date +%s)

# Get the list of VMs
vms=$(virsh list --name --all)
time_regex="_([0-9]+)_(server|agent)"
# Cleanup running VMs, happens if a previous test panics
for vm in $vms; do
if [[ $vm =~ $time_regex ]]; then
vm_time="${BASH_REMATCH[1]}"
age=$((current_time - vm_time))
if [ $age -gt 7200 ]; then
virsh destroy $vm
virsh undefine $vm --remove-all-storage
fi
fi
done

# Cleanup inactive domains, happens if previous test is canceled
vms=$(virsh list --name --inactive)
for vm in $vms; do
if [[ $vm =~ $time_regex ]]; then
vm_time="${BASH_REMATCH[1]}"
age=$((current_time - vm_time))
if [ $age -gt 7200 ]; then
virsh undefine $vm --remove-all-storage
fi
fi
done
4 changes: 3 additions & 1 deletion tests/e2e/secretsencryption/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ GOCOVER = (ENV['E2E_GOCOVER'] || "")
NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i
NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 2048).to_i
# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks
NETWORK_PREFIX = "10.10.10"
NETWORK_PREFIX = "10.10.13"
install_type = ""

def provision(vm, role, role_num, node_num)
Expand Down Expand Up @@ -53,6 +53,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/secretsencryption_old/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ var local = flag.Bool("local", false, "deploy a locally built K3s binary")
// Environment Variables Info:
// E2E_RELEASE_VERSION=v1.23.1+k3s2 or nil for latest commit from master

func Test_E2ESecretsEncryption(t *testing.T) {
func Test_E2ESecretsEncryptionOld(t *testing.T) {
RegisterFailHandler(Fail)
flag.Parse()
suiteConfig, reporterConfig := GinkgoConfiguration()
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/snapshotrestore/Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand Down
16 changes: 11 additions & 5 deletions tests/e2e/splitserver/Vagrantfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
ENV['VAGRANT_NO_PARALLEL'] = 'no'
NODE_ROLES = (ENV['E2E_NODE_ROLES'] ||
["server-etcd-0", "server-etcd-1", "server-etcd-2", "server-cp-0", "server-cp-1", "agent-0"])
["server-etcd-0", "server-etcd-1", "server-etcd-2", "server-cp-0", "server-cp-1", "agent-0", "agent-1"])
NODE_BOXES = (ENV['E2E_NODE_BOXES'] ||
['generic/ubuntu2310', 'generic/ubuntu2310', 'generic/ubuntu2310', 'generic/ubuntu2310', 'generic/ubuntu2310'])
GITHUB_BRANCH = (ENV['E2E_GITHUB_BRANCH'] || "master")
RELEASE_VERSION = (ENV['E2E_RELEASE_VERSION'] || "")
GOCOVER = (ENV['E2E_GOCOVER'] || "")
NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i
NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 2048).to_i
# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks
NETWORK_PREFIX = "10.10.10"
NETWORK_PREFIX = "10.10.12"

def provision(vm, role, role_num, node_num)
vm.box = NODE_BOXES[node_num]
Expand All @@ -22,12 +23,14 @@ def provision(vm, role, role_num, node_num)
defaultOSConfigure(vm)

install_type = getInstallType(vm, RELEASE_VERSION, GITHUB_BRANCH)
addCoverageDir(vm, role, GOCOVER)

vm.provision "ping k3s.io", type: "shell", inline: "ping -c 2 k3s.io"

if node_num == 0 && !role.include?("server") && !role.include?("etcd")
puts "first node must be a etcd server"
abort
if ARGV.include?("up") || (ARGV.include?("reload") && ARGV.include?("--provision"))
puts "Error: first node provisioned must be a etcd server"
abort
end
elsif role.include?("server") && role.include?("etcd") && role_num == 0
vm.provision 'k3s-install', type: 'k3s', run: 'once' do |k3s|
k3s.args = "server"
Expand Down Expand Up @@ -102,6 +105,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
Expand All @@ -116,6 +121,7 @@ Vagrant.configure("2") do |config|
end

NODE_ROLES.each_with_index do |role, i|
# Find the first number in the role name
role_num = role.split("-", -1).pop.to_i
config.vm.define role do |node|
provision(node.vm, role, role_num, i)
Expand Down
73 changes: 66 additions & 7 deletions tests/e2e/splitserver/splitserver_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
package splitserver

import (
"context"
"flag"
"fmt"
"os"
"strconv"
"strings"
"testing"
"time"

"github.com/k3s-io/k3s/tests/e2e"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"golang.org/x/sync/errgroup"
)

// Valid nodeOS: generic/ubuntu2310, opensuse/Leap-15.3.x86_64
Expand All @@ -19,21 +22,25 @@ var etcdCount = flag.Int("etcdCount", 3, "number of server nodes only deploying
var controlPlaneCount = flag.Int("controlPlaneCount", 1, "number of server nodes acting as control plane")
var agentCount = flag.Int("agentCount", 1, "number of agent nodes")
var ci = flag.Bool("ci", false, "running on CI")
var local = flag.Bool("local", false, "deploy a locally built K3s binary")
var hardened = flag.Bool("hardened", false, "true or false")

// Environment Variables Info:
// E2E_RELEASE_VERSION=v1.23.1+k3s2 or nil for latest commit from master

func createSplitCluster(nodeOS string, etcdCount, controlPlaneCount, agentCount int) ([]string, []string, []string, error) {
// createSplitCluster creates a split server cluster with the given nodeOS, etcdCount, controlPlaneCount, and agentCount.
// It duplicates and merges functionality found in the e2e.CreateCluster and e2e.CreateLocalCluster functions.
func createSplitCluster(nodeOS string, etcdCount, controlPlaneCount, agentCount int, local bool) ([]string, []string, []string, error) {
etcdNodeNames := make([]string, etcdCount)
cpNodeNames := make([]string, controlPlaneCount)
agentNodeNames := make([]string, agentCount)

for i := 0; i < etcdCount; i++ {
etcdNodeNames[i] = "server-etcd-" + strconv.Itoa(i)
}
cpNodeNames := make([]string, controlPlaneCount)
for i := 0; i < controlPlaneCount; i++ {
cpNodeNames[i] = "server-cp-" + strconv.Itoa(i)
}
agentNodeNames := make([]string, agentCount)
for i := 0; i < agentCount; i++ {
agentNodeNames[i] = "agent-" + strconv.Itoa(i)
}
Expand All @@ -43,21 +50,68 @@ func createSplitCluster(nodeOS string, etcdCount, controlPlaneCount, agentCount
nodeBoxes := strings.Repeat(nodeOS+" ", etcdCount+controlPlaneCount+agentCount)
nodeBoxes = strings.TrimSpace(nodeBoxes)

allNodes := append(etcdNodeNames, cpNodeNames...)
allNodes = append(allNodes, agentNodeNames...)

var testOptions string
for _, env := range os.Environ() {
if strings.HasPrefix(env, "E2E_") {
testOptions += " " + env
}
}

cmd := fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" %s vagrant up &> vagrant.log`, nodeRoles, nodeBoxes, testOptions)
// Provision the first etcd node. In GitHub Actions, this also imports the VM image into libvirt, which
// takes time and can cause the next vagrant up to fail if it is not given enough time to complete.
cmd := fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" vagrant up --no-provision %s &> vagrant.log`, nodeRoles, nodeBoxes, etcdNodeNames[0])
fmt.Println(cmd)
if _, err := e2e.RunCommand(cmd); err != nil {
fmt.Println("Error Creating Cluster", err)
return nil, nil, nil, err
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}

// Bring up the rest of the nodes in parallel
errg, _ := errgroup.WithContext(context.Background())
for _, node := range allNodes[1:] {
cmd := fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" vagrant up --no-provision %s &>> vagrant.log`, nodeRoles, nodeBoxes, node)
errg.Go(func() error {
_, err := e2e.RunCommand(cmd)
return err
})
// libVirt/Virtualbox needs some time between provisioning nodes
time.Sleep(10 * time.Second)
}
if err := errg.Wait(); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}

if local {
testOptions += " E2E_RELEASE_VERSION=skip"
for _, node := range allNodes {
cmd := fmt.Sprintf(`E2E_NODE_ROLES=%s vagrant scp ../../../dist/artifacts/k3s %s:/tmp/`, node, node)
if _, err := e2e.RunCommand(cmd); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, fmt.Errorf("failed to scp k3s binary to %s: %v", node, err)
}
if _, err := e2e.RunCmdOnNode("mv /tmp/k3s /usr/local/bin/", node); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}
}
}
// Install K3s on all nodes in parallel
errg, _ = errgroup.WithContext(context.Background())
for _, node := range allNodes {
cmd = fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" %s vagrant provision %s &>> vagrant.log`, nodeRoles, nodeBoxes, testOptions, node)
errg.Go(func() error {
_, err := e2e.RunCommand(cmd)
return err
})
// K3s needs some time between joining nodes to avoid learner issues
time.Sleep(10 * time.Second)
}
if err := errg.Wait(); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}
return etcdNodeNames, cpNodeNames, agentNodeNames, nil
}

func Test_E2ESplitServer(t *testing.T) {
RegisterFailHandler(Fail)
flag.Parse()
Expand All @@ -78,7 +132,7 @@ var _ = Describe("Verify Create", Ordered, func() {
Context("Cluster :", func() {
It("Starts up with no issues", func() {
var err error
etcdNodeNames, cpNodeNames, agentNodeNames, err = createSplitCluster(*nodeOS, *etcdCount, *controlPlaneCount, *agentCount)
etcdNodeNames, cpNodeNames, agentNodeNames, err = createSplitCluster(*nodeOS, *etcdCount, *controlPlaneCount, *agentCount, *local)
Expect(err).NotTo(HaveOccurred(), e2e.GetVagrantLog(err))
fmt.Println("CLUSTER CONFIG")
fmt.Println("OS:", *nodeOS)
Expand Down Expand Up @@ -229,6 +283,11 @@ var _ = AfterEach(func() {
})

var _ = AfterSuite(func() {
if !failed {
allNodes := append(cpNodeNames, etcdNodeNames...)
allNodes = append(allNodes, agentNodeNames...)
Expect(e2e.GetCoverageReport(allNodes)).To(Succeed())
}
if !failed || *ci {
Expect(e2e.DestroyCluster()).To(Succeed())
Expect(os.Remove(kubeConfigFile)).To(Succeed())
Expand Down
Loading