Skip to content

Commit

Permalink
Merge pull request kubernetes#105764 from jlebon/pr/add-ssh-mode
Browse files Browse the repository at this point in the history
test/e2e_node/remote: support pure SSH mode
  • Loading branch information
k8s-ci-robot committed Nov 22, 2021
2 parents 806e38a + 3ebd93c commit a142f86
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 95 deletions.
7 changes: 5 additions & 2 deletions build/root/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,10 @@ define TEST_E2E_NODE_HELP_INFO
# Defaults to "".
# RUN_UNTIL_FAILURE: If true, pass --untilItFails to ginkgo so tests are run
# repeatedly until they fail. Defaults to false.
# REMOTE: If true, run the tests on a remote host instance on GCE. Defaults
# to false.
# REMOTE: If true, run the tests on a remote host. Defaults to false.
# REMOTE_MODE: For REMOTE=true only. Mode for remote execution (eg. gce, ssh).
# If set to "gce", an instance can be provisioned or reused from GCE. If set
# to "ssh", provided `HOSTS` must be IPs or resolvable. Defaults to "gce".
# ARTIFACTS: Local directory to scp test artifacts into from the remote hosts
# for REMOTE=true. Local directory to write juntil xml results into for REMOTE=false.
# Defaults to "/tmp/_artifacts/$$(date +%y%m%dT%H%M%S)".
Expand Down Expand Up @@ -256,6 +258,7 @@ define TEST_E2E_NODE_HELP_INFO
# Defaults to false.
# TEST_SUITE: For REMOTE=true only. Test suite to use. Defaults to "default".
# SSH_KEY: For REMOTE=true only. Path to SSH key to use.
# SSH_OPTIONS: For REMOTE=true only. SSH options to use.
# RUNTIME_CONFIG: The runtime configuration for the API server on the node e2e tests.
#
# Example:
Expand Down
31 changes: 28 additions & 3 deletions hack/make-rules/test-e2e-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ skip=${SKIP-"\[Flaky\]|\[Slow\]|\[Serial\]"}
parallelism=${PARALLELISM:-8}
artifacts="${ARTIFACTS:-"/tmp/_artifacts/$(date +%y%m%dT%H%M%S)"}"
remote=${REMOTE:-"false"}
remote_mode=${REMOTE_MODE:-"gce"}
runtime=${RUNTIME:-"docker"}
container_runtime_endpoint=${CONTAINER_RUNTIME_ENDPOINT:-""}
image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""}
Expand All @@ -49,6 +50,7 @@ extra_envs=${EXTRA_ENVS:-}
runtime_config=${RUNTIME_CONFIG:-}
ssh_user=${SSH_USER:-"${USER}"}
ssh_key=${SSH_KEY:-}
ssh_options=${SSH_OPTIONS:-}
kubelet_config_file=${KUBELET_CONFIG_FILE:-"test/e2e_node/jenkins/default-kubelet-config.yaml"}

# Parse the flags to pass to ginkgo
Expand Down Expand Up @@ -86,8 +88,8 @@ if [[ ${runtime} == "remote" ]] ; then
fi


if [ "${remote}" = true ] ; then
# The following options are only valid in remote run.
if [ "${remote}" = true ] && [ "${remote_mode}" = gce ] ; then
# The following options are only valid in remote GCE run.
images=${IMAGES:-""}
hosts=${HOSTS:-""}
image_project=${IMAGE_PROJECT:-"kubernetes-node-e2e-images"}
Expand Down Expand Up @@ -177,12 +179,35 @@ if [ "${remote}" = true ] ; then
--delete-instances="${delete_instances}" --test_args="${test_args}" --instance-metadata="${metadata}" \
--image-config-file="${image_config_file}" --system-spec-name="${system_spec_name}" \
--runtime-config="${runtime_config}" --preemptible-instances="${preemptible_instances}" \
--ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --image-config-dir="${image_config_dir}" \
--ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \
--image-config-dir="${image_config_dir}" \
--extra-envs="${extra_envs}" --kubelet-config-file="${kubelet_config_file}" --test-suite="${test_suite}" \
"${timeout_arg}" \
2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?

elif [ "${remote}" = true ] && [ "${remote_mode}" = ssh ] ; then
hosts=${HOSTS:-""}
test_suite=${TEST_SUITE:-"default"}
if [[ -n "${TIMEOUT:-}" ]] ; then
timeout_arg="--test-timeout=${TIMEOUT}"
fi

# Use cluster.local as default dns-domain
test_args='--dns-domain="'${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args}
test_args='--kubelet-flags="--cluster-domain='${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args}

# Invoke the runner
go run test/e2e_node/runner/remote/run_remote.go --mode="ssh" --logtostderr --vmodule=*=4 \
--hosts="${hosts}" --results-dir="${artifacts}" --ginkgo-flags="${ginkgoflags}" \
--test_args="${test_args}" --system-spec-name="${system_spec_name}" \
--runtime-config="${runtime_config}" \
--ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \
--extra-envs="${extra_envs}" --test-suite="${test_suite}" \
"${timeout_arg}" \
2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?

else
# Refresh sudo credentials if needed
if ping -c 1 -q metadata.google.internal &> /dev/null; then
Expand Down
196 changes: 106 additions & 90 deletions test/e2e_node/runner/remote/run_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
"sigs.k8s.io/yaml"
)

var mode = flag.String("mode", "gce", "Mode to operate in. One of gce|ssh. Defaults to gce")
var testArgs = flag.String("test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.")
var testSuite = flag.String("test-suite", "default", "Test suite the runner initializes with. Currently support default|cadvisor|conformance")
var instanceNamePrefix = flag.String("instance-name-prefix", "", "prefix for instance names")
Expand Down Expand Up @@ -223,18 +224,98 @@ func main() {
return
}

if *hosts == "" && *imageConfigFile == "" && *images == "" {
klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.")
var gceImages *internalImageConfig
if *mode == "gce" {
if *hosts == "" && *imageConfigFile == "" && *images == "" {
klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.")
}
var err error
computeService, err = getComputeClient()
if err != nil {
klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err)
}
if gceImages, err = prepareGceImages(); err != nil {
klog.Fatalf("While preparing GCE images: %v", err)
}
if *instanceNamePrefix == "" {
*instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8]
}
}
var err error
computeService, err = getComputeClient()
if err != nil {
klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err)

// Setup coloring
stat, _ := os.Stdout.Stat()
useColor := (stat.Mode() & os.ModeCharDevice) != 0
blue := ""
noColour := ""
if useColor {
blue = "\033[0;34m"
noColour = "\033[0m"
}

go arc.getArchive()
defer arc.deleteArchive()

results := make(chan *TestResult)
running := 0
if gceImages != nil {
for shortName := range gceImages.images {
imageConfig := gceImages.images[shortName]
fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image)
running++
go func(image *internalGCEImage, junitFileName string) {
results <- testImage(image, junitFileName)
}(&imageConfig, shortName)
}
}
if *hosts != "" {
for _, host := range strings.Split(*hosts, ",") {
fmt.Printf("Initializing e2e tests using host %s.\n", host)
running++
go func(host string, junitFileName string) {
results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags)
}(host, host)
}
}

// Wait for all tests to complete and emit the results
errCount := 0
exitOk := true
for i := 0; i < running; i++ {
tr := <-results
host := tr.host
fmt.Println() // Print an empty line
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("%s> START TEST >%s\n", blue, noColour)
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("Start Test Suite on Host %s\n", host)
fmt.Printf("%s\n", tr.output)
if tr.err != nil {
errCount++
fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err)
} else {
fmt.Printf("Success Finished Test Suite on Host %s\n", host)
}
exitOk = exitOk && tr.exitOk
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour)
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Println() // Print an empty line
}
// Set the exit code if there were failures
if !exitOk {
fmt.Printf("Failure: %d errors encountered.\n", errCount)
callGubernator(*gubernator)
arc.deleteArchive()
os.Exit(1)
}
callGubernator(*gubernator)
}

func prepareGceImages() (*internalImageConfig, error) {
gceImages := &internalImageConfig{
images: make(map[string]internalGCEImage),
}

// Parse images from given config file and convert them to internalGCEImage.
if *imageConfigFile != "" {
configPath := *imageConfigFile
Expand All @@ -244,22 +325,22 @@ func main() {

imageConfigData, err := ioutil.ReadFile(configPath)
if err != nil {
klog.Fatalf("Could not read image config file provided: %v", err)
return nil, fmt.Errorf("Could not read image config file provided: %v", err)
}
// Unmarshal the given image config file. All images for this test run will be organized into a map.
// shortName->GCEImage, e.g cos-stable->cos-stable-81-12871-103-0.
externalImageConfig := ImageConfig{Images: make(map[string]GCEImage)}
err = yaml.Unmarshal(imageConfigData, &externalImageConfig)
if err != nil {
klog.Fatalf("Could not parse image config file: %v", err)
return nil, fmt.Errorf("Could not parse image config file: %v", err)
}

for shortName, imageConfig := range externalImageConfig.Images {
var image string
if (imageConfig.ImageRegex != "" || imageConfig.ImageFamily != "") && imageConfig.Image == "" {
image, err = getGCEImage(imageConfig.ImageRegex, imageConfig.ImageFamily, imageConfig.Project)
if err != nil {
klog.Fatalf("Could not retrieve a image based on image regex %q and family %q: %v",
return nil, fmt.Errorf("Could not retrieve a image based on image regex %q and family %q: %v",
imageConfig.ImageRegex, imageConfig.ImageFamily, err)
}
} else {
Expand Down Expand Up @@ -318,75 +399,8 @@ func main() {
klog.Fatal("Must specify --project flag to launch images into")
}
}
if *instanceNamePrefix == "" {
*instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8]
}

// Setup coloring
stat, _ := os.Stdout.Stat()
useColor := (stat.Mode() & os.ModeCharDevice) != 0
blue := ""
noColour := ""
if useColor {
blue = "\033[0;34m"
noColour = "\033[0m"
}

go arc.getArchive()
defer arc.deleteArchive()

results := make(chan *TestResult)
running := 0
for shortName := range gceImages.images {
imageConfig := gceImages.images[shortName]
fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image)
running++
go func(image *internalGCEImage, junitFileName string) {
results <- testImage(image, junitFileName)
}(&imageConfig, shortName)
}
if *hosts != "" {
for _, host := range strings.Split(*hosts, ",") {
fmt.Printf("Initializing e2e tests using host %s.\n", host)
running++
go func(host string, junitFileName string) {
results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags)
}(host, host)
}
}

// Wait for all tests to complete and emit the results
errCount := 0
exitOk := true
for i := 0; i < running; i++ {
tr := <-results
host := tr.host
fmt.Println() // Print an empty line
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("%s> START TEST >%s\n", blue, noColour)
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("Start Test Suite on Host %s\n", host)
fmt.Printf("%s\n", tr.output)
if tr.err != nil {
errCount++
fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err)
} else {
fmt.Printf("Success Finished Test Suite on Host %s\n", host)
}
exitOk = exitOk && tr.exitOk
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour)
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Println() // Print an empty line
}
// Set the exit code if there were failures
if !exitOk {
fmt.Printf("Failure: %d errors encountered.\n", errCount)
callGubernator(*gubernator)
arc.deleteArchive()
os.Exit(1)
}
callGubernator(*gubernator)
return gceImages, nil
}

func callGubernator(gubernator bool) {
Expand Down Expand Up @@ -436,29 +450,23 @@ func getImageMetadata(input string) *compute.Metadata {
return &ret
}

// Run tests in archive against host
func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult {
func registerGceHostIP(host string) error {
instance, err := computeService.Instances.Get(*project, *zone, host).Do()
if err != nil {
return &TestResult{
err: err,
host: host,
exitOk: false,
}
return err
}
if strings.ToUpper(instance.Status) != "RUNNING" {
err = fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status)
return &TestResult{
err: err,
host: host,
exitOk: false,
}
return fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status)
}
externalIP := getExternalIP(instance)
if len(externalIP) > 0 {
remote.AddHostnameIP(host, externalIP)
}
return nil
}

// Run tests in archive against host
func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult {
path, err := arc.getArchive()
if err != nil {
// Don't log fatal because we need to do any needed cleanup contained in "defer" statements
Expand Down Expand Up @@ -553,6 +561,14 @@ func testImage(imageConfig *internalGCEImage, junitFileName string) *TestResult
// If we are going to delete the instance, don't bother with cleaning up the files
deleteFiles := !*deleteInstances && *cleanup

if err = registerGceHostIP(host); err != nil {
return &TestResult{
err: err,
host: host,
exitOk: false,
}
}

result := testHost(host, deleteFiles, imageConfig.imageDesc, junitFileName, ginkgoFlagsStr)
// This is a temporary solution to collect serial node serial log. Only port 1 contains useful information.
// TODO(random-liu): Extract out and unify log collection logic with cluste e2e.
Expand Down

0 comments on commit a142f86

Please sign in to comment.