From 2fff0dc5ba84c506a4648ce84609cfa32ccd9f4d Mon Sep 17 00:00:00 2001 From: Shaun Crampton Date: Mon, 17 May 2021 14:47:02 +0100 Subject: [PATCH 1/6] Reduce FV verbosity: Ignore infra logs during stop. Log command output inline rather than buffer. --- fv/containers/containers.go | 25 +++++++++++++++++--- fv/infrastructure/infra_etcd.go | 4 +++- fv/infrastructure/infra_k8s.go | 42 +++++++++++++++++++++------------ fv/named_ports_test.go | 22 ++++++++--------- fv/utils/utils.go | 35 ++++++--------------------- 5 files changed, 70 insertions(+), 58 deletions(-) diff --git a/fv/containers/containers.go b/fv/containers/containers.go index 8694dd0c0e..79679e1ab5 100644 --- a/fv/containers/containers.go +++ b/fv/containers/containers.go @@ -54,6 +54,7 @@ type Container struct { dataRaces []string logFinished sync.WaitGroup + dropAllLogs bool } type watch struct { @@ -63,6 +64,17 @@ type watch struct { var containerIdx = 0 +func (c *Container) StopLogs() { + if c == nil { + log.Info("StopLogs no-op because nil container") + return + } + + c.mutex.Lock() + c.dropAllLogs = true + c.mutex.Unlock() +} + func (c *Container) Stop() { if c == nil { log.Info("Stop no-op because nil container") @@ -78,7 +90,7 @@ func (c *Container) Stop() { } c.mutex.Unlock() - logCxt.Info("Stop") + logCxt.Info("Stopping...") // Ask docker to stop the container. withTimeoutPanic(logCxt, 30*time.Second, c.execDockerStop) @@ -347,7 +359,14 @@ func (c *Container) copyOutputToLog(streamName string, stream io.Reader, done *s for scanner.Scan() { line := scanner.Text() - fmt.Fprintf(ginkgo.GinkgoWriter, "%v[%v] %v\n", c.Name, streamName, line) + + // Check if we're dropping logs (e.g. because we're tearing down the container at the end of the test). + c.mutex.Lock() + droppingLogs := c.dropAllLogs + c.mutex.Unlock() + if !droppingLogs { + fmt.Fprintf(ginkgo.GinkgoWriter, "%v[%v] %v\n", c.Name, streamName, line) + } // Capture data race warnings and log to file. if strings.Contains(line, "WARNING: DATA RACE") { @@ -392,7 +411,7 @@ func (c *Container) copyOutputToLog(streamName string, stream io.Reader, done *s } logCxt := log.WithFields(log.Fields{ "name": c.Name, - "stream": stream, + "stream": streamName, }) if scanner.Err() != nil { logCxt.WithError(scanner.Err()).Error("Non-EOF error reading container stream") diff --git a/fv/infrastructure/infra_etcd.go b/fv/infrastructure/infra_etcd.go index 35ec3409b8..298f56b1b9 100644 --- a/fv/infrastructure/infra_etcd.go +++ b/fv/infrastructure/infra_etcd.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2020 Tigera, Inc. All rights reserved. +// Copyright (c) 2018-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -198,6 +198,8 @@ func (eds *EtcdDatastoreInfra) DumpErrorData() { } func (eds *EtcdDatastoreInfra) Stop() { + eds.bpfLog.StopLogs() + eds.etcdContainer.StopLogs() eds.bpfLog.Stop() eds.etcdContainer.Stop() } diff --git a/fv/infrastructure/infra_k8s.go b/fv/infrastructure/infra_k8s.go index a1bb8e9ad0..ea662eb33f 100644 --- a/fv/infrastructure/infra_k8s.go +++ b/fv/infrastructure/infra_k8s.go @@ -98,6 +98,17 @@ var ( func TearDownK8sInfra(kds *K8sDatastoreInfra) { log.Info("TearDownK8sInfra starting") var wg sync.WaitGroup + + if kds.etcdContainer != nil { + kds.etcdContainer.StopLogs() + } + if kds.k8sApiContainer != nil { + kds.k8sApiContainer.StopLogs() + } + if kds.k8sControllerManager != nil { + kds.k8sControllerManager.StopLogs() + } + if kds.etcdContainer != nil { wg.Add(1) go func() { @@ -166,6 +177,7 @@ func runK8sApiserver(etcdIp string) *containers.Container { "-v", os.Getenv("PRIVATE_KEY")+":/private.key", utils.Config.K8sImage, "kube-apiserver", + "--v=0", "--service-cluster-ip-range=10.101.0.0/16", "--authorization-mode=RBAC", "--insecure-port=8080", // allow insecure connection from controller manager. @@ -196,7 +208,7 @@ func runK8sControllerManager(apiserverIp string) *containers.Container { // they are enabled. "--allocate-node-cidrs=false", "--leader-elect=false", - "--v=3", + "--v=0", "--service-account-private-key-file=/private.key", "--concurrent-gc-syncs=50", ) @@ -735,52 +747,52 @@ func (kds *K8sDatastoreInfra) AddDefaultDeny() error { func (kds *K8sDatastoreInfra) DumpErrorData() { nsList, err := kds.K8sClient.CoreV1().Namespaces().List(context.Background(), metav1.ListOptions{}) if err == nil { - utils.AddToTestOutput("Kubernetes Namespaces\n") + log.Info("DIAGS: Kubernetes Namespaces:") for _, ns := range nsList.Items { - utils.AddToTestOutput(spew.Sdump(ns)) + log.Info(spew.Sdump(ns)) } } profiles, err := kds.calicoClient.Profiles().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico Profiles\n") + log.Info("DIAGS: Calico Profiles:") for _, profile := range profiles.Items { - utils.AddToTestOutput(spew.Sdump(profile)) + log.Info(spew.Sdump(profile)) } } policies, err := kds.calicoClient.NetworkPolicies().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico NetworkPolicies\n") + log.Info("DIAGS: Calico NetworkPolicies:") for _, policy := range policies.Items { - utils.AddToTestOutput(spew.Sdump(policy)) + log.Info(spew.Sdump(policy)) } } gnps, err := kds.calicoClient.GlobalNetworkPolicies().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico GlobalNetworkPolicies\n") + log.Info("DIAGS: Calico GlobalNetworkPolicies:") for _, gnp := range gnps.Items { - utils.AddToTestOutput(spew.Sdump(gnp)) + log.Info(spew.Sdump(gnp)) } } workloads, err := kds.calicoClient.WorkloadEndpoints().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico WorkloadEndpoints\n") + log.Info("DIAGS: Calico WorkloadEndpoints:") for _, w := range workloads.Items { - utils.AddToTestOutput(spew.Sdump(w)) + log.Info(spew.Sdump(w)) } } nodes, err := kds.calicoClient.Nodes().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico Nodes\n") + log.Info("DIAGS: Calico Nodes:") for _, n := range nodes.Items { - utils.AddToTestOutput(spew.Sdump(n)) + log.Info(spew.Sdump(n)) } } heps, err := kds.calicoClient.HostEndpoints().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico Host Endpoints\n") + log.Info("DIAGS: Calico Host Endpoints:") for _, hep := range heps.Items { - utils.AddToTestOutput(spew.Sdump(hep)) + log.Info(spew.Sdump(hep)) } } } diff --git a/fv/named_ports_test.go b/fv/named_ports_test.go index 1350e2067a..13beef9b1f 100644 --- a/fv/named_ports_test.go +++ b/fv/named_ports_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Tigera, Inc. All rights reserved. +// Copyright (c) 2020-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -156,37 +156,37 @@ func describeNamedPortTests(testSourcePorts bool, protocol string) { profiles, err := client.Profiles().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico Profiles\n") + log.Info("DIAGS: Calico Profiles:") for _, profile := range profiles.Items { - utils.AddToTestOutput(fmt.Sprintf("%v\n", profile)) + log.Info(profile) } } policies, err := client.NetworkPolicies().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico NetworkPolicies\n") + log.Info("DIAGS: Calico NetworkPolicies:") for _, policy := range policies.Items { - utils.AddToTestOutput(fmt.Sprintf("%v\n", policy)) + log.Info(policy) } } gnps, err := client.GlobalNetworkPolicies().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico GlobalNetworkPolicies\n") + log.Info("DIAGS: Calico GlobalNetworkPolicies:") for _, gnp := range gnps.Items { - utils.AddToTestOutput(fmt.Sprintf("%v\n", gnp)) + log.Info(gnp) } } workloads, err := client.WorkloadEndpoints().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico WorkloadEndpoints\n") + log.Info("DIAGS: Calico WorkloadEndpoints:") for _, w := range workloads.Items { - utils.AddToTestOutput(fmt.Sprintf("%v\n", w)) + log.Info(w) } } nodes, err := client.Nodes().List(context.Background(), options.ListOptions{}) if err == nil { - utils.AddToTestOutput("Calico Nodes\n") + log.Info("DIAGS: Calico Nodes:") for _, n := range nodes.Items { - utils.AddToTestOutput(fmt.Sprintf("%v\n", n)) + log.Info(n) } } diff --git a/fv/utils/utils.go b/fv/utils/utils.go index 254750dbbb..d3e3db1849 100644 --- a/fv/utils/utils.go +++ b/fv/utils/utils.go @@ -19,14 +19,12 @@ import ( "bytes" "context" "fmt" - "os" "os/exec" "strings" "syscall" "time" "github.com/kelseyhightower/envconfig" - . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" log "github.com/sirupsen/logrus" @@ -76,8 +74,6 @@ func RunMayFail(command string, args ...string) error { return run(nil, false, command, args...) } -var currentTestOutput = []string{} - var LastRunOutput string func run(input []byte, checkNoError bool, command string, args ...string) error { @@ -86,14 +82,16 @@ func run(input []byte, checkNoError bool, command string, args ...string) error cmd.Stdin = bytes.NewReader(input) } outputBytes, err := cmd.CombinedOutput() - currentTestOutput = append(currentTestOutput, fmt.Sprintf("Command: %v %v\n", command, args)) - currentTestOutput = append(currentTestOutput, string(outputBytes)) + output := string(outputBytes) LastRunOutput = string(outputBytes) if err != nil { log.WithFields(log.Fields{ "command": command, - "args": args, - "output": string(outputBytes)}).WithError(err).Warning("Command failed") + "args": args}).WithError(err).Warningf("Command failed:\n%s", output) + } else { + log.WithFields(log.Fields{ + "command": command, + "args": args}).Infof("Command succeeded:\n%s", output) } if checkNoError { Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Command failed\nCommand: %v args: %v\nOutput:\n\n%v", @@ -106,25 +104,6 @@ func run(input []byte, checkNoError bool, command string, args ...string) error return nil } -func AddToTestOutput(args ...string) { - currentTestOutput = append(currentTestOutput, args...) -} - -var _ = BeforeEach(func() { - currentTestOutput = []string{} -}) - -var _ = AfterEach(func() { - if CurrentGinkgoTestDescription().Failed { - os.Stdout.WriteString(fmt.Sprintf("\n===== begin output from failed test %s =====\n", - CurrentGinkgoTestDescription().FullTestText)) - for _, output := range currentTestOutput { - os.Stdout.WriteString(output) - } - os.Stdout.WriteString("===== end output from failed test =====\n\n") - } -}) - func GetCommandOutput(command string, args ...string) (string, error) { cmd := Command(command, args...) log.Infof("Running '%s %s'", cmd.Path, strings.Join(cmd.Args, " ")) @@ -142,7 +121,7 @@ func Command(name string, args ...string) *exec.Cmd { log.WithFields(log.Fields{ "command": name, "commandArgs": args, - }).Info("Creating Command.") + }).Debug("Creating Command.") return exec.Command(name, args...) } From 6c88ebbac1bdfcf7db8b426679def3a15c97b974 Mon Sep 17 00:00:00 2001 From: Shaun Crampton Date: Tue, 18 May 2021 11:01:18 +0100 Subject: [PATCH 2/6] Markups: fine tune command output. --- fv/utils/utils.go | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/fv/utils/utils.go b/fv/utils/utils.go index d3e3db1849..bfbcd5e994 100644 --- a/fv/utils/utils.go +++ b/fv/utils/utils.go @@ -84,14 +84,19 @@ func run(input []byte, checkNoError bool, command string, args ...string) error outputBytes, err := cmd.CombinedOutput() output := string(outputBytes) LastRunOutput = string(outputBytes) + formattedCmd := formatCommand(command, args) if err != nil { - log.WithFields(log.Fields{ - "command": command, - "args": args}).WithError(err).Warningf("Command failed:\n%s", output) + if len(output) == 0 { + log.WithError(err).Warningf("Command failed [%s]: ", formattedCmd) + } else { + log.WithError(err).Warningf("Command failed [%s]:\n%s", formattedCmd, indent(output, "\t")) + } } else { - log.WithFields(log.Fields{ - "command": command, - "args": args}).Infof("Command succeeded:\n%s", output) + if len(output) == 0 { + log.Infof("Command succeeded [%s]: ", formattedCmd) + } else { + log.Infof("Command succeeded [%s]:\n%s", formattedCmd, indent(output, "\t")) + } } if checkNoError { Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Command failed\nCommand: %v args: %v\nOutput:\n\n%v", @@ -104,6 +109,28 @@ func run(input []byte, checkNoError bool, command string, args ...string) error return nil } +func indent(s string, prefix string) string { + lines := strings.Split(s, "\n") + for i := range lines { + lines[i] = prefix + lines[i] + } + return strings.Join(lines, "\n") +} + +func formatCommand(command string, args []string) string { + out := command + for _, arg := range args { + // Only quote if there are actually some interesting characters in there, just to make it easier to read. + quoted := fmt.Sprintf("%q", arg) + if quoted == `"` + arg + `"` { + out += " " + arg + } else { + out += " " + quoted + } + } + return out +} + func GetCommandOutput(command string, args ...string) (string, error) { cmd := Command(command, args...) log.Infof("Running '%s %s'", cmd.Path, strings.Join(cmd.Args, " ")) @@ -118,10 +145,7 @@ func RunCommand(command string, args ...string) error { } func Command(name string, args ...string) *exec.Cmd { - log.WithFields(log.Fields{ - "command": name, - "commandArgs": args, - }).Debug("Creating Command.") + log.Debugf("Creating Command [%s].", formatCommand(name, args)) return exec.Command(name, args...) } From 5e86f1d8ec6bf19d43ae2391d2cfe9391fa824d8 Mon Sep 17 00:00:00 2001 From: Shaun Crampton Date: Fri, 14 May 2021 14:08:35 +0100 Subject: [PATCH 3/6] Disable checksum offload on the VXLAN tunnel for kernels unix.IFNAMSIZ { + return fmt.Errorf("name too long") + } + + // To access the IOCTL, we need a socket file descriptor. + socket, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0) + if err != nil { + return err + } + defer func() { + err := unix.Close(socket) + if err != nil { + // Super unlikely; normally a failure from Close means that some data couldn't be flushed + // but we didn't send any. + logrus.WithError(err).Warn("unix.Close(socket) failed") + } + }() + + // Allocate an EthtoolValue using manual memory management. This is required because we need to pass + // a struct to the Syscall that in turn points to the EthtoolValue. If we allocate the EthtooValue on the + // go stack/heap then it would not be protected from being moved by the GC while the syscall is in progress. + // (Only the directly-passed struct is protected from being moved during the syscall.) + alloc := memory.Allocator{} + defer func() { + err := alloc.Close() + if err != nil { + logrus.WithError(err).Panic("Failed to release memory to the system") + } + }() + valueUPtr, err := alloc.UnsafeCalloc(int(unsafe.Sizeof(EthtoolValue{}))) + if err != nil { + return fmt.Errorf("failed to allocate memory: %w", err) + } + defer func() { + err := alloc.UnsafeFree(valueUPtr) + if err != nil { + logrus.WithError(err).Warn("UnsafeFree() failed") + } + }() + value := (*EthtoolValue)(valueUPtr) + + // Get the current value so we only set it if it needs to change. + *value = EthtoolValue{Cmd: unix.ETHTOOL_GTXCSUM} + request := IFReqData{Data: uintptr(valueUPtr)} + copy(request.Name[:], name) + if err := ioctlEthtool(socket, &request); err != nil { + return err + } + if value.Data == 0 { // if already off, don't try to change + return nil + } + + // Set the value. + *value = EthtoolValue{Cmd: unix.ETHTOOL_STXCSUM, Data: 0 /* off */} + return ioctlEthtool(socket, &request) +} diff --git a/fv/wireguard_test.go b/fv/wireguard_test.go index 0e74317e9f..77f08f8384 100644 --- a/fv/wireguard_test.go +++ b/fv/wireguard_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Tigera, Inc. All rights reserved. +// Copyright (c) 2020-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/go.mod b/go.mod index b623401d1f..aa8b6c0d04 100644 --- a/go.mod +++ b/go.mod @@ -41,6 +41,7 @@ require ( k8s.io/apimachinery v0.21.0-rc.0 k8s.io/client-go v0.21.0-rc.0 k8s.io/kubernetes v1.21.0-rc.0 + modernc.org/memory v1.0.4 ) replace ( diff --git a/go.sum b/go.sum index 2d511e47d2..fcb822f693 100644 --- a/go.sum +++ b/go.sum @@ -655,6 +655,8 @@ github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/quobyte/api v0.1.8/go.mod h1:jL7lIHrmqQ7yh05OJ+eEEdHr0u/kmT1Ff9iHd+4H6VI= github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uYEyJGbgTkfkS4+E/PavXkNJcbFIpEtjt2B0KDQ5+9M= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/robfig/cron v1.1.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -1237,6 +1239,10 @@ k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/ modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k= +modernc.org/mathutil v1.1.1 h1:FeylZSVX8S+58VsyJlkEj2bcpdytmp9MmDKZkKx8OIE= +modernc.org/mathutil v1.1.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.0.4 h1:utMBrFcpnQDdNsmM6asmyH/FM9TqLPS7XF7otpJmrwM= +modernc.org/memory v1.0.4/go.mod h1:nV2OApxradM3/OVbs2/0OsP6nPfakXpi50C7dcoHXlc= modernc.org/strutil v1.0.0/go.mod h1:lstksw84oURvj9y3tn8lGvRxyRC1S2+g5uuIzNfIOBs= modernc.org/xc v1.0.0/go.mod h1:mRNCo0bvLjGhHO9WsyuKVU4q0ceiDDDoEeWDJHrNx8I= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= diff --git a/iptables/feature_detect.go b/iptables/feature_detect.go index e684f2912d..537f097c63 100644 --- a/iptables/feature_detect.go +++ b/iptables/feature_detect.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2020 Tigera, Inc. All rights reserved. +// Copyright (c) 2018-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -44,6 +44,8 @@ var ( v3Dot10Dot0 = versionparse.MustParseVersion("3.10.0") // v3Dot14Dot0 added the random-fully feature on the iptables interface. v3Dot14Dot0 = versionparse.MustParseVersion("3.14.0") + // v5Dot7Dot0 contains a fix for checksum offloading. + v5Dot7Dot0 = versionparse.MustParseVersion("5.7.0") ) type Features struct { @@ -54,6 +56,10 @@ type Features struct { // RestoreSupportsLock is true if the iptables-restore command supports taking the xtables lock and the // associated -w and -W arguments. RestoreSupportsLock bool + // ChecksumOffloadBroken is true for kernels that have broken checksum offload for packets with SNATted source + // ports. See https://github.com/projectcalico/calico/issues/3145. On such kernels we disable checksum offload + // on our VXLAN device. + ChecksumOffloadBroken bool } type FeatureDetector struct { @@ -102,9 +108,10 @@ func (d *FeatureDetector) refreshFeaturesLockHeld() { // Calculate the features. features := Features{ - SNATFullyRandom: iptV.Compare(v1Dot6Dot0) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, - MASQFullyRandom: iptV.Compare(v1Dot6Dot2) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, - RestoreSupportsLock: iptV.Compare(v1Dot6Dot2) >= 0, + SNATFullyRandom: iptV.Compare(v1Dot6Dot0) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, + MASQFullyRandom: iptV.Compare(v1Dot6Dot2) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, + RestoreSupportsLock: iptV.Compare(v1Dot6Dot2) >= 0, + ChecksumOffloadBroken: kerV.Compare(v5Dot7Dot0) < 0, } if value, ok := (d.featureOverride)["SNATFullyRandom"]; ok { diff --git a/iptables/features_detect_test.go b/iptables/features_detect_test.go index 3306c4a99e..bd74e1305c 100644 --- a/iptables/features_detect_test.go +++ b/iptables/features_detect_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2019 Tigera, Inc. All rights reserved. +// Copyright (c) 2018-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,72 +39,90 @@ func TestFeatureDetection(t *testing.T) { "iptables v1.6.2", "Linux version 3.14.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: true, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: true, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.1", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: true, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: true, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.5.0", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.2", "Linux version 3.13.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "garbage", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.2", "garbage", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "error", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.2", "error", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, + }, + }, + { + "iptables v1.8.4", + "Linux version 5.7.0", + Features{ + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: true, + ChecksumOffloadBroken: false, }, }, } { @@ -146,9 +164,10 @@ func TestFeatureDetectionOverride(t *testing.T) { "iptables v1.6.2", "Linux version 3.14.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: true, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: true, + ChecksumOffloadBroken: true, }, map[string]string{}, }, @@ -156,9 +175,10 @@ func TestFeatureDetectionOverride(t *testing.T) { "iptables v1.6.1", "Linux version 3.14.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, map[string]string{ "RestoreSupportsLock": "true", @@ -168,9 +188,10 @@ func TestFeatureDetectionOverride(t *testing.T) { "error", "error", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, map[string]string{ "RestoreSupportsLock": "true", From 24c2930b36f817bef2137c4a126f21a020829e28 Mon Sep 17 00:00:00 2001 From: Shaun Crampton Date: Fri, 14 May 2021 15:05:57 +0100 Subject: [PATCH 4/6] Add VXLAN checksum FV, fix feature detect override. Use reflection to access the feature fields to avoid needing code or every field. --- dataplane/linux/vxlan_mgr.go | 6 ++++- fv/vxlan_test.go | 40 +++++++++++++++++++++++++++++---- iptables/feature_detect.go | 43 ++++++++++++++++++++++-------------- 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/dataplane/linux/vxlan_mgr.go b/dataplane/linux/vxlan_mgr.go index 3ab355fef8..d28ce1bd3f 100644 --- a/dataplane/linux/vxlan_mgr.go +++ b/dataplane/linux/vxlan_mgr.go @@ -341,7 +341,11 @@ func (m *vxlanManager) CompleteDeferredWork() error { // KeepVXLANDeviceInSync is a goroutine that configures the VXLAN tunnel device, then periodically // checks that it is still correctly configured. func (m *vxlanManager) KeepVXLANDeviceInSync(mtu int, xsumBroken bool, wait time.Duration) { - logrus.WithField("mtu", mtu).Info("VXLAN tunnel device thread started.") + logrus.WithFields(logrus.Fields{ + "mtu": mtu, + "xsumBroken": xsumBroken, + "wait": wait, + }).Info("VXLAN tunnel device thread started.") logNextSuccess := true for { localVTEP := m.getLocalVTEP() diff --git a/fv/vxlan_test.go b/fv/vxlan_test.go index ff7572b405..3e91524cf2 100644 --- a/fv/vxlan_test.go +++ b/fv/vxlan_test.go @@ -42,9 +42,17 @@ import ( var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before adding host IPs to IP sets", []apiconfig.DatastoreType{apiconfig.EtcdV3, apiconfig.Kubernetes}, func(getInfra infrastructure.InfraFactory) { for _, vxlanM := range []api.VXLANMode{api.VXLANModeCrossSubnet} { vxlanMode := vxlanM - for _, routeSource := range []string{"CalicoIPAM", "WorkloadIPs"} { - routeSource := routeSource - Describe(fmt.Sprintf("VXLAN mode set to %s, routeSource %s", vxlanMode, routeSource), func() { + type testConf struct { + RouteSource string + BrokenXSum bool + } + for _, testConfig := range []testConf{ + {"CalicoIPAM", true}, + {"WorkloadIPs", false}, + } { + routeSource := testConfig.RouteSource + brokenXSum := testConfig.BrokenXSum + Describe(fmt.Sprintf("VXLAN mode set to %s, routeSource %s, brokenXSum: %v", vxlanMode, routeSource, brokenXSum), func() { var ( infra infrastructure.DatastoreInfra felixes []*infrastructure.Felix @@ -60,6 +68,10 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before addin topologyOptions.VXLANMode = vxlanMode topologyOptions.IPIPEnabled = false topologyOptions.ExtraEnvVars["FELIX_ROUTESOURCE"] = routeSource + // We force the broken checksum handling on or off so that we're not dependent on kernel version + // for these tests. Since we're testing in containers anyway, checksum offload can't really be + // tested but we can verify the state with ethtool. + topologyOptions.ExtraEnvVars["FELIX_FeatureDetectOverride"] = fmt.Sprintf("ChecksumOffloadBroken=%t", brokenXSum) felixes, client = infrastructure.StartNNodeTopology(3, topologyOptions, infra) // Install a default profile that allows all ingress and egress, in the absence of any Policy. @@ -128,7 +140,27 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before addin } infra.Stop() }) - + if brokenXSum { + It("should disable checksum offload", func() { + Eventually(func() string { + out, err := felixes[0].ExecOutput("ethtool", "-k", "vxlan.calico") + if err != nil { + return fmt.Sprintf("ERROR: %v", err) + } + return out + }, "10s", "100ms").Should(ContainSubstring("tx-checksumming: off")) + }) + } else { + It("should not disable checksum offload", func() { + Eventually(func() string { + out, err := felixes[0].ExecOutput("ethtool", "-k", "vxlan.calico") + if err != nil { + return fmt.Sprintf("ERROR: %v", err) + } + return out + }, "10s", "100ms").Should(ContainSubstring("tx-checksumming: on")) + }) + } It("should use the --random-fully flag in the MASQUERADE rules", func() { for _, felix := range felixes { Eventually(func() string { diff --git a/iptables/feature_detect.go b/iptables/feature_detect.go index 537f097c63..0a940e692b 100644 --- a/iptables/feature_detect.go +++ b/iptables/feature_detect.go @@ -18,6 +18,7 @@ import ( "bytes" "io" "os/exec" + "reflect" "regexp" "strconv" "strings" @@ -66,6 +67,7 @@ type FeatureDetector struct { lock sync.Mutex featureCache *Features featureOverride map[string]string + loggedOverrides bool // Path to file with kernel version GetKernelVersionReader func() (io.Reader, error) @@ -114,27 +116,34 @@ func (d *FeatureDetector) refreshFeaturesLockHeld() { ChecksumOffloadBroken: kerV.Compare(v5Dot7Dot0) < 0, } - if value, ok := (d.featureOverride)["SNATFullyRandom"]; ok { - ovr, err := strconv.ParseBool(value) - if err == nil { - log.WithField("override", ovr).Info("Override feature SNATFullyRandom") - features.SNATFullyRandom = ovr + for k, v := range d.featureOverride { + ovr, err := strconv.ParseBool(v) + logCxt := log.WithFields(log.Fields{ + "flag": k, + "value": v, + }) + if err != nil { + if !d.loggedOverrides { + logCxt.Warn("Failed to parse value for feature detection override; ignoring") + } + continue } - } - if value, ok := (d.featureOverride)["MASQFullyRandom"]; ok { - ovr, err := strconv.ParseBool(value) - if err == nil { - log.WithField("override", ovr).Info("Override feature MASQFullyRandom") - features.MASQFullyRandom = ovr + field := reflect.ValueOf(&features).Elem().FieldByName(k) + if field.IsValid() { + field.SetBool(ovr) + } else { + if !d.loggedOverrides { + logCxt.Warn("Unknown feature detection flag; ignoring") + } + continue } - } - if value, ok := (d.featureOverride)["RestoreSupportsLock"]; ok { - ovr, err := strconv.ParseBool(value) - if err == nil { - log.WithField("override", ovr).Info("Override feature RestoreSupportsLock") - features.RestoreSupportsLock = ovr + + if !d.loggedOverrides { + logCxt.Info("Overriding feature detection flag") } } + // Avoid logging all the override values every time through this function. + d.loggedOverrides = true if d.featureCache == nil || *d.featureCache != features { log.WithFields(log.Fields{ From c334f05993761a6656bdcce137fb6b68f9591810 Mon Sep 17 00:00:00 2001 From: Shaun Crampton Date: Mon, 17 May 2021 16:00:50 +0100 Subject: [PATCH 5/6] Add extra diags and remove no-op. --- fv/bpf_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fv/bpf_test.go b/fv/bpf_test.go index 309cfd3f1a..84505683b4 100644 --- a/fv/bpf_test.go +++ b/fv/bpf_test.go @@ -285,6 +285,8 @@ func describeBPFTests(opts ...bpfTestOpt) bool { for i, felix := range felixes { felix.Exec("iptables-save", "-c") felix.Exec("ip", "r") + felix.Exec("ip", "link") + felix.Exec("ip", "addr") felix.Exec("ip", "route", "show", "cached") felix.Exec("calico-bpf", "ipsets", "dump") felix.Exec("calico-bpf", "routes", "dump") @@ -2172,7 +2174,6 @@ func describeBPFTests(opts ...bpfTestOpt) bool { hostW0SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedIPIPTunnelAddr) hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedIPIPTunnelAddr) case "wireguard": - hostW1SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedWireguardTunnelAddr) hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedWireguardTunnelAddr) } From 0e3e12e4b6ede80615a88ee4683687be0d9c4332 Mon Sep 17 00:00:00 2001 From: Shaun Crampton Date: Tue, 18 May 2021 10:37:25 +0100 Subject: [PATCH 6/6] Pre-load ipip kernel module to prevent flake. --- .semaphore/create-test-vm | 1 + .semaphore/semaphore.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.semaphore/create-test-vm b/.semaphore/create-test-vm index 18d5c50419..f521f7b80b 100755 --- a/.semaphore/create-test-vm +++ b/.semaphore/create-test-vm @@ -40,6 +40,7 @@ function create-vm() { gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo apt-get update -y && \ gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo apt-get install -y --no-install-recommends git make iproute2 docker.io wireguard && \ gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo usermod -a -G docker ubuntu && \ + gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo modprobe ipip && \ set +x && \ echo "$DOCKERHUB_PASSWORD" | gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- docker login --username "$DOCKERHUB_USERNAME" --password-stdin && \ set -x && \ diff --git a/.semaphore/semaphore.yml b/.semaphore/semaphore.yml index d0e8b9ac43..bcc8df34d6 100644 --- a/.semaphore/semaphore.yml +++ b/.semaphore/semaphore.yml @@ -79,6 +79,9 @@ blocks: - docker load -i /tmp/calico-felix.tar - rm /tmp/calico-felix.tar - touch bin/* + # Pre-loading the IPIP module prevents a flake where the first felix to use IPIP loads the module and + # routing in that first felix container chooses different source IPs than the tests are expecting. + - sudo modprobe ipip jobs: - name: FV Test matrix commands: