Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable checksum offload on the VXLAN tunnel for kernels <v5.7. #2811

Merged
merged 6 commits into from May 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .semaphore/create-test-vm
Expand Up @@ -40,6 +40,7 @@ function create-vm() {
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo apt-get update -y && \
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo apt-get install -y --no-install-recommends git make iproute2 docker.io wireguard && \
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo usermod -a -G docker ubuntu && \
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo modprobe ipip && \
set +x && \
echo "$DOCKERHUB_PASSWORD" | gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- docker login --username "$DOCKERHUB_USERNAME" --password-stdin && \
set -x && \
Expand Down
3 changes: 3 additions & 0 deletions .semaphore/semaphore.yml
Expand Up @@ -79,6 +79,9 @@ blocks:
- docker load -i /tmp/calico-felix.tar
- rm /tmp/calico-felix.tar
- touch bin/*
# Pre-loading the IPIP module prevents a flake where the first felix to use IPIP loads the module and
# routing in that first felix container chooses different source IPs than the tests are expecting.
- sudo modprobe ipip
jobs:
- name: FV Test matrix
commands:
Expand Down
2 changes: 1 addition & 1 deletion dataplane/linux/int_dataplane.go
Expand Up @@ -445,7 +445,7 @@ func NewIntDataplaneDriver(config Config) *InternalDataplane {
config,
dp.loopSummarizer,
)
go vxlanManager.KeepVXLANDeviceInSync(config.VXLANMTU, 10*time.Second)
go vxlanManager.KeepVXLANDeviceInSync(config.VXLANMTU, iptablesFeatures.ChecksumOffloadBroken, 10*time.Second)
dp.RegisterManager(vxlanManager)
} else {
cleanUpVXLANDevice()
Expand Down
23 changes: 18 additions & 5 deletions dataplane/linux/vxlan_mgr.go
@@ -1,4 +1,4 @@
// Copyright (c) 2016-2020 Tigera, Inc. All rights reserved.
// Copyright (c) 2016-2021 Tigera, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,7 @@ import (
"syscall"
"time"

"github.com/projectcalico/felix/ethtool"
"github.com/projectcalico/felix/ipsets"
"github.com/projectcalico/felix/logutils"
"github.com/projectcalico/felix/rules"
Expand Down Expand Up @@ -339,8 +340,12 @@ func (m *vxlanManager) CompleteDeferredWork() error {

// KeepVXLANDeviceInSync is a goroutine that configures the VXLAN tunnel device, then periodically
// checks that it is still correctly configured.
func (m *vxlanManager) KeepVXLANDeviceInSync(mtu int, wait time.Duration) {
logrus.WithField("mtu", mtu).Info("VXLAN tunnel device thread started.")
func (m *vxlanManager) KeepVXLANDeviceInSync(mtu int, xsumBroken bool, wait time.Duration) {
logrus.WithFields(logrus.Fields{
"mtu": mtu,
"xsumBroken": xsumBroken,
"wait": wait,
}).Info("VXLAN tunnel device thread started.")
logNextSuccess := true
for {
localVTEP := m.getLocalVTEP()
Expand All @@ -362,13 +367,14 @@ func (m *vxlanManager) KeepVXLANDeviceInSync(mtu int, wait time.Duration) {
}
}

err := m.configureVXLANDevice(mtu, localVTEP)
err := m.configureVXLANDevice(mtu, localVTEP, xsumBroken)
if err != nil {
logrus.WithError(err).Warn("Failed configure VXLAN tunnel device, retrying...")
logNextSuccess = true
time.Sleep(1 * time.Second)
continue
}

if logNextSuccess {
logrus.Info("VXLAN tunnel device configured")
logNextSuccess = false
Expand Down Expand Up @@ -400,7 +406,7 @@ func (m *vxlanManager) getParentInterface(localVTEP *proto.VXLANTunnelEndpointUp
}

// configureVXLANDevice ensures the VXLAN tunnel device is up and configured correctly.
func (m *vxlanManager) configureVXLANDevice(mtu int, localVTEP *proto.VXLANTunnelEndpointUpdate) error {
func (m *vxlanManager) configureVXLANDevice(mtu int, localVTEP *proto.VXLANTunnelEndpointUpdate, xsumBroken bool) error {
logCxt := logrus.WithFields(logrus.Fields{"device": m.vxlanDevice})
logCxt.Debug("Configuring VXLAN tunnel device")
parent, err := m.getParentInterface(localVTEP)
Expand Down Expand Up @@ -478,6 +484,13 @@ func (m *vxlanManager) configureVXLANDevice(mtu int, localVTEP *proto.VXLANTunne
return fmt.Errorf("failed to ensure address of interface: %s", err)
}

// If required, disable checksum offload.
if xsumBroken {
if err := ethtool.EthtoolTXOff(m.vxlanDevice); err != nil {
return fmt.Errorf("failed to disable checksum offload: %s", err)
}
}

// And the device is up.
if err := m.nlHandle.LinkSetUp(link); err != nil {
return fmt.Errorf("failed to set interface up: %s", err)
Expand Down
8 changes: 4 additions & 4 deletions dataplane/linux/vxlan_mgr_test.go
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2020 Tigera, Inc. All rights reserved.
// Copyright (c) 2019-2021 Tigera, Inc. All rights reserved.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -141,7 +141,7 @@ var _ = Describe("VXLANManager", func() {

manager.noEncapRouteTable = prt

err := manager.configureVXLANDevice(50, localVTEP)
err := manager.configureVXLANDevice(50, localVTEP, false)
Expect(err).NotTo(HaveOccurred())

Expect(manager.myVTEP).NotTo(BeNil())
Expand Down Expand Up @@ -178,7 +178,7 @@ var _ = Describe("VXLANManager", func() {
})

It("adds the route to the default table on next try when the parent route table is not immediately found", func() {
go manager.KeepVXLANDeviceInSync(1400, 1*time.Second)
go manager.KeepVXLANDeviceInSync(1400, false, 1*time.Second)
manager.OnUpdate(&proto.VXLANTunnelEndpointUpdate{
Node: "node2",
Mac: "00:0a:95:9d:68:16",
Expand Down Expand Up @@ -213,7 +213,7 @@ var _ = Describe("VXLANManager", func() {
localVTEP := manager.getLocalVTEP()
Expect(localVTEP).NotTo(BeNil())

err = manager.configureVXLANDevice(50, localVTEP)
err = manager.configureVXLANDevice(50, localVTEP, false)
Expect(err).NotTo(HaveOccurred())

Expect(prt.currentRoutes["eth0"]).To(HaveLen(0))
Expand Down
108 changes: 108 additions & 0 deletions ethtool/ethtool.go
@@ -0,0 +1,108 @@
// Copyright (c) 2021 Tigera, Inc. All rights reserved.
//
// Based on the version in the Weave project, Copyright Weaveworks:
// https://github.com/weaveworks/weave/blob/c69e140e9e43d456b6fe5812a2bc61bc67953b93/net/ethtool.go
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ethtool

import (
"fmt"
"unsafe"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"modernc.org/memory"
)

// IFReqData represents linux/if.h 'struct ifreq'
type IFReqData struct {
Name [unix.IFNAMSIZ]byte
Data uintptr
}

// EthtoolValue represents linux/ethtool.h 'struct ethtool_value'
type EthtoolValue struct {
Cmd uint32
Data uint32
}

func ioctlEthtool(fd int, argp *IFReqData) error {
// Important that the cast to uintptr is _syntactically_ within the Syscall() invocation in order to guarantee
// safety. (See notes in the unsafe.Pointer docs.)
_, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.SIOCETHTOOL), uintptr(unsafe.Pointer(argp)))
if errno != 0 {
return errno
}
return nil
}

// EthtoolTXOff disables the TX checksum offload on the specified interface
func EthtoolTXOff(name string) error {
if len(name)+1 > unix.IFNAMSIZ {
return fmt.Errorf("name too long")
}

// To access the IOCTL, we need a socket file descriptor.
socket, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0)
if err != nil {
return err
}
defer func() {
err := unix.Close(socket)
if err != nil {
// Super unlikely; normally a failure from Close means that some data couldn't be flushed
// but we didn't send any.
logrus.WithError(err).Warn("unix.Close(socket) failed")
}
}()

// Allocate an EthtoolValue using manual memory management. This is required because we need to pass
// a struct to the Syscall that in turn points to the EthtoolValue. If we allocate the EthtooValue on the
// go stack/heap then it would not be protected from being moved by the GC while the syscall is in progress.
// (Only the directly-passed struct is protected from being moved during the syscall.)
alloc := memory.Allocator{}
defer func() {
err := alloc.Close()
if err != nil {
logrus.WithError(err).Panic("Failed to release memory to the system")
}
}()
valueUPtr, err := alloc.UnsafeCalloc(int(unsafe.Sizeof(EthtoolValue{})))
if err != nil {
return fmt.Errorf("failed to allocate memory: %w", err)
}
defer func() {
err := alloc.UnsafeFree(valueUPtr)
if err != nil {
logrus.WithError(err).Warn("UnsafeFree() failed")
}
}()
value := (*EthtoolValue)(valueUPtr)

// Get the current value so we only set it if it needs to change.
*value = EthtoolValue{Cmd: unix.ETHTOOL_GTXCSUM}
request := IFReqData{Data: uintptr(valueUPtr)}
copy(request.Name[:], name)
if err := ioctlEthtool(socket, &request); err != nil {
return err
}
if value.Data == 0 { // if already off, don't try to change
return nil
}

// Set the value.
*value = EthtoolValue{Cmd: unix.ETHTOOL_STXCSUM, Data: 0 /* off */}
return ioctlEthtool(socket, &request)
}
3 changes: 2 additions & 1 deletion fv/bpf_test.go
Expand Up @@ -285,6 +285,8 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
for i, felix := range felixes {
felix.Exec("iptables-save", "-c")
felix.Exec("ip", "r")
felix.Exec("ip", "link")
felix.Exec("ip", "addr")
felix.Exec("ip", "route", "show", "cached")
felix.Exec("calico-bpf", "ipsets", "dump")
felix.Exec("calico-bpf", "routes", "dump")
Expand Down Expand Up @@ -2172,7 +2174,6 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
hostW0SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedIPIPTunnelAddr)
hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedIPIPTunnelAddr)
case "wireguard":
hostW1SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedWireguardTunnelAddr)
hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedWireguardTunnelAddr)
}

Expand Down
25 changes: 22 additions & 3 deletions fv/containers/containers.go
Expand Up @@ -54,6 +54,7 @@ type Container struct {
dataRaces []string

logFinished sync.WaitGroup
dropAllLogs bool
}

type watch struct {
Expand All @@ -63,6 +64,17 @@ type watch struct {

var containerIdx = 0

func (c *Container) StopLogs() {
if c == nil {
log.Info("StopLogs no-op because nil container")
return
}

c.mutex.Lock()
c.dropAllLogs = true
c.mutex.Unlock()
}

func (c *Container) Stop() {
if c == nil {
log.Info("Stop no-op because nil container")
Expand All @@ -78,7 +90,7 @@ func (c *Container) Stop() {
}
c.mutex.Unlock()

logCxt.Info("Stop")
logCxt.Info("Stopping...")

// Ask docker to stop the container.
withTimeoutPanic(logCxt, 30*time.Second, c.execDockerStop)
Expand Down Expand Up @@ -347,7 +359,14 @@ func (c *Container) copyOutputToLog(streamName string, stream io.Reader, done *s

for scanner.Scan() {
line := scanner.Text()
fmt.Fprintf(ginkgo.GinkgoWriter, "%v[%v] %v\n", c.Name, streamName, line)

// Check if we're dropping logs (e.g. because we're tearing down the container at the end of the test).
c.mutex.Lock()
droppingLogs := c.dropAllLogs
c.mutex.Unlock()
if !droppingLogs {
fmt.Fprintf(ginkgo.GinkgoWriter, "%v[%v] %v\n", c.Name, streamName, line)
}

// Capture data race warnings and log to file.
if strings.Contains(line, "WARNING: DATA RACE") {
Expand Down Expand Up @@ -392,7 +411,7 @@ func (c *Container) copyOutputToLog(streamName string, stream io.Reader, done *s
}
logCxt := log.WithFields(log.Fields{
"name": c.Name,
"stream": stream,
"stream": streamName,
})
if scanner.Err() != nil {
logCxt.WithError(scanner.Err()).Error("Non-EOF error reading container stream")
Expand Down
4 changes: 3 additions & 1 deletion fv/infrastructure/infra_etcd.go
@@ -1,4 +1,4 @@
// Copyright (c) 2018-2020 Tigera, Inc. All rights reserved.
// Copyright (c) 2018-2021 Tigera, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -198,6 +198,8 @@ func (eds *EtcdDatastoreInfra) DumpErrorData() {
}

func (eds *EtcdDatastoreInfra) Stop() {
eds.bpfLog.StopLogs()
eds.etcdContainer.StopLogs()
eds.bpfLog.Stop()
eds.etcdContainer.Stop()
}