From 0b38dfdf3edad8ae188f37509a2cbcafa2b7d17e Mon Sep 17 00:00:00 2001 From: ghorofamike <38078806+ghorofamike@users.noreply.github.com> Date: Wed, 18 Apr 2018 01:58:35 +0200 Subject: [PATCH] This PR adds support for TCP MD5 auth in metallb. (#236) The default net.Conn object is replaced by low level syscalls that create the FD's set the sockopts directly, as TCP MD5 sockopts have to be set before the connection is made. --- internal/bgp/bgp.go | 229 +++++++++++++++++++++++- internal/bgp/bgp_test.go | 49 ++++- internal/config/config.go | 9 +- manifests/example-config.yaml | 3 + speaker/bgp_controller.go | 6 +- speaker/bgp_controller_test.go | 2 +- website/content/configuration/_index.md | 4 +- 7 files changed, 287 insertions(+), 15 deletions(-) diff --git a/internal/bgp/bgp.go b/internal/bgp/bgp.go index 9bd3f392d68..0267d57c744 100644 --- a/internal/bgp/bgp.go +++ b/internal/bgp/bgp.go @@ -8,9 +8,13 @@ import ( "io" "io/ioutil" "net" + "os" "reflect" + "strconv" "sync" + "syscall" "time" + "unsafe" "github.com/go-kit/kit/log" ) @@ -25,6 +29,7 @@ type Session struct { peerASN uint32 holdTime time.Duration logger log.Logger + password string newHoldTime chan bool backoff backoff @@ -144,6 +149,7 @@ func (s *Session) sendUpdates() bool { } // connect establishes the BGP session with the peer. +// sets TCP_MD5 sockopt if password is !="", func (s *Session) connect() error { s.mu.Lock() defer s.mu.Unlock() @@ -154,13 +160,26 @@ func (s *Session) connect() error { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() + deadline, _ := ctx.Deadline() + // we need the same length timeout as the ctx. + timeout := 10 + var conn net.Conn + + d := TCPDialer{ + Dialer: net.Dialer{ + Timeout: 10 * time.Second, + Deadline: deadline, + }, + AuthPassword: s.password, + } + tcphost, portstr, err := net.SplitHostPort(s.addr) + port, err := strconv.Atoi(portstr) + conn, err = d.DialTCP(tcphost, port, timeout) - var d net.Dialer - conn, err := d.DialContext(ctx, "tcp", s.addr) if err != nil { return fmt.Errorf("dial %q: %s", s.addr, err) } - deadline, _ := ctx.Deadline() + if err = conn.SetDeadline(deadline); err != nil { conn.Close() return fmt.Errorf("setting deadline on conn to %q: %s", s.addr, err) @@ -284,7 +303,7 @@ func (s *Session) sendKeepalive() error { // // The session will immediately try to connect and synchronize its // local state with the peer. -func New(l log.Logger, addr string, asn uint32, routerID net.IP, peerASN uint32, holdTime time.Duration) (*Session, error) { +func New(l log.Logger, addr string, asn uint32, routerID net.IP, peerASN uint32, holdTime time.Duration, password string) (*Session, error) { ret := &Session{ addr: addr, asn: asn, @@ -294,6 +313,7 @@ func New(l log.Logger, addr string, asn uint32, routerID net.IP, peerASN uint32, logger: log.With(l, "peer", addr, "localASN", asn, "peerASN", peerASN), newHoldTime: make(chan bool, 1), advertised: map[string]*Advertisement{}, + password: password, } ret.cond = sync.NewCond(&ret.mu) go ret.sendKeepalives() @@ -427,3 +447,204 @@ func (a *Advertisement) Equal(b *Advertisement) bool { } return reflect.DeepEqual(a.Communities, b.Communities) } + +const ( + //tcpMD5SIG TCP MD5 Signature (RFC2385) + tcpMD5SIG = 14 + //ipv6MINHOPCOUNT Generalized TTL Security Mechanism (RFC5082) + ipv6MINHOPCOUNT = 73 +) + +// This struct is defined at; linux-kernel: include/uapi/linux/tcp.h, +// It must be kept in sync with that definition, see current version: +// https://github.com/torvalds/linux/blob/v4.16/include/uapi/linux/tcp.h#L253 +// nolint[structcheck] +type tcpmd5sig struct { + ssFamily uint16 + ss [126]byte + pad1 uint16 + keylen uint16 + pad2 uint32 + key [80]byte +} + +func buildTCPMD5Sig(address string, key string) (tcpmd5sig, error) { + t := tcpmd5sig{} + addr := net.ParseIP(address) + if addr.To4() != nil { + t.ssFamily = syscall.AF_INET + copy(t.ss[2:], addr.To4()) + } else { + t.ssFamily = syscall.AF_INET6 + copy(t.ss[6:], addr.To16()) + } + + t.keylen = uint16(len(key)) + copy(t.key[0:], []byte(key)) + + return t, nil +} + +//TCPDialer represents the connection +type TCPDialer struct { + net.Dialer + + // MD5 authentication password. + AuthPassword string +} + +// DialTCP does the part of creating a connection manually, including setting the +// proper TCP MD5 options when the password is not empty. Works by manupulating +// the low level FD's, skipping the net.Conn API as it has not hooks to set +// the neccessary sockopts for TCP MD5. +func (d *TCPDialer) DialTCP(tcphost string, port int, timeout int) (net.Conn, error) { + + laddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort("0.0.0.0", "0")) + + if err != nil { + return nil, fmt.Errorf("Error resolving local address: %s ", err) + } + + raddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(tcphost, fmt.Sprintf("%d", port))) + + if err != nil { + return nil, fmt.Errorf("invalid remote address: %s ", err) + } + + var family int + var ra, la syscall.Sockaddr + if raddr.IP.To4() != nil { + family = syscall.AF_INET + rsockaddr := &syscall.SockaddrInet4{Port: port} + copy(rsockaddr.Addr[:], raddr.IP.To4()) + ra = rsockaddr + lsockaddr := &syscall.SockaddrInet4{} + copy(lsockaddr.Addr[:], laddr.IP.To4()) + la = lsockaddr + } else { + family = syscall.AF_INET6 + rsockaddr := &syscall.SockaddrInet6{Port: port} + copy(rsockaddr.Addr[:], raddr.IP.To16()) + ra = rsockaddr + var zone uint32 + if laddr.Zone != "" { + intf, errs := net.InterfaceByName(laddr.Zone) + if errs != nil { + return nil, errs + } + zone = uint32(intf.Index) + } + lsockaddr := &syscall.SockaddrInet6{ZoneId: zone} + copy(lsockaddr.Addr[:], laddr.IP.To16()) + la = lsockaddr + } + + sockType := syscall.SOCK_STREAM | syscall.SOCK_CLOEXEC | syscall.SOCK_NONBLOCK + proto := 0 + fd, err := syscall.Socket(family, sockType, proto) + if err != nil { + return nil, err + } + + // A new socket was created so we must close it before this + // function returns either on failure or success. On success, + // net.FileConn() in newTCPConn() increases the refcount of + // the socket so this fi.Close() doesn't destroy the socket. + // The caller must call Close() with the file later. + // Note that the above os.NewFile() doesn't play with the + // refcount. + + fi := os.NewFile(uintptr(fd), "") + defer fi.Close() + + if d.AuthPassword != "" { + if err = setsockoptTCPMD5Sig(fd, tcphost, d.AuthPassword); err != nil { + return nil, err + } + } + + if timeout != 0 { + if err = setsockoptIPTTL(fd, family, timeout); err != nil { + return nil, err + } + } + + if err = syscall.Bind(fd, la); err != nil { + return nil, os.NewSyscallError("bind", err) + } + + err = syscall.Connect(fd, ra) + + switch err { + case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR: + // do timeout handling + case nil: + return net.FileConn(fi) + default: + return nil, os.NewSyscallError("connect", err) + } + + // Turns out this is neccessary to handle at least syscall.EINPROGRESS, + // without handling EINPROGRESS we end up with errors like + // "error":"dial XXXXXXXXXXX": connect: operation now in progress","localASN":64787,"msg":"failed to connect to peer" + // again borrowed from gobgp + epfd, err := syscall.EpollCreate1(syscall.EPOLL_CLOEXEC) + if err != nil { + return nil, err + } + defer syscall.Close(epfd) + + var event syscall.EpollEvent + events := make([]syscall.EpollEvent, 1) + + event.Events = syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLPRI + event.Fd = int32(fd) + if err = syscall.EpollCtl(epfd, syscall.EPOLL_CTL_ADD, fd, &event); err != nil { + return nil, err + } + + for { + nevents, err := syscall.EpollWait(epfd, events, int(d.Timeout/1000000) /*msec*/) + if err != nil { + return nil, err + } + if nevents == 0 { + return nil, fmt.Errorf("timeout") + } else if nevents == 1 && events[0].Fd == int32(fd) { + nerr, err := syscall.GetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_ERROR) + if err != nil { + return nil, os.NewSyscallError("getsockopt", err) + } + switch err := syscall.Errno(nerr); err { + case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR: + case syscall.Errno(0), syscall.EISCONN: + return net.FileConn(fi) + default: + return nil, os.NewSyscallError("getsockopt", err) + } + } else { + return nil, fmt.Errorf("unexpected epoll behavior") + } + } + +} + +// Better way may be available in Go 1.11, see go-review.googlesource.com/c/go/+/72810 +func setsockoptTCPMD5Sig(fd int, address string, key string) error { + t, err := buildTCPMD5Sig(address, key) + if err != nil { + return err + } + b := *(*[unsafe.Sizeof(t)]byte)(unsafe.Pointer(&t)) + return os.NewSyscallError("setsockopt", syscall.SetsockoptString(fd, syscall.IPPROTO_TCP, tcpMD5SIG, string(b[:]))) +} + +func setsockoptIPTTL(fd int, family int, value int) error { + level := syscall.IPPROTO_IP + name := syscall.IP_TTL + if family == syscall.AF_INET6 { + level = syscall.IPPROTO_IPV6 + name = syscall.IPV6_UNICAST_HOPS + } + return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(fd, level, name, value)) +} diff --git a/internal/bgp/bgp_test.go b/internal/bgp/bgp_test.go index b9e2a142cfe..3102cbaa09d 100644 --- a/internal/bgp/bgp_test.go +++ b/internal/bgp/bgp_test.go @@ -28,7 +28,7 @@ func ipnet(s string) *net.IPNet { return n } -func runGoBGP(ctx context.Context) (chan *table.Path, error) { +func runGoBGP(ctx context.Context, password string, port int32) (chan *table.Path, error) { s := gobgp.NewBgpServer() go s.Serve() @@ -36,7 +36,7 @@ func runGoBGP(ctx context.Context) (chan *table.Path, error) { Config: config.GlobalConfig{ As: 64543, RouterId: "1.2.3.4", - Port: 4179, + Port: port, }, } if err := s.Start(global); err != nil { @@ -47,6 +47,7 @@ func runGoBGP(ctx context.Context) (chan *table.Path, error) { Config: config.NeighborConfig{ NeighborAddress: "127.0.0.1", PeerAs: 64543, + AuthPassword: password, }, } if err := s.AddNeighbor(n); err != nil { @@ -77,13 +78,53 @@ func TestInterop(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() - ips, err := runGoBGP(ctx) + ips, err := runGoBGP(ctx, "", 4179) if err != nil { t.Fatalf("starting GoBGP: %s", err) } l := log.NewNopLogger() - sess, err := New(l, "127.0.0.1:4179", 64543, net.ParseIP("2.3.4.5"), 64543, 10*time.Second) + sess, err := New(l, "127.0.0.1:4179", 64543, net.ParseIP("2.3.4.5"), 64543, 10*time.Second, "") + if err != nil { + t.Fatalf("starting BGP session to GoBGP: %s", err) + } + defer sess.Close() + + adv := &Advertisement{ + Prefix: ipnet("1.2.3.0/24"), + NextHop: net.ParseIP("10.20.30.40"), + LocalPref: 42, + Communities: []uint32{1234, 2345}, + } + + if err := sess.Set(adv); err != nil { + t.Fatalf("setting advertisement: %s", err) + } + + for { + select { + case <-ctx.Done(): + t.Fatalf("test timed out waiting for route") + case path := <-ips: + if err := checkPath(path, adv); err != nil { + t.Fatalf("path did not match expectations: %s", err) + } + return + } + } +} + +func TestTCPMD5(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + ips, err := runGoBGP(ctx, "somepassword", 5179) + if err != nil { + t.Fatalf("starting GoBGP: %s", err) + } + + l := log.NewNopLogger() + sess, err := New(l, "127.0.0.1:5179", 64543, net.ParseIP("2.3.4.6"), 64543, 10*time.Second, "somepassword") if err != nil { t.Fatalf("starting BGP session to GoBGP: %s", err) } diff --git a/internal/config/config.go b/internal/config/config.go index 6fcc7d18bf8..d7069767bce 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -44,6 +44,7 @@ type peer struct { HoldTime string `yaml:"hold-time"` RouterID string `yaml:"router-id"` NodeSelectors []nodeSelector `yaml:"node-selectors"` + Password string `yaml:"password"` } type nodeSelector struct { @@ -106,7 +107,8 @@ type Peer struct { // Only connect to this peer on nodes that match one of these // selectors. NodeSelectors []labels.Selector - + // Authentication password for routers enforcing TCP MD5 authenticated sessions + Password string // TODO: more BGP session settings } @@ -284,6 +286,10 @@ func parsePeer(p peer) (*Peer, error) { } } + var password string + if p.Password != "" { + password = p.Password + } return &Peer{ MyASN: p.MyASN, ASN: p.ASN, @@ -292,6 +298,7 @@ func parsePeer(p peer) (*Peer, error) { HoldTime: holdTime, RouterID: routerID, NodeSelectors: nodeSels, + Password: password, }, nil } diff --git a/manifests/example-config.yaml b/manifests/example-config.yaml index 54ae33da863..0d2d34ecb0c 100644 --- a/manifests/example-config.yaml +++ b/manifests/example-config.yaml @@ -25,6 +25,9 @@ data: # to the node IP address. Generally only useful when you need to peer with # another BGP router running on the same machine as MetalLB. router-id: 1.2.3.4 + # (optional) Password for TCPMD5 authenticated BGP sessions + # offered by some peers. + password: "yourPassword" # (optional) The nodes that should connect to this peer. A node # matches if at least one of the node selectors matches. Within # one selector, a node matches if all the matchers are diff --git a/speaker/bgp_controller.go b/speaker/bgp_controller.go index 10ee440e029..a5ae325fc8a 100644 --- a/speaker/bgp_controller.go +++ b/speaker/bgp_controller.go @@ -114,7 +114,7 @@ func (c *bgpController) syncPeers(l log.Logger) error { if p.cfg.RouterID != nil { routerID = p.cfg.RouterID } - s, err := newBGP(c.logger, fmt.Sprintf("%s:%d", p.cfg.Addr, p.cfg.Port), p.cfg.MyASN, routerID, p.cfg.ASN, p.cfg.HoldTime) + s, err := newBGP(c.logger, fmt.Sprintf("%s:%d", p.cfg.Addr, p.cfg.Port), p.cfg.MyASN, routerID, p.cfg.ASN, p.cfg.HoldTime, p.cfg.Password) if err != nil { l.Log("op", "syncPeers", "error", err, "peer", p.cfg.Addr, "msg", "failed to create BGP session") errs++ @@ -216,6 +216,6 @@ func (c *bgpController) SetNode(l log.Logger, node *v1.Node) error { return c.syncPeers(l) } -var newBGP = func(logger log.Logger, addr string, myASN uint32, routerID net.IP, asn uint32, hold time.Duration) (session, error) { - return bgp.New(logger, addr, myASN, routerID, asn, hold) +var newBGP = func(logger log.Logger, addr string, myASN uint32, routerID net.IP, asn uint32, hold time.Duration, password string) (session, error) { + return bgp.New(logger, addr, myASN, routerID, asn, hold, password) } diff --git a/speaker/bgp_controller_test.go b/speaker/bgp_controller_test.go index a5e63c05ebe..6e4988a7488 100644 --- a/speaker/bgp_controller_test.go +++ b/speaker/bgp_controller_test.go @@ -93,7 +93,7 @@ type fakeBGP struct { gotAds map[string][]*bgp.Advertisement } -func (f *fakeBGP) New(_ log.Logger, addr string, _ uint32, _ net.IP, _ uint32, _ time.Duration) (session, error) { +func (f *fakeBGP) New(_ log.Logger, addr string, _ uint32, _ net.IP, _ uint32, _ time.Duration, _ string) (session, error) { f.Lock() defer f.Unlock() diff --git a/website/content/configuration/_index.md b/website/content/configuration/_index.md index 337ed959757..a64af0ee53f 100644 --- a/website/content/configuration/_index.md +++ b/website/content/configuration/_index.md @@ -97,7 +97,7 @@ specific than `/24`. So, you need to somehow advertise a `/24` to your transit provider, but still have the ability to do per-IP routing internally. -Here's a configuration that implemnents this: +Here's a configuration that implements this: ```yaml apiVersion: v1 @@ -120,7 +120,7 @@ data: - aggregation-length: 32 localpref: 100 communities: - - no-avertise + - no-advertise - aggregation-length: 24 bgp-communities: no-advertise: 65535:65282