Skip to content

Commit

Permalink
Merge pull request #1935 from fcrisciani/overlay-setmatrix
Browse files Browse the repository at this point in the history
Overlay fix for transient IP reuse
  • Loading branch information
mavenugo authored Oct 3, 2017
2 parents 389e1e6 + d93b9b0 commit 7447e54
Show file tree
Hide file tree
Showing 9 changed files with 216 additions and 156 deletions.
1 change: 0 additions & 1 deletion drivers/overlay/encryption.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (

const (
r = 0xD0C4E3
timeout = 30
pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
)

Expand Down
18 changes: 8 additions & 10 deletions drivers/overlay/joinleave.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,

ep.ifName = containerIfName

if err := d.writeEndpointToStore(ep); err != nil {
if err = d.writeEndpointToStore(ep); err != nil {
return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
}

Expand All @@ -86,7 +86,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return err
}

if err := sbox.AddInterface(overlayIfName, "veth",
if err = sbox.AddInterface(overlayIfName, "veth",
sbox.InterfaceOptions().Master(s.brName)); err != nil {
return fmt.Errorf("could not add veth pair inside the network sandbox: %v", err)
}
Expand All @@ -100,15 +100,15 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return err
}

if err := nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil {
if err = nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil {
return fmt.Errorf("could not set mac address (%v) to the container interface: %v", ep.mac, err)
}

for _, sub := range n.subnets {
if sub == s {
continue
}
if err := jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil {
if err = jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil {
logrus.Errorf("Adding subnet %s static route in network %q failed\n", s.subnetIP, n.id)
}
}
Expand All @@ -122,7 +122,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,

d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true)

if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
if err = d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
logrus.Warn(err)
}

Expand Down Expand Up @@ -200,7 +200,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
}

if etype == driverapi.Delete {
d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep)
d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, false)
return
}

Expand Down Expand Up @@ -232,11 +232,9 @@ func (d *driver) Leave(nid, eid string) error {
}
}

n.leaveSandbox()
d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true)

if err := d.checkEncryption(nid, nil, 0, true, false); err != nil {
logrus.Warn(err)
}
n.leaveSandbox()

return nil
}
66 changes: 17 additions & 49 deletions drivers/overlay/ov_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,9 @@ func (d *driver) DeleteNetwork(nid string) error {
if err := d.deleteEndpointFromStore(ep); err != nil {
logrus.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err)
}

}
// flush the peerDB entries
d.peerFlush(nid)
d.deleteNetwork(nid)

vnis, err := n.releaseVxlanID()
Expand Down Expand Up @@ -505,11 +506,7 @@ func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) erro
vxlanIfaceOption := make([]osl.IfaceOption, 1)
vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption
err = sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
return err
}
return nil
return sbox.Restore(Ifaces, nil, nil, nil)
}

func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
Expand Down Expand Up @@ -760,58 +757,38 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
continue
}

logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)

if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
continue
}

if n.driver.isSerfAlive() {
logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
if err != nil {
logrus.Errorf("could not resolve peer %q: %v", ip, err)
continue
}
n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
} else {
// If the gc_thresh values are lower kernel might knock off the neighor entries.
// When we get a L3 miss check if its a valid peer and reprogram the neighbor
// entry again. Rate limit it to once attempt every 500ms, just in case a faulty
// container sends a flood of packets to invalid peers
if !l3Miss {
continue
}
if time.Since(t) > 500*time.Millisecond {
} else if l3Miss && time.Since(t) > time.Second {
// All the local peers will trigger a miss notification but this one is expected and the local container will reply
// autonomously to the ARP request
// In case the gc_thresh3 values is low kernel might reject new entries during peerAdd. This will trigger the following
// extra logs that will inform of the possible issue.
// Entries created would not be deleted see documentation http://man7.org/linux/man-pages/man7/arp.7.html:
// Entries which are marked as permanent are never deleted by the garbage-collector.
// The time limit here is to guarantee that the dbSearch is not
// done too frequently causing a stall of the peerDB operations.
pKey, pEntry, err := n.driver.peerDbSearch(n.id, ip)
if err == nil && !pEntry.isLocal {
t = time.Now()
n.programNeighbor(ip)
logrus.Warnf("miss notification for peer:%+v l3Miss:%t l2Miss:%t, if the problem persist check the gc_thresh on the host pKey:%+v pEntry:%+v err:%v",
neigh, l3Miss, l2Miss, *pKey, *pEntry, err)
}
}
}
}
}

func (n *network) programNeighbor(ip net.IP) {
peerMac, _, _, err := n.driver.peerDbSearch(n.id, ip)
if err != nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, no peer entry", ip)
return
}
s := n.getSubnetforIPAddr(ip)
if s == nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, not a valid subnet", ip)
return
}
sbox := n.sandbox()
if sbox == nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, overlay sandbox missing", ip)
return
}
if err := sbox.AddNeighbor(ip, peerMac, true, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s: %v", ip, err)
return
}
}

func (d *driver) addNetwork(n *network) {
d.Lock()
d.networks[n.id] = n
Expand Down Expand Up @@ -1090,15 +1067,6 @@ func (n *network) contains(ip net.IP) bool {
return false
}

func (n *network) getSubnetforIPAddr(ip net.IP) *subnet {
for _, s := range n.subnets {
if s.subnetIP.Contains(ip) {
return s
}
}
return nil
}

// getSubnetforIP returns the subnet to which the given IP belongs
func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
for _, s := range n.subnets {
Expand Down
8 changes: 4 additions & 4 deletions drivers/overlay/ov_serf.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (d *driver) processEvent(u serf.UserEvent) {
case "join":
d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false, false, false)
case "leave":
d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr))
d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false)
}
}

Expand All @@ -135,13 +135,13 @@ func (d *driver) processQuery(q *serf.Query) {
fmt.Printf("Failed to scan query payload string: %v\n", err)
}

peerMac, peerIPMask, vtep, err := d.peerDbSearch(nid, net.ParseIP(ipStr))
pKey, pEntry, err := d.peerDbSearch(nid, net.ParseIP(ipStr))
if err != nil {
return
}

logrus.Debugf("Sending peer query resp mac %s, mask %s, vtep %s", peerMac, net.IP(peerIPMask), vtep)
q.Respond([]byte(fmt.Sprintf("%s %s %s", peerMac.String(), net.IP(peerIPMask).String(), vtep.String())))
logrus.Debugf("Sending peer query resp mac %v, mask %s, vtep %s", pKey.peerMac, net.IP(pEntry.peerIPMask).String(), pEntry.vtep)
q.Respond([]byte(fmt.Sprintf("%s %s %s", pKey.peerMac.String(), net.IP(pEntry.peerIPMask).String(), pEntry.vtep.String())))
}

func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) {
Expand Down
2 changes: 1 addition & 1 deletion drivers/overlay/overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
d.Unlock()

// If containers are already running on this network update the
// advertiseaddress in the peerDB
// advertise address in the peerDB
d.localJoinOnce.Do(func() {
d.peerDBUpdateSelf()
})
Expand Down
2 changes: 1 addition & 1 deletion drivers/overlay/overlay.proto
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ message PeerRecord {
// which this container is running and can be reached by
// building a tunnel to that host IP.
string tunnel_endpoint_ip = 3 [(gogoproto.customname) = "TunnelEndpointIP"];
}
}
Loading

0 comments on commit 7447e54

Please sign in to comment.