Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
libnetwork/drivers/overlay/encryption.go /
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
599 lines (518 sloc)
13.2 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package overlay | |
| import ( | |
| "bytes" | |
| "encoding/binary" | |
| "encoding/hex" | |
| "fmt" | |
| "hash/fnv" | |
| "net" | |
| "sync" | |
| "syscall" | |
| "strconv" | |
| "github.com/Sirupsen/logrus" | |
| "github.com/docker/libnetwork/iptables" | |
| "github.com/docker/libnetwork/ns" | |
| "github.com/docker/libnetwork/types" | |
| "github.com/vishvananda/netlink" | |
| ) | |
| const ( | |
| mark = uint32(0xD0C4E3) | |
| timeout = 30 | |
| pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8) | |
| ) | |
| const ( | |
| forward = iota + 1 | |
| reverse | |
| bidir | |
| ) | |
| type key struct { | |
| value []byte | |
| tag uint32 | |
| } | |
| func (k *key) String() string { | |
| if k != nil { | |
| return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag) | |
| } | |
| return "" | |
| } | |
| type spi struct { | |
| forward int | |
| reverse int | |
| } | |
| func (s *spi) String() string { | |
| return fmt.Sprintf("SPI(FWD: 0x%x, REV: 0x%x)", uint32(s.forward), uint32(s.reverse)) | |
| } | |
| type encrMap struct { | |
| nodes map[string][]*spi | |
| sync.Mutex | |
| } | |
| func (e *encrMap) String() string { | |
| e.Lock() | |
| defer e.Unlock() | |
| b := new(bytes.Buffer) | |
| for k, v := range e.nodes { | |
| b.WriteString("\n") | |
| b.WriteString(k) | |
| b.WriteString(":") | |
| b.WriteString("[") | |
| for _, s := range v { | |
| b.WriteString(s.String()) | |
| b.WriteString(",") | |
| } | |
| b.WriteString("]") | |
| } | |
| return b.String() | |
| } | |
| func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal, add bool) error { | |
| logrus.Debugf("checkEncryption(%s, %v, %d, %t)", nid[0:7], rIP, vxlanID, isLocal) | |
| n := d.network(nid) | |
| if n == nil || !n.secure { | |
| return nil | |
| } | |
| if len(d.keys) == 0 { | |
| return types.ForbiddenErrorf("encryption key is not present") | |
| } | |
| lIP := net.ParseIP(d.bindAddress) | |
| aIP := net.ParseIP(d.advertiseAddress) | |
| nodes := map[string]net.IP{} | |
| switch { | |
| case isLocal: | |
| if err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { | |
| if !aIP.Equal(pEntry.vtep) { | |
| nodes[pEntry.vtep.String()] = pEntry.vtep | |
| } | |
| return false | |
| }); err != nil { | |
| logrus.Warnf("Failed to retrieve list of participating nodes in overlay network %s: %v", nid[0:5], err) | |
| } | |
| default: | |
| if len(d.network(nid).endpoints) > 0 { | |
| nodes[rIP.String()] = rIP | |
| } | |
| } | |
| logrus.Debugf("List of nodes: %s", nodes) | |
| if add { | |
| for _, rIP := range nodes { | |
| if err := setupEncryption(lIP, aIP, rIP, vxlanID, d.secMap, d.keys); err != nil { | |
| logrus.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err) | |
| } | |
| } | |
| } else { | |
| if len(nodes) == 0 { | |
| if err := removeEncryption(lIP, rIP, d.secMap); err != nil { | |
| logrus.Warnf("Failed to remove network encryption between %s and %s: %v", lIP, rIP, err) | |
| } | |
| } | |
| } | |
| return nil | |
| } | |
| func setupEncryption(localIP, advIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error { | |
| logrus.Debugf("Programming encryption for vxlan %d between %s and %s", vni, localIP, remoteIP) | |
| rIPs := remoteIP.String() | |
| indices := make([]*spi, 0, len(keys)) | |
| err := programMangle(vni, true) | |
| if err != nil { | |
| logrus.Warn(err) | |
| } | |
| err = programInput(vni, true) | |
| if err != nil { | |
| logrus.Warn(err) | |
| } | |
| for i, k := range keys { | |
| spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)} | |
| dir := reverse | |
| if i == 0 { | |
| dir = bidir | |
| } | |
| fSA, rSA, err := programSA(localIP, remoteIP, spis, k, dir, true) | |
| if err != nil { | |
| logrus.Warn(err) | |
| } | |
| indices = append(indices, spis) | |
| if i != 0 { | |
| continue | |
| } | |
| err = programSP(fSA, rSA, true) | |
| if err != nil { | |
| logrus.Warn(err) | |
| } | |
| } | |
| em.Lock() | |
| em.nodes[rIPs] = indices | |
| em.Unlock() | |
| return nil | |
| } | |
| func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error { | |
| em.Lock() | |
| indices, ok := em.nodes[remoteIP.String()] | |
| em.Unlock() | |
| if !ok { | |
| return nil | |
| } | |
| for i, idxs := range indices { | |
| dir := reverse | |
| if i == 0 { | |
| dir = bidir | |
| } | |
| fSA, rSA, err := programSA(localIP, remoteIP, idxs, nil, dir, false) | |
| if err != nil { | |
| logrus.Warn(err) | |
| } | |
| if i != 0 { | |
| continue | |
| } | |
| err = programSP(fSA, rSA, false) | |
| if err != nil { | |
| logrus.Warn(err) | |
| } | |
| } | |
| return nil | |
| } | |
| func programMangle(vni uint32, add bool) (err error) { | |
| var ( | |
| p = strconv.FormatUint(uint64(vxlanPort), 10) | |
| c = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8) | |
| m = strconv.FormatUint(uint64(mark), 10) | |
| chain = "OUTPUT" | |
| rule = []string{"-p", "udp", "--dport", p, "-m", "u32", "--u32", c, "-j", "MARK", "--set-mark", m} | |
| a = "-A" | |
| action = "install" | |
| ) | |
| if add == iptables.Exists(iptables.Mangle, chain, rule...) { | |
| return | |
| } | |
| if !add { | |
| a = "-D" | |
| action = "remove" | |
| } | |
| if err = iptables.RawCombinedOutput(append([]string{"-t", string(iptables.Mangle), a, chain}, rule...)...); err != nil { | |
| logrus.Warnf("could not %s mangle rule: %v", action, err) | |
| } | |
| return | |
| } | |
| func programInput(vni uint32, add bool) (err error) { | |
| var ( | |
| port = strconv.FormatUint(uint64(vxlanPort), 10) | |
| vniMatch = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8) | |
| plainVxlan = []string{"-p", "udp", "--dport", port, "-m", "u32", "--u32", vniMatch, "-j"} | |
| ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...) | |
| block = append(plainVxlan, "DROP") | |
| accept = append(ipsecVxlan, "ACCEPT") | |
| chain = "INPUT" | |
| action = iptables.Append | |
| msg = "add" | |
| ) | |
| if !add { | |
| action = iptables.Delete | |
| msg = "remove" | |
| } | |
| if err := iptables.ProgramRule(iptables.Filter, chain, action, accept); err != nil { | |
| logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err) | |
| } | |
| if err := iptables.ProgramRule(iptables.Filter, chain, action, block); err != nil { | |
| logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err) | |
| } | |
| return | |
| } | |
| func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) { | |
| var ( | |
| action = "Removing" | |
| xfrmProgram = ns.NlHandle().XfrmStateDel | |
| ) | |
| if add { | |
| action = "Adding" | |
| xfrmProgram = ns.NlHandle().XfrmStateAdd | |
| } | |
| if dir&reverse > 0 { | |
| rSA = &netlink.XfrmState{ | |
| Src: remoteIP, | |
| Dst: localIP, | |
| Proto: netlink.XFRM_PROTO_ESP, | |
| Spi: spi.reverse, | |
| Mode: netlink.XFRM_MODE_TRANSPORT, | |
| } | |
| if add { | |
| rSA.Aead = buildAeadAlgo(k, spi.reverse) | |
| } | |
| exists, err := saExists(rSA) | |
| if err != nil { | |
| exists = !add | |
| } | |
| if add != exists { | |
| logrus.Debugf("%s: rSA{%s}", action, rSA) | |
| if err := xfrmProgram(rSA); err != nil { | |
| logrus.Warnf("Failed %s rSA{%s}: %v", action, rSA, err) | |
| } | |
| } | |
| } | |
| if dir&forward > 0 { | |
| fSA = &netlink.XfrmState{ | |
| Src: localIP, | |
| Dst: remoteIP, | |
| Proto: netlink.XFRM_PROTO_ESP, | |
| Spi: spi.forward, | |
| Mode: netlink.XFRM_MODE_TRANSPORT, | |
| } | |
| if add { | |
| fSA.Aead = buildAeadAlgo(k, spi.forward) | |
| } | |
| exists, err := saExists(fSA) | |
| if err != nil { | |
| exists = !add | |
| } | |
| if add != exists { | |
| logrus.Debugf("%s fSA{%s}", action, fSA) | |
| if err := xfrmProgram(fSA); err != nil { | |
| logrus.Warnf("Failed %s fSA{%s}: %v.", action, fSA, err) | |
| } | |
| } | |
| } | |
| return | |
| } | |
| func programSP(fSA *netlink.XfrmState, rSA *netlink.XfrmState, add bool) error { | |
| action := "Removing" | |
| xfrmProgram := ns.NlHandle().XfrmPolicyDel | |
| if add { | |
| action = "Adding" | |
| xfrmProgram = ns.NlHandle().XfrmPolicyAdd | |
| } | |
| fullMask := net.CIDRMask(8*len(fSA.Src), 8*len(fSA.Src)) | |
| fPol := &netlink.XfrmPolicy{ | |
| Src: &net.IPNet{IP: fSA.Src, Mask: fullMask}, | |
| Dst: &net.IPNet{IP: fSA.Dst, Mask: fullMask}, | |
| Dir: netlink.XFRM_DIR_OUT, | |
| Proto: 17, | |
| DstPort: 4789, | |
| Mark: &netlink.XfrmMark{ | |
| Value: mark, | |
| }, | |
| Tmpls: []netlink.XfrmPolicyTmpl{ | |
| { | |
| Src: fSA.Src, | |
| Dst: fSA.Dst, | |
| Proto: netlink.XFRM_PROTO_ESP, | |
| Mode: netlink.XFRM_MODE_TRANSPORT, | |
| Spi: fSA.Spi, | |
| }, | |
| }, | |
| } | |
| exists, err := spExists(fPol) | |
| if err != nil { | |
| exists = !add | |
| } | |
| if add != exists { | |
| logrus.Debugf("%s fSP{%s}", action, fPol) | |
| if err := xfrmProgram(fPol); err != nil { | |
| logrus.Warnf("%s fSP{%s}: %v", action, fPol, err) | |
| } | |
| } | |
| return nil | |
| } | |
| func saExists(sa *netlink.XfrmState) (bool, error) { | |
| _, err := ns.NlHandle().XfrmStateGet(sa) | |
| switch err { | |
| case nil: | |
| return true, nil | |
| case syscall.ESRCH: | |
| return false, nil | |
| default: | |
| err = fmt.Errorf("Error while checking for SA existence: %v", err) | |
| logrus.Warn(err) | |
| return false, err | |
| } | |
| } | |
| func spExists(sp *netlink.XfrmPolicy) (bool, error) { | |
| _, err := ns.NlHandle().XfrmPolicyGet(sp) | |
| switch err { | |
| case nil: | |
| return true, nil | |
| case syscall.ENOENT: | |
| return false, nil | |
| default: | |
| err = fmt.Errorf("Error while checking for SP existence: %v", err) | |
| logrus.Warn(err) | |
| return false, err | |
| } | |
| } | |
| func buildSPI(src, dst net.IP, st uint32) int { | |
| b := make([]byte, 4) | |
| binary.BigEndian.PutUint32(b, st) | |
| h := fnv.New32a() | |
| h.Write(src) | |
| h.Write(b) | |
| h.Write(dst) | |
| return int(binary.BigEndian.Uint32(h.Sum(nil))) | |
| } | |
| func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo { | |
| salt := make([]byte, 4) | |
| binary.BigEndian.PutUint32(salt, uint32(s)) | |
| return &netlink.XfrmStateAlgo{ | |
| Name: "rfc4106(gcm(aes))", | |
| Key: append(k.value, salt...), | |
| ICVLen: 64, | |
| } | |
| } | |
| func (d *driver) secMapWalk(f func(string, []*spi) ([]*spi, bool)) error { | |
| d.secMap.Lock() | |
| for node, indices := range d.secMap.nodes { | |
| idxs, stop := f(node, indices) | |
| if idxs != nil { | |
| d.secMap.nodes[node] = idxs | |
| } | |
| if stop { | |
| break | |
| } | |
| } | |
| d.secMap.Unlock() | |
| return nil | |
| } | |
| func (d *driver) setKeys(keys []*key) error { | |
| // Accept the encryption keys and clear any stale encryption map | |
| d.Lock() | |
| d.keys = keys | |
| d.secMap = &encrMap{nodes: map[string][]*spi{}} | |
| d.Unlock() | |
| logrus.Debugf("Initial encryption keys: %v", d.keys) | |
| return nil | |
| } | |
| // updateKeys allows to add a new key and/or change the primary key and/or prune an existing key | |
| // The primary key is the key used in transmission and will go in first position in the list. | |
| func (d *driver) updateKeys(newKey, primary, pruneKey *key) error { | |
| logrus.Debugf("Updating Keys. New: %v, Primary: %v, Pruned: %v", newKey, primary, pruneKey) | |
| logrus.Debugf("Current: %v", d.keys) | |
| var ( | |
| newIdx = -1 | |
| priIdx = -1 | |
| delIdx = -1 | |
| lIP = net.ParseIP(d.bindAddress) | |
| aIP = net.ParseIP(d.advertiseAddress) | |
| ) | |
| d.Lock() | |
| // add new | |
| if newKey != nil { | |
| d.keys = append(d.keys, newKey) | |
| newIdx += len(d.keys) | |
| } | |
| for i, k := range d.keys { | |
| if primary != nil && k.tag == primary.tag { | |
| priIdx = i | |
| } | |
| if pruneKey != nil && k.tag == pruneKey.tag { | |
| delIdx = i | |
| } | |
| } | |
| d.Unlock() | |
| if (newKey != nil && newIdx == -1) || | |
| (primary != nil && priIdx == -1) || | |
| (pruneKey != nil && delIdx == -1) { | |
| return types.BadRequestErrorf("cannot find proper key indices while processing key update:"+ | |
| "(newIdx,priIdx,delIdx):(%d, %d, %d)", newIdx, priIdx, delIdx) | |
| } | |
| d.secMapWalk(func(rIPs string, spis []*spi) ([]*spi, bool) { | |
| rIP := net.ParseIP(rIPs) | |
| return updateNodeKey(lIP, aIP, rIP, spis, d.keys, newIdx, priIdx, delIdx), false | |
| }) | |
| d.Lock() | |
| // swap primary | |
| if priIdx != -1 { | |
| swp := d.keys[0] | |
| d.keys[0] = d.keys[priIdx] | |
| d.keys[priIdx] = swp | |
| } | |
| // prune | |
| if delIdx != -1 { | |
| if delIdx == 0 { | |
| delIdx = priIdx | |
| } | |
| d.keys = append(d.keys[:delIdx], d.keys[delIdx+1:]...) | |
| } | |
| d.Unlock() | |
| logrus.Debugf("Updated: %v", d.keys) | |
| return nil | |
| } | |
| /******************************************************** | |
| * Steady state: rSA0, rSA1, rSA2, fSA1, fSP1 | |
| * Rotation --> -rSA0, +rSA3, +fSA2, +fSP2/-fSP1, -fSA1 | |
| * Steady state: rSA1, rSA2, rSA3, fSA2, fSP2 | |
| *********************************************************/ | |
| // Spis and keys are sorted in such away the one in position 0 is the primary | |
| func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx, delIdx int) []*spi { | |
| logrus.Debugf("Updating keys for node: %s (%d,%d,%d)", rIP, newIdx, priIdx, delIdx) | |
| spis := idxs | |
| logrus.Debugf("Current: %v", spis) | |
| // add new | |
| if newIdx != -1 { | |
| spis = append(spis, &spi{ | |
| forward: buildSPI(aIP, rIP, curKeys[newIdx].tag), | |
| reverse: buildSPI(rIP, aIP, curKeys[newIdx].tag), | |
| }) | |
| } | |
| if delIdx != -1 { | |
| // -rSA0 | |
| programSA(lIP, rIP, spis[delIdx], nil, reverse, false) | |
| } | |
| if newIdx > -1 { | |
| // +RSA2 | |
| programSA(lIP, rIP, spis[newIdx], curKeys[newIdx], reverse, true) | |
| } | |
| if priIdx > 0 { | |
| // +fSA2 | |
| fSA2, _, _ := programSA(lIP, rIP, spis[priIdx], curKeys[priIdx], forward, true) | |
| // +fSP2, -fSP1 | |
| fullMask := net.CIDRMask(8*len(fSA2.Src), 8*len(fSA2.Src)) | |
| fSP1 := &netlink.XfrmPolicy{ | |
| Src: &net.IPNet{IP: fSA2.Src, Mask: fullMask}, | |
| Dst: &net.IPNet{IP: fSA2.Dst, Mask: fullMask}, | |
| Dir: netlink.XFRM_DIR_OUT, | |
| Proto: 17, | |
| DstPort: 4789, | |
| Mark: &netlink.XfrmMark{ | |
| Value: mark, | |
| }, | |
| Tmpls: []netlink.XfrmPolicyTmpl{ | |
| { | |
| Src: fSA2.Src, | |
| Dst: fSA2.Dst, | |
| Proto: netlink.XFRM_PROTO_ESP, | |
| Mode: netlink.XFRM_MODE_TRANSPORT, | |
| Spi: fSA2.Spi, | |
| }, | |
| }, | |
| } | |
| logrus.Debugf("Updating fSP{%s}", fSP1) | |
| if err := ns.NlHandle().XfrmPolicyUpdate(fSP1); err != nil { | |
| logrus.Warnf("Failed to update fSP{%s}: %v", fSP1, err) | |
| } | |
| // -fSA1 | |
| programSA(lIP, rIP, spis[0], nil, forward, false) | |
| } | |
| // swap | |
| if priIdx > 0 { | |
| swp := spis[0] | |
| spis[0] = spis[priIdx] | |
| spis[priIdx] = swp | |
| } | |
| // prune | |
| if delIdx != -1 { | |
| if delIdx == 0 { | |
| delIdx = priIdx | |
| } | |
| spis = append(spis[:delIdx], spis[delIdx+1:]...) | |
| } | |
| logrus.Debugf("Updated: %v", spis) | |
| return spis | |
| } | |
| func (n *network) maxMTU() int { | |
| mtu := 1500 | |
| if n.mtu != 0 { | |
| mtu = n.mtu | |
| } | |
| mtu -= vxlanEncap | |
| if n.secure { | |
| // In case of encryption account for the | |
| // esp packet espansion and padding | |
| mtu -= pktExpansion | |
| mtu -= (mtu % 4) | |
| } | |
| return mtu | |
| } |