From 7a84e6b6a41cb96aee6fe451fded2997d789b95d Mon Sep 17 00:00:00 2001 From: Sebastian Wicki Date: Mon, 13 Feb 2023 14:13:19 +0100 Subject: [PATCH] ipam/crd: Fix panic due to concurrent map read and map write [ upstream commit e3a78b0d8e692ba375b35e74d2ca699f8a9e79bb ] This fixes a panic in the `totalPoolSize` function. Previously, `totalPoolSize` required that the `crdAllocator` mutex was held. This however is not sufficient to block concurrent writes to the `allocationPoolSize` map, since that map is written to by `nodeStore.updateLocalNodeResource`, which only holds the `nodeStore` mutex. This commit fixes the issue by moving the `totalPoolSize` function to the `nodeStore` and having it explicitly take the `nodeStore` mutex (instead of requiring the `crdAllocator` mutex to be held). This ensures that all access to `allocationPoolSize` is now protected by the `nodeStore` mutex. The lock ordering is also preserved: The `crdAllocator` calls into `nodeStore`, but not vise-versa. Thus, the lock ordering is always that the `crdAllocator` lock is held first, and the `nodeStore` lock second. Related to: cilium/cilium#23707 Signed-off-by: Sebastian Wicki Signed-off-by: Tam Mach --- pkg/ipam/crd.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pkg/ipam/crd.go b/pkg/ipam/crd.go index c239460c75fdd..9de88fdcc3c07 100644 --- a/pkg/ipam/crd.go +++ b/pkg/ipam/crd.go @@ -560,6 +560,17 @@ func (n *nodeStore) allocateNext(allocated ipamTypes.AllocationMap, family Famil return nil, nil, fmt.Errorf("No more IPs available") } +// totalPoolSize returns the total size of the allocation pool +func (n *nodeStore) totalPoolSize(family Family) int { + n.mutex.RLock() + defer n.mutex.RUnlock() + + if num, ok := n.allocationPoolSize[family]; ok { + return num + } + return 0 +} + // crdAllocator implements the CRD-backed IP allocator type crdAllocator struct { // store is the node store backing the custom resource @@ -818,15 +829,6 @@ func (a *crdAllocator) AllocateNextWithoutSyncUpstream(owner string) (*Allocatio return result, nil } -// totalPoolSize returns the total size of the allocation pool -// a.mutex must be held -func (a *crdAllocator) totalPoolSize() int { - if num, ok := a.store.allocationPoolSize[a.family]; ok { - return num - } - return 0 -} - // Dump provides a status report and lists all allocated IP addresses func (a *crdAllocator) Dump() (map[string]string, string) { a.mutex.RLock() @@ -837,7 +839,7 @@ func (a *crdAllocator) Dump() (map[string]string, string) { allocs[ip] = "" } - status := fmt.Sprintf("%d/%d allocated", len(allocs), a.totalPoolSize()) + status := fmt.Sprintf("%d/%d allocated", len(allocs), a.store.totalPoolSize(a.family)) return allocs, status }