Skip to content

Commit

Permalink
ipam/crd: Fix panic due to concurrent map read and map write
Browse files Browse the repository at this point in the history
[ upstream commit e3a78b0 ]

This fixes a panic in the `totalPoolSize` function. Previously,
`totalPoolSize` required that the `crdAllocator` mutex was held. This
however is not sufficient to block concurrent writes to the
`allocationPoolSize` map, since that map is written to by
`nodeStore.updateLocalNodeResource`, which only holds the `nodeStore`
mutex.

This commit fixes the issue by moving the `totalPoolSize` function to
the `nodeStore` and having it explicitly take the `nodeStore` mutex
(instead of requiring the `crdAllocator` mutex to be held). This ensures
that all access to `allocationPoolSize` is now protected by the
`nodeStore` mutex.

The lock ordering is also preserved: The `crdAllocator` calls into
`nodeStore`, but not vise-versa. Thus, the lock ordering is always that
the `crdAllocator` lock is held first, and the `nodeStore` lock second.

Related to: cilium#23707

Signed-off-by: Sebastian Wicki <sebastian@isovalent.com>
Signed-off-by: Tam Mach <tam.mach@cilium.io>
  • Loading branch information
gandro authored and sayboras committed Feb 23, 2023
1 parent a4699da commit 7a84e6b
Showing 1 changed file with 12 additions and 10 deletions.
22 changes: 12 additions & 10 deletions pkg/ipam/crd.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,17 @@ func (n *nodeStore) allocateNext(allocated ipamTypes.AllocationMap, family Famil
return nil, nil, fmt.Errorf("No more IPs available")
}

// totalPoolSize returns the total size of the allocation pool
func (n *nodeStore) totalPoolSize(family Family) int {
n.mutex.RLock()
defer n.mutex.RUnlock()

if num, ok := n.allocationPoolSize[family]; ok {
return num
}
return 0
}

// crdAllocator implements the CRD-backed IP allocator
type crdAllocator struct {
// store is the node store backing the custom resource
Expand Down Expand Up @@ -818,15 +829,6 @@ func (a *crdAllocator) AllocateNextWithoutSyncUpstream(owner string) (*Allocatio
return result, nil
}

// totalPoolSize returns the total size of the allocation pool
// a.mutex must be held
func (a *crdAllocator) totalPoolSize() int {
if num, ok := a.store.allocationPoolSize[a.family]; ok {
return num
}
return 0
}

// Dump provides a status report and lists all allocated IP addresses
func (a *crdAllocator) Dump() (map[string]string, string) {
a.mutex.RLock()
Expand All @@ -837,7 +839,7 @@ func (a *crdAllocator) Dump() (map[string]string, string) {
allocs[ip] = ""
}

status := fmt.Sprintf("%d/%d allocated", len(allocs), a.totalPoolSize())
status := fmt.Sprintf("%d/%d allocated", len(allocs), a.store.totalPoolSize(a.family))
return allocs, status
}

Expand Down

0 comments on commit 7a84e6b

Please sign in to comment.