diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 17686d756433d..c694ab9c0aa92 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -251,13 +251,13 @@ func (a *cpuAccumulator) isNUMANodeFree(numaID int) bool { // Returns true if the supplied socket is fully available in `a.details`. // "fully available" means that all the CPUs in it are free. func (a *cpuAccumulator) isSocketFree(socketID int) bool { - return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket() + return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUDetails.CPUsInSockets(socketID).Size() } // Returns true if the supplied core is fully available in `a.details`. // "fully available" means that all the CPUs in it are free. func (a *cpuAccumulator) isCoreFree(coreID int) bool { - return a.details.CPUsInCores(coreID).Size() == a.topo.CPUsPerCore() + return a.details.CPUsInCores(coreID).Size() == a.topo.CPUDetails.CPUsInCores(coreID).Size() } // Returns free NUMA Node IDs as a slice sorted by sortAvailableNUMANodes(). diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index 63b026b1979f2..0f489d3896588 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -110,6 +110,18 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) { mustParseCPUSet(t, "0-40,42-49,51-68,71-79"), []int{}, }, + { + "multi numa, dual socket, HT, last CPU offline, socket-0 free", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "0-63,128-191"), + []int{0}, + }, + { + "multi numa, dual socket, HT, last CPU offline, socket-1 free", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "64-127,192-254"), + []int{1}, + }, } for _, tc := range testCases { @@ -119,7 +131,6 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) { sort.Ints(result) if !reflect.DeepEqual(result, tc.expect) { t.Errorf("expected %v to equal %v", result, tc.expect) - } }) } @@ -210,6 +221,24 @@ func TestCPUAccumulatorFreeNUMANodes(t *testing.T) { mustParseCPUSet(t, "0-9,11-59,61-79"), []int{}, }, + { + "multi numa, dual socket, HT, last CPU offline, NUMA node-0 free", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "0-15,128-143"), + []int{0}, + }, + { + "multi numa, dual socket, HT, last CPU offline, NUMA node-7 free", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "112-127,240-254"), + []int{7}, + }, + { + "multi numa, dual socket, HT, last CPU offline, 0 NUMA nodes free(1 CPU consumed)", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "0-15,128-142"), + []int{}, + }, } for _, tc := range testCases { @@ -331,6 +360,18 @@ func TestCPUAccumulatorFreeCores(t *testing.T) { cpuset.New(2, 3, 4, 5, 8, 9, 10, 11), []int{2, 4, 3, 5}, }, + { + "multi numa, dual socket, HT, last CPU offline, 0 cores free (1 partially consumed)", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "0"), + []int{}, + }, + { + "multi numa, dual socket, HT, last CPU offline, 1 cores free (1 online CPU, 1 offline CPU)", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "127"), + []int{127}, + }, } for _, tc := range testCases { @@ -854,6 +895,29 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) { "", mustParseCPUSet(t, "43-47,75-79,96,101-105,171-174,203-206,229-232"), }, + // this case demonstrates that the policy does not guarantee Full Physical CPUs yet. Once the behavior changes, it can be deleted + // the provided available Numas are: + // - 0,1 both aliged 5 Cores, and 2 single CPUs within 2 Cores + // - 2,3 both aliged 6 Cores + // the result chooses all 24 CPUs in Numa 0,1, but Numa 2,3 are better + { + "allocate 10 full cores distributed across first 2 NUMA nodes and 4 CPUs spilling over to each of NUMA 0,1", + topoDualSocketMultiNumaPerSocketHTLarge, + mustParseCPUSet(t, "0-5,129-134,16-21,144-149,32-37,160-165,48-53,176-181"), + 24, + 2, + "", + mustParseCPUSet(t, "0-5,16-21,129-134,144-149"), + }, + { + "allocate 10 full cores distributed across first 2 NUMA nodes and 4 CPUs spilling over to each of NUMA 0,1(CPU 255 offline)", + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline, + mustParseCPUSet(t, "0-5,129-134,16-21,144-149,32-37,160-165,48-53,176-181"), + 24, + 2, + "", + mustParseCPUSet(t, "0-5,16-21,129-134,144-149"), + }, }...) for _, tc := range testCases { diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index d22a6a64d5ea9..1af341b648607 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -310,7 +310,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai }() if p.options.FullPhysicalCPUsOnly { - CPUsPerCore := p.topology.CPUsPerCore() + CPUsPerCore := p.topology.MaxCPUsPerCore() if (numCPUs % CPUsPerCore) != 0 { // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted // if the CPU requested is a multiple of the number of virtual cpus per physical cores. @@ -486,7 +486,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int) if p.options.DistributeCPUsAcrossNUMA { cpuGroupSize := 1 if p.options.FullPhysicalCPUsOnly { - cpuGroupSize = p.topology.CPUsPerCore() + cpuGroupSize = p.topology.MaxCPUsPerCore() } return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize) } diff --git a/pkg/kubelet/cm/cpumanager/policy_test.go b/pkg/kubelet/cm/cpumanager/policy_test.go index 02f0898063a09..88c49c28bed72 100644 --- a/pkg/kubelet/cm/cpumanager/policy_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_test.go @@ -892,4 +892,286 @@ var ( 255: {CoreID: 127, SocketID: 1, NUMANodeID: 7}, }, } + + /* + Topology from dual AMD EPYC 7713 64-Core Processor; with 1 CPU offline; lscpu excerpt + CPU(s): 256 + On-line CPU(s) list: 0-254 + Off-line CPU(s) list: 255 + Thread(s) per core: 1 + Core(s) per socket: 64 + Socket(s): 2 + NUMA node(s): 8 + NUMA node0 CPU(s): 0-15,128-143 + NUMA node1 CPU(s): 16-31,144-159 + NUMA node2 CPU(s): 32-47,160-175 + NUMA node3 CPU(s): 48-63,176-191 + NUMA node4 CPU(s): 64-79,192-207 + NUMA node5 CPU(s): 80-95,208-223 + NUMA node6 CPU(s): 96-111,224-239 + NUMA node7 CPU(s): 112-127,240-254 + */ + topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline = &topology.CPUTopology{ + NumCPUs: 255, + NumSockets: 2, + NumCores: 128, + NumNUMANodes: 8, + CPUDetails: map[int]topology.CPUInfo{ + 0: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 1: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 2: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 3: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 4: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 5: {CoreID: 5, SocketID: 0, NUMANodeID: 0}, + 6: {CoreID: 6, SocketID: 0, NUMANodeID: 0}, + 7: {CoreID: 7, SocketID: 0, NUMANodeID: 0}, + 8: {CoreID: 8, SocketID: 0, NUMANodeID: 0}, + 9: {CoreID: 9, SocketID: 0, NUMANodeID: 0}, + 10: {CoreID: 10, SocketID: 0, NUMANodeID: 0}, + 11: {CoreID: 11, SocketID: 0, NUMANodeID: 0}, + 12: {CoreID: 12, SocketID: 0, NUMANodeID: 0}, + 13: {CoreID: 13, SocketID: 0, NUMANodeID: 0}, + 14: {CoreID: 14, SocketID: 0, NUMANodeID: 0}, + 15: {CoreID: 15, SocketID: 0, NUMANodeID: 0}, + 16: {CoreID: 16, SocketID: 0, NUMANodeID: 1}, + 17: {CoreID: 17, SocketID: 0, NUMANodeID: 1}, + 18: {CoreID: 18, SocketID: 0, NUMANodeID: 1}, + 19: {CoreID: 19, SocketID: 0, NUMANodeID: 1}, + 20: {CoreID: 20, SocketID: 0, NUMANodeID: 1}, + 21: {CoreID: 21, SocketID: 0, NUMANodeID: 1}, + 22: {CoreID: 22, SocketID: 0, NUMANodeID: 1}, + 23: {CoreID: 23, SocketID: 0, NUMANodeID: 1}, + 24: {CoreID: 24, SocketID: 0, NUMANodeID: 1}, + 25: {CoreID: 25, SocketID: 0, NUMANodeID: 1}, + 26: {CoreID: 26, SocketID: 0, NUMANodeID: 1}, + 27: {CoreID: 27, SocketID: 0, NUMANodeID: 1}, + 28: {CoreID: 28, SocketID: 0, NUMANodeID: 1}, + 29: {CoreID: 29, SocketID: 0, NUMANodeID: 1}, + 30: {CoreID: 30, SocketID: 0, NUMANodeID: 1}, + 31: {CoreID: 31, SocketID: 0, NUMANodeID: 1}, + 32: {CoreID: 32, SocketID: 0, NUMANodeID: 2}, + 33: {CoreID: 33, SocketID: 0, NUMANodeID: 2}, + 34: {CoreID: 34, SocketID: 0, NUMANodeID: 2}, + 35: {CoreID: 35, SocketID: 0, NUMANodeID: 2}, + 36: {CoreID: 36, SocketID: 0, NUMANodeID: 2}, + 37: {CoreID: 37, SocketID: 0, NUMANodeID: 2}, + 38: {CoreID: 38, SocketID: 0, NUMANodeID: 2}, + 39: {CoreID: 39, SocketID: 0, NUMANodeID: 2}, + 40: {CoreID: 40, SocketID: 0, NUMANodeID: 2}, + 41: {CoreID: 41, SocketID: 0, NUMANodeID: 2}, + 42: {CoreID: 42, SocketID: 0, NUMANodeID: 2}, + 43: {CoreID: 43, SocketID: 0, NUMANodeID: 2}, + 44: {CoreID: 44, SocketID: 0, NUMANodeID: 2}, + 45: {CoreID: 45, SocketID: 0, NUMANodeID: 2}, + 46: {CoreID: 46, SocketID: 0, NUMANodeID: 2}, + 47: {CoreID: 47, SocketID: 0, NUMANodeID: 2}, + 48: {CoreID: 48, SocketID: 0, NUMANodeID: 3}, + 49: {CoreID: 49, SocketID: 0, NUMANodeID: 3}, + 50: {CoreID: 50, SocketID: 0, NUMANodeID: 3}, + 51: {CoreID: 51, SocketID: 0, NUMANodeID: 3}, + 52: {CoreID: 52, SocketID: 0, NUMANodeID: 3}, + 53: {CoreID: 53, SocketID: 0, NUMANodeID: 3}, + 54: {CoreID: 54, SocketID: 0, NUMANodeID: 3}, + 55: {CoreID: 55, SocketID: 0, NUMANodeID: 3}, + 56: {CoreID: 56, SocketID: 0, NUMANodeID: 3}, + 57: {CoreID: 57, SocketID: 0, NUMANodeID: 3}, + 58: {CoreID: 58, SocketID: 0, NUMANodeID: 3}, + 59: {CoreID: 59, SocketID: 0, NUMANodeID: 3}, + 60: {CoreID: 60, SocketID: 0, NUMANodeID: 3}, + 61: {CoreID: 61, SocketID: 0, NUMANodeID: 3}, + 62: {CoreID: 62, SocketID: 0, NUMANodeID: 3}, + 63: {CoreID: 63, SocketID: 0, NUMANodeID: 3}, + 64: {CoreID: 64, SocketID: 1, NUMANodeID: 4}, + 65: {CoreID: 65, SocketID: 1, NUMANodeID: 4}, + 66: {CoreID: 66, SocketID: 1, NUMANodeID: 4}, + 67: {CoreID: 67, SocketID: 1, NUMANodeID: 4}, + 68: {CoreID: 68, SocketID: 1, NUMANodeID: 4}, + 69: {CoreID: 69, SocketID: 1, NUMANodeID: 4}, + 70: {CoreID: 70, SocketID: 1, NUMANodeID: 4}, + 71: {CoreID: 71, SocketID: 1, NUMANodeID: 4}, + 72: {CoreID: 72, SocketID: 1, NUMANodeID: 4}, + 73: {CoreID: 73, SocketID: 1, NUMANodeID: 4}, + 74: {CoreID: 74, SocketID: 1, NUMANodeID: 4}, + 75: {CoreID: 75, SocketID: 1, NUMANodeID: 4}, + 76: {CoreID: 76, SocketID: 1, NUMANodeID: 4}, + 77: {CoreID: 77, SocketID: 1, NUMANodeID: 4}, + 78: {CoreID: 78, SocketID: 1, NUMANodeID: 4}, + 79: {CoreID: 79, SocketID: 1, NUMANodeID: 4}, + 80: {CoreID: 80, SocketID: 1, NUMANodeID: 5}, + 81: {CoreID: 81, SocketID: 1, NUMANodeID: 5}, + 82: {CoreID: 82, SocketID: 1, NUMANodeID: 5}, + 83: {CoreID: 83, SocketID: 1, NUMANodeID: 5}, + 84: {CoreID: 84, SocketID: 1, NUMANodeID: 5}, + 85: {CoreID: 85, SocketID: 1, NUMANodeID: 5}, + 86: {CoreID: 86, SocketID: 1, NUMANodeID: 5}, + 87: {CoreID: 87, SocketID: 1, NUMANodeID: 5}, + 88: {CoreID: 88, SocketID: 1, NUMANodeID: 5}, + 89: {CoreID: 89, SocketID: 1, NUMANodeID: 5}, + 90: {CoreID: 90, SocketID: 1, NUMANodeID: 5}, + 91: {CoreID: 91, SocketID: 1, NUMANodeID: 5}, + 92: {CoreID: 92, SocketID: 1, NUMANodeID: 5}, + 93: {CoreID: 93, SocketID: 1, NUMANodeID: 5}, + 94: {CoreID: 94, SocketID: 1, NUMANodeID: 5}, + 95: {CoreID: 95, SocketID: 1, NUMANodeID: 5}, + 96: {CoreID: 96, SocketID: 1, NUMANodeID: 6}, + 97: {CoreID: 97, SocketID: 1, NUMANodeID: 6}, + 98: {CoreID: 98, SocketID: 1, NUMANodeID: 6}, + 99: {CoreID: 99, SocketID: 1, NUMANodeID: 6}, + 100: {CoreID: 100, SocketID: 1, NUMANodeID: 6}, + 101: {CoreID: 101, SocketID: 1, NUMANodeID: 6}, + 102: {CoreID: 102, SocketID: 1, NUMANodeID: 6}, + 103: {CoreID: 103, SocketID: 1, NUMANodeID: 6}, + 104: {CoreID: 104, SocketID: 1, NUMANodeID: 6}, + 105: {CoreID: 105, SocketID: 1, NUMANodeID: 6}, + 106: {CoreID: 106, SocketID: 1, NUMANodeID: 6}, + 107: {CoreID: 107, SocketID: 1, NUMANodeID: 6}, + 108: {CoreID: 108, SocketID: 1, NUMANodeID: 6}, + 109: {CoreID: 109, SocketID: 1, NUMANodeID: 6}, + 110: {CoreID: 110, SocketID: 1, NUMANodeID: 6}, + 111: {CoreID: 111, SocketID: 1, NUMANodeID: 6}, + 112: {CoreID: 112, SocketID: 1, NUMANodeID: 7}, + 113: {CoreID: 113, SocketID: 1, NUMANodeID: 7}, + 114: {CoreID: 114, SocketID: 1, NUMANodeID: 7}, + 115: {CoreID: 115, SocketID: 1, NUMANodeID: 7}, + 116: {CoreID: 116, SocketID: 1, NUMANodeID: 7}, + 117: {CoreID: 117, SocketID: 1, NUMANodeID: 7}, + 118: {CoreID: 118, SocketID: 1, NUMANodeID: 7}, + 119: {CoreID: 119, SocketID: 1, NUMANodeID: 7}, + 120: {CoreID: 120, SocketID: 1, NUMANodeID: 7}, + 121: {CoreID: 121, SocketID: 1, NUMANodeID: 7}, + 122: {CoreID: 122, SocketID: 1, NUMANodeID: 7}, + 123: {CoreID: 123, SocketID: 1, NUMANodeID: 7}, + 124: {CoreID: 124, SocketID: 1, NUMANodeID: 7}, + 125: {CoreID: 125, SocketID: 1, NUMANodeID: 7}, + 126: {CoreID: 126, SocketID: 1, NUMANodeID: 7}, + 127: {CoreID: 127, SocketID: 1, NUMANodeID: 7}, + 128: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 129: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 130: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 131: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 132: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 133: {CoreID: 5, SocketID: 0, NUMANodeID: 0}, + 134: {CoreID: 6, SocketID: 0, NUMANodeID: 0}, + 135: {CoreID: 7, SocketID: 0, NUMANodeID: 0}, + 136: {CoreID: 8, SocketID: 0, NUMANodeID: 0}, + 137: {CoreID: 9, SocketID: 0, NUMANodeID: 0}, + 138: {CoreID: 10, SocketID: 0, NUMANodeID: 0}, + 139: {CoreID: 11, SocketID: 0, NUMANodeID: 0}, + 140: {CoreID: 12, SocketID: 0, NUMANodeID: 0}, + 141: {CoreID: 13, SocketID: 0, NUMANodeID: 0}, + 142: {CoreID: 14, SocketID: 0, NUMANodeID: 0}, + 143: {CoreID: 15, SocketID: 0, NUMANodeID: 0}, + 144: {CoreID: 16, SocketID: 0, NUMANodeID: 1}, + 145: {CoreID: 17, SocketID: 0, NUMANodeID: 1}, + 146: {CoreID: 18, SocketID: 0, NUMANodeID: 1}, + 147: {CoreID: 19, SocketID: 0, NUMANodeID: 1}, + 148: {CoreID: 20, SocketID: 0, NUMANodeID: 1}, + 149: {CoreID: 21, SocketID: 0, NUMANodeID: 1}, + 150: {CoreID: 22, SocketID: 0, NUMANodeID: 1}, + 151: {CoreID: 23, SocketID: 0, NUMANodeID: 1}, + 152: {CoreID: 24, SocketID: 0, NUMANodeID: 1}, + 153: {CoreID: 25, SocketID: 0, NUMANodeID: 1}, + 154: {CoreID: 26, SocketID: 0, NUMANodeID: 1}, + 155: {CoreID: 27, SocketID: 0, NUMANodeID: 1}, + 156: {CoreID: 28, SocketID: 0, NUMANodeID: 1}, + 157: {CoreID: 29, SocketID: 0, NUMANodeID: 1}, + 158: {CoreID: 30, SocketID: 0, NUMANodeID: 1}, + 159: {CoreID: 31, SocketID: 0, NUMANodeID: 1}, + 160: {CoreID: 32, SocketID: 0, NUMANodeID: 2}, + 161: {CoreID: 33, SocketID: 0, NUMANodeID: 2}, + 162: {CoreID: 34, SocketID: 0, NUMANodeID: 2}, + 163: {CoreID: 35, SocketID: 0, NUMANodeID: 2}, + 164: {CoreID: 36, SocketID: 0, NUMANodeID: 2}, + 165: {CoreID: 37, SocketID: 0, NUMANodeID: 2}, + 166: {CoreID: 38, SocketID: 0, NUMANodeID: 2}, + 167: {CoreID: 39, SocketID: 0, NUMANodeID: 2}, + 168: {CoreID: 40, SocketID: 0, NUMANodeID: 2}, + 169: {CoreID: 41, SocketID: 0, NUMANodeID: 2}, + 170: {CoreID: 42, SocketID: 0, NUMANodeID: 2}, + 171: {CoreID: 43, SocketID: 0, NUMANodeID: 2}, + 172: {CoreID: 44, SocketID: 0, NUMANodeID: 2}, + 173: {CoreID: 45, SocketID: 0, NUMANodeID: 2}, + 174: {CoreID: 46, SocketID: 0, NUMANodeID: 2}, + 175: {CoreID: 47, SocketID: 0, NUMANodeID: 2}, + 176: {CoreID: 48, SocketID: 0, NUMANodeID: 3}, + 177: {CoreID: 49, SocketID: 0, NUMANodeID: 3}, + 178: {CoreID: 50, SocketID: 0, NUMANodeID: 3}, + 179: {CoreID: 51, SocketID: 0, NUMANodeID: 3}, + 180: {CoreID: 52, SocketID: 0, NUMANodeID: 3}, + 181: {CoreID: 53, SocketID: 0, NUMANodeID: 3}, + 182: {CoreID: 54, SocketID: 0, NUMANodeID: 3}, + 183: {CoreID: 55, SocketID: 0, NUMANodeID: 3}, + 184: {CoreID: 56, SocketID: 0, NUMANodeID: 3}, + 185: {CoreID: 57, SocketID: 0, NUMANodeID: 3}, + 186: {CoreID: 58, SocketID: 0, NUMANodeID: 3}, + 187: {CoreID: 59, SocketID: 0, NUMANodeID: 3}, + 188: {CoreID: 60, SocketID: 0, NUMANodeID: 3}, + 189: {CoreID: 61, SocketID: 0, NUMANodeID: 3}, + 190: {CoreID: 62, SocketID: 0, NUMANodeID: 3}, + 191: {CoreID: 63, SocketID: 0, NUMANodeID: 3}, + 192: {CoreID: 64, SocketID: 1, NUMANodeID: 4}, + 193: {CoreID: 65, SocketID: 1, NUMANodeID: 4}, + 194: {CoreID: 66, SocketID: 1, NUMANodeID: 4}, + 195: {CoreID: 67, SocketID: 1, NUMANodeID: 4}, + 196: {CoreID: 68, SocketID: 1, NUMANodeID: 4}, + 197: {CoreID: 69, SocketID: 1, NUMANodeID: 4}, + 198: {CoreID: 70, SocketID: 1, NUMANodeID: 4}, + 199: {CoreID: 71, SocketID: 1, NUMANodeID: 4}, + 200: {CoreID: 72, SocketID: 1, NUMANodeID: 4}, + 201: {CoreID: 73, SocketID: 1, NUMANodeID: 4}, + 202: {CoreID: 74, SocketID: 1, NUMANodeID: 4}, + 203: {CoreID: 75, SocketID: 1, NUMANodeID: 4}, + 204: {CoreID: 76, SocketID: 1, NUMANodeID: 4}, + 205: {CoreID: 77, SocketID: 1, NUMANodeID: 4}, + 206: {CoreID: 78, SocketID: 1, NUMANodeID: 4}, + 207: {CoreID: 79, SocketID: 1, NUMANodeID: 4}, + 208: {CoreID: 80, SocketID: 1, NUMANodeID: 5}, + 209: {CoreID: 81, SocketID: 1, NUMANodeID: 5}, + 210: {CoreID: 82, SocketID: 1, NUMANodeID: 5}, + 211: {CoreID: 83, SocketID: 1, NUMANodeID: 5}, + 212: {CoreID: 84, SocketID: 1, NUMANodeID: 5}, + 213: {CoreID: 85, SocketID: 1, NUMANodeID: 5}, + 214: {CoreID: 86, SocketID: 1, NUMANodeID: 5}, + 215: {CoreID: 87, SocketID: 1, NUMANodeID: 5}, + 216: {CoreID: 88, SocketID: 1, NUMANodeID: 5}, + 217: {CoreID: 89, SocketID: 1, NUMANodeID: 5}, + 218: {CoreID: 90, SocketID: 1, NUMANodeID: 5}, + 219: {CoreID: 91, SocketID: 1, NUMANodeID: 5}, + 220: {CoreID: 92, SocketID: 1, NUMANodeID: 5}, + 221: {CoreID: 93, SocketID: 1, NUMANodeID: 5}, + 222: {CoreID: 94, SocketID: 1, NUMANodeID: 5}, + 223: {CoreID: 95, SocketID: 1, NUMANodeID: 5}, + 224: {CoreID: 96, SocketID: 1, NUMANodeID: 6}, + 225: {CoreID: 97, SocketID: 1, NUMANodeID: 6}, + 226: {CoreID: 98, SocketID: 1, NUMANodeID: 6}, + 227: {CoreID: 99, SocketID: 1, NUMANodeID: 6}, + 228: {CoreID: 100, SocketID: 1, NUMANodeID: 6}, + 229: {CoreID: 101, SocketID: 1, NUMANodeID: 6}, + 230: {CoreID: 102, SocketID: 1, NUMANodeID: 6}, + 231: {CoreID: 103, SocketID: 1, NUMANodeID: 6}, + 232: {CoreID: 104, SocketID: 1, NUMANodeID: 6}, + 233: {CoreID: 105, SocketID: 1, NUMANodeID: 6}, + 234: {CoreID: 106, SocketID: 1, NUMANodeID: 6}, + 235: {CoreID: 107, SocketID: 1, NUMANodeID: 6}, + 236: {CoreID: 108, SocketID: 1, NUMANodeID: 6}, + 237: {CoreID: 109, SocketID: 1, NUMANodeID: 6}, + 238: {CoreID: 110, SocketID: 1, NUMANodeID: 6}, + 239: {CoreID: 111, SocketID: 1, NUMANodeID: 6}, + 240: {CoreID: 112, SocketID: 1, NUMANodeID: 7}, + 241: {CoreID: 113, SocketID: 1, NUMANodeID: 7}, + 242: {CoreID: 114, SocketID: 1, NUMANodeID: 7}, + 243: {CoreID: 115, SocketID: 1, NUMANodeID: 7}, + 244: {CoreID: 116, SocketID: 1, NUMANodeID: 7}, + 245: {CoreID: 117, SocketID: 1, NUMANodeID: 7}, + 246: {CoreID: 118, SocketID: 1, NUMANodeID: 7}, + 247: {CoreID: 119, SocketID: 1, NUMANodeID: 7}, + 248: {CoreID: 120, SocketID: 1, NUMANodeID: 7}, + 249: {CoreID: 121, SocketID: 1, NUMANodeID: 7}, + 250: {CoreID: 122, SocketID: 1, NUMANodeID: 7}, + 251: {CoreID: 123, SocketID: 1, NUMANodeID: 7}, + 252: {CoreID: 124, SocketID: 1, NUMANodeID: 7}, + 253: {CoreID: 125, SocketID: 1, NUMANodeID: 7}, + 254: {CoreID: 126, SocketID: 1, NUMANodeID: 7}, + }, + } ) diff --git a/pkg/kubelet/cm/cpumanager/topology/topology.go b/pkg/kubelet/cm/cpumanager/topology/topology.go index 62d91a5dee5d0..9dfa255511ce0 100644 --- a/pkg/kubelet/cm/cpumanager/topology/topology.go +++ b/pkg/kubelet/cm/cpumanager/topology/topology.go @@ -44,22 +44,22 @@ type CPUTopology struct { CPUDetails CPUDetails } -// CPUsPerCore returns the number of logical CPUs are associated with +// MaxCPUsPerCore returns the max number of logical CPUs are associated with // each core. -func (topo *CPUTopology) CPUsPerCore() int { +func (topo *CPUTopology) MaxCPUsPerCore() int { if topo.NumCores == 0 { return 0 } - return topo.NumCPUs / topo.NumCores -} -// CPUsPerSocket returns the number of logical CPUs are associated with -// each socket. -func (topo *CPUTopology) CPUsPerSocket() int { - if topo.NumSockets == 0 { - return 0 + cpusByCore := make(map[int]int) + maxCPUsPerCore := 1 + for _, info := range topo.CPUDetails { + cpusByCore[info.CoreID] += 1 + if cpusByCore[info.CoreID] > maxCPUsPerCore { + maxCPUsPerCore = cpusByCore[info.CoreID] + } } - return topo.NumCPUs / topo.NumSockets + return maxCPUsPerCore } // CPUCoreID returns the physical core ID which the given logical CPU diff --git a/pkg/kubelet/cm/cpumanager/topology/topology_test.go b/pkg/kubelet/cm/cpumanager/topology/topology_test.go index 37d8f7f01fc54..26478aee5c6e7 100644 --- a/pkg/kubelet/cm/cpumanager/topology/topology_test.go +++ b/pkg/kubelet/cm/cpumanager/topology/topology_test.go @@ -1100,3 +1100,104 @@ func TestCPUNUMANodeID(t *testing.T) { }) } } + +func TestMaxCPUsPerCore(t *testing.T) { + tests := []struct { + name string + topo *CPUTopology + want int + }{ + { + name: "No HT", + topo: &CPUTopology{ + NumCPUs: 8, + NumSockets: 2, + NumCores: 8, + CPUDetails: map[int]CPUInfo{ + 0: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 1: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 2: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 3: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 4: {CoreID: 4, SocketID: 1, NUMANodeID: 1}, + 5: {CoreID: 5, SocketID: 1, NUMANodeID: 1}, + 6: {CoreID: 6, SocketID: 1, NUMANodeID: 1}, + 7: {CoreID: 7, SocketID: 1, NUMANodeID: 1}, + }, + }, + want: 1, + }, + { + name: "Single Socket HT", + topo: &CPUTopology{ + NumCPUs: 8, + NumSockets: 1, + NumCores: 4, + CPUDetails: map[int]CPUInfo{ + 0: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 1: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 2: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 3: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + 4: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 5: {CoreID: 1, SocketID: 0, NUMANodeID: 0}, + 6: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 7: {CoreID: 3, SocketID: 0, NUMANodeID: 0}, + }, + }, + want: 2, + }, + { + name: "Dual Socket HT", + topo: &CPUTopology{ + NumCPUs: 12, + NumSockets: 2, + NumCores: 6, + CPUDetails: map[int]CPUInfo{ + 0: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 1: {CoreID: 1, SocketID: 1, NUMANodeID: 1}, + 2: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 3: {CoreID: 3, SocketID: 1, NUMANodeID: 1}, + 4: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 5: {CoreID: 5, SocketID: 1, NUMANodeID: 1}, + 6: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 7: {CoreID: 1, SocketID: 1, NUMANodeID: 1}, + 8: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 9: {CoreID: 3, SocketID: 1, NUMANodeID: 1}, + 10: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 11: {CoreID: 5, SocketID: 1, NUMANodeID: 1}, + }, + }, + want: 2, + }, + { + name: "Dual Socket HT with CPU offline", + topo: &CPUTopology{ + NumCPUs: 11, + NumSockets: 2, + NumCores: 6, + CPUDetails: map[int]CPUInfo{ + 0: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 1: {CoreID: 1, SocketID: 1, NUMANodeID: 1}, + 2: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 3: {CoreID: 3, SocketID: 1, NUMANodeID: 1}, + 4: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + 5: {CoreID: 5, SocketID: 1, NUMANodeID: 1}, + 6: {CoreID: 0, SocketID: 0, NUMANodeID: 0}, + 7: {CoreID: 1, SocketID: 1, NUMANodeID: 1}, + 8: {CoreID: 2, SocketID: 0, NUMANodeID: 0}, + 9: {CoreID: 3, SocketID: 1, NUMANodeID: 1}, + 10: {CoreID: 4, SocketID: 0, NUMANodeID: 0}, + }, + }, + want: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.topo.MaxCPUsPerCore() + if got != tt.want { + t.Errorf("MaxCPUsPerCore() returned %v, want %v", got, tt.want) + } + }) + } +}