-
Notifications
You must be signed in to change notification settings - Fork 4
/
binpack_simulation.go
371 lines (331 loc) · 13.6 KB
/
binpack_simulation.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/*******************************************************************************
*
* Copyright 2024 SAP SE
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You should have received a copy of the License along with this
* program. If not, you may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*******************************************************************************/
package nova
import (
"fmt"
"math"
"strings"
"github.com/gophercloud/gophercloud/openstack/placement/v1/resourceproviders"
"github.com/sapcc/go-api-declarations/limes"
"github.com/sapcc/go-bits/logg"
"github.com/sapcc/limes/internal/core"
)
// BinpackBehavior contains configuration parameters for the binpack simulation.
type BinpackBehavior struct {
// When ranking nodes during placement, do not include the VCPU count dimension in the score.
ScoreIgnoresCores bool `yaml:"score_ignores_cores"`
// When ranking nodes during placement, do not include the disk size dimension in the score.
ScoreIgnoresDisk bool `yaml:"score_ignores_disk"`
// When ranking nodes during placement, do not include the RAM size dimension in the score.
ScoreIgnoresRAM bool `yaml:"score_ignores_ram"`
}
// BinpackHypervisor models an entire Nova hypervisor for the purposes of the
// binpacking simulation.
//
// We assume the Nova hypervisor to be an entire cluster of physical nodes.
// We cannot see the sizes of the individual nodes in that cluster, only the
// total capacity and the MaxUnit value on the inventories. We have to make the
// assumption that the individual nodes are of identical size.
type BinpackHypervisor struct {
Match MatchingHypervisor
Nodes []*BinpackNode
}
// BinpackHypervisors adds methods to type []BinpackHypervisor.
type BinpackHypervisors []BinpackHypervisor
// BinpackNode appears in type BinpackHypervisor.
type BinpackNode struct {
Capacity BinpackVector[uint64]
Instances []BinpackInstance
}
// BinpackInstance appears in type BinpackNode. It describes a single instance
// that has been placed on the node as part of the binpacking simulation.
type BinpackInstance struct {
FlavorName string
Size BinpackVector[uint64]
Reason string // one of "used", "committed", "pending", "padding" (in descending priority), only for debug logging
}
func guessNodeCountFromMetric(metric string, inv resourceproviders.Inventory) (uint64, error) {
if inv.MaxUnit == 0 {
return 0, fmt.Errorf("missing MaxUnit for %s metric", metric)
}
nodeCountFloat := float64(inv.Total-inv.Reserved) / float64(inv.MaxUnit)
// we prefer to round down (11.6 nodes should go to 11 instead of 12 to be
// safe), but data sometimes has slight rounding errors that we want to
// correct (9.98 nodes should be rounded up to 10 instead of down to 9)
nodeCount := uint64(math.Floor(nodeCountFloat))
if nodeCountFloat-float64(nodeCount) > 0.98 {
nodeCount = uint64(math.Ceil(nodeCountFloat))
}
return nodeCount, nil
}
// PrepareHypervisorForBinpacking converts a MatchingHypervisor into a BinpackHypervisor.
func PrepareHypervisorForBinpacking(h MatchingHypervisor) (BinpackHypervisor, error) {
// compute node count based on the assumption of equal-size nodes:
// nodeCount = (total - reserved) / maxUnit
nodeCountAccordingToVCPU, err := guessNodeCountFromMetric("VCPU", h.Inventories["VCPU"])
if err != nil {
return BinpackHypervisor{}, fmt.Errorf("cannot deduce node count for %s: %w", h.Hypervisor.Description(), err)
}
nodeCountAccordingToRAM, err := guessNodeCountFromMetric("MEMORY_MB", h.Inventories["MEMORY_MB"])
if err != nil {
return BinpackHypervisor{}, fmt.Errorf("cannot deduce node count for %s: %w", h.Hypervisor.Description(), err)
}
// as a sanity check, all metrics must agree on the same node count
if nodeCountAccordingToVCPU != nodeCountAccordingToRAM {
vcpuInventory := h.Inventories["VCPU"]
ramInventory := h.Inventories["MEMORY_MB"]
return BinpackHypervisor{}, fmt.Errorf(
"cannot deduce node count for %s: guessing %d based on VCPU (total = %d, reserved = %d, maxUnit = %d), but %d based on MEMORY_MB (total = %d, reserved = %d, maxUnit = %d)",
h.Hypervisor.Description(),
nodeCountAccordingToVCPU, vcpuInventory.Total, vcpuInventory.Reserved, vcpuInventory.MaxUnit,
nodeCountAccordingToRAM, ramInventory.Total, ramInventory.Reserved, ramInventory.MaxUnit,
)
}
nodeCount := nodeCountAccordingToVCPU
if nodeCount == 0 {
return BinpackHypervisor{}, fmt.Errorf("node count for %s is zero", h.Hypervisor.Description())
}
// break down capacity into equal-sized nodes
nodeTemplate := BinpackNode{
Capacity: BinpackVector[uint64]{
VCPUs: uint64(h.Inventories["VCPU"].MaxUnit),
MemoryMB: uint64(h.Inventories["MEMORY_MB"].MaxUnit),
// We do not use `h.Inventories["DISK_GB"].MaxUnit` because it appears to describe the max root
// disk size for a single instance, rather than the actual available disk size. Maybe this is
// because the root disks are stored on nearby NFS filers, so MaxUnit is actually the max
// volume size instead of the total capacity per node. Since we have a good nodeCount number
// now, we can divide up the total disk space for all nodes.
LocalGB: uint64(h.Inventories["DISK_GB"].Total-h.Inventories["DISK_GB"].Reserved) / nodeCount,
},
}
result := BinpackHypervisor{
Match: h,
Nodes: make([]*BinpackNode, int(nodeCount)),
}
for idx := range result.Nodes {
node := nodeTemplate // this is an important copy which has nothing to do with loop-variable aliasing!
result.Nodes[idx] = &node
}
return result, nil
}
// RenderDebugView prints an overview of the placements in this hypervisor on several logg.Debug lines.
func (h BinpackHypervisor) RenderDebugView(az limes.AvailabilityZone) {
shortID := h.Match.Hypervisor.Service.Host
logg.Debug("[%s][%s] %s", az, shortID, h.Match.Hypervisor.Description())
for idx, n := range h.Nodes {
var placements []string
for _, i := range n.Instances {
placements = append(placements, fmt.Sprintf("%s:%s", i.Reason, i.FlavorName))
}
placements = append(placements, fmt.Sprintf("%s free", n.free()))
logg.Debug("[%s][%s][node%03d] %s: %s", az, shortID, idx+1, n.Capacity, strings.Join(placements, ", "))
}
}
// PlaceSeveralInstances calls PlaceOneInstance multiple times.
func (hh BinpackHypervisors) PlaceSeveralInstances(ff FullFlavor, reason string, coresOvercommitFactor core.OvercommitFactor, blockedCapacity BinpackVector[uint64], bb BinpackBehavior, count uint64) (ok bool) {
for range count {
ok = hh.PlaceOneInstance(ff, reason, coresOvercommitFactor, blockedCapacity, bb)
if !ok {
// if we don't have space for this instance, we won't have space for any following ones
return false
}
}
return true
}
// PlaceOneInstance places a single instance of the given flavor using the vector-dot binpacking algorithm.
// If the instance cannot be placed, false is returned.
func (hh BinpackHypervisors) PlaceOneInstance(ff FullFlavor, reason string, coresOvercommitFactor core.OvercommitFactor, blockedCapacity BinpackVector[uint64], bb BinpackBehavior) (ok bool) {
// This function implements the vector dot binpacking method described in [Mayank] (section III,
// subsection D, including the correction presented in the last paragraph of that subsection).
//
// Here's the quick summary: We describe capacity and usage as vectors with three dimensions (VCPU,
// RAM, Disk). When placing an instance, we have the vector corresponding to that instance's
// flavor, and also one vector per node describing the unused capacity on that node.
//
// For each node, we take the instance's size vector S and the node's free capacity vector F, and
// rescale them component-wise to the range [0..1] (with 0 meaning zero and 1 meaning the node's
// full capacity). Then we select the node that minimizes the angle between those two vectors.
// That's the same as maximizing cos(S, F) = |S * F| / |S| * |F|.
//
// [Mayank]: https://www.it.iitb.ac.in/~sahoo/papers/cloud2011_mayank.pdf
vmSize := BinpackVector[uint64]{
VCPUs: coresOvercommitFactor.ApplyInReverseTo(uint64(ff.Flavor.VCPUs)),
MemoryMB: uint64(ff.Flavor.RAM),
LocalGB: uint64(ff.Flavor.Disk),
}
// ensure that placing this instance does not encroach on the overall blocked capacity
var totalFree BinpackVector[uint64]
for _, hypervisor := range hh {
for _, node := range hypervisor.Nodes {
totalFree = totalFree.Add(node.free())
}
}
if !blockedCapacity.Add(vmSize).FitsIn(totalFree) {
logg.Debug("refusing to place %s with %s because of blocked capacity %s (total free = %s)",
ff.Flavor.Name, vmSize.String(), blockedCapacity.String(), totalFree.String())
return false
}
var (
bestNode *BinpackNode
bestScore = 0.0
)
for _, hypervisor := range hh {
// skip hypervisors that the flavor does not accept
if !ff.MatchesHypervisor(hypervisor.Match) {
continue
}
for _, node := range hypervisor.Nodes {
// skip nodes that cannot fit this instance at all
nodeFree := node.free()
if !vmSize.FitsIn(nodeFree) {
continue
}
// calculate score as cos(S, F)^2 (maximizing the square of the cosine is the same as
// maximizing just the cosine, but replaces expensive sqrt() in the denominator with cheap
// squaring in the nominator)
s := vmSize.Div(node.Capacity)
f := nodeFree.Div(node.Capacity)
dotProduct := s.Dot(f, bb)
score := dotProduct * dotProduct / (s.Dot(s, bb) * f.Dot(f, bb))
// choose node with best score
if score > bestScore {
bestScore = score
bestNode = node
}
}
}
if bestNode == nil {
logg.Debug("refusing to place %s with %s because no node has enough space", ff.Flavor.Name, vmSize.String())
return false
} else {
bestNode.Instances = append(bestNode.Instances, BinpackInstance{
FlavorName: ff.Flavor.Name,
Size: vmSize,
Reason: reason,
})
return true
}
}
func (n BinpackNode) usage() (result BinpackVector[uint64]) {
for _, i := range n.Instances {
result.VCPUs += i.Size.VCPUs
result.MemoryMB += i.Size.MemoryMB
result.LocalGB += i.Size.LocalGB
}
return
}
func (n BinpackNode) free() BinpackVector[uint64] {
return n.Capacity.SaturatingSub(n.usage())
}
type BinpackVector[T float64 | uint64] struct {
VCPUs T `json:"vcpus"`
MemoryMB T `json:"memory_mib"`
LocalGB T `json:"local_disk_gib"`
}
func (v BinpackVector[T]) FitsIn(other BinpackVector[T]) bool {
return v.VCPUs <= other.VCPUs && v.MemoryMB <= other.MemoryMB && v.LocalGB <= other.LocalGB
}
func (v BinpackVector[T]) Add(other BinpackVector[T]) BinpackVector[T] {
return BinpackVector[T]{
VCPUs: v.VCPUs + other.VCPUs,
MemoryMB: v.MemoryMB + other.MemoryMB,
LocalGB: v.LocalGB + other.LocalGB,
}
}
// Like Sub, but never goes below zero.
func (v BinpackVector[T]) SaturatingSub(other BinpackVector[T]) BinpackVector[T] {
return BinpackVector[T]{
// The expression `max(0, v - other)` is rewritten into `max(v, other) - other`
// here to protect against underflow for T == uint64.
VCPUs: max(v.VCPUs, other.VCPUs) - other.VCPUs,
MemoryMB: max(v.MemoryMB, other.MemoryMB) - other.MemoryMB,
LocalGB: max(v.LocalGB, other.LocalGB) - other.LocalGB,
}
}
func (v BinpackVector[T]) Mul(other BinpackVector[T]) BinpackVector[float64] {
return BinpackVector[float64]{
VCPUs: float64(v.VCPUs) * float64(other.VCPUs),
MemoryMB: float64(v.MemoryMB) * float64(other.MemoryMB),
LocalGB: float64(v.LocalGB) * float64(other.LocalGB),
}
}
func (v BinpackVector[T]) Div(other BinpackVector[T]) BinpackVector[float64] {
return BinpackVector[float64]{
VCPUs: float64(v.VCPUs) / float64(other.VCPUs),
MemoryMB: float64(v.MemoryMB) / float64(other.MemoryMB),
LocalGB: float64(v.LocalGB) / float64(other.LocalGB),
}
}
func (v BinpackVector[T]) AsFloat() BinpackVector[float64] {
return BinpackVector[float64]{
VCPUs: float64(v.VCPUs),
MemoryMB: float64(v.MemoryMB),
LocalGB: float64(v.LocalGB),
}
}
func (v BinpackVector[T]) AsUint() BinpackVector[uint64] {
return BinpackVector[uint64]{
VCPUs: uint64(v.VCPUs),
MemoryMB: uint64(v.MemoryMB),
LocalGB: uint64(v.LocalGB),
}
}
func (v BinpackVector[T]) Dot(other BinpackVector[T], bb BinpackBehavior) T {
score := T(0)
if !bb.ScoreIgnoresCores {
score += v.VCPUs * other.VCPUs
}
if !bb.ScoreIgnoresDisk {
score += v.LocalGB * other.LocalGB
}
if !bb.ScoreIgnoresRAM {
score += v.MemoryMB * other.MemoryMB
}
return score
}
func (v BinpackVector[T]) IsAnyZero() bool {
return v.VCPUs == 0 || v.MemoryMB == 0 || v.LocalGB == 0
}
func (v BinpackVector[T]) String() string {
// only used for debug output where T = uint64, so these conversions will not hurt
return fmt.Sprintf("%dc/%dm/%dg", uint64(v.VCPUs), uint64(v.MemoryMB), uint64(v.LocalGB))
}
// TotalCapacity returns the sum of capacity over all hypervisor nodes.
func (hh BinpackHypervisors) TotalCapacity() (result BinpackVector[uint64]) {
for _, hypervisor := range hh {
for _, node := range hypervisor.Nodes {
result = result.Add(node.Capacity)
}
}
return
}
// PlacementCountForFlavor returns how many instances have been placed for the given flavor name.
func (hh BinpackHypervisors) PlacementCountForFlavor(flavorName string) uint64 {
var result uint64
for _, hypervisor := range hh {
for _, node := range hypervisor.Nodes {
for _, instance := range node.Instances {
if instance.FlavorName == flavorName {
result++
}
}
}
}
return result
}