-
Notifications
You must be signed in to change notification settings - Fork 29
/
types.go
321 lines (266 loc) · 11.9 KB
/
types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"bytes"
"fmt"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
metrics "k8s.io/metrics/pkg/apis/metrics/v1beta1"
"sigs.k8s.io/yaml"
"github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1"
)
// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +kubebuilder:subresource:status
// +kubebuilder:resource:shortName=spd
// ServiceProfileDescriptor captures information about a service workload
type ServiceProfileDescriptor struct {
metav1.TypeMeta `json:",inline"`
// +optional
metav1.ObjectMeta `json:"metadata,omitempty"`
// Spec defines the behavior of a ServiceProfileDescriptor.
// +optional
Spec ServiceProfileDescriptorSpec `json:"spec,omitempty"`
// Status represents the concrete samples of ServiceProfileData with multiple resources.
// +optional
Status ServiceProfileDescriptorStatus `json:"status,omitempty"`
}
// DecodeNestedObjects decodes extended indicator for known types.
func (c *ServiceProfileDescriptor) DecodeNestedObjects(d runtime.Decoder) error {
for i := range c.Spec.ExtendedIndicator {
indicator := &c.Spec.ExtendedIndicator[i]
err := indicator.decodeNestedObjects(d)
if err != nil {
return fmt.Errorf("decoding .spec.extendedIndicator[%d]: %w", i, err)
}
}
return nil
}
// EncodeNestedObjects encodes extended indicator.
func (c *ServiceProfileDescriptor) EncodeNestedObjects(e runtime.Encoder) error {
for i := range c.Spec.ExtendedIndicator {
indicator := &c.Spec.ExtendedIndicator[i]
err := indicator.encodeNestedObjects(e)
if err != nil {
return fmt.Errorf("encoding .spec.extendedIndicator[%d]: %w", i, err)
}
}
return nil
}
// ServiceProfileDescriptorSpec is the specification of the behavior of the SPD.
type ServiceProfileDescriptorSpec struct {
// TargetRef points to the controller managing the set of pods for the
// spd-controller to control - e.g. Deployment, StatefulSet.
// SPD should have one-to-one mapping relationships with workload.
TargetRef v1alpha1.CrossVersionObjectReference `json:"targetRef"`
// BaselinePercent marks off a bunch of instances, and skip adjusting Knobs
// for them; those instances are defined as baselines, and can be compared
// with other (experimental/production) instances to demonstrate the benefits.
// If BaselinePercent is not set, we should take all instances as production instances.
// +kubebuilder:validation:Minimum=0
// +kubebuilder:validation:Maximum=100
// +optional
BaselinePercent *int32 `json:"baselinePercent,omitempty"`
// if multiple ExtendedIndicator are defined, it means that the service should
// satisfy all the strategies in those extended indicators
// +optional
// +listMapKey=name
// +listType=map
ExtendedIndicator []ServiceExtendedIndicatorSpec `json:"extendedIndicator,omitempty"`
// if multiple BusinessIndicator are defined, it means that we should
// try to satisfy all of those indicator targets
// +optional
// +listMapKey=name
// +listType=map
BusinessIndicator []ServiceBusinessIndicatorSpec `json:"businessIndicator,omitempty"`
// if multiple SystemIndicator are defined, it means that we should
// try to satisfy all of those indicator targets
// +optional
// +listMapKey=name
// +listType=map
SystemIndicator []ServiceSystemIndicatorSpec `json:"systemIndicator,omitempty"`
}
// ServiceExtendedIndicatorSpec specifies extended workload characteristics and can serve as
// an extension to indicators beyond business and system scopes. Examples include policies for
// custom memory reclamation and I/O limitations specific to a service.
type ServiceExtendedIndicatorSpec struct {
// Name defines the name of extended module
Name string `json:"name"`
// BaselinePercent marks off a bunch of instances, and skip applying extended indicator
// for them; those instances are defined as baselines, and can be compared
// with other (experimental/production) instances to demonstrate the benefits.
// If BaselinePercent is not set, we should take all instances as production instances.
// +kubebuilder:validation:Minimum=0
// +kubebuilder:validation:Maximum=100
// +optional
BaselinePercent *int32 `json:"baselinePercent,omitempty"`
// Indicators defines extend workload characteristics, Indicators can have arbitrary structure.
// Each kind of Indicator must be named using Name, followed by the suffix 'Indicators'.
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
Indicators runtime.RawExtension `json:"indicators,omitempty"`
}
const (
ExtendedIndicatorSuffix = "Indicators"
)
func (c *ServiceExtendedIndicatorSpec) decodeNestedObjects(d runtime.Decoder) error {
gvk := SchemeGroupVersion.WithKind(c.Name + ExtendedIndicatorSuffix)
// dry-run to detect and skip out-of-tree extended indicators.
if _, _, err := d.Decode(nil, &gvk, nil); runtime.IsNotRegisteredError(err) {
return nil
}
obj, parsedGvk, err := d.Decode(c.Indicators.Raw, &gvk, nil)
if err != nil {
return fmt.Errorf("decoding extended indicators %s: %w", c.Name, err)
} else if parsedGvk.GroupKind() != gvk.GroupKind() {
return fmt.Errorf("indicators for %s were not of type %s, got %s", c.Name, gvk.GroupKind(), parsedGvk.GroupKind())
}
c.Indicators.Object = obj
return nil
}
func (c *ServiceExtendedIndicatorSpec) encodeNestedObjects(e runtime.Encoder) error {
if c.Indicators.Object == nil {
return nil
}
var buf bytes.Buffer
err := e.Encode(c.Indicators.Object, &buf)
if err != nil {
return err
}
// The <e> encoder might be a YAML encoder, but the parent encoder expects
// JSON output, so we convert YAML back to JSON.
// This is a no-op if <e> produces JSON.
json, err := yaml.YAMLToJSON(buf.Bytes())
if err != nil {
return err
}
c.Indicators.Raw = json
return nil
}
// IndicatorLevelName defines several levels for each indicator, and we will
// always try to keep the actual indicator in acceptable intervals instead of
// as an accurate value. Those intervals are marked by IndicatorLevelName.
type IndicatorLevelName string
const (
// IndicatorLevelLowerBound is usually used to define the lower bound to define
// service working states. For instance, if rpc-latency is defined as a
// business indicator, if actual observed value is below IndicatorLevelLowerBound,
// it means the workload works perfectly; if observed value is above
// IndicatorLevelLowerBound but below IndicatorLevelUpperBound, it means the workload
// works can still work, but may suffer with performance downgrade.
IndicatorLevelLowerBound = "LowerBound"
// IndicatorLevelUpperBound is usually used to define the upper bound that
// the workload be bear with. For instance, if rpc-latency is defined as a
// business indicator, if actual observed value is above IndicatorLevelUpperBound,
// it means the workload is broken and can't serve the online traffic anymore.
IndicatorLevelUpperBound = "UpperBound"
)
type Indicator struct {
IndicatorLevel IndicatorLevelName `json:"indicatorLevel"`
Value float32 `json:"value"`
}
type ServiceBusinessIndicatorName string
const (
ServiceBusinessIndicatorNameRPCLatency ServiceBusinessIndicatorName = "RPCLatency"
)
// ServiceBusinessIndicatorSpec defines workload profiling in business level, such as rpc-latency,
// success-rate, service-health-score and so on, and general control-flow works as below
//
// - according to workload states, user defines several key indicates
// - user-system calculate and update observed values in status
// - sysadvisor (in-tree katalyst) decides system-indicator offset according to business-indicator
// - sysadvisor (along with reporter and qrm) to perform resources and controlKnob actions
type ServiceBusinessIndicatorSpec struct {
// Name is used to define the business-related profiling indicator for the workload,
// e.g. rpc-latency, success-rate, service-health-score and so on.
// Users can use it as an expended way, and customize sysadvisor to adapter with it.
Name ServiceBusinessIndicatorName `json:"name"`
// +optional
Indicators []Indicator `json:"indicators,omitempty"`
}
type ServiceSystemIndicatorName string
const (
ServiceSystemIndicatorNameCPUSchedWait ServiceSystemIndicatorName = "cpu_sched_wait"
ServiceSystemIndicatorNameCPUUsageRatio ServiceSystemIndicatorName = "cpu_usage_ratio"
ServiceSystemIndicatorNameCPI ServiceSystemIndicatorName = "cpi"
ServiceSystemIndicatorNameMemoryAccessWriteLatency ServiceSystemIndicatorName = "mem_access_write_lat"
ServiceSystemIndicatorNameMemoryAccessReadLatency ServiceSystemIndicatorName = "mem_access_read_lat"
ServiceSystemIndicatorNameMemoryL3MissLatency ServiceSystemIndicatorName = "mem_l3_miss_lat"
)
// ServiceSystemIndicatorSpec defines workload profiling in system level, such as
// cpu_sched_wait、cpi、mbw ... and so on, and sysadvisor (along with reporter and qrm)
// will try to perform resources and controlKnob actions
//
// System-target indicator (along with its values in each level) could be difficult
// to pre-define, and it may have strong correlations with both workload characters
// and node environments, so we suggest users to run offline analysis pipelines to
// get those stats.
type ServiceSystemIndicatorSpec struct {
// Name is used to define the system-related profiling indicator for the workload,
// e.g. cpu_sched_wait、cpi、mbw ... and so on.
// Users can use it as an expended way, and customize sysadvisor to adapter with it.
Name ServiceSystemIndicatorName `json:"name"`
// +optional
Indicators []Indicator `json:"indicators,omitempty"`
}
// +kubebuilder:validation:Enum=avg;max
type Aggregator string
const (
Avg Aggregator = "avg"
Max Aggregator = "max"
Sum Aggregator = "sum"
)
// ServiceBusinessIndicatorStatus is connected with ServiceBusinessIndicatorSpec with Name
// to indicate the observed info for this workload (as for this indicator).
type ServiceBusinessIndicatorStatus struct {
Name ServiceBusinessIndicatorName `json:"name"`
// Current indicates the current observed value for this business indicator
// +optional
Current *float32 `json:"current,omitempty"`
}
// AggPodMetrics records the aggregated metrics based.
type AggPodMetrics struct {
// Aggregator indicates how the metrics data in Items are calculated, i.e.
// defines the aggregation functions.
Aggregator Aggregator `json:"aggregator"`
// +optional
Items []metrics.PodMetrics `json:"items,omitempty"`
}
// ServiceProfileDescriptorStatus describes the observed info of the spd.
type ServiceProfileDescriptorStatus struct {
// +optional
AggMetrics []AggPodMetrics `json:"aggMetrics,omitempty"`
// +optional
BusinessStatus []ServiceBusinessIndicatorStatus `json:"businessStatus,omitempty"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// ServiceProfileDescriptorList is a collection of SPD objects.
type ServiceProfileDescriptorList struct {
metav1.TypeMeta `json:",inline"`
// Standard list metadata
// More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata
// +optional
metav1.ListMeta `json:"metadata,omitempty"`
// items is the list of SPDs
Items []ServiceProfileDescriptor `json:"items"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// TestExtendedIndicators just for extended indicator test
type TestExtendedIndicators struct {
metav1.TypeMeta `json:",inline"`
Indicators *TestIndicators `json:"indicators"`
}
type TestIndicators struct {
TestIndicator string `json:"testIndicator"`
}