Skip to content

Commit

Permalink
Fix the semantic meaning of etcd server within component statuses and…
Browse files Browse the repository at this point in the history
… metrics.

Instead of numerating all the etcd endpoints known by apiserver, we will
group them by purpose. `etcd-0` will be the default etcd, `etcd-1` will
be the first resource override, `etcd-2` will be the second override and
so on.
  • Loading branch information
serathius committed Jul 20, 2023
1 parent fa88c0b commit 03aad1f
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 42 deletions.
2 changes: 1 addition & 1 deletion pkg/registry/core/componentstatus/validator.go
Expand Up @@ -117,5 +117,5 @@ func (server *EtcdServer) DoServerCheck() (probe.Result, string, error) {
if err != nil {
return probe.Failure, "", err
}
return probe.Success, "", err
return probe.Success, "ok", err
}
27 changes: 8 additions & 19 deletions staging/src/k8s.io/apiserver/pkg/server/storage/storage_factory.go
Expand Up @@ -291,28 +291,17 @@ func Configs(storageConfig storagebackend.Config) []storagebackend.Config {

// Returns all storage configurations including those for group resource overrides
func configs(storageConfig storagebackend.Config, grOverrides map[schema.GroupResource]groupResourceOverrides) []storagebackend.Config {
locations := sets.NewString()
configs := []storagebackend.Config{}
for _, loc := range storageConfig.Transport.ServerList {
// copy
newConfig := storageConfig
newConfig.Transport.ServerList = []string{loc}
configs = append(configs, newConfig)
locations.Insert(loc)
}
configs := []storagebackend.Config{storageConfig}

for _, override := range grOverrides {
for _, loc := range override.etcdLocation {
if locations.Has(loc) {
continue
}
// copy
newConfig := storageConfig
override.Apply(&newConfig, &StorageCodecConfig{})
newConfig.Transport.ServerList = []string{loc}
configs = append(configs, newConfig)
locations.Insert(loc)
if len(override.etcdLocation) == 0 {
continue
}
// copy
newConfig := storageConfig
override.Apply(&newConfig, &StorageCodecConfig{})
newConfig.Transport.ServerList = override.etcdLocation
configs = append(configs, newConfig)
}
return configs
}
Expand Down
Expand Up @@ -191,33 +191,36 @@ func TestConfigs(t *testing.T) {
defaultEtcdLocations := []string{"http://127.0.0.1", "http://127.0.0.2"}

testCases := []struct {
resource schema.GroupResource
resource *schema.GroupResource
servers []string
wantConfigs []storagebackend.Config
}{
{
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
},
},
{
resource: schema.GroupResource{Group: example.GroupName, Resource: "resource"},
resource: &schema.GroupResource{Group: example.GroupName, Resource: "resource"},
servers: []string{},
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
},
},
{
resource: &schema.GroupResource{Group: example.GroupName, Resource: "resource"},
servers: []string{"http://127.0.0.1:10000"},
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1:10000"}}, Prefix: "/registry", Paging: true},
},
},
{
resource: schema.GroupResource{Group: example.GroupName, Resource: "resource"},
resource: &schema.GroupResource{Group: example.GroupName, Resource: "resource"},
servers: []string{"http://127.0.0.1:10000", "https://127.0.0.1", "http://127.0.0.2"},
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1:10000"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"https://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1:10000", "https://127.0.0.1", "http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
},
},
}
Expand All @@ -230,8 +233,8 @@ func TestConfigs(t *testing.T) {
},
}
storageFactory := NewDefaultStorageFactory(defaultConfig, "", codecs, NewDefaultResourceEncodingConfig(scheme), NewResourceConfig(), nil)
if len(test.servers) > 0 {
storageFactory.SetEtcdLocation(test.resource, test.servers)
if test.resource != nil {
storageFactory.SetEtcdLocation(*test.resource, test.servers)
}

got := storageFactory.Configs()
Expand Down
Expand Up @@ -84,7 +84,7 @@ var (
},
[]string{"endpoint"},
)
storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"server"}, nil, compbasemetrics.ALPHA, "")
storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"cluster"}, nil, compbasemetrics.ALPHA, "")
storageMonitor = &monitorCollector{}
etcdEventsReceivedCounts = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Expand Down Expand Up @@ -274,21 +274,21 @@ func (c *monitorCollector) CollectWithStability(ch chan<- compbasemetrics.Metric
}

for i, m := range monitors {
server := fmt.Sprintf("etcd-%d", i)
cluster := fmt.Sprintf("etcd-%d", i)

klog.V(4).InfoS("Start collecting storage metrics", "server", server)
klog.V(4).InfoS("Start collecting storage metrics", "cluster", cluster)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
metrics, err := m.Monitor(ctx)
cancel()
m.Close()
if err != nil {
klog.InfoS("Failed to get storage metrics", "server", server, "err", err)
klog.InfoS("Failed to get storage metrics", "cluster", cluster, "err", err)
continue
}

metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), server)
metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), cluster)
if err != nil {
klog.ErrorS(err, "Failed to create metric", "server", server)
klog.ErrorS(err, "Failed to create metric", "cluster", cluster)
}
ch <- metric
}
Expand Down
2 changes: 1 addition & 1 deletion test/instrumentation/documentation/documentation-list.yaml
Expand Up @@ -3760,7 +3760,7 @@
type: Custom
stabilityLevel: ALPHA
labels:
- server
- cluster
- name: transformation_duration_seconds
subsystem: storage
namespace: apiserver
Expand Down
2 changes: 1 addition & 1 deletion test/instrumentation/documentation/documentation.md
Expand Up @@ -932,7 +932,7 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
<td class="metric_type" data-type="custom">Custom</td>
<td class="metric_description">Size of the storage database file physically allocated in bytes.</td>
<td class="metric_labels_varying"><div class="metric_label">server</div></td>
<td class="metric_labels_varying"><div class="metric_label">cluster</div></td>
<td class="metric_labels_constant"></td>
<td class="metric_deprecated_version"></td></tr>
<tr class="metric"><td class="metric_name">apiserver_storage_transformation_duration_seconds</td>
Expand Down
4 changes: 3 additions & 1 deletion test/integration/metrics/metrics_test.go
Expand Up @@ -77,7 +77,9 @@ func TestAPIServerProcessMetrics(t *testing.T) {
}

func TestAPIServerStorageMetrics(t *testing.T) {
s := kubeapiservertesting.StartTestServerOrDie(t, nil, nil, framework.SharedEtcd())
config := framework.SharedEtcd()
config.Transport.ServerList = []string{config.Transport.ServerList[0], config.Transport.ServerList[0]}
s := kubeapiservertesting.StartTestServerOrDie(t, nil, nil, config)
defer s.TearDownFn()

metrics, err := scrapeMetrics(s)
Expand Down

0 comments on commit 03aad1f

Please sign in to comment.