Skip to content

Commit

Permalink
feat(monitor): optimize monitor (#2141)
Browse files Browse the repository at this point in the history
1. fix bug in prometheus rule
2. add ttl for messagerequest

Co-authored-by: willzgli <willzgli@tencent.com>
  • Loading branch information
willzgli and willzgli authored Nov 3, 2022
1 parent 13b64d0 commit f42cc9a
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ data:
api_server = "https://tke-platform-api"
api_server_client_config = "/app/conf/tke-platform-config.yaml"

[features]
# Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h"
messagerequest_ttl = "1h"
message_ttl = "360h"

tke-auth-webhook.yaml: |
apiVersion: v1
kind: Config
Expand Down
2 changes: 2 additions & 0 deletions cmd/tke-notify-api/app/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type Config struct {
PrivilegedUsername string
ExternalHost string
ExternalPort int
MessageRequestTTL time.Duration
MessageTTL time.Duration
}

Expand Down Expand Up @@ -139,6 +140,7 @@ func CreateConfigFromOptions(serverName string, opts *options.Options) (*Config,
PrivilegedUsername: opts.Authentication.PrivilegedUsername,
ExternalHost: opts.Generic.ExternalHost,
ExternalPort: opts.Generic.ExternalPort,
MessageRequestTTL: opts.FeatureOptions.MessageRequestTTL,
MessageTTL: opts.FeatureOptions.MessageTTL,
}, nil
}
28 changes: 21 additions & 7 deletions cmd/tke-notify-api/app/options/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,34 +19,48 @@
package options

import (
"time"

"github.com/spf13/pflag"
"github.com/spf13/viper"
"time"
)

const (
flagMessageTTL = "message-ttl"
configMessageTTL = "features.message_ttl"
defaultMessageTTL = time.Hour * 24 * 30
flagMessageTTL = "message-ttl"
flagMessageRequestTTL = "messagerequest-ttl"
configMessageRequestTTL = "features.messagerequest_ttl"
configMessageTTL = "features.message_ttl"
defaultMessageRequestTTL = time.Hour
defaultMessageTTL = time.Hour * 24 * 30
)

type FeatureOptions struct {
MessageTTL time.Duration
MessageRequestTTL time.Duration
MessageTTL time.Duration
}

func NewFeatureOptions() *FeatureOptions {
return &FeatureOptions{MessageTTL: defaultMessageTTL}
return &FeatureOptions{
MessageRequestTTL: defaultMessageRequestTTL,
MessageTTL: defaultMessageTTL,
}
}

func (o *FeatureOptions) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&o.MessageRequestTTL, flagMessageRequestTTL, o.MessageRequestTTL,
"How long to retain messagerequests")
fs.DurationVar(&o.MessageTTL, flagMessageTTL, o.MessageTTL,
"How long to retain messages and messagerequests")
"How long to retain messages")
_ = viper.BindPFlag(configMessageRequestTTL, fs.Lookup(flagMessageRequestTTL))
_ = viper.BindPFlag(configMessageTTL, fs.Lookup(flagMessageTTL))

}

func (o *FeatureOptions) ApplyFlags() []error {
var errs []error

// Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".
o.MessageRequestTTL = viper.GetDuration(configMessageRequestTTL)
o.MessageTTL = viper.GetDuration(configMessageTTL)

return errs
Expand Down
1 change: 1 addition & 0 deletions cmd/tke-notify-api/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func createAPIServerConfig(cfg *config.Config) *apiserver.Config {
StorageFactory: cfg.StorageFactory,
APIResourceConfigSource: cfg.StorageFactory.APIResourceConfigSource,
PrivilegedUsername: cfg.PrivilegedUsername,
MessageRequestTTL: cfg.MessageRequestTTL,
MessageTTL: cfg.MessageTTL,
},
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/monitor/controller/prometheus/yamls.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ groups:
expr: sum(k8s_container_fs_write_times) without (container_name,container_id)

- record: k8s_pod_status_ready
expr: sum(kube_pod_status_ready{condition="true"}) by (namespace,pod_name) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2
expr: sum(kube_pod_status_phase{phase=~"Running|Succeeded"}) by (namespace,pod_name) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2

- record: k8s_pod_restart_total
expr: sum(idelta(kube_pod_container_status_restarts_total [2m])) by (namespace,pod_name) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2
Expand Down Expand Up @@ -1240,8 +1240,8 @@ func configForAlertManager(webhookAddr string, repeatInterval string) string {

route:
group_by: ['alertname','alarmPolicyName','version']
group_wait: 1s
group_interval: 1s
group_wait: 30s
group_interval: 5m
repeat_interval: %s
receiver: 'web.hook'
routes:
Expand Down
5 changes: 4 additions & 1 deletion pkg/notify/apiserver/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
package apiserver

import (
"time"

"k8s.io/apiserver/pkg/registry/generic"
genericapiserver "k8s.io/apiserver/pkg/server"
serverstorage "k8s.io/apiserver/pkg/server/storage"
"time"
versionedinformers "tkestack.io/tke/api/client/informers/externalversions"
notifyv1 "tkestack.io/tke/api/notify/v1"
"tkestack.io/tke/pkg/apiserver/storage"
Expand All @@ -37,6 +38,7 @@ type ExtraConfig struct {
StorageFactory serverstorage.StorageFactory
VersionedInformers versionedinformers.SharedInformerFactory
PrivilegedUsername string
MessageRequestTTL time.Duration
MessageTTL time.Duration
}

Expand Down Expand Up @@ -92,6 +94,7 @@ func (c completedConfig) New(delegationTarget genericapiserver.DelegationTarget)
&notifyrest.StorageProvider{
LoopbackClientConfig: c.GenericConfig.LoopbackClientConfig,
PrivilegedUsername: c.ExtraConfig.PrivilegedUsername,
MessageRequestTTL: c.ExtraConfig.MessageRequestTTL,
MessageTTL: c.ExtraConfig.MessageTTL,
},
}
Expand Down
8 changes: 5 additions & 3 deletions pkg/notify/registry/rest/rest.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@
package rest

import (
"time"

"k8s.io/apiserver/pkg/registry/generic"
"k8s.io/apiserver/pkg/registry/rest"
genericserver "k8s.io/apiserver/pkg/server"
serverstorage "k8s.io/apiserver/pkg/server/storage"
restclient "k8s.io/client-go/rest"
"time"
notifyinternalclient "tkestack.io/tke/api/client/clientset/internalversion/typed/notify/internalversion"
"tkestack.io/tke/api/notify"
"tkestack.io/tke/api/notify/v1"
v1 "tkestack.io/tke/api/notify/v1"
"tkestack.io/tke/pkg/apiserver/storage"
channelstorage "tkestack.io/tke/pkg/notify/registry/channel/storage"
configmapstorage "tkestack.io/tke/pkg/notify/registry/configmap/storage"
Expand All @@ -43,6 +44,7 @@ import (
type StorageProvider struct {
LoopbackClientConfig *restclient.Config
PrivilegedUsername string
MessageRequestTTL time.Duration
MessageTTL time.Duration
}

Expand Down Expand Up @@ -85,7 +87,7 @@ func (s *StorageProvider) v1Storage(apiResourceConfigSource serverstorage.APIRes
storageMap["messages"] = messageREST.Message
storageMap["messages/status"] = messageREST.Status

messageRequestREST := messagerequeststorage.NewStorage(restOptionsGetter, notifyClient, s.PrivilegedUsername, s.MessageTTL)
messageRequestREST := messagerequeststorage.NewStorage(restOptionsGetter, notifyClient, s.PrivilegedUsername, s.MessageRequestTTL)
storageMap["messagerequests"] = messageRequestREST.MessageRequest
storageMap["messagerequests/status"] = messageRequestREST.Status

Expand Down

0 comments on commit f42cc9a

Please sign in to comment.