Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a counter for API server watch failures #400

Merged
merged 1 commit into from
Oct 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ build-all-bins:
$(foreach arch,$(BIN_ARCH_WINDOWS),$(call build-bin,windows,$(arch),.exe))
$(foreach arch,$(BIN_ARCH_DARWIN),$(call build-bin,darwin,$(arch),))

.PHONY: image
image:
docker build \
--build-arg image=public.ecr.aws/eks-distro-build-tooling/eks-distro-minimal-base-nonroot:2021-08-26-1630012071 \
--tag aws-iam-authenticator:$(VERSION)_$(GIT_COMMIT)_$(shell date +%s) .

.PHONY: goreleaser
goreleaser:
ifndef GORELEASER
Expand Down
6 changes: 3 additions & 3 deletions pkg/ec2provider/ec2provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ func (c *mockEc2Client) DescribeInstances(in *ec2.DescribeInstancesInput) (*ec2.

func newMockedEC2ProviderImpl() *ec2ProviderImpl {
dnsCache := ec2PrivateDNSCache{
cache: make(map[string]string),
lock: sync.RWMutex{},
cache: make(map[string]string),
lock: sync.RWMutex{},
}
ec2Requests := ec2Requests{
set: make(map[string]bool),
Expand All @@ -54,7 +54,7 @@ func newMockedEC2ProviderImpl() *ec2ProviderImpl {
return &ec2ProviderImpl{
ec2: &mockEc2Client{},
privateDNSCache: dnsCache,
ec2Requests: ec2Requests,
ec2Requests: ec2Requests,
instanceIdsChannel: make(chan string, maxChannelSize),
}

Expand Down
10 changes: 8 additions & 2 deletions pkg/mapper/configmap/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ import (
"k8s.io/client-go/kubernetes"
v1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/clientcmd"

"sigs.k8s.io/aws-iam-authenticator/pkg/config"
"sigs.k8s.io/aws-iam-authenticator/pkg/metrics"
)

type MapStore struct {
Expand All @@ -29,9 +31,10 @@ type MapStore struct {
// Used as set.
awsAccounts map[string]interface{}
configMap v1.ConfigMapInterface
metrics metrics.Metrics
}

func New(masterURL, kubeConfig string) (*MapStore, error) {
func New(masterURL, kubeConfig string, authenticatorMetrics metrics.Metrics) (*MapStore, error) {
clientconfig, err := clientcmd.BuildConfigFromFlags(masterURL, kubeConfig)
if err != nil {
return nil, err
Expand All @@ -43,6 +46,7 @@ func New(masterURL, kubeConfig string) (*MapStore, error) {

ms := MapStore{}
ms.configMap = clientset.CoreV1().ConfigMaps("kube-system")
ms.metrics = authenticatorMetrics
return &ms, nil
}

Expand All @@ -60,10 +64,12 @@ func (ms *MapStore) startLoadConfigMap(stopCh <-chan struct{}) {
FieldSelector: fields.OneTermEqualSelector("metadata.name", "aws-auth").String(),
})
if err != nil {
logrus.Warn("Unable to re-establish watch. Sleeping for 5 seconds")
logrus.Errorf("Unable to re-establish watch: %v, sleeping for 5 seconds.", err)
ms.metrics.ConfigMapWatchFailures.Inc()
time.Sleep(5 * time.Second)
continue
}

for r := range watcher.ResultChan() {
switch r.Type {
case watch.Error:
Expand Down
6 changes: 5 additions & 1 deletion pkg/mapper/configmap/configmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ package configmap
import (
"reflect"
"testing"

"time"

"github.com/prometheus/client_golang/prometheus"
core_v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/watch"

v1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/kubernetes/typed/core/v1/fake"
k8stesting "k8s.io/client-go/testing"
"sigs.k8s.io/aws-iam-authenticator/pkg/config"
"sigs.k8s.io/aws-iam-authenticator/pkg/metrics"
)

var testUser = config.UserMapping{Username: "matlan", Groups: []string{"system:master", "dev"}}
Expand All @@ -23,6 +25,7 @@ func makeStore() MapStore {
users: make(map[string]config.UserMapping),
roles: make(map[string]config.RoleMapping),
awsAccounts: make(map[string]interface{}),
metrics: metrics.CreateMetrics(prometheus.NewRegistry()),
}
ms.users["matt"] = testUser
ms.roles["instance"] = testRole
Expand All @@ -38,6 +41,7 @@ func makeStoreWClient() (MapStore, *fake.FakeConfigMaps) {
users: make(map[string]config.UserMapping),
roles: make(map[string]config.RoleMapping),
configMap: v1.ConfigMapInterface(fakeConfigMaps),
metrics: metrics.CreateMetrics(prometheus.NewRegistry()),
}
return ms, fakeConfigMaps
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/mapper/configmap/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (

"sigs.k8s.io/aws-iam-authenticator/pkg/config"
"sigs.k8s.io/aws-iam-authenticator/pkg/mapper"
"sigs.k8s.io/aws-iam-authenticator/pkg/metrics"
)

type ConfigMapMapper struct {
Expand All @@ -13,8 +14,8 @@ type ConfigMapMapper struct {

var _ mapper.Mapper = &ConfigMapMapper{}

func NewConfigMapMapper(cfg config.Config) (*ConfigMapMapper, error) {
ms, err := New(cfg.Master, cfg.Kubeconfig)
func NewConfigMapMapper(cfg config.Config, authenticatorMetrics metrics.Metrics) (*ConfigMapMapper, error) {
ms, err := New(cfg.Master, cfg.Kubeconfig, authenticatorMetrics)
if err != nil {
return nil, err
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/mapper/configmap/yaml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -17,6 +18,7 @@ import (
"k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/aws-iam-authenticator/pkg/config"
"sigs.k8s.io/aws-iam-authenticator/pkg/metrics"
)

var log = logrus.New()
Expand Down Expand Up @@ -94,6 +96,7 @@ func TestConfigMap(t *testing.T) {
"aws-auth-missing-bar.yaml", nil, nil, nil, true,
},
}

for _, tt := range tests {
t.Run(tt.configMapYaml, func(t *testing.T) {
cm, err := configMapFromYaml(tt.configMapYaml)
Expand All @@ -104,7 +107,9 @@ func TestConfigMap(t *testing.T) {
}

cs := fake.NewSimpleClientset()
ms := MapStore{}
ms := MapStore{
metrics: metrics.CreateMetrics(prometheus.NewRegistry()),
}
ms.configMap = cs.CoreV1().ConfigMaps("kube-system")

stopCh := make(chan struct{})
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package metrics

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

const (
Namespace = "aws_iam_authenticator"
Malformed = "malformed_request"
Invalid = "invalid_token"
STSError = "sts_error"
Unknown = "uknown_user"
Success = "success"
)

// Metrics are handles to the collectors for prometheus for the various metrics we are tracking.
type Metrics struct {
ConfigMapWatchFailures prometheus.Counter
Latency *prometheus.HistogramVec
}

func CreateMetrics(reg prometheus.Registerer) Metrics {
factory := promauto.With(reg)

return Metrics{
ConfigMapWatchFailures: factory.NewCounter(
prometheus.CounterOpts{
Namespace: Namespace,
Name: "configmap_watch_failures",
Help: "EKS Configmap watch failures",
},
),
Latency: factory.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: Namespace,
Name: "authenticate_latency_seconds",
Help: "Authenticate call latency",
},
[]string{"result"},
),
}
}
59 changes: 18 additions & 41 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"sigs.k8s.io/aws-iam-authenticator/pkg/mapper/configmap"
"sigs.k8s.io/aws-iam-authenticator/pkg/mapper/crd"
"sigs.k8s.io/aws-iam-authenticator/pkg/mapper/file"
"sigs.k8s.io/aws-iam-authenticator/pkg/metrics"
"sigs.k8s.io/aws-iam-authenticator/pkg/token"

awsarn "github.com/aws/aws-sdk-go/aws/arn"
Expand Down Expand Up @@ -64,35 +65,23 @@ var (
type handler struct {
http.ServeMux
verifier token.Verifier
metrics metrics
metrics metrics.Metrics
ec2Provider ec2provider.EC2Provider
clusterID string
mappers []mapper.Mapper
scrubbedAccounts []string
}

// metrics are handles to the collectors for prometheous for the various metrics we are tracking.
type metrics struct {
latency *prometheus.HistogramVec
}

// namespace for the AWS IAM Authenticator's metrics
const (
metricNS = "aws_iam_authenticator"
metricMalformed = "malformed_request"
metricInvalid = "invalid_token"
metricSTSError = "sts_error"
metricUnknown = "uknown_user"
metricSuccess = "success"
)

// New authentication webhook server.
func New(cfg config.Config, stopCh <-chan struct{}) *Server {
c := &Server{
Config: cfg,
}

mappers, err := BuildMapperChain(cfg)
authenticatorMetrics := metrics.CreateMetrics(prometheus.DefaultRegisterer)
c.metrics = authenticatorMetrics

mappers, err := BuildMapperChain(cfg, authenticatorMetrics)
if err != nil {
logrus.Fatalf("failed to build mapper chain: %v", err)
}
Expand Down Expand Up @@ -151,7 +140,7 @@ func New(cfg config.Config, stopCh <-chan struct{}) *Server {
logrus.Infof("reconfigure your apiserver with `--authentication-token-webhook-config-file=%s` to enable (assuming default hostPath mounts)", c.GenerateKubeconfigPath)
c.httpServer = http.Server{
ErrorLog: log.New(errLog, "", 0),
Handler: c.getHandler(mappers, c.EC2DescribeInstancesQps, c.EC2DescribeInstancesBurst),
Handler: c.getHandler(authenticatorMetrics, mappers, c.EC2DescribeInstancesQps, c.EC2DescribeInstancesBurst),
}
c.listener = listener
return c
Expand All @@ -175,7 +164,7 @@ type healthzHandler struct{}
func (m *healthzHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "ok")
}
func (c *Server) getHandler(mappers []mapper.Mapper, ec2DescribeQps int, ec2DescribeBurst int) *handler {
func (c *Server) getHandler(authenticatorMetrics metrics.Metrics, mappers []mapper.Mapper, ec2DescribeQps int, ec2DescribeBurst int) *handler {
if c.ServerEC2DescribeInstancesRoleARN != "" {
_, err := awsarn.Parse(c.ServerEC2DescribeInstancesRoleARN)
if err != nil {
Expand All @@ -185,7 +174,7 @@ func (c *Server) getHandler(mappers []mapper.Mapper, ec2DescribeQps int, ec2Desc

h := &handler{
verifier: token.NewVerifier(c.ClusterID, c.PartitionID),
metrics: createMetrics(),
metrics: authenticatorMetrics,
ec2Provider: ec2provider.New(c.ServerEC2DescribeInstancesRoleARN, ec2DescribeQps, ec2DescribeBurst),
clusterID: c.ClusterID,
mappers: mappers,
Expand All @@ -202,19 +191,7 @@ func (c *Server) getHandler(mappers []mapper.Mapper, ec2DescribeQps int, ec2Desc
return h
}

func createMetrics() metrics {
m := metrics{
latency: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: metricNS,
Name: "authenticate_latency_seconds",
Help: "The latency for authenticate call",
}, []string{"result"}),
}
prometheus.MustRegister(m.latency)
return m
}

func BuildMapperChain(cfg config.Config) ([]mapper.Mapper, error) {
func BuildMapperChain(cfg config.Config, authenticatorMetrics metrics.Metrics) ([]mapper.Mapper, error) {
modes := cfg.BackendMode
mappers := []mapper.Mapper{}
for _, mode := range modes {
Expand All @@ -230,7 +207,7 @@ func BuildMapperChain(cfg config.Config) ([]mapper.Mapper, error) {
case mapper.ModeConfigMap:
fallthrough
case mapper.ModeEKSConfigMap:
configMapMapper, err := configmap.NewConfigMapMapper(cfg)
configMapMapper, err := configmap.NewConfigMapMapper(cfg, authenticatorMetrics)
if err != nil {
return nil, fmt.Errorf("backend-mode %q creation failed: %v", mode, err)
}
Expand Down Expand Up @@ -272,13 +249,13 @@ func (h *handler) authenticateEndpoint(w http.ResponseWriter, req *http.Request)
if req.Method != http.MethodPost {
log.Error("unexpected request method")
http.Error(w, "expected POST", http.StatusMethodNotAllowed)
h.metrics.latency.WithLabelValues(metricMalformed).Observe(duration(start))
h.metrics.Latency.WithLabelValues(metrics.Malformed).Observe(duration(start))
return
}
if req.Body == nil {
log.Error("empty request body")
http.Error(w, "expected a request body", http.StatusBadRequest)
h.metrics.latency.WithLabelValues(metricMalformed).Observe(duration(start))
h.metrics.Latency.WithLabelValues(metrics.Malformed).Observe(duration(start))
return
}
defer req.Body.Close()
Expand All @@ -287,7 +264,7 @@ func (h *handler) authenticateEndpoint(w http.ResponseWriter, req *http.Request)
if err := json.NewDecoder(req.Body).Decode(&tokenReview); err != nil {
log.WithError(err).Error("could not parse request body")
http.Error(w, "expected a request body to be a TokenReview", http.StatusBadRequest)
h.metrics.latency.WithLabelValues(metricMalformed).Observe(duration(start))
h.metrics.Latency.WithLabelValues(metrics.Malformed).Observe(duration(start))
return
}

Expand All @@ -300,9 +277,9 @@ func (h *handler) authenticateEndpoint(w http.ResponseWriter, req *http.Request)
identity, err := h.verifier.Verify(tokenReview.Spec.Token)
if err != nil {
if _, ok := err.(token.STSError); ok {
h.metrics.latency.WithLabelValues(metricSTSError).Observe(duration(start))
h.metrics.Latency.WithLabelValues(metrics.STSError).Observe(duration(start))
} else {
h.metrics.latency.WithLabelValues(metricInvalid).Observe(duration(start))
h.metrics.Latency.WithLabelValues(metrics.Invalid).Observe(duration(start))
}
log.WithError(err).Warn("access denied")
w.WriteHeader(http.StatusForbidden)
Expand All @@ -325,7 +302,7 @@ func (h *handler) authenticateEndpoint(w http.ResponseWriter, req *http.Request)

username, groups, err := h.doMapping(identity)
if err != nil {
h.metrics.latency.WithLabelValues(metricUnknown).Observe(duration(start))
h.metrics.Latency.WithLabelValues(metrics.Unknown).Observe(duration(start))
log.WithError(err).Warn("access denied")
w.WriteHeader(http.StatusForbidden)
w.Write(tokenReviewDenyJSON)
Expand All @@ -344,7 +321,7 @@ func (h *handler) authenticateEndpoint(w http.ResponseWriter, req *http.Request)
"uid": uid,
"groups": groups,
}).Info("access granted")
h.metrics.latency.WithLabelValues(metricSuccess).Observe(duration(start))
h.metrics.Latency.WithLabelValues(metrics.Success).Observe(duration(start))
w.WriteHeader(http.StatusOK)

userExtra := map[string]authenticationv1beta1.ExtraValue{}
Expand Down
Loading