Skip to content

Commit

Permalink
auth: policy based auth map GC
Browse files Browse the repository at this point in the history
Until now, auth map entries were garbage collected based on the
following criterias:

* related identity has been deleted
* related node has been deleted
* entry has been expired

The initial goal was that expiration will cover the case where no longer
a policy is enforcing authentication.

But the introduction of re-authentication (cilium#25927) changed this, because
the entries would have re-authenticated "forever" (until identity or
node would have been deleted).

Therefore, this commit introduces some rudimentary garbage collection
based on policies by periodically checking whether a policy is still
enforcing authentication between two identities. If not, the auth map
entry gets deleted.

Signed-off-by: Marco Hofstetter <marco.hofstetter@isovalent.com>
  • Loading branch information
mhofstetter authored and romanspb80 committed Jun 22, 2023
1 parent 073270e commit f0cb281
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 26 deletions.
46 changes: 38 additions & 8 deletions pkg/auth/authmap_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,30 @@ import (
"github.com/cilium/cilium/pkg/k8s/resource"
"github.com/cilium/cilium/pkg/logging/logfields"
"github.com/cilium/cilium/pkg/node/addressing"
"github.com/cilium/cilium/pkg/policy"
)

type authMapGarbageCollector struct {
logger logrus.FieldLogger
authmap authMap
ipCache ipCache
logger logrus.FieldLogger
authmap authMap
ipCache ipCache
policyRepo policyRepository

discoveredCiliumNodeIDs map[uint16]struct{}
discoveredCiliumIdentities map[identity.NumericIdentity]struct{}
}

func newAuthMapGC(logger logrus.FieldLogger, authmap authMap, ipCache ipCache) *authMapGarbageCollector {
type policyRepository interface {
GetAuthTypes(localID, remoteID identity.NumericIdentity) policy.AuthTypes
}

func newAuthMapGC(logger logrus.FieldLogger, authmap authMap, ipCache ipCache, policyRepo policyRepository) *authMapGarbageCollector {
return &authMapGarbageCollector{
logger: logger,
authmap: authmap,
ipCache: ipCache,
logger: logger,
authmap: authmap,
ipCache: ipCache,
policyRepo: policyRepo,

discoveredCiliumNodeIDs: map[uint16]struct{}{
0: {}, // Local node 0 is always available
},
Expand Down Expand Up @@ -166,8 +174,30 @@ func (r *authMapGarbageCollector) cleanupDeletedIdentity(id *ciliumv2.CiliumIden
})
}

func (r *authMapGarbageCollector) CleanupExpiredEntries(_ context.Context) error {
func (r *authMapGarbageCollector) cleanupEntriesWithoutAuthPolicy(_ context.Context) error {
r.logger.Debug("Cleaning up expired entries")

err := r.authmap.DeleteIf(func(key authKey, info authInfo) bool {
authTypes := r.policyRepo.GetAuthTypes(key.localIdentity, key.remoteIdentity)

if _, ok := authTypes[key.authType]; !ok {
r.logger.
WithField("key", key).
WithField("auth_type", key.authType).
Debug("Deleting entry because no policy requires authentication")
return true
}
return false
})

if err != nil {
return fmt.Errorf("failed to cleanup entries without any auth policy: %w", err)
}
return nil
}

func (r *authMapGarbageCollector) cleanupExpiredEntries(_ context.Context) error {
r.logger.Debug("auth: cleaning up expired entries")
now := time.Now()
err := r.authmap.DeleteIf(func(key authKey, info authInfo) bool {
if info.expiration.Before(now) {
Expand Down
74 changes: 58 additions & 16 deletions pkg/auth/authmap_gc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,9 @@ func Test_authMapGarbageCollector_initialSync(t *testing.T) {
{localIdentity: identity.NumericIdentity(2), remoteIdentity: identity.NumericIdentity(3), remoteNodeID: 11, authType: policy.AuthTypeDisabled}: {expiration: time.Now().Add(-5 * time.Minute)},
},
}
am := newAuthMapGC(logrus.New(), authMap,
newFakeIPCache(map[uint16]string{
10: "172.18.0.3",
}),
)
am := newAuthMapGC(logrus.New(), authMap, newFakeIPCache(map[uint16]string{
10: "172.18.0.3",
}), nil)

assert.Len(t, am.discoveredCiliumNodeIDs, 1) // local node 0
assert.Empty(t, am.discoveredCiliumIdentities)
Expand Down Expand Up @@ -66,34 +64,62 @@ func Test_authMapGarbageCollector_initialSync(t *testing.T) {
func Test_authMapGarbageCollector_gc(t *testing.T) {
authMap := &fakeAuthMap{
entries: map[authKey]authInfo{
{localIdentity: identity.NumericIdentity(1), remoteIdentity: identity.NumericIdentity(2), remoteNodeID: 10, authType: policy.AuthTypeDisabled}: {expiration: time.Now().Add(5 * time.Minute)}, // deleted remote node
{localIdentity: identity.NumericIdentity(2), remoteIdentity: identity.NumericIdentity(4), remoteNodeID: 0, authType: policy.AuthTypeDisabled}: {expiration: time.Now().Add(5 * time.Minute)}, // deleted remote id
{localIdentity: identity.NumericIdentity(10), remoteIdentity: identity.NumericIdentity(11), remoteNodeID: 0, authType: policy.AuthTypeDisabled}: {expiration: time.Now().Add(5 * time.Minute)}, // deleted local id
{localIdentity: identity.NumericIdentity(2), remoteIdentity: identity.NumericIdentity(3), remoteNodeID: 11, authType: policy.AuthTypeDisabled}: {expiration: time.Now().Add(-5 * time.Minute)}, // expired
{localIdentity: identity.NumericIdentity(1), remoteIdentity: identity.NumericIdentity(2), remoteNodeID: 10, authType: policy.AuthTypeSpire}: {expiration: time.Now().Add(5 * time.Minute)}, // deleted remote node
{localIdentity: identity.NumericIdentity(2), remoteIdentity: identity.NumericIdentity(4), remoteNodeID: 0, authType: policy.AuthTypeSpire}: {expiration: time.Now().Add(5 * time.Minute)}, // deleted remote id
{localIdentity: identity.NumericIdentity(10), remoteIdentity: identity.NumericIdentity(11), remoteNodeID: 0, authType: policy.AuthTypeSpire}: {expiration: time.Now().Add(5 * time.Minute)}, // deleted local id
{localIdentity: identity.NumericIdentity(5), remoteIdentity: identity.NumericIdentity(6), remoteNodeID: 12, authType: policy.AuthTypeSpire}: {expiration: time.Now().Add(5 * time.Minute)}, // no policy present which enforces auth between identities
{localIdentity: identity.NumericIdentity(2), remoteIdentity: identity.NumericIdentity(3), remoteNodeID: 12, authType: policy.AuthTypeAlwaysFail}: {expiration: time.Now().Add(5 * time.Minute)}, // no policy present which enforces specific auth type
{localIdentity: identity.NumericIdentity(2), remoteIdentity: identity.NumericIdentity(3), remoteNodeID: 11, authType: policy.AuthTypeSpire}: {expiration: time.Now().Add(-5 * time.Minute)}, // expired
},
}

am := newAuthMapGC(logrus.New(), authMap,
newFakeIPCache(map[uint16]string{
10: "172.18.0.3",
}),
&fakePolicyRepository{
needsAuth: map[identity.NumericIdentity]map[identity.NumericIdentity]policy.AuthTypes{
identity.NumericIdentity(1): {
identity.NumericIdentity(2): map[policy.AuthType]struct{}{
policy.AuthTypeSpire: {},
},
},
identity.NumericIdentity(2): {
identity.NumericIdentity(3): map[policy.AuthType]struct{}{
policy.AuthTypeSpire: {},
},
identity.NumericIdentity(4): map[policy.AuthType]struct{}{
policy.AuthTypeSpire: {},
},
},
identity.NumericIdentity(10): {
identity.NumericIdentity(11): map[policy.AuthType]struct{}{
policy.AuthTypeSpire: {},
},
},
},
},
)

assert.Len(t, authMap.entries, 4)
assert.Len(t, authMap.entries, 6)

err := am.handleCiliumNodeEvent(context.Background(), ciliumNodeEvent(resource.Delete, "172.18.0.3"))
err := am.cleanupExpiredEntries(context.Background())
assert.NoError(t, err)
assert.Len(t, authMap.entries, 5)

err = am.cleanupEntriesWithoutAuthPolicy(context.Background())
assert.NoError(t, err)
assert.Len(t, authMap.entries, 3)

err = am.handleCiliumIdentityEvent(context.Background(), ciliumIdentityEvent(resource.Delete, "4"))
err = am.handleCiliumNodeEvent(context.Background(), ciliumNodeEvent(resource.Delete, "172.18.0.3"))
assert.NoError(t, err)
assert.Len(t, authMap.entries, 2)

err = am.handleCiliumIdentityEvent(context.Background(), ciliumIdentityEvent(resource.Delete, "10"))
err = am.handleCiliumIdentityEvent(context.Background(), ciliumIdentityEvent(resource.Delete, "4"))
assert.NoError(t, err)
assert.Len(t, authMap.entries, 1)

err = am.CleanupExpiredEntries(context.Background())
err = am.handleCiliumIdentityEvent(context.Background(), ciliumIdentityEvent(resource.Delete, "10"))
assert.NoError(t, err)
assert.Empty(t, authMap.entries)
}
Expand All @@ -103,7 +129,7 @@ func Test_authMapGarbageCollector_HandleNodeEventError(t *testing.T) {
entries: map[authKey]authInfo{},
failDelete: true,
}
am := newAuthMapGC(logrus.New(), authMap, newFakeIPCache(map[uint16]string{}))
am := newAuthMapGC(logrus.New(), authMap, newFakeIPCache(map[uint16]string{}), nil)

event := ciliumNodeEvent(resource.Delete, "172.18.0.3")
var eventErr error
Expand All @@ -120,7 +146,7 @@ func Test_authMapGarbageCollector_HandleIdentityEventError(t *testing.T) {
entries: map[authKey]authInfo{},
failDelete: true,
}
am := newAuthMapGC(logrus.New(), authMap, newFakeIPCache(map[uint16]string{}))
am := newAuthMapGC(logrus.New(), authMap, newFakeIPCache(map[uint16]string{}), nil)

event := ciliumIdentityEvent(resource.Delete, "4")
var eventErr error
Expand Down Expand Up @@ -167,3 +193,19 @@ func ciliumIdentityEvent(eventType resource.EventKind, id string) resource.Event
Key: resource.Key{Namespace: "test-ns", Name: id},
}
}

// Fake policyRepository

type fakePolicyRepository struct {
needsAuth map[identity.NumericIdentity]map[identity.NumericIdentity]policy.AuthTypes
}

func (r *fakePolicyRepository) GetAuthTypes(localID, remoteID identity.NumericIdentity) policy.AuthTypes {
if remotes, localPresent := r.needsAuth[localID]; localPresent {
if authTypes, remotePresent := remotes[remoteID]; remotePresent {
return authTypes
}
}

return nil
}
8 changes: 6 additions & 2 deletions pkg/auth/cell.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
"github.com/cilium/cilium/pkg/k8s/resource"
"github.com/cilium/cilium/pkg/maps/authmap"
"github.com/cilium/cilium/pkg/policy"
"github.com/cilium/cilium/pkg/signal"
"github.com/cilium/cilium/pkg/stream"
)
Expand Down Expand Up @@ -85,6 +86,7 @@ type authManagerParams struct {
SignalManager signal.SignalManager
CiliumIdentities resource.Resource[*ciliumv2.CiliumIdentity]
CiliumNodes resource.Resource[*ciliumv2.CiliumNode]
PolicyRepo *policy.Repository
}

func newManager(params authManagerParams) error {
Expand Down Expand Up @@ -122,7 +124,7 @@ func newManager(params authManagerParams) error {

registerReAuthenticationJob(jobGroup, mgr, params.AuthHandlers)

mapGC := newAuthMapGC(params.Logger, mapCache, params.IPCache)
mapGC := newAuthMapGC(params.Logger, mapCache, params.IPCache, params.PolicyRepo)

registerGCJobs(jobGroup, mapGC, params)

Expand Down Expand Up @@ -164,7 +166,9 @@ func registerGCJobs(jobGroup job.Group, mapGC *authMapGarbageCollector, params a
jobGroup.Add(job.Observer[resource.Event[*ciliumv2.CiliumNode]]("auth nodes gc", mapGC.handleCiliumNodeEvent, params.CiliumNodes))
}

jobGroup.Add(job.Timer("auth expiration gc", mapGC.CleanupExpiredEntries, params.Config.MeshAuthExpiredGCInterval))
jobGroup.Add(job.Timer("auth policies gc", mapGC.cleanupEntriesWithoutAuthPolicy, params.Config.MeshAuthExpiredGCInterval))

jobGroup.Add(job.Timer("auth expiration gc", mapGC.cleanupExpiredEntries, params.Config.MeshAuthExpiredGCInterval))
}

type authHandlerResult struct {
Expand Down

0 comments on commit f0cb281

Please sign in to comment.