Skip to content

Commit

Permalink
fix: handle errors to watch apid/trustd certs
Browse files Browse the repository at this point in the history
Fixes #8345

Both `apid` and `trustd` services use a gRPC connection back to
`machined` to watch changes to the certificates (new certificates being
issued).

This refactors the code to follow regular conventions, so that a failure
to watch will crash the process, and they have a way to restart and
re-establish the watch.

Use the context and errgroup consistently.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Feb 23, 2024
1 parent c79d69c commit 67ac693
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 60 deletions.
6 changes: 5 additions & 1 deletion internal/app/apid/main.go
Expand Up @@ -81,7 +81,7 @@ func apidMain() error {
stateClient := v1alpha1.NewStateClient(runtimeConn)
resources := state.WrapCore(client.NewAdapter(stateClient))

tlsConfig, err := provider.NewTLSConfig(resources)
tlsConfig, err := provider.NewTLSConfig(ctx, resources)
if err != nil {
return fmt.Errorf("failed to create remote certificate provider: %w", err)
}
Expand Down Expand Up @@ -225,6 +225,10 @@ func apidMain() error {
return socketServer.Serve(socketListener)
})

errGroup.Go(func() error {
return tlsConfig.Watch(ctx)
})

errGroup.Go(func() error {
<-ctx.Done()

Expand Down
66 changes: 37 additions & 29 deletions internal/app/apid/pkg/provider/provider.go
Expand Up @@ -9,7 +9,6 @@ import (
"context"
stdlibtls "crypto/tls"
"fmt"
"log"
"sync"

"github.com/cosi-project/runtime/pkg/resource"
Expand All @@ -22,23 +21,28 @@ import (
// TLSConfig provides client & server TLS configs for apid.
type TLSConfig struct {
certificateProvider *certificateProvider
watchCh <-chan state.Event
}

// NewTLSConfig builds provider from configuration and endpoints.
//
//nolint:gocyclo
func NewTLSConfig(resources state.State) (*TLSConfig, error) {
func NewTLSConfig(ctx context.Context, resources state.State) (*TLSConfig, error) {
watchCh := make(chan state.Event)

if err := resources.Watch(context.TODO(), resource.NewMetadata(secrets.NamespaceName, secrets.APIType, secrets.APIID, resource.VersionUndefined), watchCh); err != nil {
if err := resources.Watch(ctx, resource.NewMetadata(secrets.NamespaceName, secrets.APIType, secrets.APIID, resource.VersionUndefined), watchCh); err != nil {
return nil, fmt.Errorf("error setting up watch: %w", err)
}

// wait for the first event to set up certificate provider
provider := &certificateProvider{}

for {
event := <-watchCh
var event state.Event

select {
case <-ctx.Done():
return nil, ctx.Err()
case event = <-watchCh:
}

switch event.Type {
case state.Created, state.Updated:
Expand All @@ -56,36 +60,40 @@ func NewTLSConfig(resources state.State) (*TLSConfig, error) {
return nil, err
}

break
return &TLSConfig{
certificateProvider: provider,
watchCh: watchCh,
}, nil
}
}

go func() {
for {
event := <-watchCh
// Watch for changes in API certificates and updates the TLSConfig.
func (tlsConfig *TLSConfig) Watch(ctx context.Context) error {
for {
var event state.Event

switch event.Type {
case state.Created, state.Updated:
// expected
case state.Destroyed, state.Bootstrapped:
// ignore, we'll get another event
continue
case state.Errored:
log.Printf("error watching for API certificates: %s", event.Error)
select {
case <-ctx.Done():
return nil
case event = <-tlsConfig.watchCh:
}

continue
}
switch event.Type {
case state.Created, state.Updated:
// expected
case state.Destroyed, state.Bootstrapped:
// ignore, we'll get another event
continue
case state.Errored:
return fmt.Errorf("error watching API certificates: %w", event.Error)
}

apiCerts := event.Resource.(*secrets.API) //nolint:errcheck,forcetypeassert
apiCerts := event.Resource.(*secrets.API) //nolint:errcheck,forcetypeassert

if err := provider.Update(apiCerts); err != nil {
log.Printf("failed updating cert: %v", err)
}
if err := tlsConfig.certificateProvider.Update(apiCerts); err != nil {
return fmt.Errorf("failed updating cert: %v", err)
}
}()

return &TLSConfig{
certificateProvider: provider,
}, nil
}
}

// ServerConfig generates server-side tls.Config.
Expand Down
70 changes: 41 additions & 29 deletions internal/app/trustd/internal/provider/provider.go
Expand Up @@ -22,23 +22,29 @@ import (
// TLSConfig provides client & server TLS configs for trustd.
type TLSConfig struct {
certificateProvider *certificateProvider

watchCh <-chan state.Event
}

// NewTLSConfig builds provider from configuration and endpoints.
//
//nolint:gocyclo
func NewTLSConfig(resources state.State) (*TLSConfig, error) {
func NewTLSConfig(ctx context.Context, resources state.State) (*TLSConfig, error) {
watchCh := make(chan state.Event)

if err := resources.Watch(context.TODO(), resource.NewMetadata(secrets.NamespaceName, secrets.TrustdType, secrets.TrustdID, resource.VersionUndefined), watchCh); err != nil {
if err := resources.Watch(ctx, resource.NewMetadata(secrets.NamespaceName, secrets.TrustdType, secrets.TrustdID, resource.VersionUndefined), watchCh); err != nil {
return nil, fmt.Errorf("error setting up watch: %w", err)
}

// wait for the first event to set up certificate provider
provider := &certificateProvider{}

for {
event := <-watchCh
var event state.Event

select {
case <-ctx.Done():
return nil, ctx.Err()
case event = <-watchCh:
}

switch event.Type {
case state.Created, state.Updated:
Expand All @@ -56,34 +62,40 @@ func NewTLSConfig(resources state.State) (*TLSConfig, error) {
return nil, err
}

break
return &TLSConfig{
certificateProvider: provider,
watchCh: watchCh,
}, nil
}
}

// Watch for updates to trustd certificates.
func (tlsConfig *TLSConfig) Watch(ctx context.Context) error {
for {
var event state.Event

go func() {
for {
event := <-watchCh

switch event.Type {
case state.Created, state.Updated:
// expected
case state.Destroyed, state.Bootstrapped:
// ignore, we'll get another event
continue
case state.Errored:
log.Printf("error watching for trustd certificates: %s", event.Error)
}

trustdCerts := event.Resource.(*secrets.Trustd) //nolint:errcheck,forcetypeassert

if err := provider.Update(trustdCerts); err != nil {
log.Printf("failed updating cert: %v", err)
}
select {
case <-ctx.Done():
return nil
case event = <-tlsConfig.watchCh:
}
}()

return &TLSConfig{
certificateProvider: provider,
}, nil
switch event.Type {
case state.Created, state.Updated:
// expected
case state.Destroyed, state.Bootstrapped:
// ignore, we'll get another event
continue
case state.Errored:
log.Printf("error watching for trustd certificates: %s", event.Error)
}

trustdCerts := event.Resource.(*secrets.Trustd) //nolint:errcheck,forcetypeassert

if err := tlsConfig.certificateProvider.Update(trustdCerts); err != nil {
return fmt.Errorf("failed updating cert: %w", err)
}
}
}

// ServerConfig generates server-side tls.Config.
Expand Down
7 changes: 6 additions & 1 deletion internal/app/trustd/main.go
Expand Up @@ -2,6 +2,7 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

// Package trustd implements trustd functionality.
package trustd

import (
Expand Down Expand Up @@ -78,7 +79,7 @@ func trustdMain() error {
stateClient := v1alpha1.NewStateClient(runtimeConn)
resources := state.WrapCore(client.NewAdapter(stateClient))

tlsConfig, err := provider.NewTLSConfig(resources)
tlsConfig, err := provider.NewTLSConfig(ctx, resources)
if err != nil {
return fmt.Errorf("failed to create remote certificate provider: %w", err)
}
Expand Down Expand Up @@ -114,6 +115,10 @@ func trustdMain() error {
return networkServer.Serve(networkListener)
})

errGroup.Go(func() error {
return tlsConfig.Watch(ctx)
})

errGroup.Go(func() error {
<-ctx.Done()

Expand Down

0 comments on commit 67ac693

Please sign in to comment.