Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@ import (
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/rhobs/obsctl-reloader/pkg/loader"
"github.com/rhobs/obsctl-reloader/pkg/loop"
"github.com/rhobs/obsctl-reloader/pkg/syncer"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
k8sconfig "sigs.k8s.io/controller-runtime/pkg/client/config"

"github.com/rhobs/obsctl-reloader/pkg/loader"
"github.com/rhobs/obsctl-reloader/pkg/loop"
"github.com/rhobs/obsctl-reloader/pkg/syncer"
)

const (
obsctlContextAPIName = "api"
defaultSleepDurationSeconds = 15
obsctlContextAPIName = "api"
defaultSleepDurationSeconds = 15
defaultConfigReloadIntervalSeconds = 60
)

type cfg struct {
Expand All @@ -39,6 +41,7 @@ type cfg struct {
logRulesEnabled bool
logLevel string
listenInternal string
configReloadInterval uint
}

func setupLogger(logLevel string) log.Logger {
Expand Down Expand Up @@ -68,6 +71,7 @@ func parseFlags() *cfg {

// Common flags.
flag.UintVar(&cfg.sleepDurationSeconds, "sleep-duration-seconds", defaultSleepDurationSeconds, "The interval in seconds after which all PrometheusRules are synced to Observatorium API.")
flag.UintVar(&cfg.configReloadInterval, "config-reload-interval-seconds", defaultConfigReloadIntervalSeconds, "The interval in seconds for reloading configuration.")
flag.StringVar(&cfg.observatoriumURL, "observatorium-api-url", "", "The URL of the Observatorium API to which rules will be synced.")
flag.StringVar(&cfg.managedTenants, "managed-tenants", "", "The name of the tenants whose rules should be synced. If there are multiple tenants, ensure they are comma-separated.")
flag.StringVar(&cfg.issuerURL, "issuer-url", "", "The OIDC issuer URL, see https://openid.net/specs/openid-connect-discovery-1_0.html#IssuerDiscovery.")
Expand Down Expand Up @@ -149,7 +153,7 @@ func main() {
reg,
)
if err := o.InitOrReloadObsctlConfig(); err != nil {
level.Error(logger).Log("msg", "error reloading/initializing obsctl config", "error", err)
level.Error(logger).Log("msg", "error initializing obsctl config", "error", err)
panic(err)
}

Expand All @@ -165,6 +169,7 @@ func main() {
o,
cfg.logRulesEnabled,
cfg.sleepDurationSeconds,
cfg.configReloadInterval,
)
}, func(_ error) {
cancel()
Expand Down
3 changes: 2 additions & 1 deletion main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/go-kit/log"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"

"github.com/rhobs/obsctl-reloader/pkg/loop"
)

Expand Down Expand Up @@ -82,7 +83,7 @@ func TestSyncLoop(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
time.AfterFunc(25*time.Second, func() { cancel() })

testutil.Ok(t, loop.SyncLoop(ctx, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), rl, rs, true, 5))
testutil.Ok(t, loop.SyncLoop(ctx, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), rl, rs, true, 5, 60))

testutil.Equals(t, 12, rs.setCurrentTenantCnt)
testutil.Equals(t, 4, rs.metricsRulesCnt)
Expand Down
6 changes: 6 additions & 0 deletions pkg/loop/syncloop.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/go-kit/log"
"github.com/go-kit/log/level"

"github.com/rhobs/obsctl-reloader/pkg/loader"
"github.com/rhobs/obsctl-reloader/pkg/syncer"
)
Expand All @@ -19,9 +20,14 @@ func SyncLoop(
o syncer.RulesSyncer,
logRulesEnabled bool,
sleepDurationSeconds uint,
configReloadIntervalSeconds uint,
) error {
for {
select {
case <-time.After(time.Duration(configReloadIntervalSeconds) * time.Second):
if err := o.InitOrReloadObsctlConfig(); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, so generally this is good. But InitOrReloadObsctlConfig(), attempts to read and load obsctl from the disk first, so it won't actually reload the secrets here I think.

The reason we save it on disk and load if needed is so that we don't end up regularly calling SSO for new tokens, but that we can use the present one till full expiry (usually ~15m).

But to unblock, maybe we can add a bool param to the method to skip reading from disk step and call it with true here? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!

IMHO, in all the cases where InitOrReloadObsctlConfig is called, we want it to get new tenant credentials from the k8s API (even before talking to SSO to try to get a token), because they could have changed. Otherwise we are not truly reloading any configuration as everything that will be reloaded was already there and nothing "new" is detected or used unless someone's modifying the obsctl files in the filesystem.

I added some small logic that will remove and re-add tenant in obsctl only if their OIDC credentials have changed (client id, secret, audience, issuer url, or offline access attributes).

Now, obsctl-reloader create clients a client for every single tenant discovered from secrets to confirm they are good (in the if !o.skipClientCheck block). This means SSO is called for all of them anyway. The client creation only skips calling SSO if there's a non-expired token in it, which is never the case because we never update the secret with the access tokens.

Considering that if the tenant test client creation fails we skip the error and keep loading other clients, I think we can remove that check from inside the configuration loading and let it happen when we call the Observatorium API. This way we can take advantage of the access token that is cached by obsctl-reloader for existing tenants and should only fetch new tokens from SSO for the tenants that got reloaded. WDYT, @saswatamcode?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, that makes sense! Let's test this out somehow (would need to update somebody's secret to a wrong thing and then a right thing). :)

level.Error(logger).Log("msg", "error reloading obsctl config", "error", err)
}
case <-time.After(time.Duration(sleepDurationSeconds) * time.Second):
prometheusRules, err := k.GetPrometheusRules()
if err != nil {
Expand Down
27 changes: 27 additions & 0 deletions pkg/syncer/obsctlsyncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,15 @@ func (o *ObsctlRulesSyncer) InitOrReloadObsctlConfig() error {
}
}

existingTenantCfg, foundTenant := o.c.APIs[obsctlContextAPIName].Contexts[tenant]
if foundTenant && !o.tenantConfigMatches(existingTenantCfg, tenantCfg) {
err := o.c.RemoveTenant(o.logger, tenantCfg.Tenant, obsctlContextAPIName)
if err != nil {
// We don't really care about the error here, logging only for visibility.
level.Info(o.logger).Log("msg", "removing tenant", "tenant", tenant, "error", err)
}
}

if err := o.c.AddTenant(o.logger, tenantCfg.Tenant, obsctlContextAPIName, tenantCfg.Tenant, tenantCfg.OIDC); err != nil {
level.Error(o.logger).Log("msg", "adding tenant", "tenant", tenant, "error", err)
return errors.Wrap(err, "adding tenant to obsctl config")
Expand All @@ -227,6 +236,24 @@ func (o *ObsctlRulesSyncer) InitOrReloadObsctlConfig() error {
return nil
}

// tenantConfigMatches checks if two tenant configs are equal. We consider them equal if they have the same tenant name
// and OIDC config (regardless of any token that might've been already acquired and cached).
func (o *ObsctlRulesSyncer) tenantConfigMatches(firstConfig, secondConfig config.TenantConfig) bool {
if firstConfig.Tenant != secondConfig.Tenant {
return false
}

if firstConfig.OIDC != secondConfig.OIDC {
return false
}

return firstConfig.OIDC.ClientID == secondConfig.OIDC.ClientID &&
firstConfig.OIDC.ClientSecret == secondConfig.OIDC.ClientSecret &&
firstConfig.OIDC.Audience == secondConfig.OIDC.Audience &&
firstConfig.OIDC.IssuerURL == secondConfig.OIDC.IssuerURL &&
firstConfig.OIDC.OfflineAccess == secondConfig.OIDC.OfflineAccess
}

func (o *ObsctlRulesSyncer) SetCurrentTenant(tenant string) error {
if err := o.c.SetCurrentContext(o.logger, obsctlContextAPIName, tenant); err != nil {
level.Error(o.logger).Log("msg", "switching context", "tenant", tenant, "error", err)
Expand Down