Skip to content

Commit

Permalink
initial pass at more proactive version update, move off migration con…
Browse files Browse the repository at this point in the history
…dition to configmap/sample inspection
  • Loading branch information
gabemontero committed Nov 13, 2020
1 parent 7026e73 commit 2e435a6
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 21 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ The samples resource maintains the following conditions in its status:
-- False when all imagestream changes are acknowledged
-- The list of pending imagestreams will be represented by a configmap for each imagestream in the samples operator's namespace (where the imagestream name is the configmap name). When the imagestream has completed imports, the respective configmap for the imagestream is deleted.
- ImportCredentialsExist
-- A 'samples-registry-credentials' secret has been copied into the openshift namespace
-- Credentials for pulling from `registry.redhat.io` exist in the `pull-secret` Secret in the `openshift-config` namespace
- ConfigurationValid
-- True or false based on whether any of the restricted changes noted above have been submitted
- RemovePending
Expand All @@ -78,7 +78,7 @@ The samples resource maintains the following conditions in its status:
-- True when an error has occurred
-- The configmap associated with the imagestream will remain in the samples operator's namespace. There will be a key in the configmap's data map for each imagestreamtag, where the value of the entry will be the error message reported in the imagestream status.
- MigrationInProgress
-- True when the samples operator has detected that its version is different than the samples operator version the current samples set were installed with
-- This condition is deprecated as of the 4.7 branch of this repository. Upgrade tracking is now achieved via the other conditions and both the per imagestream configmaps and the imagestream-to-image configmap.

# CVO Cluster Operator Status

Expand Down
138 changes: 122 additions & 16 deletions pkg/stub/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ func (h *Handler) prepSamplesWatchEvent(kind, name string, annotations map[strin
return cfg, filePath, true, nil
}

if util.ConditionFalse(cfg, v1.ImageChangesInProgress) &&
(util.ConditionTrue(cfg, v1.MigrationInProgress) || h.version != cfg.Status.Version) {
/*if util.ConditionFalse(cfg, v1.ImageChangesInProgress) &&
(util.ConditionTrue(cfg, v1.MigrationInProgress) || h.version != cfg.Status.Version) {*/
if h.shouldSetVersion(cfg) {
// we have gotten events for items early in the migration list but we have not
// finished processing the list
// avoid (re)upsert, but check import status
Expand Down Expand Up @@ -759,15 +760,15 @@ func (h *Handler) Handle(event util.Event) error {
util.ConditionTrue(cfg, v1.SamplesExist) &&
util.ConditionFalse(cfg, v1.ImageChangesInProgress) &&
h.version == cfg.Status.Version {
logrus.Debugf("At steady state: config the same and exists is true, in progress false, and version correct")
logrus.Printf("At steady state: config the same and exists is true, in progress false, and version correct")

// once the status version is in sync, we can turn off the migration condition
if util.ConditionTrue(cfg, v1.MigrationInProgress) {
/*if util.ConditionTrue(cfg, v1.MigrationInProgress) {
h.GoodConditionUpdate(cfg, corev1.ConditionFalse, v1.MigrationInProgress)
dbg := " turn migration off"
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
}*/

// migration inevitably means we need to refresh the file cache as samples are added and
// deleted between releases, so force file map building
Expand Down Expand Up @@ -802,31 +803,38 @@ func (h *Handler) Handle(event util.Event) error {
util.ConditionUpdate(cfg, condition)
}

_, err = h.configmapclientwrapper.Get(util.IST2ImageMap)
if err != nil && !kerrors.IsNotFound(err) {
return err
}
ist2ImageMapExists := err == nil

cfg = h.refetchCfgMinimizeConflicts(cfg)
if util.ConditionFalse(cfg, v1.MigrationInProgress) &&
len(cfg.Status.Version) > 0 &&
h.version != cfg.Status.Version {
//if util.ConditionFalse(cfg, v1.MigrationInProgress) &&
if h.shouldSetVersion(cfg) &&
//len(cfg.Status.Version) > 0 && // this len check distinguished between upgrade and initial install
ist2ImageMapExists {
logrus.Printf("Undergoing migration from %s to %s", cfg.Status.Version, h.version)
// delete ist to image map as we will need to build a new one;
// we do not remove this map during delete/remove processing
// to facilitate disconnected users deciding which images to mirror
err := h.configmapclientwrapper.Delete(util.IST2ImageMap)
if err != nil {
return h.configmapclientwrapper.Delete(util.IST2ImageMap)
/*if err != nil {
// simply retry
return err
}
logrus.Printf("Undergoing migration from %s to %s", cfg.Status.Version, h.version)
h.GoodConditionUpdate(cfg, corev1.ConditionTrue, v1.MigrationInProgress)
dbg := "turn migration on"
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)*/
}

cfg = h.refetchCfgMinimizeConflicts(cfg)
if !configChanged &&
util.ConditionTrue(cfg, v1.SamplesExist) &&
if !configChanged && h.shouldSetVersion(cfg) && ist2ImageMapExists {
/*util.ConditionTrue(cfg, v1.SamplesExist) &&
util.ConditionFalse(cfg, v1.ImageChangesInProgress) &&
util.Condition(cfg, v1.MigrationInProgress).LastUpdateTime.Before(&util.Condition(cfg, v1.ImageChangesInProgress).LastUpdateTime) &&
h.version != cfg.Status.Version {
h.version != cfg.Status.Version {*/
if util.ConditionTrue(cfg, v1.ImportImageErrorsExist) {
logrus.Printf("An image import error occurred applying the latest configuration on version %s; this operator will periodically retry the import, or an administrator can investigate and remedy manually", h.version)
}
Expand Down Expand Up @@ -1073,7 +1081,7 @@ func (h *Handler) createSamples(cfg *v1.Config, updateIfPresent, registryChanged

// if after initial startup, and not migration, the only cfg change was changing the registry, since that does not impact
// the templates, we can move on
if len(unskippedTemplates) == 0 && registryChanged && util.ConditionTrue(cfg, v1.SamplesExist) && util.ConditionFalse(cfg, v1.MigrationInProgress) {
if len(unskippedTemplates) == 0 && registryChanged && h.sufficientSteadyStateForAnOperator(cfg) { //util.ConditionTrue(cfg, v1.SamplesExist) && util.ConditionFalse(cfg, v1.MigrationInProgress) {
return false, nil
}

Expand Down Expand Up @@ -1138,3 +1146,101 @@ func GetNamespace() string {
b, _ := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/" + corev1.ServiceAccountNamespaceKey)
return string(b)
}

func (h *Handler) shouldSetProgressingFalse(cfg *v1.Config) bool {
return util.ConditionTrue(cfg, v1.SamplesExist) &&
util.ConditionTrue(cfg, v1.ImageChangesInProgress) &&
(len(h.activeImageStreams()) == 0 || util.ConditionTrue(cfg, v1.ImportImageErrorsExist)) &&
!h.upsertInProgress &&
h.numOfManagedImageStreamsPresent() >= h.numOfManagedImageStreams()
}

func (h *Handler) shouldSetVersion(cfg *v1.Config) bool {
return util.ConditionTrue(cfg, v1.SamplesExist) &&
util.ConditionFalse(cfg, v1.ImageChangesInProgress) &&
!h.upsertInProgress &&
h.version != cfg.Status.Version &&
h.areStreamsAtCorrectVersion()
}

func (h *Handler) sufficientSteadyStateForAnOperator(cfg *v1.Config) bool {
return util.ConditionTrue(cfg, v1.SamplesExist) &&
util.ConditionFalse(cfg, v1.ImageChangesInProgress) &&
h.version == cfg.Status.Version
}

func (h *Handler) areStreamsAtCorrectVersion() bool {
for key, _ := range h.imagestreamFile {
// remember, our get wrapper goes through the lister, so we are only hitting the controller cache
skipped, ok := h.skippedImagestreams[key]
if skipped && ok {
continue
}
is, err := h.imageclientwrapper.Get(key)
if err != nil {
logrus.Warningf("areStreamsAtCorrectVersion: get of stream %s got error: %s", key, err.Error())
return false
}
if is == nil {
logrus.Warningf("areStreamsAtCorrectVersion: got nil for stream %s but no error", key)
return false
}
if is.Annotations == nil {
logrus.Warningf("areStreamsAtCorrectVersion: stream %s has no annotation map", key)
return false
}
isv, ok := is.Annotations[v1.SamplesVersionAnnotation]
if !ok {
logrus.Warningf("areStreamsAtCorrectVersion: stream %s does not have version annotation", key)
return false
}
if isv != h.version {
logrus.Warningf("areStreamsAtCorrectVersion: stream %s is at version %s instead of %s", key, isv, h.version)
return false
}
logrus.Printf("areStreamsAtCorrectVersion: stream %s is a correct version %s", key, isv)
}
return true
}

func (h *Handler) numOfManagedImageStreams() int {
rc := len(h.imagestreamFile) - len(h.skippedImagestreams)
logrus.Printf("samples operator currently managing %d imagestreams", rc)
return rc
}

func (h *Handler) numOfManagedImageStreamsPresent() int {
rc := 0
for key, _ := range h.imagestreamFile {
// remember, our get wrapper goes through the lister, so we are only hitting the controller cache
skipped, ok := h.skippedImagestreams[key]
if skipped && ok {
continue
}
is, err := h.imageclientwrapper.Get(key)
if err != nil {
logrus.Warningf("numOfManagedImageStreamsPresent: get of stream %s got error: %s", key, err.Error())
continue
}
if is == nil {
logrus.Warningf("numOfManagedImageStreamsPresent: got nil for stream %s but no error", key)
continue
}
if is.Labels == nil {
logrus.Warningf("numOfManagedImageStreamsPresent: stream %s has no labels map", key)
continue
}
managed, ok := is.Labels[v1.SamplesManagedLabel]
if !ok {
logrus.Warningf("numOfManagedImageStreamsPresent: stream %s does not have managed label", key)
continue
}
if managed != "true" {
logrus.Warningf("numOfManagedImageStreamsPresent: stream %s has managed value of %s but is not in internal skip list", key, managed)
continue
}
rc++
}
logrus.Printf("numOfManagedImageStreamsPresent: samples operator found %d managed imagestreams", rc)
return rc
}
12 changes: 11 additions & 1 deletion pkg/stub/imagestreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,21 @@ func (h *Handler) processImageStreamWatchEvent(is *imagev1.ImageStream, deleted
return err
}

if util.ConditionTrue(cfg, v1.ImageChangesInProgress) && len(h.activeImageStreams()) == 0 && !h.upsertInProgress {
// the next two attempts to update samples conditions stem from analysis of installs with some amount of
// lower level issues (api server not ready, thottling, watches getting closed) as a failsafe to make sure
// the operator reports happiness to the CVO as quickly as possible... ultimately, with so many imagestreams, some of their events
// eventually get through
if h.shouldSetProgressingFalse(cfg) {
dbg := fmt.Sprintf("progressing false update on imagestream %s event", is.Name)
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
if h.shouldSetVersion(cfg) {
cfg.Status.Version = h.version
dbg := fmt.Sprintf("upd status version to %s on imagestream event %s", h.version, is.Name)
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
return nil

}
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/cluster_samples_operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1146,15 +1146,15 @@ func coreTestUpgrade(t *testing.T) {
t.Fatalf("problem updating deployment env")
}

if cfg.Status.ManagementState == operatorsv1api.Managed {
/*if cfg.Status.ManagementState == operatorsv1api.Managed {
err = wait.PollImmediate(1*time.Second, 3*time.Minute, func() (bool, error) {
cfg := verifyOperatorUp(t)
if util.ConditionTrue(cfg, samplesapi.MigrationInProgress) {
return true, nil
}
return false, nil
})
}
}*/

if err != nil {
dumpPod(t)
Expand Down

0 comments on commit 2e435a6

Please sign in to comment.