-
Notifications
You must be signed in to change notification settings - Fork 82
/
handler.go
1224 lines (1103 loc) · 44.3 KB
/
handler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package stub
import (
"context"
"crypto/tls"
"fmt"
"io/ioutil"
"net"
"os"
"runtime"
"strings"
"sync"
"time"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
kapis "k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/wait"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
corev1lister "k8s.io/client-go/listers/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/util/retry"
imagev1 "github.com/openshift/api/image/v1"
operatorsv1api "github.com/openshift/api/operator/v1"
v1 "github.com/openshift/api/samples/v1"
templatev1 "github.com/openshift/api/template/v1"
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
imagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1"
imagev1lister "github.com/openshift/client-go/image/listers/image/v1"
sampleclientv1 "github.com/openshift/client-go/samples/clientset/versioned/typed/samples/v1"
configv1lister "github.com/openshift/client-go/samples/listers/samples/v1"
templatev1client "github.com/openshift/client-go/template/clientset/versioned/typed/template/v1"
templatev1lister "github.com/openshift/client-go/template/listers/template/v1"
"github.com/openshift/cluster-samples-operator/pkg/cache"
sampopclient "github.com/openshift/cluster-samples-operator/pkg/client"
"github.com/openshift/cluster-samples-operator/pkg/metrics"
operatorstatus "github.com/openshift/cluster-samples-operator/pkg/operatorstatus"
"github.com/openshift/cluster-samples-operator/pkg/util"
)
const (
x86ContentRootDir = "/opt/openshift/operator/x86_64"
armContentRootDir = "/opt/openshift/operator/aarch64"
ppcContentRootDir = "/opt/openshift/operator/ppc64le"
zContentRootDir = "/opt/openshift/operator/s390x"
installtypekey = "keyForInstallTypeField"
regkey = "keyForSamplesRegistryField"
skippedstreamskey = "keyForSkippedImageStreamsField"
skippedtempskey = "keyForSkippedTemplatesField"
)
func NewSamplesOperatorHandler(kubeconfig *restclient.Config,
listers *sampopclient.Listers) (*Handler, error) {
h := &Handler{}
h.initter = &defaultInClusterInitter{}
h.initter.init(h, kubeconfig)
crdWrapper := &generatedCRDWrapper{}
client, err := sampleclientv1.NewForConfig(kubeconfig)
if err != nil {
return nil, err
}
crdWrapper.client = client.Configs()
crdWrapper.lister = listers.Config
h.crdwrapper = crdWrapper
h.crdlister = listers.Config
h.streamlister = listers.ImageStreams
h.tplstore = listers.Templates
h.cfgsecretlister = listers.ConfigNamespaceSecrets
h.Fileimagegetter = &DefaultImageStreamFromFileGetter{}
h.Filetemplategetter = &DefaultTemplateFromFileGetter{}
h.Filefinder = &DefaultResourceFileLister{}
h.imageclientwrapper = &defaultImageStreamClientWrapper{h: h, lister: listers.ImageStreams}
h.templateclientwrapper = &defaultTemplateClientWrapper{h: h, lister: listers.Templates}
h.configmapclientwrapper = &defaultConfigMapClientWrapper{h: h, lister: listers.ConfigMaps}
h.cvowrapper = operatorstatus.NewClusterOperatorHandler(h.configclient)
h.skippedImagestreams = make(map[string]bool)
h.skippedTemplates = make(map[string]bool)
h.imagestreamFile = make(map[string]string)
h.templateFile = make(map[string]string)
h.imagestreatagToImage = make(map[string]string)
h.CreateDefaultResourceIfNeeded(nil)
h.imagestreamRetry = make(map[string]metav1.Time)
h.mapsMutex = sync.Mutex{}
h.version = os.Getenv("RELEASE_VERSION")
metrics.InitializeMetricsCollector(listers)
return h, nil
}
type Handler struct {
initter InClusterInitter
crdwrapper CRDWrapper
cvowrapper *operatorstatus.ClusterOperatorHandler
restconfig *restclient.Config
tempclient *templatev1client.TemplateV1Client
imageclient *imagev1client.ImageV1Client
coreclient *corev1client.CoreV1Client
configclient *configv1client.ConfigV1Client
imageclientwrapper ImageStreamClientWrapper
templateclientwrapper TemplateClientWrapper
configmapclientwrapper ConfigMapClientWrapper
crdlister configv1lister.ConfigLister
streamlister imagev1lister.ImageStreamNamespaceLister
tplstore templatev1lister.TemplateNamespaceLister
cfgsecretlister corev1lister.SecretNamespaceLister
opersecretlister corev1lister.SecretNamespaceLister
Fileimagegetter ImageStreamFromFileGetter
Filetemplategetter TemplateFromFileGetter
Filefinder ResourceFileLister
skippedTemplates map[string]bool
skippedImagestreams map[string]bool
imagestreamFile map[string]string
templateFile map[string]string
imagestreatagToImage map[string]string
imagestreamRetry map[string]metav1.Time
mapsMutex sync.Mutex
upsertInProgress bool
secretRetryCount int8
version string
tbrCheckFailed bool
}
// prepSamplesWatchEvent decides whether an upsert of the sample should be done, as well as data for either doing the upsert or checking the status of a prior upsert;
// the return values:
// - cfg: return this if we want to check the status of a prior upsert
// - filePath: used to the caller to look up the image content for the upsert
// - doUpsert: whether to do the upsert of not ... not doing the upsert optionally triggers the need for checking status of prior upsert
// - err: if a problem occurred getting the Config, we return the error to bubble up and initiate a retry
func (h *Handler) prepSamplesWatchEvent(kind, name string, annotations map[string]string, deleted bool) (*v1.Config, string, bool, error) {
cfg, err := h.crdwrapper.Get(v1.ConfigName)
if cfg == nil || err != nil {
// if not found, then this also would mean a deletion event
if kerrors.IsNotFound(err) {
logrus.Printf("Received watch event %s but not upserting since deletion of the Config is in progress", kind+"/"+name)
return nil, "", false, nil
}
logrus.Printf("Received watch event %s but not upserting since not have the Config yet: %#v %#v", kind+"/"+name, err, cfg)
return nil, "", false, err
}
cfg = cfg.DeepCopy()
if cfg.DeletionTimestamp != nil {
// we do no return the cfg in this case because we do not want to bother with any progress tracking
logrus.Printf("Received watch event %s but not upserting since deletion of the Config is in progress", kind+"/"+name)
// note, the imagestream watch cache gets cleared once the deletion/finalizer processing commences
return nil, "", false, nil
}
// we do not return the cfg in these cases because we do not want to bother with any progress tracking
switch cfg.Spec.ManagementState {
case operatorsv1api.Removed:
logrus.Debugf("Not upserting %s/%s event because operator is in removed state and image changes are not in progress", kind, name)
return nil, "", false, nil
case operatorsv1api.Unmanaged:
logrus.Debugf("Not upserting %s/%s event because operator is in unmanaged state and image changes are not in progress", kind, name)
return nil, "", false, nil
}
filePath := ""
// on pod restarts samples watch events come in before the first
// Config event, or we might not get an event at all if there were no changes;
// restarts as part of migrations also make things interesting because existing
// samples may have been deleted on disk, but we'll address that below
force := metrics.StreamsEmpty()
h.buildFileMaps(cfg, force)
// make sure skip filter list is ready
h.buildSkipFilters(cfg)
inInventory := false
skipped := false
switch kind {
case "imagestream":
filePath, inInventory = h.imagestreamFile[name]
if !inInventory {
logrus.Debugf("watch stream event %s not part of operators inventory", name)
// we now have cases where sample providers are deleting entire imagestreams;
// let's make sure there are no stale entries with inprogress / importerror
_, err := h.configmapclientwrapper.Get(name)
if err != nil && kerrors.IsNotFound(err) {
// ConfigMap should only exist if the imagestream has an error
return nil, "", false, nil
}
if err != nil {
// GET errors indicate a potential issue with the apiserver, return error and try again
return nil, "", false, err
}
err = h.configmapclientwrapper.Delete(name)
return nil, "", false, err
}
_, skipped = h.skippedImagestreams[name]
case "template":
filePath, inInventory = h.templateFile[name]
if !inInventory {
// in the case of templates we can just ignore content from prior releases that is no longer
// part of the current release
logrus.Printf("watch template event %s not part of operators inventory", name)
return nil, "", false, nil
}
_, skipped = h.skippedTemplates[name]
}
if skipped {
logrus.Printf("watch event %s in skipped list for %s", name, kind)
// but return cfg to potentially toggle pending/import error condition
return cfg, "", false, nil
}
if deleted { //&& (kind == "template" || cache.UpsertsAmount() == 0) {
logrus.Printf("going to recreate deleted managed sample %s/%s", kind, name)
return cfg, filePath, true, nil
}
if h.shouldSetVersion(cfg) {
// we have gotten events for items early in the migration list but we have not
// finished processing the list
// avoid (re)upsert, but check import status
if util.ConditionTrue(cfg, v1.MigrationInProgress) {
logrus.Printf("watch event for %s/%s while migration in progress, image in progress is false; will not update sample because of this event", kind, name)
}
return cfg, "", false, nil
}
if annotations != nil {
isv, ok := annotations[v1.SamplesVersionAnnotation]
logrus.Debugf("Comparing %s/%s version %s ok %v with git version %s", kind, name, isv, ok, h.version)
if ok && isv == h.version {
logrus.Debugf("Not upserting %s/%s cause operator version matches", kind, name)
// but return cfg to potentially toggle pending condition
return cfg, "", false, nil
}
}
return cfg, filePath, true, nil
}
func (h *Handler) GoodConditionUpdate(cfg *v1.Config, newStatus corev1.ConditionStatus, conditionType v1.ConfigConditionType) {
logrus.Debugf("updating condition %s to %s", conditionType, newStatus)
condition := util.Condition(cfg, conditionType)
// decision was made to not spam master if
// duplicate events come it (i.e. status does not
// change)
if condition.Status != newStatus {
now := kapis.Now()
condition.LastUpdateTime = now
if condition.Status != newStatus {
condition.LastTransitionTime = now
}
condition.Status = newStatus
condition.Message = ""
condition.Reason = ""
util.ConditionUpdate(cfg, condition)
}
if conditionType == v1.ConfigurationValid {
switch newStatus {
case corev1.ConditionTrue:
metrics.ConfigInvalid(false)
default:
metrics.ConfigInvalid(true)
}
}
}
// copied from k8s.io/kubernetes/test/utils/
func IsRetryableAPIError(err error) bool {
if err == nil {
return false
}
// These errors may indicate a transient error that we can retry.
if kerrors.IsInternalError(err) || kerrors.IsTimeout(err) || kerrors.IsServerTimeout(err) ||
kerrors.IsTooManyRequests(err) || utilnet.IsProbableEOF(err) || utilnet.IsConnectionReset(err) {
return true
}
// If the error sends the Retry-After header, we respect it as an explicit confirmation we should retry.
if _, shouldRetry := kerrors.SuggestsClientDelay(err); shouldRetry {
return true
}
return false
}
// this method assumes it is only called on initial cfg create, or if the Architecture array len == 0
func (h *Handler) updateCfgArch(cfg *v1.Config) *v1.Config {
switch {
// if you look at https://golang.org/dl/ the arch symbols for ppc and 390 container
// the values of our constants below.
case strings.Contains(runtime.GOARCH, v1.PPCArchitecture):
cfg.Spec.Architectures = append(cfg.Spec.Architectures, v1.PPCArchitecture)
case strings.Contains(runtime.GOARCH, v1.S390Architecture):
cfg.Spec.Architectures = append(cfg.Spec.Architectures, v1.S390Architecture)
case strings.Contains(runtime.GOARCH, v1.ARMArchitecture):
cfg.Spec.Architectures = append(cfg.Spec.Architectures, v1.ARMArchitecture)
case strings.Contains(runtime.GOARCH, v1.AMDArchitecture):
fallthrough
case strings.Contains(runtime.GOARCH, v1.X86Architecture):
cfg.Spec.Architectures = append(cfg.Spec.Architectures, v1.X86Architecture)
default:
logrus.Warningf("unsupported hardware architecture indicated by the golang GOARCH variable being set to %s", runtime.GOARCH)
}
return cfg
}
func (h *Handler) tbrInaccessible() bool {
if h.configclient == nil {
// unit test environment
return false
}
// even with the connection attempt below, we still do the ipv6/proxy checks in case bot ipv6 and proxy
// are employed, as we have will return differently here based on which are
if util.IsIPv6() {
logrus.Print("registry.redhat.io does not support ipv6, bootstrap to removed")
return true
}
// if a proxy is in play, the registry.redhat.io connection attempt during startup is problematic at best;
// assume tbr is accessible since a proxy implies external access, and not disconnected
proxy, err := h.configclient.Proxies().Get(context.TODO(), "cluster", metav1.GetOptions{})
if err != nil {
logrus.Printf("unable to retrieve proxy configuration as part of testing registry.redhat.io connectivity: %s", err.Error())
} else {
if len(proxy.Status.HTTPSProxy) > 0 || len(proxy.Status.HTTPProxy) > 0 {
logrus.Printf("with global proxy configured assuming registry.redhat.io is accessible, bootstrap to Managed")
return false
}
}
err = wait.PollImmediate(20*time.Second, 5*time.Minute, func() (bool, error) {
// we have seen cases in the field with disconnected cluster where the default connection timeout can be
// very long (15 minutes in one case); so we do an initial non-tls connection were we can specify a quicker
// timeout to filter out that scenario and default to tbr inaccessible / Removed in an expedient fashion
connWithTimeout, err := net.DialTimeout("tcp", "registry.redhat.io:443", 15*time.Second)
if err != nil {
logrus.Infof("test connection with timeout failed with %s", err.Error())
return false, nil
}
defer connWithTimeout.Close()
// still do the tls form of connect (using our connection with the shorter timeout) to confirm
// ssl handshake is OK
tlsConf := &tls.Config{
ServerName: "registry.redhat.io",
}
conn := tls.Client(connWithTimeout, tlsConf)
defer conn.Close()
err = conn.Handshake()
if err != nil {
logrus.Infof("test tls connection to registry.redhat.io experienced SSL handshake error %s", err.Error())
// these can be intermittent as well so we'll retry
return false, nil
}
logrus.Infof("test connection to registry.redhat.io successful")
return true, nil
})
if err == nil {
h.tbrCheckFailed = false
return false
}
h.tbrCheckFailed = true
logrus.Infof("unable to establish HTTPS connection to registry.redhat.io after 3 minutes, bootstrap to Removed")
return true
}
func (h *Handler) CreateDefaultResourceIfNeeded(cfg *v1.Config) (*v1.Config, error) {
// assume the caller has call lock on the mutex .. out pattern is to have that as
// high up the stack as possible ... loc because need to
// coordinate with event handler processing
// when it completely updates all imagestreams/templates/statuses
deleteInProgress := cfg != nil && cfg.DeletionTimestamp != nil
var err error
if deleteInProgress {
cfg = &v1.Config{}
cfg.Name = v1.ConfigName
cfg.Kind = "Config"
cfg.APIVersion = v1.GroupName + "/" + v1.Version
err = wait.PollImmediate(3*time.Second, 30*time.Second, func() (bool, error) {
s, e := h.crdwrapper.Get(v1.ConfigName)
if kerrors.IsNotFound(e) {
return true, nil
}
if err != nil {
logrus.Printf("create default config access error %v", err)
return false, nil
}
// based on 4.0 testing, we've been seeing empty resources returned
// in the not found case, but just in case ...
if s == nil {
return true, nil
}
// means still found ... will return wait.ErrWaitTimeout if this continues
return false, nil
})
if err != nil {
return nil, h.processError(cfg, v1.SamplesExist, corev1.ConditionUnknown, err, "issues waiting for delete to complete: %v")
}
cfg = nil
logrus.Println("delete of Config recognized")
}
if cfg == nil || kerrors.IsNotFound(err) {
// "4a" in the "startup" workflow, just create default
// resource and set up that way
cfg = &v1.Config{}
cfg.Spec.SkippedTemplates = []string{}
cfg.Spec.SkippedImagestreams = []string{}
cfg.Status.SkippedImagestreams = []string{}
cfg.Status.SkippedTemplates = []string{}
cfg.Name = v1.ConfigName
cfg.Kind = "Config"
cfg.APIVersion = v1.GroupName + "/" + v1.Version
cfg = h.updateCfgArch(cfg)
// build file maps and create configmaps with imagestreamtag to image mappings
err := h.buildFileMaps(cfg, true)
if err != nil {
return nil, err
}
switch {
// TODO as we gain content for non x86 platforms we can remove the nonx86 check
case util.IsUnsupportedArch(cfg):
cfg.Spec.ManagementState = operatorsv1api.Removed
cfg.Status.Version = h.version
case h.tbrInaccessible():
cfg.Spec.ManagementState = operatorsv1api.Removed
cfg.Status.Version = h.version
default:
cfg.Spec.ManagementState = operatorsv1api.Managed
}
h.AddFinalizer(cfg)
logrus.Println("creating default Config")
err = h.crdwrapper.Create(cfg)
if err != nil {
if !kerrors.IsAlreadyExists(err) {
return nil, err
}
// in case there is some race condition
logrus.Println("got already exists error on create default")
}
} else {
logrus.Printf("Config %#v found during operator startup", cfg)
// after a restart, this means we are beyond a bootstrap; but let's
// preserve the state of our initial TBR check
h.tbrCheckFailed = false
if cfg.Status.ManagementState == operatorsv1api.Removed {
op, err := h.cvowrapper.ClusterOperatorWrapper.Get(operatorstatus.ClusterOperatorName)
if err == nil {
for _, c := range op.Status.Conditions {
if c.Reason == operatorstatus.TBR {
logrus.Print("Samples operator originally bootstrapped as removed because the TBR was inaccessible")
h.tbrCheckFailed = true
break
}
}
}
}
}
return cfg, nil
}
func (h *Handler) initConditions(cfg *v1.Config) *v1.Config {
now := kapis.Now()
util.Condition(cfg, v1.SamplesExist)
creds := util.Condition(cfg, v1.ImportCredentialsExist)
// image registry operator now handles making TBR creds available
// for imagestreams
if creds.Status != corev1.ConditionTrue {
creds.Status = corev1.ConditionTrue
creds.LastTransitionTime = now
creds.LastUpdateTime = now
util.ConditionUpdate(cfg, creds)
}
valid := util.Condition(cfg, v1.ConfigurationValid)
// our default config is valid; since Condition sets new conditions to false
// if we get false here this is the first pass through; invalid configs
// are caught above
if valid.Status != corev1.ConditionTrue {
valid.Status = corev1.ConditionTrue
valid.LastUpdateTime = now
valid.LastTransitionTime = now
util.ConditionUpdate(cfg, valid)
}
util.Condition(cfg, v1.ImageChangesInProgress)
util.Condition(cfg, v1.RemovePending)
util.Condition(cfg, v1.MigrationInProgress)
util.Condition(cfg, v1.ImportImageErrorsExist)
return cfg
}
func (h *Handler) CleanUpOpenshiftNamespaceOnDelete(cfg *v1.Config) error {
h.buildSkipFilters(cfg)
iopts := metav1.ListOptions{LabelSelector: v1.SamplesManagedLabel + "=true"}
streamList, err := h.imageclientwrapper.List(iopts)
if err != nil && !kerrors.IsNotFound(err) {
logrus.Warnf("Problem listing openshift imagestreams on Config delete: %#v", err)
return err
} else {
if streamList.Items != nil {
for _, stream := range streamList.Items {
// this should filter both skipped imagestreams and imagestreams we
// do not manage
manage, ok := stream.Labels[v1.SamplesManagedLabel]
if !ok || strings.TrimSpace(manage) != "true" {
continue
}
err = h.imageclientwrapper.Delete(stream.Name, &metav1.DeleteOptions{})
if err != nil && !kerrors.IsNotFound(err) {
logrus.Warnf("Problem deleting openshift imagestream %s on Config delete: %#v", stream.Name, err)
return err
}
cache.ImageStreamMassDeletesAdd(stream.Name)
}
}
}
tempList, err := h.templateclientwrapper.List(iopts)
if err != nil && !kerrors.IsNotFound(err) {
logrus.Warnf("Problem listing openshift templates on Config delete: %#v", err)
return err
} else {
if tempList.Items != nil {
for _, temp := range tempList.Items {
// this should filter both skipped templates and templates we
// do not manage
manage, ok := temp.Labels[v1.SamplesManagedLabel]
if !ok || strings.TrimSpace(manage) != "true" {
continue
}
err = h.templateclientwrapper.Delete(temp.Name, &metav1.DeleteOptions{})
if err != nil && !kerrors.IsNotFound(err) {
logrus.Warnf("Problem deleting openshift template %s on Config delete: %#v", temp.Name, err)
return err
}
cache.TemplateMassDeletesAdd(temp.Name)
}
}
}
cmList, err := h.configmapclientwrapper.List()
if err != nil && !kerrors.IsNotFound(err) {
logrus.Warnf("Problem listing sample operator config maps on Config delete: %v", err.Error())
return err
} else {
for _, cm := range cmList {
if cm.Name == util.IST2ImageMap {
continue
}
err := h.configmapclientwrapper.Delete(cm.Name)
if err != nil && !kerrors.IsNotFound(err) {
logrus.Warnf("Problem deleting samples operator config map %s on Config delete: %v", cm.Name, err.Error())
return err
}
}
}
// FYI we no longer delete the credential because the payload imagestreams like cli, must-gather that
// this operator initially installs via its manifest, but does not manage, needs the pull image secret
return nil
}
func (h *Handler) Handle(event util.Event) error {
switch event.Object.(type) {
case *corev1.ConfigMap:
cm, _ := event.Object.(*corev1.ConfigMap)
if cm.Name == util.IST2ImageMap {
return nil
}
return h.processImageCondition()
case *imagev1.ImageStream:
is, _ := event.Object.(*imagev1.ImageStream)
if is.Namespace != "openshift" {
return nil
}
err := h.processImageStreamWatchEvent(is, event.Deleted)
return err
case *templatev1.Template:
t, _ := event.Object.(*templatev1.Template)
if t.Namespace != "openshift" {
return nil
}
err := h.processTemplateWatchEvent(t, event.Deleted)
return err
case *v1.Config:
cfg, _ := event.Object.(*v1.Config)
if cfg.Name != v1.ConfigName || cfg.Namespace != "" {
return nil
}
// pattern is 1) come in with delete timestamp, event delete flag false
// 2) then after we remove finalizer, comes in with delete timestamp
// and event delete flag true
if event.Deleted {
logrus.Info("A previous delete attempt has been successfully completed")
h.cvowrapper.UpdateOperatorStatus(cfg, true, h.tbrCheckFailed, h.activeImageStreams())
return nil
}
if cfg.DeletionTimestamp != nil {
h.cvowrapper.UpdateOperatorStatus(cfg, true, h.tbrCheckFailed, h.activeImageStreams())
// before we kick off the delete cycle though, we make sure a prior creation
// cycle is not still in progress, because we don't want the create adding back
// in things we just deleted ... if an upsert is still in progress, return an error;
// the creation loop checks for deletion timestamp and aborts when it sees it;
// but we don't use in progress condition here, as the upsert cycle might also be complete;
// but ImageInProess is still true, so we use a local variable which is only set while upserts are happening; otherwise
// here, we get started with the delete, which will ultimately reset the conditions
// and samples, when it is false again, regardless of whether the imagestream imports are done
if h.upsertInProgress {
return fmt.Errorf("A delete attempt has come in while creating samples; initiating retry; creation loop should abort soon")
}
// nuke any registered upserts
//cache.ClearUpsertsCache()
if h.NeedsFinalizing(cfg) {
// so we initiate the delete and set exists to false first, where if we get
// conflicts because of start up imagestream events, the retry should work
// cause the finalizer is still there; also, needs finalizing sets the deleteInProgress
// flag (which imagestream event processing checks)
//
// when we come back in with the deleteInProgress already true, with a delete timestamp
// we then remove the finalizer and create the new Config
//
// note, as part of resetting the delete flag during error retries, we still need
// a way to tell the imagestream event processing to not bother with pending updates,
// so we have an additional flag for that special case
logrus.Println("Initiating samples delete and marking exists false")
err := h.CleanUpOpenshiftNamespaceOnDelete(cfg)
if err != nil {
return err
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
h.GoodConditionUpdate(cfg, corev1.ConditionFalse, v1.SamplesExist)
dbg := "exist false update"
logrus.Printf("CRDUPDATE %s", dbg)
err = h.crdwrapper.UpdateStatus(cfg, dbg)
if err != nil {
logrus.Printf("error on Config update after setting exists condition to false (returning error to retry): %v", err)
return err
}
} else {
logrus.Println("Initiating finalizer processing for a SampleResource delete attempt")
cfg = h.refetchCfgMinimizeConflicts(cfg)
h.RemoveFinalizer(cfg)
dbg := "remove finalizer update"
logrus.Printf("CRDUPDATE %s", dbg)
// not updating the status, but the metadata annotation
err := h.crdwrapper.Update(cfg)
if err != nil {
logrus.Printf("error removing Config finalizer during delete (hopefully retry on return of error works): %v", err)
return err
}
go func() {
h.CreateDefaultResourceIfNeeded(cfg)
}()
}
return nil
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
validArch, _ := h.IsValidArch(cfg)
if validArch && util.IsUnsupportedArch(cfg) && cfg.Spec.ManagementState == operatorsv1api.Managed {
// we did not bootstrap as removed in 4.2 for s390/ppc; we just reported complete
// clean that up to facilitate our mode of operation for those platforms
cfg.Spec.ManagementState = operatorsv1api.Removed
dbg := fmt.Sprintf("switch management state to removed for %s", cfg.Spec.Architectures[0])
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.Update(cfg)
}
// Every time we see a change to the Config object, update the ClusterOperator status
// based on the current conditions of the Config.
cfg = h.refetchCfgMinimizeConflicts(cfg)
err := h.cvowrapper.UpdateOperatorStatus(cfg, false, h.tbrCheckFailed, h.activeImageStreams())
if err != nil {
logrus.Errorf("error updating cluster operator status: %v", err)
return err
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
updateStatusManagementState, cfgUpdate, err := h.ProcessManagementField(cfg)
if !updateStatusManagementState || err != nil {
if err != nil || cfgUpdate {
// flush status update
dbg := fmt.Sprintf("process mgmt update spec %s status %s", string(cfg.Spec.ManagementState), string(cfg.Status.ManagementState))
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
return err
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
existingValidStatus := util.Condition(cfg, v1.ConfigurationValid).Status
err = h.SpecValidation(cfg)
if err != nil {
// flush status update
dbg := "bad spec validation update"
logrus.Printf("CRDUPDATE %s", dbg)
// only retry on error updating the Config; do not return
// the error from SpecValidation which denotes a bad config
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
// if a bad config was corrected, update and return
if existingValidStatus != util.Condition(cfg, v1.ConfigurationValid).Status {
dbg := "spec corrected"
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
if len(cfg.Spec.Architectures) > 0 &&
cfg.Spec.Architectures[0] != v1.AMDArchitecture &&
cfg.Spec.Architectures[0] != v1.ARMArchitecture &&
cfg.Spec.Architectures[0] != v1.X86Architecture &&
cfg.Spec.Architectures[0] != v1.S390Architecture &&
cfg.Spec.Architectures[0] != v1.PPCArchitecture {
logrus.Printf("samples are not installed on an unsupported architecture")
}
h.buildSkipFilters(cfg)
configChanged := false
configChangeRequiresUpsert := false
registryChanged := false
unskippedStreams := map[string]bool{}
unskippedTemplates := map[string]bool{}
if cfg.Spec.ManagementState == cfg.Status.ManagementState {
cfg = h.refetchCfgMinimizeConflicts(cfg)
configChanged, configChangeRequiresUpsert, registryChanged, unskippedStreams, unskippedTemplates = h.VariableConfigChanged(cfg)
logrus.Debugf("config changed %v upsert needed %v exists/true %v progressing/false %v op version %s status version %s",
configChanged,
configChangeRequiresUpsert,
util.ConditionTrue(cfg, v1.SamplesExist),
util.ConditionFalse(cfg, v1.ImageChangesInProgress),
h.version,
cfg.Status.Version)
// so ignore if config does not change and the samples exist and
// we are not in progress and at the right level
if !configChanged &&
util.ConditionTrue(cfg, v1.SamplesExist) &&
util.ConditionFalse(cfg, v1.ImageChangesInProgress) &&
h.version == cfg.Status.Version {
logrus.Printf("At steady state: config the same and exists is true, in progress false, and version correct")
cfg = h.refetchCfgMinimizeConflicts(cfg)
// migration inevitably means we need to refresh the file cache as samples are added and
// deleted between releases, so force file map building
if !util.IsUnsupportedArch(cfg) {
h.buildFileMaps(cfg, true)
// passing in false means if the samples is present, we leave it alone
_, err = h.createSamples(cfg, false, registryChanged, unskippedStreams, unskippedTemplates)
}
return err
}
// if config changed requiring an upsert, but a prior config action is still in progress,
// reset in progress to false and return; the next event should drive the actual
// processing of the config change and replace whatever was previously
// in progress
if configChangeRequiresUpsert &&
util.ConditionTrue(cfg, v1.ImageChangesInProgress) {
cfg = h.refetchCfgMinimizeConflicts(cfg)
h.GoodConditionUpdate(cfg, corev1.ConditionFalse, v1.ImageChangesInProgress)
dbg := "change in progress from true to false for config change"
logrus.Printf("CRDUPDATE %s", dbg)
// do not transfer config changes to status since we are just turning off in progress
// to start a new createSamples cycle
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
if cfg.Status.ManagementState != operatorsv1api.Managed {
cfg.Status.ManagementState = operatorsv1api.Managed
dbg := "change status management state to managed"
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
// if coming from remove turn off
cfg = h.refetchCfgMinimizeConflicts(cfg)
if util.ConditionTrue(cfg, v1.RemovePending) {
now := kapis.Now()
condition := util.Condition(cfg, v1.RemovePending)
condition.LastTransitionTime = now
condition.LastUpdateTime = now
condition.Status = corev1.ConditionFalse
util.ConditionUpdate(cfg, condition)
dbg := "change remove pending to false"
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
if !configChanged && h.shouldSetVersion(cfg) {
if util.ConditionTrue(cfg, v1.ImportImageErrorsExist) {
logrus.Printf("An image import error occurred applying the latest configuration on version %s; this operator will periodically retry the import, or an administrator can investigate and remedy manually", h.version)
}
cfg.Status.Version = h.version
logrus.Printf("The samples are now at version %s", cfg.Status.Version)
dbg := "upd status version"
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
// cycle through the skip lists and update the managed flag if needed
for _, name := range cfg.Spec.SkippedTemplates {
h.setSampleManagedLabelToFalse("template", name)
}
for _, name := range cfg.Spec.SkippedImagestreams {
h.setSampleManagedLabelToFalse("imagestream", name)
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
if configChanged && !configChangeRequiresUpsert && util.ConditionTrue(cfg, v1.SamplesExist) {
dbg := "bypassing upserts for non invasive config change after initial create"
logrus.Printf("CRDUPDATE %s", dbg)
cfg = h.refetchCfgMinimizeConflicts(cfg)
// we have now "processed" the config and are executing changes, transfer current spec to status
h.StoreCurrentValidConfig(cfg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
cfg = h.refetchCfgMinimizeConflicts(cfg)
if !util.ConditionTrue(cfg, v1.SamplesExist) ||
!util.ConditionFalse(cfg, v1.ImageChangesInProgress) ||
h.version != cfg.Status.Version ||
configChanged ||
updateStatusManagementState {
logrus.Infof("ENTERING UPSERT / STEADY STATE PATH ExistTrue %v ImageInProgressFalse %v VersionOK %v ConfigChanged %v ManagementStateChanged %v",
util.ConditionTrue(cfg, v1.SamplesExist),
util.ConditionFalse(cfg, v1.ImageChangesInProgress),
h.version == cfg.Status.Version,
configChanged,
updateStatusManagementState)
// pass in true to force rebuild of maps, which we do here because at this point
// we have taken on some form of config change
err = h.buildFileMaps(cfg, true)
if err != nil {
return err
}
h.upsertInProgress = true
turnFlagOff := func(h *Handler) { h.upsertInProgress = false }
defer turnFlagOff(h)
for isName := range h.imagestreamFile {
_, skipped := h.skippedImagestreams[isName]
unskipping := len(unskippedStreams) > 0
_, unskipped := unskippedStreams[isName]
if (unskipping && !unskipped) || skipped {
continue
}
cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: isName,
Namespace: v1.OperatorNamespace,
},
Data: map[string]string{},
}
_, err = h.configmapclientwrapper.Create(cm)
if err != nil && !kerrors.IsAlreadyExists(err) {
return err
}
}
abortForDelete, err := h.createSamples(cfg, true, registryChanged, unskippedStreams, unskippedTemplates)
// we prioritize enabling delete vs. any error processing from createSamples (though at the moment that
// method only returns nil error when it returns true for abortForDelete) as a subsequent delete's processing will
// immediately remove the cfg obj and cluster operator object that we just posted some error notice in
if abortForDelete {
// a delete has been initiated
// note, the imagestream watch cache gets cleared once the deletion/finalizer processing commences
dbg := "create samples aborted for delete"
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
if err != nil {
h.processError(cfg, v1.SamplesExist, corev1.ConditionUnknown, err, "error creating samples: %v")
dbg := "setting samples exists to unknown"
logrus.Printf("CRDUPDATE %s", dbg)
e := h.crdwrapper.UpdateStatus(cfg, dbg)
if e != nil {
return e
}
return err
}
cfg.Status.Version = h.version
h.GoodConditionUpdate(cfg, corev1.ConditionTrue, v1.SamplesExist)
h.GoodConditionUpdate(cfg, corev1.ConditionFalse, v1.ImageChangesInProgress)
// now that we employ status subresources, we can't populate
// the conditions on create; so we do initialize here, which is our "step 1"
// of the "make a change" flow in our state machine
cfg = h.initConditions(cfg)
dbg := "samples upserted; set clusteroperator ready, steady state"
// we have now "processed" the config and are executing changes, transfer current spec to status
h.StoreCurrentValidConfig(cfg)
logrus.Printf("CRDUPDATE %s", dbg)
return h.crdwrapper.UpdateStatus(cfg, dbg)
}
}
return nil
}
func (h *Handler) setSampleManagedLabelToFalse(kind, name string) error {
var err error
switch kind {
case "imagestream":
var stream *imagev1.ImageStream
err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
stream, err = h.imageclientwrapper.Get(name)
if err == nil && stream != nil && stream.Labels != nil {
stream = stream.DeepCopy()
label, _ := stream.Labels[v1.SamplesManagedLabel]
if label == "true" {
stream.Labels[v1.SamplesManagedLabel] = "false"
_, err = h.imageclientwrapper.Update(stream)
}
}
return err
})
case "template":
var tpl *templatev1.Template
err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
tpl, err = h.templateclientwrapper.Get(name)
if err == nil && tpl != nil && tpl.Labels != nil {
tpl = tpl.DeepCopy()
label, _ := tpl.Labels[v1.SamplesManagedLabel]
if label == "true" {
tpl.Labels[v1.SamplesManagedLabel] = "false"
_, err = h.templateclientwrapper.Update(tpl)
}
}
return err
})
}
return nil
}
// abortForDelete is used in various portions of the control flow to abort upsert or watch processing
// if a deletion has been initiated and we want to process the config object's finalizer
func (h *Handler) abortForDelete(cfg *v1.Config) bool {
// reminder refetchCfgMinimizeConflicts accesses the lister/watch cache and does not perform API calls
cfg = h.refetchCfgMinimizeConflicts(cfg)
if cfg.DeletionTimestamp != nil {
return true
}
return false
}
// rc: error - any api errors during upserts
// rc: bool - if we abort because we detected a delete
func (h *Handler) createSamples(cfg *v1.Config, updateIfPresent, registryChanged bool, unskippedStreams, unskippedTemplates map[string]bool) (bool, error) {
// first, got through the list and prime our upsert cache
// prior to any actual upserts
imagestreams := []*imagev1.ImageStream{}
for _, fileName := range h.imagestreamFile {
if h.abortForDelete(cfg) {
return true, nil
}
imagestream, err := h.Fileimagegetter.Get(fileName)