/
validator.go
1825 lines (1621 loc) · 69.3 KB
/
validator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package host
import (
"bytes"
"context"
"encoding/json"
"fmt"
"math"
"net"
"net/url"
"strings"
"time"
"github.com/coreos/ignition/v2/config/v3_2"
ignition_types "github.com/coreos/ignition/v2/config/v3_2/types"
"github.com/go-openapi/strfmt"
"github.com/go-openapi/swag"
"github.com/openshift/assisted-service/internal/common"
"github.com/openshift/assisted-service/internal/constants"
"github.com/openshift/assisted-service/internal/hardware"
"github.com/openshift/assisted-service/internal/host/hostutil"
"github.com/openshift/assisted-service/internal/network"
"github.com/openshift/assisted-service/internal/operators"
"github.com/openshift/assisted-service/internal/provider/registry"
"github.com/openshift/assisted-service/internal/versions"
"github.com/openshift/assisted-service/models"
"github.com/openshift/assisted-service/pkg/conversions"
"github.com/openshift/assisted-service/pkg/s3wrapper"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/thoas/go-funk"
"github.com/vincent-petithory/dataurl"
"gorm.io/gorm"
"sigs.k8s.io/yaml"
)
type ValidationStatus string
const (
ValidationSuccess ValidationStatus = "success"
ValidationSuccessSuppressOutput ValidationStatus = "success-suppress-output"
ValidationFailure ValidationStatus = "failure"
ValidationPending ValidationStatus = "pending"
ValidationError ValidationStatus = "error"
ValidationDisabled ValidationStatus = "disabled"
maxServiceAheadOfHostTimeDiff = 20 * time.Minute
maxHostAheadOfServiceTimeDiff = 1 * time.Hour
maxHostTimingMetrics = 4
maxPingCommandExamples = 4
)
const FailedToFindAction = "failed to find action for step"
const OpenStackPlatform = "OpenStack Compute"
var (
ImageStatusDownloadRateThreshold = 0.001
invalidPlatforms = []string{
OpenStackPlatform,
}
)
func (v ValidationStatus) String() string {
return string(v)
}
type InventoryCache map[string]*models.Inventory
func (inventoryCache InventoryCache) GetOrUnmarshal(host *models.Host) (inventory *models.Inventory, err error) {
if host.Inventory == "" {
return nil, nil
}
var ok bool
key := host.ID.String() + "@" + host.InfraEnvID.String()
inventory, ok = inventoryCache[key]
if !ok {
inventory, err = common.UnmarshalInventory(host.Inventory)
if err != nil {
return
}
inventoryCache[key] = inventory
}
return
}
type validationContext struct {
host *models.Host
cluster *common.Cluster
infraEnv *common.InfraEnv
inventory *models.Inventory
db *gorm.DB
inventoryCache InventoryCache
clusterHostRequirements *models.ClusterHostRequirements
minCPUCoresRequirement int64
minRAMMibRequirement int64
kubeApiEnabled bool
softTimeoutsEnabled bool
objectHandler s3wrapper.API
ctx context.Context
}
type validationCondition func(context *validationContext) (ValidationStatus, string)
type validation struct {
id validationID
condition validationCondition
skippedStates []models.HostStage
}
func (c *validationContext) loadCluster() error {
var err error
if c.cluster == nil {
c.cluster, err = common.GetClusterFromDBWithHosts(c.db, *c.host.ClusterID)
}
return err
}
func (c *validationContext) loadInfraEnv() error {
var err error
if c.infraEnv == nil {
c.infraEnv, err = common.GetInfraEnvFromDB(c.db, c.host.InfraEnvID)
}
return err
}
func (c *validationContext) loadInventory() error {
inventory, err := c.inventoryCache.GetOrUnmarshal(c.host)
if inventory == nil || err != nil {
return err
}
if inventory.Memory == nil {
return errors.New("Inventory is not valid, Memory not detected")
}
if inventory.CPU == nil {
return errors.New("Inventory is not valid, CPU not detected")
}
c.inventory = inventory
return nil
}
func (v *validator) getBootDeviceInfo(host *models.Host) (*models.DiskInfo, error) {
bootDevice, err := hardware.GetBootDevice(v.hwValidator, host)
if err != nil {
return nil, err
}
info, err := common.GetDiskInfo(host.DisksInfo, bootDevice)
if err != nil {
return nil, err
}
return info, nil
}
func (c *validationContext) validateRole() error {
switch common.GetEffectiveRole(c.host) {
case models.HostRoleMaster, models.HostRoleWorker, models.HostRoleAutoAssign:
return nil
default:
return errors.Errorf("Illegal role defined: %s", common.GetEffectiveRole(c.host))
}
}
func (c *validationContext) validateMachineCIDR() error {
var err error
for _, machineNetwork := range c.cluster.MachineNetworks {
_, _, err = net.ParseCIDR(string(machineNetwork.Cidr))
if err != nil {
return err
}
}
return nil
}
func (c *validationContext) loadClusterHostRequirements(hwValidator hardware.Validator) error {
requirements, err := hwValidator.GetClusterHostRequirements(context.TODO(), c.cluster, c.host)
c.clusterHostRequirements = requirements
return err
}
func (c *validationContext) loadInfraEnvHostRequirements(hwValidator hardware.Validator) error {
requirements, err := hwValidator.GetInfraEnvHostRequirements(context.TODO(), c.infraEnv)
c.clusterHostRequirements = requirements
return err
}
func (c *validationContext) loadGeneralMinRequirements(hwValidator hardware.Validator) error {
requirements, err := hwValidator.GetPreflightHardwareRequirements(context.TODO(), c.cluster)
if err != nil {
return err
}
c.minCPUCoresRequirement = int64(math.Min(float64(requirements.Ocp.Master.Quantitative.CPUCores), float64(requirements.Ocp.Worker.Quantitative.CPUCores)))
c.minRAMMibRequirement = int64(math.Min(float64(requirements.Ocp.Master.Quantitative.RAMMib), float64(requirements.Ocp.Worker.Quantitative.RAMMib)))
return err
}
func (c *validationContext) loadGeneralInfraEnvMinRequirements(hwValidator hardware.Validator) error {
requirements, err := hwValidator.GetPreflightInfraEnvHardwareRequirements(context.TODO(), c.infraEnv)
if err != nil {
return err
}
c.minCPUCoresRequirement = int64(math.Min(float64(requirements.Ocp.Master.Quantitative.CPUCores), float64(requirements.Ocp.Worker.Quantitative.CPUCores)))
c.minRAMMibRequirement = int64(math.Min(float64(requirements.Ocp.Master.Quantitative.RAMMib), float64(requirements.Ocp.Worker.Quantitative.RAMMib)))
return err
}
func newValidationContext(ctx context.Context, host *models.Host, c *common.Cluster, i *common.InfraEnv, db *gorm.DB, inventoryCache InventoryCache, hwValidator hardware.Validator, kubeApiEnabled bool, objectHandler s3wrapper.API, softTimeoutsEnabled bool) (*validationContext, error) {
ret := &validationContext{
ctx: ctx,
host: host,
db: db,
cluster: c,
infraEnv: i,
inventoryCache: inventoryCache,
kubeApiEnabled: kubeApiEnabled,
softTimeoutsEnabled: softTimeoutsEnabled,
objectHandler: objectHandler,
}
if host.ClusterID != nil {
err := ret.loadCluster()
if err != nil {
return nil, err
}
err = ret.loadInventory()
if err != nil {
return nil, err
}
err = ret.validateRole()
if err != nil {
return nil, err
}
err = ret.validateMachineCIDR()
if err != nil {
return nil, err
}
err = ret.loadClusterHostRequirements(hwValidator)
if err != nil {
return nil, err
}
err = ret.loadGeneralMinRequirements(hwValidator)
if err != nil {
return nil, err
}
} else {
err := ret.loadInfraEnv()
if err != nil {
return nil, err
}
err = ret.loadInventory()
if err != nil {
return nil, err
}
err = ret.validateRole()
if err != nil {
return nil, err
}
err = ret.loadInfraEnvHostRequirements(hwValidator)
if err != nil {
return nil, err
}
err = ret.loadGeneralInfraEnvMinRequirements(hwValidator)
if err != nil {
return nil, err
}
}
return ret, nil
}
func boolValue(b bool) ValidationStatus {
if b {
return ValidationSuccess
} else {
return ValidationFailure
}
}
type validator struct {
log logrus.FieldLogger
hwValidatorCfg *hardware.ValidatorCfg
hwValidator hardware.Validator
operatorsAPI operators.API
providerRegistry registry.ProviderRegistry
versionHandler versions.Handler
}
func (v *validator) isMediaConnected(c *validationContext) (ValidationStatus, string) {
status := boolValue(c.host.MediaStatus == nil || *c.host.MediaStatus != models.HostMediaStatusDisconnected)
switch status {
case ValidationSuccess:
return status, "Media device is connected"
case ValidationFailure:
return status, statusInfoMediaDisconnected
default:
return status, fmt.Sprintf("Unexpected status %s", status)
}
}
func (v *validator) isConnected(c *validationContext) (ValidationStatus, string) {
maxHostDisconnectionTime := v.hwValidatorCfg.MaxHostDisconnectionTime
if c.host.Bootstrap {
// In case of bootstrap we increase disconnection timeout as it's resolv.conf
// will be recreated in the middle of installation and it can cause for dns issues
// It can cause bootstrap to disconnect from assisted service
maxHostDisconnectionTime = v.hwValidatorCfg.MaxHostDisconnectionTime + 2*time.Minute
}
rebootIndex := IndexOfStage(models.HostStageRebooting, BootstrapStages[:])
hostIsPreReboot := c.host.Progress.CurrentStage == "" || funk.Contains(BootstrapStages[0:rebootIndex], c.host.Progress.CurrentStage)
status := boolValue(!hostIsPreReboot || c.host.CheckedInAt.String() == "" || time.Since(time.Time(c.host.CheckedInAt)) <= maxHostDisconnectionTime)
if status == ValidationSuccess {
return status, "Host is connected"
} else {
return status, "Host is disconnected"
}
}
func (v *validator) hasInventory(c *validationContext) (ValidationStatus, string) {
status := boolValue(c.inventory != nil)
switch status {
case ValidationSuccess:
return status, "Valid inventory exists for the host"
case ValidationFailure:
return status, "Inventory has not been received for the host"
default:
return status, fmt.Sprintf("Unexpected status %s", status)
}
}
func (v *validator) hasMinCpuCores(c *validationContext) (ValidationStatus, string) {
if c.inventory == nil {
return ValidationPending, "Missing inventory"
}
if c.inventory.CPU.Count >= c.minCPUCoresRequirement {
return ValidationSuccess, "Sufficient CPU cores"
}
return ValidationFailure, fmt.Sprintf("The host is not eligible to participate in Openshift Cluster because the minimum required CPU cores for any role is %d, found only %d", c.minCPUCoresRequirement, c.inventory.CPU.Count)
}
func (v *validator) hasMinMemory(c *validationContext) (ValidationStatus, string) {
status := ValidationPending
if c.inventory == nil {
return status, "Missing inventory"
}
status = boolValue(c.inventory.Memory.PhysicalBytes >= conversions.MibToBytes(c.minRAMMibRequirement))
if status == ValidationSuccess {
return status, "Sufficient minimum RAM"
}
return ValidationFailure, fmt.Sprintf("The host is not eligible to participate in Openshift Cluster because the minimum required RAM for any role is %s, found only %s",
conversions.BytesToString(conversions.MibToBytes(c.minRAMMibRequirement)), conversions.BytesToString(c.inventory.Memory.PhysicalBytes))
}
func (v *validator) compatibleWithClusterPlatform(c *validationContext) (ValidationStatus, string) {
// Late binding
if c.infraEnv != nil {
return ValidationSuccessSuppressOutput, ""
}
if c.inventory == nil || common.PlatformTypeValue(c.cluster.Platform.Type) == "" {
return ValidationPending, "Missing inventory or platform isn't set"
}
supported, err := v.providerRegistry.IsHostSupported(c.cluster.Platform, c.host)
if err != nil {
return ValidationError, "Validation error"
}
if supported {
return ValidationSuccess, fmt.Sprintf("Host is compatible with cluster platform %s", common.PlatformTypeValue(c.cluster.Platform.Type))
}
return ValidationFailure, fmt.Sprintf("Host is not compatible with cluster platform %s; either disable this host or discover a new compatible host.",
common.PlatformTypeValue(c.cluster.Platform.Type))
}
func (v *validator) areTangServersReachable(c *validationContext) (ValidationStatus, string) {
if c.host.TangConnectivity == "" {
return ValidationPending, ""
}
// Older agents have no action for tang-connectivity-check.
// The missing action result will not fail host validations to
// keep backward compatibility with older agents who did not include tang-connectivity-check.
if strings.Contains(c.host.TangConnectivity, FailedToFindAction) {
v.log.Warningf(
"host %s replied to StepType: %s with: %s. Validation will pass to keep backward compatibility with discovery agent version: %s",
c.host.ID.String(), models.StepTypeTangConnectivityCheck, c.host.TangConnectivity, c.host.DiscoveryAgentVersion,
)
return ValidationSuccessSuppressOutput, ""
}
var response models.TangConnectivityResponse
if err := json.Unmarshal([]byte(c.host.TangConnectivity), &response); err == nil && response.IsSuccess {
return ValidationSuccess, "Tang servers are reachable"
}
return ValidationFailure, fmt.Sprintf("Could not validate that all Tang servers are reachable and working: %s", c.host.TangConnectivity)
}
func (v *validator) diskEncryptionRequirementsSatisfied(c *validationContext) (ValidationStatus, string) {
var status ValidationStatus
var message string
if c.infraEnv != nil || swag.StringValue(c.cluster.DiskEncryption.EnableOn) == models.DiskEncryptionEnableOnNone {
return ValidationSuccessSuppressOutput, ""
}
if c.inventory == nil {
return ValidationPending, "Missing host inventory"
}
if hostutil.IsDay2Host(c.host) {
//day2 validation is taking the disk encryption data solely from
//the host inventory and set the diskEncryption field on the cluster
//according to that information
luks, err := hostutil.GetDiskEncryptionForDay2(v.log, c.host)
if err != nil {
return ValidationPending, "Missing ignition information"
}
if luks == nil || luks.Clevis == nil {
// No tang servers to validate for the target cluster
return ValidationSuccessSuppressOutput, ""
}
c.cluster.DiskEncryption = &models.DiskEncryption{}
if swag.BoolValue(luks.Clevis.Tpm2) {
c.cluster.DiskEncryption.Mode = swag.String(models.DiskEncryptionModeTpmv2)
// If Tpm2 is enabled for workers, check whether supported by the host.
status = boolValue(c.inventory.TpmVersion == models.InventoryTpmVersionNr20)
} else if len(luks.Clevis.Tang) != 0 {
c.cluster.DiskEncryption.Mode = swag.String(models.DiskEncryptionModeTang)
status, message = v.areTangServersReachable(c)
if status == ValidationFailure {
return status, message
}
} else {
// Only Tpm2 and Tang are available for disk encryption
status = ValidationFailure
}
} else {
//day 1 validation is relying on the host's role and the user
//configuration to check if the disk encryption setup is valid
role := common.GetEffectiveRole(c.host)
if role == models.HostRoleAutoAssign {
return ValidationPending, "Missing role assignment"
}
if !hostutil.IsDiskEncryptionEnabledForRole(*c.cluster.DiskEncryption, role) {
return ValidationSuccessSuppressOutput, ""
}
if swag.StringValue(c.cluster.DiskEncryption.Mode) == models.DiskEncryptionModeTang {
status, message = v.areTangServersReachable(c)
if status == ValidationFailure {
return status, message
}
} else { // Mode TPMv2
status = boolValue(c.inventory.TpmVersion == models.InventoryTpmVersionNr20)
}
}
switch status {
case ValidationSuccess:
return status, fmt.Sprintf("Installation disk can be encrypted using %s", *c.cluster.DiskEncryption.Mode)
case ValidationFailure:
// Mode TPMv2
if c.inventory.TpmVersion == models.InventoryTpmVersionNone {
return status, "TPM version could not be found, make sure TPM is enabled in host's BIOS"
}
if c.cluster.DiskEncryption.Mode == nil {
return status, "Invalid LUKS object in ignition - both TPM2 and Tang are not available"
}
return status, fmt.Sprintf("The host's TPM version is not supported, expected-version: %s, actual-version: %s",
models.InventoryTpmVersionNr20, c.inventory.TpmVersion)
case ValidationPending:
return status, "Disk encryption check was not performed yet"
default:
return status, fmt.Sprintf("Unexpected status %s", status)
}
}
func (v *validator) hasMinValidDisks(c *validationContext) (ValidationStatus, string) {
if c.inventory == nil {
return ValidationPending, "Missing inventory"
}
inventory, err := c.inventoryCache.GetOrUnmarshal(c.host)
if err != nil {
return ValidationError, "Failed to load inventory"
}
if len(inventory.Disks) == 0 {
return ValidationError, "Failed to detected disks"
}
disks := v.hwValidator.ListEligibleDisks(c.inventory)
if len(disks) > 0 {
return ValidationSuccess, "Sufficient disk capacity"
}
return ValidationFailure, "No eligible disks were found, please check specific disks to see why they are not eligible"
}
func (v *validator) isMachineCidrDefined(c *validationContext) (ValidationStatus, string) {
status := ValidationSuccessSuppressOutput
if c.infraEnv != nil {
return status, fmt.Sprintf("Unexpected status %s", status)
}
if swag.BoolValue(c.cluster.UserManagedNetworking) {
return ValidationSuccess, "No Machine Network CIDR needed: User Managed Networking"
}
if swag.StringValue(c.cluster.Kind) == models.ClusterKindAddHostsCluster {
return ValidationSuccess, "No Machine Network CIDR needed: Day2 cluster"
}
if network.IsMachineCidrAvailable(c.cluster) {
return ValidationSuccess, "Machine Network CIDR is defined"
}
if swag.BoolValue(c.cluster.VipDhcpAllocation) {
return ValidationFailure, "Machine Network CIDR is undefined"
}
return ValidationFailure, "Machine Network CIDR is undefined; the Machine Network CIDR can be defined by setting either the API or Ingress virtual IPs"
}
func (v *validator) hasCPUCoresForRole(c *validationContext) (ValidationStatus, string) {
if c.inventory != nil {
if c.inventory.CPU.Count >= c.clusterHostRequirements.Total.CPUCores {
return ValidationSuccess, fmt.Sprintf("Sufficient CPU cores for role %s", common.GetEffectiveRole(c.host))
}
return ValidationFailure, fmt.Sprintf("Require at least %d CPU cores for %s role, found only %d", c.clusterHostRequirements.Total.CPUCores, common.GetEffectiveRole(c.host), c.inventory.CPU.Count)
}
return ValidationPending, "Missing inventory or role"
}
func (v *validator) hasMemoryForRole(c *validationContext) (ValidationStatus, string) {
if c.inventory == nil {
return ValidationPending, "Missing inventory or role"
}
requiredBytes := conversions.MibToBytes(c.clusterHostRequirements.Total.RAMMib)
if c.inventory.Memory.PhysicalBytes >= requiredBytes {
return ValidationSuccess, fmt.Sprintf("Sufficient RAM for role %s", common.GetEffectiveRole(c.host))
}
return ValidationFailure, fmt.Sprintf("Require at least %s RAM for role %s, found only %s",
conversions.BytesToString(conversions.MibToBytes(c.clusterHostRequirements.Total.RAMMib)), common.GetEffectiveRole(c.host), conversions.BytesToString(c.inventory.Memory.PhysicalBytes))
}
func (v *validator) isHostnameUnique(c *validationContext) (ValidationStatus, string) {
if c.infraEnv != nil {
return ValidationSuccessSuppressOutput, ""
}
if c.inventory == nil {
return ValidationPending, "Missing inventory"
}
realHostname := getRealHostname(c.host, c.inventory)
for _, h := range c.cluster.Hosts {
if h.ID.String() != c.host.ID.String() && h.Inventory != "" {
otherInventory, err := c.inventoryCache.GetOrUnmarshal(h)
if err != nil || otherInventory == nil {
v.log.WithError(err).Warnf("Illegal inventory for host %s", h.ID.String())
// It is not our hostname
continue
}
if realHostname == getRealHostname(h, otherInventory) {
return ValidationFailure, fmt.Sprintf("Hostname %s is not unique in cluster", getRealHostname(c.host, c.inventory))
}
}
}
return ValidationSuccess, fmt.Sprintf("Hostname %s is unique in cluster", getRealHostname(c.host, c.inventory))
}
func (v *validator) isValidPlatformNetworkSettings(c *validationContext) (ValidationStatus, string) {
if c.inventory == nil {
return ValidationPending, "Missing inventory"
}
if c.inventory.SystemVendor == nil {
return ValidationError, "Validation error"
}
if funk.ContainsString(invalidPlatforms, c.inventory.SystemVendor.ProductName) {
// In case there is no cluster validation is pending
if c.infraEnv != nil {
return ValidationSuccessSuppressOutput, ""
} else {
//In case userManagedNetworking is true, we don't care about the platform
if swag.BoolValue(c.cluster.UserManagedNetworking) {
return ValidationSuccess, fmt.Sprintf("Platform %s is allowed", c.inventory.SystemVendor.ProductName)
}
return ValidationFailure, fmt.Sprintf("Platform %s is allowed only for Single Node OpenShift or user-managed networking", c.inventory.SystemVendor.ProductName)
}
}
return ValidationSuccess, fmt.Sprintf("Platform %s is allowed", c.inventory.SystemVendor.ProductName)
}
func (v *validator) belongsToMachineCidr(c *validationContext) (ValidationStatus, string) {
if c.infraEnv != nil {
return ValidationSuccessSuppressOutput, ""
}
// In case cluster is multi node UMN no need to validate non bootstrap nodes at all
// In boostrap case if machine cidr was not set by user no need to validate either as it machine cidr will be set from one of it networks
if swag.BoolValue(c.cluster.UserManagedNetworking) && !common.IsSingleNodeCluster(c.cluster) && (!c.host.Bootstrap || !network.IsMachineCidrAvailable(c.cluster)) {
return ValidationSuccess, "No machine network CIDR validation needed: User Managed Networking"
}
if swag.StringValue(c.cluster.Kind) == models.ClusterKindAddHostsCluster {
return ValidationSuccess, "No machine network CIDR validation needed: Day2 cluster"
}
if c.inventory == nil || !network.IsMachineCidrAvailable(c.cluster) {
return ValidationPending, "Missing inventory or machine network CIDR"
}
if !network.IsHostInPrimaryMachineNetCidr(v.log, c.cluster, c.host) {
return ValidationFailure, "Host does not belong to machine network CIDRs. Verify that the host belongs to every CIDR listed under machine networks"
}
return ValidationSuccess, "Host belongs to all machine network CIDRs"
}
func getRealHostname(host *models.Host, inventory *models.Inventory) string {
if host.RequestedHostname != "" {
return host.RequestedHostname
}
return inventory.Hostname
}
func (v *validator) isHostnameValid(c *validationContext) (ValidationStatus, string) {
if c.inventory == nil {
return ValidationFailure, "Missing inventory"
}
if err := hostutil.ValidateHostname(getRealHostname(c.host, c.inventory)); err != nil {
if funk.ContainsString(hostutil.ForbiddenHostnames, getRealHostname(c.host, c.inventory)) {
return ValidationFailure, fmt.Sprintf("The host name %s is forbidden", getRealHostname(c.host, c.inventory))
}
return ValidationFailure, fmt.Sprintf("Hostname %s is forbidden, hostname should match pattern %s", getRealHostname(c.host, c.inventory), hostutil.HostnamePattern)
}
return ValidationSuccess, fmt.Sprintf("Hostname %s is allowed", getRealHostname(c.host, c.inventory))
}
func (v *validator) isIgnitionDownloadable(c *validationContext) (ValidationStatus, string) {
if c.infraEnv != nil {
return ValidationSuccessSuppressOutput, ""
}
if !hostutil.IsDay2Host(c.host) {
return ValidationSuccessSuppressOutput, ""
}
if c.host.APIVipConnectivity == "" {
return ValidationPending, "Ignition is not yet available, pending API connectivity"
}
var apiConnectivityResponse models.APIVipConnectivityResponse
if err := json.Unmarshal([]byte(c.host.APIVipConnectivity), &apiConnectivityResponse); err != nil {
return ValidationError, "Internal error - failed to parse agent API connectivity response"
}
if apiConnectivityResponse.IsSuccess {
return ValidationSuccess, "Ignition is downloadable"
}
if apiConnectivityResponse.URL == "" {
// Missing URL means this is a response from an older agent version,
// without much information about what went wrong with the download -
// so return an undetailed error message
return ValidationFailure, "This host has failed to download the ignition file from the cluster, please ensure the host can reach the cluster"
}
return ValidationFailure, fmt.Sprintf(
"This host has failed to download the ignition file from %s with the following error: %s. "+
"Please ensure the host can reach this URL",
apiConnectivityResponse.URL, apiConnectivityResponse.DownloadError)
}
func (v *validator) belongsToL2MajorityGroup(c *validationContext, majorityGroups map[string][]strfmt.UUID) ValidationStatus {
if !network.IsMachineCidrAvailable(c.cluster) {
return ValidationPending
}
if majorityGroups == nil {
return ValidationFailure
}
// TODO(mko) This rule should be revised as soon as OCP supports multiple machineNetwork
// entries using the same IP stack.
areNetworksEqual := func(ipnet1, ipnet2 *net.IPNet) bool {
return ipnet1.IP.Equal(ipnet2.IP) && bytes.Equal(ipnet1.Mask, ipnet2.Mask)
}
groupForNetwork := func(ipnet *net.IPNet) []strfmt.UUID {
for key, groups := range majorityGroups {
_, groupIpnet, err := net.ParseCIDR(key)
// majority groups may contain keys other than CIDRS (For instance IPv4 for L3). Therefore, in case of
// parse error we can skip safely
if err != nil {
continue
}
if areNetworksEqual(ipnet, groupIpnet) {
return groups
}
}
return nil
}
for _, machineNet := range c.cluster.MachineNetworks {
_, machineIpnet, err := net.ParseCIDR(string(machineNet.Cidr))
if err != nil {
return ValidationError
}
if !funk.Contains(groupForNetwork(machineIpnet), *c.host.ID) {
return ValidationFailure
}
}
return ValidationSuccess
}
func (v *validator) belongsToL3MajorityGroup(c *validationContext, connectivity network.Connectivity) ValidationStatus {
var ipv4Connectivity, ipv6Connectivity bool
ipv4, ipv6, err := network.GetConfiguredAddressFamilies(c.cluster)
if err != nil {
v.log.WithError(err).Warn("Get configured address families")
return ValidationError
}
if !(ipv4 || ipv6) || connectivity.L3ConnectedAddresses == nil {
return ValidationFailure
}
hostConnectedAddresses, ok := connectivity.L3ConnectedAddresses[*c.host.ID]
if !ok {
return ValidationFailure
}
for _, addr := range hostConnectedAddresses {
if network.IsIPv4Addr(addr) {
ipv4Connectivity = true
} else {
ipv6Connectivity = true
}
}
return boolValue((!ipv4 || ipv4Connectivity) && (!ipv6 || ipv6Connectivity))
}
func (v *validator) belongsToMajorityGroup(c *validationContext) (ValidationStatus, string) {
var message string
if c.infraEnv != nil {
return ValidationSuccessSuppressOutput, ""
}
if hostutil.IsDay2Host(c.host) {
return ValidationSuccess, "Day2 host is not required to be connected to other hosts in the cluster"
}
if common.IsSingleNodeCluster(c.cluster) {
return ValidationSuccess, "Host has connectivity to the majority of hosts in the cluster"
}
if c.cluster.ConnectivityMajorityGroups == "" {
return ValidationPending, "Machine Network CIDR or Connectivity Majority Groups missing"
}
var connectivity network.Connectivity
err := json.Unmarshal([]byte(c.cluster.ConnectivityMajorityGroups), &connectivity)
if err != nil {
v.log.WithError(err).Warn("Parse majority group")
return ValidationError, "Parse error for connectivity majority group"
}
var status ValidationStatus
if swag.BoolValue(c.cluster.UserManagedNetworking) {
status = v.belongsToL3MajorityGroup(c, connectivity)
} else {
status = v.belongsToL2MajorityGroup(c, connectivity.MajorityGroups)
}
if status == ValidationFailure && len(c.cluster.Hosts) < 3 {
return ValidationPending, "Not enough hosts in cluster to calculate connectivity groups"
}
switch status {
case ValidationSuccess:
message = "Host has connectivity to the majority of hosts in the cluster"
case ValidationFailure:
message = "No connectivity to the majority of hosts in the cluster"
case ValidationPending:
// Shouldn't happen
message = "Not enough information to calculate host majority groups"
default:
message = fmt.Sprintf("Unexpected status %s", status)
}
return status, message
}
func (v *validator) missingNTPSyncResult(db *gorm.DB, host *models.Host) ValidationStatus {
unboundStatuses := []string{
models.HostStatusInsufficientUnbound,
models.HostStatusDisconnectedUnbound,
models.HostStatusDiscoveringUnbound,
models.HostStatusKnownUnbound,
}
if funk.ContainsString(unboundStatuses, swag.StringValue(host.Status)) {
sources, err := common.GetHostNTPSources(db, host)
if err != nil {
v.log.WithError(err).Errorf("Failed to get sources for host %s", host.ID.String())
return ValidationError
}
if sources == "" {
return ValidationSuccessSuppressOutput
}
}
return ValidationFailure
}
func (v *validator) isNTPSynced(c *validationContext) (ValidationStatus, string) {
var status ValidationStatus
var message string
var sources []*models.NtpSource
if c.host.NtpSources == "" {
status = v.missingNTPSyncResult(c.db, c.host)
} else if err := json.Unmarshal([]byte(c.host.NtpSources), &sources); err != nil {
v.log.WithError(err).Warn("Parse NTP sources")
status = ValidationError
} else {
status = v.missingNTPSyncResult(c.db, c.host)
for _, source := range sources {
if source.SourceState == models.SourceStateSynced {
status = ValidationSuccess
}
}
}
switch status {
case ValidationSuccess:
message = "Host NTP is synced"
case ValidationFailure:
message = "Host couldn't synchronize with any NTP server"
case ValidationError:
message = "Parse error for NTP sources"
default:
message = fmt.Sprintf("Unexpected status %s", status)
}
return status, message
}
func (v *validator) isTimeSyncedBetweenHostAndService(c *validationContext) (ValidationStatus, string) {
if c.host.Timestamp == 0 {
return ValidationPending, "Missing host time, can't determine synchronization between host and service"
}
diff := time.Now().UTC().Sub(time.Unix(c.host.Timestamp, 0).UTC())
if diff > maxServiceAheadOfHostTimeDiff {
return ValidationFailure, fmt.Sprintf("Host clock is not synchronized, service time is ahead of host's at least for %.1f minutes, "+
"please configure an NTP server via DHCP. Service time: %s", maxServiceAheadOfHostTimeDiff.Minutes(), time.Now().UTC())
} else if diff < -maxHostAheadOfServiceTimeDiff {
return ValidationFailure, fmt.Sprintf("Host clock is not synchronized, host time is ahead of service at least for %.1f minutes, "+
"please configure an NTP server via DHCP. Service time: %s", maxHostAheadOfServiceTimeDiff.Minutes(), time.Now().UTC())
}
return ValidationSuccess, "Host clock is synchronized with service"
}
func (v *validator) sucessfullOrUnknownContainerImagesAvailability(c *validationContext) (ValidationStatus, string) {
imageStatuses, err := common.UnmarshalImageStatuses(c.host.ImagesStatus)
if err != nil {
v.log.WithError(err).Warn("Parse container image statuses")
return ValidationError, "Validation error"
}
if !allImagesValid(imageStatuses) {
images, err := v.getFailedImagesNames(c.host)
if err == nil {
return ValidationFailure, fmt.Sprintf("Failed to fetch container images needed for installation from %s. "+
"This may be due to a network hiccup. Retry to install again. If this problem persists, "+
"check your network settings to make sure you’re not blocked.", strings.Join(images, ","))
}
return ValidationError, "Validation error"
}
return ValidationSuccess, "All required container images were either pulled successfully or no attempt was made to pull them"
}
func (v *validator) getFailedImagesNames(host *models.Host) ([]string, error) {
imageStatuses, err := common.UnmarshalImageStatuses(host.ImagesStatus)
if err != nil {
return nil, err
}
imageNames := make([]string, 0)
for _, imageStatus := range imageStatuses {
if isInvalidImageStatus(imageStatus) {
imageNames = append(imageNames, imageStatus.Name)
}
}
return imageNames, nil
}
func isInvalidImageStatus(imageStatus *models.ContainerImageAvailability) bool {
return imageStatus.Result == models.ContainerImageAvailabilityResultFailure ||
(imageStatus.SizeBytes > 0 && imageStatus.DownloadRate < ImageStatusDownloadRateThreshold)
}
func allImagesValid(imageStatuses common.ImageStatuses) bool {
for _, imageStatus := range imageStatuses {
if isInvalidImageStatus(imageStatus) {
return false
}
}
return true
}
// This is a pre-install validation that checks that the boot device was either
// not tested for sufficient disk speed or the disk speed check has been
// successful. Since disk speed test is performed after installation has
// started, in order to have result for such test, the result has to be from a
// previous installation attempt. Since all pre-install validations have to
// pass before starting installation, it is mandatory that in case installation
// on the current boot device has not been attempted yet, this validation must
// pass.
func (v *validator) sufficientOrUnknownInstallationDiskSpeed(c *validationContext) (ValidationStatus, string) {
info, err := v.getBootDeviceInfo(c.host)
if err != nil {
return ValidationError, "Validation error"
}
if info == nil || info.DiskSpeed == nil || !info.DiskSpeed.Tested {
return ValidationSuccess, "Speed of installation disk has not yet been measured"
}
if info.DiskSpeed.ExitCode == 0 {
return ValidationSuccess, "Speed of installation disk is sufficient"
}
return ValidationFailure, "While preparing the previous installation the installation disk speed measurement failed or was found to be insufficient"
}
type hostTimingMetric struct {
otherHostName string
timingMetric float64
timingSuffix string
}
func (v *validator) summarizeHostTimingMetrics(packetLossInfo []hostTimingMetric, truncateMetrics bool) string {
result := []string{}
for i, p := range packetLossInfo {
//If there a lot of hosts in the cluster, this list could be rather large, so we shorten it
if truncateMetrics && i > maxHostTimingMetrics {
result = append(result, fmt.Sprintf("%s (%.2f%s) and others...", p.otherHostName, p.timingMetric, p.timingSuffix))
break
}
result = append(result, fmt.Sprintf("%s (%.2f%s)", p.otherHostName, p.timingMetric, p.timingSuffix))
}
return strings.Join(result, ", ")
}
type thresholdTestType int
const (
thresholdTestL3AverageRTTMs thresholdTestType = 0
thresholdTestL3PacketLoss thresholdTestType = 1
)
func (v *validator) thresholdExceededTest(testType thresholdTestType, host *models.Host, clusterRoleReqs *models.ClusterHostRequirements, hosts []*models.Host, inventoryCache InventoryCache) (ValidationStatus, []hostTimingMetric, error) {
connectivityReport, err := hostutil.UnmarshalConnectivityReport(host.Connectivity)
if err != nil {
v.log.Errorf("Unable to unmarshall host connectivity for %s:%s", host.ID, err)
return ValidationError, nil, nil
}
failedHostIPs := map[string]struct{}{}
failedHostMetrics := []hostTimingMetric{}
for _, r := range connectivityReport.RemoteHosts {
for _, l3 := range r.L3Connectivity {
var hostHasExceededThreshold bool
switch testType {
case thresholdTestL3AverageRTTMs:
hostHasExceededThreshold = l3.AverageRTTMs > *clusterRoleReqs.Total.NetworkLatencyThresholdMs
case thresholdTestL3PacketLoss:
hostHasExceededThreshold = l3.PacketLossPercentage > *clusterRoleReqs.Total.PacketLossPercentage
default:
return ValidationError, nil, fmt.Errorf("unexpected testType")
}
if hostHasExceededThreshold {
if _, ok := failedHostIPs[l3.RemoteIPAddress]; !ok {
hostname, role, err := GetHostnameAndEffectiveRoleByHostID(r.HostID, hosts, inventoryCache)
if err != nil {
v.log.Error(err)
return ValidationFailure, nil, err
}
if role == common.GetEffectiveRole(host) {
failedHostIPs[l3.RemoteIPAddress] = struct{}{}
switch testType {
case thresholdTestL3AverageRTTMs:
failedHostMetrics = append(failedHostMetrics, hostTimingMetric{otherHostName: hostname, timingMetric: l3.AverageRTTMs, timingSuffix: " ms"})
case thresholdTestL3PacketLoss:
failedHostMetrics = append(failedHostMetrics, hostTimingMetric{otherHostName: hostname, timingMetric: l3.PacketLossPercentage, timingSuffix: "%"})
}
}
}
}
}
}
if len(failedHostMetrics) > 0 {
return ValidationFailure, failedHostMetrics, nil
}
return ValidationSuccess, nil, nil
}
func (v *validator) hasSufficientPacketLossRequirementForRole(c *validationContext) (ValidationStatus, string) {
if c.inventory == nil {
return ValidationPending, "The inventory is not available yet."
}
if c.infraEnv != nil {
return ValidationSuccessSuppressOutput, ""