Skip to content

Commit

Permalink
PWX-28826: Handle pre-flight check for DMthin (#1014)
Browse files Browse the repository at this point in the history
* PWX-28826 Boilerplace

Signed-off-by: Harsh Desai <hadesai@purestorage.com>

* more boilerplate

Signed-off-by: Harsh Desai <hadesai@purestorage.com>

* PWX-28826:  Pre-flight check for DMthin.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Add comments and move StorageNode cleanup.

Signed-off-by: Jose Rivera <jose@portworx.com>

* Passed checks should be Info events.

Signed-off-by: Jose Rivera <jose@portworx.com>

* Passed checks should be Info events. (#1010)

Signed-off-by: Jose Rivera <jose@portworx.com>

* Pwx 28826 (#1011)

* Pwx 28826 (#1012)

* PWX-28826: Update with the latest master changes. (#1013)

* Updating CSV to use 23.3.1 released image

* Update for 23.3.1 release

* Controller gen vendor

Signed-off-by: Piyush Nimbalkar <pnimbalkar@purestorage.com>

* PWX-29389 Add CRD for portworx diags collection

Signed-off-by: Piyush Nimbalkar <pnimbalkar@purestorage.com>

* PWX-29409: Ignore zones with no nodes (#1008)

In disaggregated mode, there could be zones in which no storage nodes
  might be present. Such a zone would make the maxSNPZ value to be 0.
  CHanging the behavior to ignore 0 nodes in a zone for maxSNPZ
  calculation.

Signed-off-by: Naveen Revanna <nrevanna@purestorage.com>

---------

Signed-off-by: Piyush Nimbalkar <pnimbalkar@purestorage.com>
Signed-off-by: Naveen Revanna <nrevanna@purestorage.com>
Co-authored-by: CNBU Jenkins <cnbu-jenkins@purestorage.com>
Co-authored-by: Jiafeng Liao <jliao@purestorage.com>
Co-authored-by: Piyush Nimbalkar <pnimbalkar@purestorage.com>
Co-authored-by: Naveen Revanna <83608369+nrevanna@users.noreply.github.com>

* Add PassPreFlight event tag and logging

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Check status of portworx container in pre-flight pod and remove 'wait' code.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Fix unit test.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Fix unit test.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: PR review changes and fix portworx_test.go UTs

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: fix gomack Validate calls.  Also comment out the two tests that don't work since Validate was removed from the controller.validate() func. PWX-30373 to try and fix later.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-30373: Re-add back in the commented out tests and add K8s version check failure to trigger the needed workflow.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Exit pre-check wait if running CBT namespace.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Add 5 min timeout to pre-flight status check.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Exit GetPreFlightStatus() with success if running CBT namespace.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Don't automatically enable dmthin via pre-flight check if running CBT namespace.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-30373: Revert UT and integration test hacks.  Need to mock the functionality correctly.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: Increase pre-flight daemonset ready wait to 10mins.

Signed-off-by: Jose Rivera <jose@portworx.com>

* PWX-28826: fix 'TestValidate' UT.  Don't error if pre-flight daemonset exists.

Signed-off-by: Jose Rivera <jose@portworx.com>

* Only run preflight if AWS.

Signed-off-by: Jose Rivera <jose@portworx.com>

---------

Signed-off-by: Harsh Desai <hadesai@purestorage.com>
Signed-off-by: Jose Rivera <jose@portworx.com>
Signed-off-by: Piyush Nimbalkar <pnimbalkar@purestorage.com>
Signed-off-by: Naveen Revanna <nrevanna@purestorage.com>
Co-authored-by: Harsh Desai <hadesai@purestorage.com>
Co-authored-by: CNBU Jenkins <cnbu-jenkins@purestorage.com>
Co-authored-by: Jiafeng Liao <jliao@purestorage.com>
Co-authored-by: Piyush Nimbalkar <pnimbalkar@purestorage.com>
Co-authored-by: Naveen Revanna <83608369+nrevanna@users.noreply.github.com>
  • Loading branch information
6 people committed Apr 17, 2023
1 parent c2a317c commit f10adb4
Show file tree
Hide file tree
Showing 10 changed files with 683 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package component
import (
"context"
"fmt"
"reflect"
"strconv"

"github.com/hashicorp/go-version"
ocp_secv1 "github.com/openshift/api/security/v1"
"github.com/sirupsen/logrus"
Expand All @@ -12,9 +15,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
"reflect"
"sigs.k8s.io/controller-runtime/pkg/client"
"strconv"

"k8s.io/apimachinery/pkg/types"

Expand Down
63 changes: 62 additions & 1 deletion drivers/storage/portworx/portworx.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"math"
"strconv"
"strings"
"time"

version "github.com/hashicorp/go-version"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -88,7 +89,67 @@ func (p *portworx) Init(
return nil
}

func (p *portworx) Validate() error {
func (p *portworx) Validate(cluster *corev1.StorageCluster) error {
podSpec, err := p.GetStoragePodSpec(cluster, "")
if err != nil {
return err
}

preFlighter := NewPreFlighter(cluster, p.k8sClient, podSpec)

// Start the pre-flight container. The pre-flight checks at this time are specific to enabling DMthin
err = preFlighter.RunPreFlight()
if err != nil {
if !errors.IsAlreadyExists(err) {
return err
}
logrus.Debugf("pre-flight: container already running...")
}

cnt := 0
// Wait for all the pre-flight pods to finish
for {
time.Sleep(3 * time.Second) // Pause before status check
completed, inProgress, total, err := preFlighter.GetPreFlightStatus()
if err != nil {
logrus.Errorf("pre-flight: error getting pre-flight status: %v", err)
return err
}
logrus.Infof("pre-flight: Completed [%v] In Progress [%v] Total [%v]", completed, inProgress, total)

if total != 0 && completed == total {
logrus.Infof("pre-flight: completed...")
break
}

// Add five minute timeout. If we do reconcile loop check we will need a different way.
cnt++
if cnt == 200 { // 3s * 100 = 300s (10 mins)
err = fmt.Errorf("pre-flight: pre-flight status check timed out")
logrus.Errorf("%v", err)
return err
}
}

defer func() {
// Clean up the pre-flight pods
logrus.Infof("pre-flight: cleaning pre-flight ds...")
err = preFlighter.DeletePreFlight()
if err != nil {
logrus.Errorf("pre-flight: error deleting pre-flight: %v", err)
}
}()

// Process all the StorageNode.Status.Checks
if storageNodes, err := p.storageNodesList(cluster); err == nil {
err = preFlighter.ProcessPreFlightResults(p.recorder, storageNodes)
if err != nil {
logrus.Errorf("pre-flight: Error processing results: %v", err)
}
} else {
logrus.Errorf("pre-flight incomplete: Error getting storage node list: %v", err)
}

return nil
}
func (p *portworx) initializeComponents() {
Expand Down
74 changes: 73 additions & 1 deletion drivers/storage/portworx/portworx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,81 @@ func TestInit(t *testing.T) {

func TestValidate(t *testing.T) {
driver := portworx{}
cluster := &corev1.StorageCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "px-cluster",
Namespace: "kube-test",
},
}

storageNode := &corev1.StorageNode{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
Namespace: cluster.Namespace,
},
}

labels := map[string]string{
"name": pxPreFlightDaemonSetName,
}

clusterRef := metav1.NewControllerRef(cluster, pxutil.StorageClusterKind())
preflightDS := &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Name: pxPreFlightDaemonSetName,
Namespace: cluster.Namespace,
Labels: labels,
UID: types.UID("preflight-ds-uid"),
OwnerReferences: []metav1.OwnerReference{*clusterRef},
},
Spec: appsv1.DaemonSetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: labels,
},
},
}

k8sClient := testutil.FakeK8sClient(preflightDS)

preFlightPod1 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "preflight-1",
Namespace: cluster.Namespace,
OwnerReferences: []metav1.OwnerReference{{UID: preflightDS.UID}},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "portworx",
Ready: true,
},
},
},
}
err := k8sClient.Create(context.TODO(), preFlightPod1)
require.NoError(t, err)

preflightDS.Status.DesiredNumberScheduled = int32(1)
err = k8sClient.Status().Update(context.TODO(), preflightDS)
require.NoError(t, err)

recorder := record.NewFakeRecorder(100)
err = driver.Init(k8sClient, runtime.NewScheme(), recorder)
require.NoError(t, err)

err = driver.SetDefaultsOnStorageCluster(cluster)
require.NoError(t, err)

err = k8sClient.Create(context.TODO(), storageNode)
require.NoError(t, err)

err := driver.Validate()
err = driver.Validate(cluster)
require.NoError(t, err)
require.Contains(t, cluster.Annotations[pxutil.AnnotationMiscArgs], "-T dmthin")
require.NotEmpty(t, recorder.Events)
<-recorder.Events // Pop first event which is Default telemetry enabled event
require.Contains(t, <-recorder.Events,
fmt.Sprintf("%v %v %s", v1.EventTypeNormal, util.PassPreFlight, "Enabling DMthin"))
}

func TestGetSelectorLabels(t *testing.T) {
Expand Down
Loading

0 comments on commit f10adb4

Please sign in to comment.