Skip to content

Commit

Permalink
Merge pull request #139 from openshift-cherrypick-robot/cherry-pick-1…
Browse files Browse the repository at this point in the history
…32-to-release-4.5

[release-4.5] Bug 1855240: Check also Pod status before enabling Fast upload
  • Loading branch information
openshift-merge-robot committed Aug 20, 2020
2 parents de6d46f + 0e2d243 commit 6b4743c
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pkg/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (s *Support) Run(ctx context.Context, controller *controllercmd.ControllerC

// the status controller initializes the cluster operator object and retrieves
// the last sync time, if any was set
statusReporter := status.NewController(configClient, configObserver, os.Getenv("POD_NAMESPACE"))
statusReporter := status.NewController(configClient, gatherKubeClient.CoreV1(), configObserver, os.Getenv("POD_NAMESPACE"))

// the recorder periodically flushes any recorded data to disk as tar.gz files
// in s.StoragePath, and also prunes files above a certain age
Expand Down
51 changes: 39 additions & 12 deletions pkg/controller/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@ import (

"golang.org/x/time/rate"

"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog"

configv1 "github.com/openshift/api/config/v1"
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"github.com/openshift/insights-operator/pkg/config"
"github.com/openshift/insights-operator/pkg/controllerstatus"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/klog"
)

// How many upload failures in a row we tolerate before starting reporting
Expand All @@ -40,6 +41,7 @@ type Controller struct {
name string
namespace string
client configv1client.ConfigV1Interface
coreClient corev1client.CoreV1Interface
statusCh chan struct{}
configurator Configurator

Expand All @@ -50,10 +52,11 @@ type Controller struct {
safeInitialStart bool
}

func NewController(client configv1client.ConfigV1Interface, configurator Configurator, namespace string) *Controller {
func NewController(client configv1client.ConfigV1Interface, coreClient corev1client.CoreV1Interface, configurator Configurator, namespace string) *Controller {
c := &Controller{
name: "insights",
client: client,
coreClient: coreClient,
statusCh: make(chan struct{}, 1),
configurator: configurator,
namespace: namespace,
Expand Down Expand Up @@ -363,8 +366,8 @@ func (c *Controller) updateStatus(initial bool) error {
}
existing = nil
}
safeInitialStart := false
if initial {
ophealthy := false
if existing != nil {
var reported Reported
if len(existing.Status.Extension.Raw) > 0 {
Expand All @@ -373,15 +376,39 @@ func (c *Controller) updateStatus(initial bool) error {
}
}
c.SetLastReportedTime(reported.LastReportTime.Time.UTC())
if c := findOperatorStatusCondition(existing.Status.Conditions, configv1.OperatorDegraded); c == nil ||
c != nil && c.Status == configv1.ConditionFalse {
safeInitialStart = true
if con := findOperatorStatusCondition(existing.Status.Conditions, configv1.OperatorDegraded); con == nil ||
con != nil && con.Status == configv1.ConditionFalse {
klog.Info("The initial operator extension status is healthy")
ophealthy = true
}
}
if os.Getenv("POD_NAME") != "" && ophealthy {
var pod *v1.Pod
pod, err = c.coreClient.Pods(os.Getenv("POD_NAMESPACE")).Get(os.Getenv("POD_NAME"), metav1.GetOptions{})
if err == nil {
for _, c := range pod.Status.ContainerStatuses {
// all containers has to be in running state to consider them healthy
if c.LastTerminationState.Terminated != nil || c.LastTerminationState.Waiting != nil {
klog.Info("The last pod state is unhealthy")
ophealthy = false
break
}
}
} else {
if !errors.IsNotFound(err) {
klog.Errorf("Couldn't get Insights Operator Pod to detect its status. Error: %v", err)
ophealthy = false
}
}
}

if existing == nil || ophealthy {
klog.Info("It is safe to use fast upload")
c.SetSafeInitialStart(true)
} else {
safeInitialStart = true
klog.Info("Not safe for fast upload")
}
}
c.SetSafeInitialStart(safeInitialStart)

updated := c.merge(existing)
if existing == nil {
Expand Down
7 changes: 6 additions & 1 deletion pkg/insights/insightsuploader/insightsuploader.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type StatusReporter interface {
LastReportedTime() time.Time
SetLastReportedTime(time.Time)
SafeInitialStart() bool
SetSafeInitialStart(s bool)
}

type Controller struct {
Expand Down Expand Up @@ -136,8 +137,12 @@ func (c *Controller) Run(ctx context.Context) {
klog.V(2).Infof("Unable to upload report after %s: %v", time.Now().Sub(start).Truncate(time.Second/100), err)
if err == insightsclient.ErrWaitingForVersion {
initialDelay = wait.Jitter(interval/8, 1) - interval/8
if c.reporter.SafeInitialStart() {
initialDelay = wait.Jitter(time.Second*15, 1)
}
return
}
c.reporter.SetSafeInitialStart(false)
if authorizer.IsAuthorizationError(err) {
c.Simple.UpdateStatus(controllerstatus.Summary{Operation: controllerstatus.Uploading,
Reason: "NotAuthorized", Message: fmt.Sprintf("Reporting was not allowed: %v", err)})
Expand All @@ -150,7 +155,7 @@ func (c *Controller) Run(ctx context.Context) {
Reason: "UploadFailed", Message: fmt.Sprintf("Unable to report: %v", err)})
return
}

c.reporter.SetSafeInitialStart(false)
klog.V(4).Infof("Uploaded report successfully in %s", time.Now().Sub(start))
lastReported = start.UTC()
c.Simple.UpdateStatus(controllerstatus.Summary{Healthy: true})
Expand Down

0 comments on commit 6b4743c

Please sign in to comment.