Skip to content

Commit

Permalink
Merge pull request #197 from jmencak/4.6-bz1920525
Browse files Browse the repository at this point in the history
Bug 1920525: Recover gracefully after Tuned errors.
  • Loading branch information
openshift-merge-robot committed Feb 17, 2021
2 parents 01d9abf + 2dfd043 commit 191ec1e
Showing 1 changed file with 40 additions and 6 deletions.
46 changes: 40 additions & 6 deletions pkg/tuned/tuned.go
Expand Up @@ -34,6 +34,15 @@ import (
"github.com/openshift/cluster-node-tuning-operator/pkg/util"
)

// Constants
const (
// Constants used for instantiating Profile status conditions;
// they will be set to 2^0, 2^1, 2^2, ..., 2^n
scApplied Bits = 1 << iota
scWarn
scError
)

// Constants
const (
operandNamespace = "openshift-cluster-node-tuning-operator"
Expand All @@ -59,6 +68,8 @@ const (
// Types
type arrayFlags []string

type Bits uint8

type sockAccepted struct {
conn net.Conn
err error
Expand Down Expand Up @@ -91,6 +102,8 @@ type Controller struct {
// and the node Profile k8s object's Status needs to be set for the operator;
// it is set to false on successful Profile update
reloaded bool
// bit/set representaton of Profile status conditions to report back via API
status Bits
}

tunedCmd *exec.Cmd // external command (tuned) being prepared or run
Expand Down Expand Up @@ -381,23 +394,38 @@ func (c *Controller) tunedRun() {
go func() {
for scanner.Scan() {
l := scanner.Text()
profileApplied := strings.Index(l, " tuned.daemon.daemon: static tuning from profile ") >= 0 && strings.Index(l, " applied") >= 0
reloadFailed := strings.Index(l, " tuned.daemon.controller: Failed to reload Tuned: ") >= 0

if profileApplied {
c.daemon.status |= scApplied
}

if strings.Index(l, " WARNING ") >= 0 {
c.daemon.status |= scWarn
}

if strings.Index(l, " ERROR ") >= 0 {
c.daemon.status |= scError
}

if c.daemon.reloading {
c.daemon.reloading = !(strings.Index(l, "static tuning from profile") >= 0 && strings.Index(l, "applied") >= 0)
c.daemon.reloading = !profileApplied && !reloadFailed
c.daemon.reloaded = !c.daemon.reloading
}

fmt.Printf("%s\n", l)
}
}()

c.daemon.reloading = true
err = c.tunedCmd.Start()
if err != nil {
c.daemon.status = 0 // clear the set out of which Profile status conditions are created
if err = c.tunedCmd.Start(); err != nil {
klog.Errorf("error starting tuned: %v", err)
return
}

err = c.tunedCmd.Wait()
if err != nil {
if err = c.tunedCmd.Wait(); err != nil {
// The command exited with non 0 exit status, e.g. terminated by a signal
klog.Errorf("error waiting for tuned: %v", err)
return
Expand Down Expand Up @@ -559,7 +587,13 @@ func (c *Controller) timedUpdater() (err error) {
if recommendedProfile, err = getRecommendedProfile(); err != nil {
return err
}
if activeProfile != recommendedProfile {
if (c.daemon.status & scApplied) == 0 {
klog.Infof("re-applying profile (%s) as the previous application did not complete", activeProfile)
reload = true
} else if (c.daemon.status & scError) != 0 {
klog.Infof("re-applying profile (%s) as the previous application ended with error(s)", activeProfile)
reload = true
} else if activeProfile != recommendedProfile {
klog.Infof("active profile (%s) != recommended profile (%s)", activeProfile, recommendedProfile)
recommendedProfileDir := tunedProfilesDir + "/" + recommendedProfile
if _, err := os.Stat(recommendedProfileDir); os.IsNotExist(err) {
Expand Down

0 comments on commit 191ec1e

Please sign in to comment.