-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for removal of agent configs #11
Changes from all commits
7449bab
a3a6669
cb11bb2
409589c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,9 +27,9 @@ import ( | |
log "github.com/sirupsen/logrus" | ||
"github.com/spf13/viper" | ||
"io" | ||
"io/ioutil" | ||
"net/http" | ||
"os" | ||
"os/exec" | ||
"path" | ||
"time" | ||
) | ||
|
@@ -47,27 +47,43 @@ var ( | |
agentRestartDelay = 1 * time.Second | ||
) | ||
|
||
// SpecificAgentRunner manages the lifecyle and configuration of a single type of agent | ||
type SpecificAgentRunner interface { | ||
// Load gets called after viper's configuration has been populated and before any other use. | ||
Load(agentBasePath string) error | ||
SetCommandHandler(handler CommandHandler) | ||
// EnsureRunning after installation of an agent and each call to ProcessConfig | ||
EnsureRunning(ctx context.Context) | ||
// EnsureRunningState is called after installation of an agent and after each call to ProcessConfig. | ||
// In the latter case, applyConfigs will be passed as true to indicate the runner should take | ||
// actions to reload configuration into an agent, if running. | ||
// It must ensure the agent process is running if configs and executable are available | ||
// It must also ensure that that the process is stopped if no configuration remains | ||
EnsureRunningState(ctx context.Context, applyConfigs bool) | ||
ProcessConfig(configure *telemetry_edge.EnvoyInstructionConfigure) error | ||
// Stop should stop the agent's process, if running | ||
Stop() | ||
} | ||
|
||
type AgentsRunner interface { | ||
// Router routes external agent operations to the respective SpecificAgentRunner instance | ||
type Router interface { | ||
// Start ensures that when the ctx is done, then the managed SpecificAgentRunner instances will be stopped | ||
Start(ctx context.Context) | ||
ProcessInstall(install *telemetry_edge.EnvoyInstructionInstall) | ||
ProcessConfigure(configure *telemetry_edge.EnvoyInstructionConfigure) | ||
} | ||
|
||
type AgentRunningInstance struct { | ||
ctx context.Context | ||
cancel context.CancelFunc | ||
cmd *exec.Cmd | ||
type noAppliedConfigsError struct{} | ||
|
||
func (e *noAppliedConfigsError) Error() string { | ||
return "no configurations were applied" | ||
} | ||
|
||
// IsNoAppliedConfigs tests if an error returned by a SpecificAgentRunner's ProcessConfig indicates no configs were applied | ||
func IsNoAppliedConfigs(err error) bool { | ||
if err == nil { | ||
return false | ||
} | ||
_, ok := err.(*noAppliedConfigsError) | ||
return ok | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need to ask about this. Does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a type assertion which can result in either one value (and panic, if the wrong type) or two values where the second is a boolean indicating if the type assertion was "ok" (hence the naming convention). Go is at least consistent in that the map indexing operator also supports an optional, second boolean result value and the convention is "ok" there too. |
||
} | ||
|
||
func init() { | ||
|
@@ -81,6 +97,7 @@ func downloadExtractTarGz(outputPath, url string, exePath string) error { | |
if err != nil { | ||
return errors.Wrap(err, "failed to download agent") | ||
} | ||
//noinspection GoUnhandledErrorResult | ||
defer resp.Body.Close() | ||
|
||
gzipReader, err := gzip.NewReader(resp.Body) | ||
|
@@ -113,7 +130,7 @@ func downloadExtractTarGz(outputPath, url string, exePath string) error { | |
|
||
_, err = io.Copy(file, tarReader) | ||
if err != nil { | ||
file.Close() | ||
_ = file.Close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you have to assign a variable anytime the function returns something? is this just a best practice thing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IntelliJ was flagging this as a warning because the returned error was not being considered. Since errors on closing files tend to be uninteresting, I took the cheesy solution of explicitly ignoring the returned value with a blank identifier. |
||
log.WithError(err).Error("unable to write to file") | ||
continue | ||
} else { | ||
|
@@ -129,10 +146,6 @@ func downloadExtractTarGz(outputPath, url string, exePath string) error { | |
return nil | ||
} | ||
|
||
func (inst *AgentRunningInstance) IsRunning() bool { | ||
return inst != nil && inst.cmd != nil && (inst.cmd.ProcessState == nil || !inst.cmd.ProcessState.Exited()) | ||
} | ||
|
||
func fileExists(file string) bool { | ||
if _, err := os.Stat(file); os.IsNotExist(err) { | ||
return false | ||
|
@@ -143,3 +156,33 @@ func fileExists(file string) bool { | |
return true | ||
} | ||
} | ||
|
||
// handleContentConfigurationOp handles agent config operations that work with content simply written to | ||
// the file named by configInstancePath | ||
// Returns true if the configuration was applied | ||
func handleContentConfigurationOp(op *telemetry_edge.ConfigurationOp, configInstancePath string) bool { | ||
switch op.GetType() { | ||
case telemetry_edge.ConfigurationOp_CREATE, telemetry_edge.ConfigurationOp_MODIFY: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this how indentation is supposed to work with go switch statements? do we actually use a linter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than a linter I'd like for us to enable the "gofmt" option on commit. With that said, yeah, apparently this is the official indentation style for switch statements, but not what I personally prefer :). |
||
err := ioutil.WriteFile(configInstancePath, []byte(op.GetContent()), configFilePerms) | ||
if err != nil { | ||
log.WithError(err).WithField("op", op).Warn("failed to process telegraf config operation") | ||
} else { | ||
return true | ||
} | ||
|
||
case telemetry_edge.ConfigurationOp_REMOVE: | ||
err := os.Remove(configInstancePath) | ||
if err != nil { | ||
if os.IsNotExist(err) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to check what this does. is it seeing if the error returned is a does not exist error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. Since |
||
log.WithField("op", op).Warn("did not need to remove since already removed") | ||
return true | ||
} else { | ||
log.WithError(err).WithField("op", op).Warn("failed to remove config instance file") | ||
} | ||
} else { | ||
return true | ||
} | ||
} | ||
|
||
return false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright 2018 Rackspace US, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* | ||
*/ | ||
|
||
package agents_test | ||
|
||
import ( | ||
"github.com/pkg/errors" | ||
"github.com/racker/telemetry-envoy/agents" | ||
"github.com/stretchr/testify/assert" | ||
"testing" | ||
) | ||
|
||
func TestNoAppliedConfigs(t *testing.T) { | ||
var err error | ||
err = agents.CreateNoAppliedConfigsError() | ||
assert.True(t, agents.IsNoAppliedConfigs(err)) | ||
|
||
assert.False(t, agents.IsNoAppliedConfigs(errors.New("not ours"))) | ||
assert.False(t, agents.IsNoAppliedConfigs(nil)) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's interesting. By agent, I assume you mean telegraf or something?
It seems fair enough, the only thing I wonder is mostly for Windows cases, SCOM often monitors for processes stopping and will alert automatically. I believe this is unexpected service terminations rather than a clean stop, but maybe worth keeping in mind.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct. One job of Envoy is to be an "agent manager" where telegraf and filebeat are the two agents supported so far.
This comment is referring to how the Envoy watches the agent's child process. The SCOM scenario is not quite related, but still an interesting one that we should implement with a general "process monitor".