Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize the kernel monitor code. #92

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ RUN test -h /etc/localtime && rm -f /etc/localtime && cp /usr/share/zoneinfo/UTC

ADD ./bin/node-problem-detector /node-problem-detector
ADD config /config
ENTRYPOINT ["/node-problem-detector", "--kernel-monitor=/config/kernel-monitor.json"]
ENTRYPOINT ["/node-problem-detector", "--system-log-monitor=/config/kernel-monitor.json"]
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ List of supported problem daemons:

| Problem Daemon | NodeCondition | Description |
|----------------|:---------------:|:------------|
| [KernelMonitor](https://github.com/kubernetes/node-problem-detector/tree/master/pkg/kernelmonitor) | KernelDeadlock | A problem daemon monitors kernel log and reports problem according to predefined rules. |
| [KernelMonitor](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json) | KernelDeadlock | A system log monitor monitors kernel log and reports problem according to predefined rules. |

# Usage
## Flags
* `--version`: Print current version of node-problem-detector.
* `--kernel-monitor`: The configuration used by the kernel monitor, e.g.
* `--system-log-monitor`: The configuration used by the system log monitor, e.g.
[config/kernel-monitor.json](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json).
* `--apiserver-override`: A URI parameter used to customize how node-problem-detector
connects the apiserver. The format is same as the
Expand Down Expand Up @@ -112,7 +112,7 @@ spec:
hostPath:
path: /etc/localtime
```
* Edit node-problem-detector.yaml to fit your environment: Set `log` volume to your system log diretory. (Used by KernelMonitor)
* Edit node-problem-detector.yaml to fit your environment: Set `log` volume to your system log diretory. (Used by SystemLogMonitor)
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`
* If needed, you can use [ConfigMap](http://kubernetes.io/docs/user-guide/configmap/)
to overwrite the `config/`.
Expand Down
6 changes: 3 additions & 3 deletions cmd/node_problem_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ import (
"github.com/golang/glog"
"github.com/spf13/pflag"

"k8s.io/node-problem-detector/pkg/kernelmonitor"
"k8s.io/node-problem-detector/pkg/options"
"k8s.io/node-problem-detector/pkg/problemclient"
"k8s.io/node-problem-detector/pkg/problemdetector"
"k8s.io/node-problem-detector/pkg/systemlogmonitor"
"k8s.io/node-problem-detector/pkg/version"
)

Expand Down Expand Up @@ -67,9 +67,9 @@ func main() {
os.Exit(0)
}

k := kernelmonitor.NewKernelMonitorOrDie(npdo.KernelMonitorConfigPath)
l := systemlogmonitor.NewLogMonitorOrDie(npdo.SystemLogMonitorConfigPath)
c := problemclient.NewClientOrDie(npdo)
p := problemdetector.NewProblemDetector(k, c)
p := problemdetector.NewProblemDetector(l, c)

// Start http server.
if npdo.ServerPort > 0 {
Expand Down
2 changes: 1 addition & 1 deletion config/docker-monitor-filelog.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"plugin": "syslog",
"plugin": "filelog",
"pluginConfig": {
"timestamp": "^time=\"(\\S*)\"",
"message": "msg=\"([^\n]*)\"",
Expand Down
2 changes: 1 addition & 1 deletion config/kernel-monitor-filelog.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"plugin": "syslog",
"plugin": "filelog",
"pluginConfig": {
"timestamp": "^.{15}",
"message": "kernel: \\[.*\\] (.*)",
Expand Down
61 changes: 0 additions & 61 deletions pkg/kernelmonitor/README.md

This file was deleted.

8 changes: 4 additions & 4 deletions pkg/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ import (
type NodeProblemDetectorOptions struct {
// command line options

// KernelMonitorConfigPath specifies the path to kernel monitor configuration file.
KernelMonitorConfigPath string
// SystemLogMonitorConfigPath specifies the path to system log monitor configuration file.
SystemLogMonitorConfigPath string
// ApiServerOverride is the custom URI used to connect to Kubernetes ApiServer.
ApiServerOverride string
// PrintVersion is the flag determining whether version information is printed.
Expand All @@ -55,8 +55,8 @@ func NewNodeProblemDetectorOptions() *NodeProblemDetectorOptions {

// AddFlags adds node problem detector command line options to pflag.
func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&npdo.KernelMonitorConfigPath, "kernel-monitor",
"/config/kernel-monitor.json", "The path to the kernel monitor config file")
fs.StringVar(&npdo.SystemLogMonitorConfigPath, "system-log-monitor",
"/config/kernel-monitor.json", "The path to the system log monitor config file")
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
"", "Custom URI used to connect to Kubernetes ApiServer")
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
Expand Down
6 changes: 3 additions & 3 deletions pkg/problemdetector/problem_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
"k8s.io/kubernetes/pkg/util/clock"

"k8s.io/node-problem-detector/pkg/condition"
"k8s.io/node-problem-detector/pkg/kernelmonitor"
"k8s.io/node-problem-detector/pkg/problemclient"
"k8s.io/node-problem-detector/pkg/systemlogmonitor"
"k8s.io/node-problem-detector/pkg/util"
)

Expand All @@ -39,12 +39,12 @@ type problemDetector struct {
client problemclient.Client
conditionManager condition.ConditionManager
// TODO(random-liu): Use slices of problem daemons if multiple monitors are needed in the future
monitor kernelmonitor.KernelMonitor
monitor systemlogmonitor.LogMonitor
}

// NewProblemDetector creates the problem detector. Currently we just directly passed in the problem daemons, but
// in the future we may want to let the problem daemons register themselves.
func NewProblemDetector(monitor kernelmonitor.KernelMonitor, client problemclient.Client) ProblemDetector {
func NewProblemDetector(monitor systemlogmonitor.LogMonitor, client problemclient.Client) ProblemDetector {
return &problemDetector{
client: client,
conditionManager: condition.NewConditionManager(client, clock.RealClock{}),
Expand Down
83 changes: 83 additions & 0 deletions pkg/systemlogmonitor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# System Log Monitor

*System Log Monitor* is a problem daemon in node problem detector. It monitors
specified system daemon log and detects problems following predefined rules.

The System Log Monitor matches problems according to a set of predefined rule list in
the configuration files. (
[`config/kernel-monitor.json`](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json) as an example).
The rule list is extensible.

## Limitations

* System Log Monitor only supports file based log and journald now, but it is easy
to extend it with [new log watcher](#new-log-watcher)

## Add New NodeConditions

To support new node conditions, you can extend the `conditions` field in
the configuration file with new condition definition:

```json
{
"type": "NodeConditionType",
"reason": "CamelCaseDefaultNodeConditionReason",
"message": "arbitrary default node condition message"
}
```

## Detect New Problems

To detect new problems, you can extend the `rules` field in the configuration file
with new rule definition:

```json
{
"type": "temporary/permanent",
"condition": "NodeConditionOfPermanentIssue",
"reason": "CamelCaseShortReason",
"message": "regexp matching the issue in the log"
}
```

## Log Watchers

System log monitor supports different log management tools with different log
watchers:
* [filelog](https://github.com/kubernetes/node-problem-detector/blob/master/pkg/systemlogmonitor/logwatchers/filelog): Log watcher for
arbitrary file based log.
* [journald](https://github.com/kubernetes/node-problem-detector/blob/master/pkg/systemlogmonitor/logwatchers/journald): Log watcher for
journald.
Set `plugin` in the configuration file to specify log watcher.

### Plugin Configuration

Log watcher specific configurations are configured in `pluginConfig`.
* **journald**
* source: The [`SYSLOG_IDENTIFIER`](https://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html)
of the log to watch.
* **filelog**:
* timestamp: The regular expression used to match timestamp in the log line.
Submatch is supported, but only the last result will be used as the actual
timestamp.
* message: The regular expression used to match message in the log line.
Submatch is supported, but only the last result will be used as the actual
message.
* timestampFormat: The format of the timestamp. The format string is the time
`2006-01-02T15:04:05Z07:00` in the expected format. (See
[golang timestamp format](https://golang.org/pkg/time/#pkg-constants))

### Change Log Path

Log on different OS distros may locate in different path. The `logPath`
field in the configurtion file is the log path. You can always configure
`logPath` to match your OS distro.
* filelog: `logPath` is the path of log file, e.g. `/var/log/kern.log` for kernel
log.
* journald: `logPath` is the journal log directory, usually `/var/log/journal`.

### New Log Watcher

System log monitor uses [Log
Watcher](https://github.com/kubernetes/node-problem-detector/blob/master/pkg/systemlogmonitor/logwatchers/types/log_watcher.go) to support different log management tools.
It is easy to implement a new log watcher.
18 changes: 9 additions & 9 deletions pkg/kernelmonitor/config.go → pkg/systemlogmonitor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,26 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package kernelmonitor
package systemlogmonitor

import (
watchertypes "k8s.io/node-problem-detector/pkg/kernelmonitor/logwatchers/types"
kerntypes "k8s.io/node-problem-detector/pkg/kernelmonitor/types"
watchertypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/logwatchers/types"
logtypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
"k8s.io/node-problem-detector/pkg/types"
)

// MonitorConfig is the configuration of kernel monitor.
// MonitorConfig is the configuration of log monitor.
type MonitorConfig struct {
// WatcherConfig is the configuration of kernel log watcher.
// WatcherConfig is the configuration of log watcher.
watchertypes.WatcherConfig
// BufferSize is the size (in lines) of the log buffer.
BufferSize int `json:"bufferSize"`
// Source is the source name of the kernel monitor
// Source is the source name of the log monitor
Source string `json:"source"`
// DefaultConditions are the default states of all the conditions kernel monitor should handle.
// DefaultConditions are the default states of all the conditions log monitor should handle.
DefaultConditions []types.Condition `json:"conditions"`
// Rules are the rules kernel monitor will follow to parse the log file.
Rules []kerntypes.Rule `json:"rules"`
// Rules are the rules log monitor will follow to parse the log file.
Rules []logtypes.Rule `json:"rules"`
}

// applyDefaultConfiguration applies default configurations.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,28 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package kernelmonitor
package systemlogmonitor

import (
"regexp"
"strings"

"k8s.io/node-problem-detector/pkg/kernelmonitor/types"
"k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
)

// LogBuffer buffers the logs and supports match in the log buffer with regular expression.
type LogBuffer interface {
// Push pushes log into the log buffer.
Push(*types.KernelLog)
Push(*types.Log)
// Match with regular expression in the log buffer.
Match(string) []*types.KernelLog
Match(string) []*types.Log
// String returns a concatenated string of the buffered logs.
String() string
}

type logBuffer struct {
// buffer is a simple ring buffer.
buffer []*types.KernelLog
buffer []*types.Log
msg []string
max int
current int
Expand All @@ -47,20 +47,20 @@ type logBuffer struct {
// lines of patterns we support.
func NewLogBuffer(maxLines int) *logBuffer {
return &logBuffer{
buffer: make([]*types.KernelLog, maxLines, maxLines),
buffer: make([]*types.Log, maxLines, maxLines),
msg: make([]string, maxLines, maxLines),
max: maxLines,
}
}

func (b *logBuffer) Push(log *types.KernelLog) {
func (b *logBuffer) Push(log *types.Log) {
b.buffer[b.current%b.max] = log
b.msg[b.current%b.max] = log.Message
b.current++
}

// TODO(random-liu): Cache regexp if garbage collection becomes a problem someday.
func (b *logBuffer) Match(expr string) []*types.KernelLog {
func (b *logBuffer) Match(expr string) []*types.Log {
// The expression should be checked outside, and it must match to the end.
reg := regexp.MustCompile(expr + `\z`)
log := b.String()
Expand All @@ -72,7 +72,7 @@ func (b *logBuffer) Match(expr string) []*types.KernelLog {
// reverse index
s := len(log) - loc[0] - 1
total := 0
matched := []*types.KernelLog{}
matched := []*types.Log{}
for i := b.tail(); i >= b.current && b.buffer[i%b.max] != nil; i-- {
matched = append(matched, b.buffer[i%b.max])
total += len(b.msg[i%b.max]) + 1 // Add '\n'
Expand Down
Loading