Skip to content

Commit

Permalink
Merge pull request #9984 from crosbymichael/metrics
Browse files Browse the repository at this point in the history
Docker stats live container resource metrics
  • Loading branch information
Arnaud Porterie committed Jan 21, 2015
2 parents 9de604a + 4b17319 commit d8a1cbe
Show file tree
Hide file tree
Showing 20 changed files with 848 additions and 6 deletions.
122 changes: 122 additions & 0 deletions api/client/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ import (
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"sync"
"text/tabwriter"
"text/template"
"time"

log "github.com/Sirupsen/logrus"
"github.com/docker/docker/api"
"github.com/docker/docker/api/stats"
"github.com/docker/docker/dockerversion"
"github.com/docker/docker/engine"
"github.com/docker/docker/graph"
Expand Down Expand Up @@ -2618,3 +2621,122 @@ func (cli *DockerCli) CmdExec(args ...string) error {

return nil
}

type containerStats struct {
Name string
CpuPercentage float64
Memory float64
MemoryLimit float64
MemoryPercentage float64
NetworkRx float64
NetworkTx float64
mu sync.RWMutex
err error
}

func (s *containerStats) Collect(stream io.ReadCloser) {
defer stream.Close()
var (
previousCpu uint64
previousSystem uint64
start = true
dec = json.NewDecoder(stream)
)
for {
var v *stats.Stats
if err := dec.Decode(&v); err != nil {
s.mu.Lock()
s.err = err
s.mu.Unlock()
return
}
var (
memPercent = float64(v.MemoryStats.Usage) / float64(v.MemoryStats.Limit) * 100.0
cpuPercent = 0.0
)
if !start {
cpuPercent = calcuateCpuPercent(previousCpu, previousSystem, v)
}
start = false
s.mu.Lock()
s.CpuPercentage = cpuPercent
s.Memory = float64(v.MemoryStats.Usage)
s.MemoryLimit = float64(v.MemoryStats.Limit)
s.MemoryPercentage = memPercent
s.NetworkRx = float64(v.Network.RxBytes)
s.NetworkTx = float64(v.Network.TxBytes)
s.mu.Unlock()

previousCpu = v.CpuStats.CpuUsage.TotalUsage
previousSystem = v.CpuStats.SystemUsage
}
}

func (s *containerStats) Display(w io.Writer) error {
s.mu.RLock()
defer s.mu.RUnlock()
if s.err != nil {
return s.err
}
fmt.Fprintf(w, "%s\t%.2f%%\t%s/%s\t%.2f%%\t%s/%s\n",
s.Name,
s.CpuPercentage,
units.BytesSize(s.Memory), units.BytesSize(s.MemoryLimit),
s.MemoryPercentage,
units.BytesSize(s.NetworkRx), units.BytesSize(s.NetworkTx))
return nil
}

func (cli *DockerCli) CmdStats(args ...string) error {
cmd := cli.Subcmd("stats", "CONTAINER", "Display live container stats based on resource usage", true)
cmd.Require(flag.Min, 1)
utils.ParseFlags(cmd, args, true)

names := cmd.Args()
sort.Strings(names)
var cStats []*containerStats
for _, n := range names {
s := &containerStats{Name: n}
cStats = append(cStats, s)
stream, _, err := cli.call("GET", "/containers/"+n+"/stats", nil, false)
if err != nil {
return err
}
go s.Collect(stream)
}
w := tabwriter.NewWriter(cli.out, 20, 1, 3, ' ', 0)
for _ = range time.Tick(500 * time.Millisecond) {
fmt.Fprint(cli.out, "\033[2J")
fmt.Fprint(cli.out, "\033[H")
fmt.Fprintln(w, "CONTAINER\tCPU %\tMEM USAGE/LIMIT\tMEM %\tNET I/O")
toRemove := []int{}
for i, s := range cStats {
if err := s.Display(w); err != nil {
toRemove = append(toRemove, i)
}
}
for _, i := range toRemove {
cStats = append(cStats[:i], cStats[i+1:]...)
}
if len(cStats) == 0 {
return nil
}
w.Flush()
}
return nil
}

func calcuateCpuPercent(previousCpu, previousSystem uint64, v *stats.Stats) float64 {
var (
cpuPercent = 0.0
// calculate the change for the cpu usage of the container in between readings
cpuDelta = float64(v.CpuStats.CpuUsage.TotalUsage - previousCpu)
// calculate the change for the entire system between readings
systemDelta = float64(v.CpuStats.SystemUsage - previousSystem)
)

if systemDelta > 0.0 && cpuDelta > 0.0 {
cpuPercent = (cpuDelta / systemDelta) * float64(len(v.CpuStats.CpuUsage.PercpuUsage)) * 100.0
}
return cpuPercent
}
14 changes: 14 additions & 0 deletions api/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,19 @@ func getContainersJSON(eng *engine.Engine, version version.Version, w http.Respo
return nil
}

func getContainersStats(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := parseForm(r); err != nil {
return err
}
if vars == nil {
return fmt.Errorf("Missing parameter")
}
name := vars["name"]
job := eng.Job("container_stats", name)
streamJSON(job, w, true)
return job.Run()
}

func getContainersLogs(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := parseForm(r); err != nil {
return err
Expand Down Expand Up @@ -1323,6 +1336,7 @@ func createRouter(eng *engine.Engine, logging, enableCors bool, dockerVersion st
"/containers/{name:.*}/json": getContainersByName,
"/containers/{name:.*}/top": getContainersTop,
"/containers/{name:.*}/logs": getContainersLogs,
"/containers/{name:.*}/stats": getContainersStats,
"/containers/{name:.*}/attach/ws": wsContainersAttach,
"/exec/{id:.*}/json": getExecByID,
},
Expand Down
87 changes: 87 additions & 0 deletions api/stats/stats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// This package is used for API stability in the types and response to the
// consumers of the API stats endpoint.
package stats

import "time"

type ThrottlingData struct {
// Number of periods with throttling active
Periods uint64 `json:"periods,omitempty"`
// Number of periods when the container hit its throttling limit.
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
// Aggregate time the container was throttled for in nanoseconds.
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}

// All CPU stats are aggregated since container inception.
type CpuUsage struct {
// Total CPU time consumed.
// Units: nanoseconds.
TotalUsage uint64 `json:"total_usage,omitempty"`
// Total CPU time consumed per core.
// Units: nanoseconds.
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
// Time spent by tasks of the cgroup in kernel mode.
// Units: nanoseconds.
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
// Time spent by tasks of the cgroup in user mode.
// Units: nanoseconds.
UsageInUsermode uint64 `json:"usage_in_usermode"`
}

type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
SystemUsage uint64 `json:"system_cpu_usage"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
}

type MemoryStats struct {
// current res_counter usage for memory
Usage uint64 `json:"usage,omitempty"`
// maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"`
// TODO(vishh): Export these as stronger types.
// all the stats exported via memory.stat.
Stats map[string]uint64 `json:"stats,omitempty"`
// number of times memory usage hits limits.
Failcnt uint64 `json:"failcnt"`
Limit uint64 `json:"limit"`
}

type BlkioStatEntry struct {
Major uint64 `json:"major,omitempty"`
Minor uint64 `json:"minor,omitempty"`
Op string `json:"op,omitempty"`
Value uint64 `json:"value,omitempty"`
}

type BlkioStats struct {
// number of bytes tranferred to and from the block device
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
}

type Network struct {
RxBytes uint64 `json:"rx_bytes"`
RxPackets uint64 `json:"rx_packets"`
RxErrors uint64 `json:"rx_errors"`
RxDropped uint64 `json:"rx_dropped"`
TxBytes uint64 `json:"tx_bytes"`
TxPackets uint64 `json:"tx_packets"`
TxErrors uint64 `json:"tx_errors"`
TxDropped uint64 `json:"tx_dropped"`
}

type Stats struct {
Read time.Time `json:"read"`
Network Network `json:"network,omitempty"`
CpuStats CpuStats `json:"cpu_stats,omitempty"`
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
}
4 changes: 4 additions & 0 deletions daemon/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -1414,3 +1414,7 @@ func (container *Container) getNetworkedContainer() (*Container, error) {
return nil, fmt.Errorf("network mode not set to container")
}
}

func (container *Container) Stats() (*execdriver.ResourceStats, error) {
return container.daemon.Stats(container)
}
25 changes: 25 additions & 0 deletions daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ type Daemon struct {
driver graphdriver.Driver
execDriver execdriver.Driver
trustStore *trust.TrustStore
statsCollector *statsCollector
}

// Install installs daemon capabilities to eng.
Expand All @@ -116,6 +117,7 @@ func (daemon *Daemon) Install(eng *engine.Engine) error {
"container_copy": daemon.ContainerCopy,
"container_rename": daemon.ContainerRename,
"container_inspect": daemon.ContainerInspect,
"container_stats": daemon.ContainerStats,
"containers": daemon.Containers,
"create": daemon.ContainerCreate,
"rm": daemon.ContainerRm,
Expand Down Expand Up @@ -982,6 +984,7 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
execDriver: ed,
eng: eng,
trustStore: t,
statsCollector: newStatsCollector(1 * time.Second),
}
if err := daemon.restore(); err != nil {
return nil, err
Expand Down Expand Up @@ -1092,6 +1095,28 @@ func (daemon *Daemon) Kill(c *Container, sig int) error {
return daemon.execDriver.Kill(c.command, sig)
}

func (daemon *Daemon) Stats(c *Container) (*execdriver.ResourceStats, error) {
return daemon.execDriver.Stats(c.ID)
}

func (daemon *Daemon) SubscribeToContainerStats(name string) (chan interface{}, error) {
c := daemon.Get(name)
if c == nil {
return nil, fmt.Errorf("no such container")
}
ch := daemon.statsCollector.collect(c)
return ch, nil
}

func (daemon *Daemon) UnsubscribeToContainerStats(name string, ch chan interface{}) error {
c := daemon.Get(name)
if c == nil {
return fmt.Errorf("no such container")
}
daemon.statsCollector.unsubscribe(c, ch)
return nil
}

// Nuke kills all containers then removes all content
// from the content root, including images, volumes and
// container filesystems.
Expand Down
3 changes: 3 additions & 0 deletions daemon/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ func (daemon *Daemon) ContainerRm(job *engine.Job) engine.Status {
}

if container != nil {
// stop collection of stats for the container regardless
// if stats are currently getting collected.
daemon.statsCollector.stopCollection(container)
if container.IsRunning() {
if forceRemove {
if err := container.Kill(); err != nil {
Expand Down
12 changes: 11 additions & 1 deletion daemon/execdriver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import (
"io"
"os"
"os/exec"
"time"

"github.com/docker/libcontainer"
"github.com/docker/libcontainer/devices"
)

Expand All @@ -14,7 +16,7 @@ import (
type Context map[string]string

var (
ErrNotRunning = errors.New("Process could not be started")
ErrNotRunning = errors.New("Container is not running")
ErrWaitTimeoutReached = errors.New("Wait timeout reached")
ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function")
ErrDriverNotFound = errors.New("The requested docker init has not been found")
Expand Down Expand Up @@ -61,6 +63,7 @@ type Driver interface {
GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.
Terminate(c *Command) error // kill it with fire
Clean(id string) error // clean all traces of container exec
Stats(id string) (*ResourceStats, error) // Get resource stats for a running container
}

// Network settings of the container
Expand Down Expand Up @@ -101,6 +104,13 @@ type Resources struct {
Cpuset string `json:"cpuset"`
}

type ResourceStats struct {
*libcontainer.ContainerStats
Read time.Time `json:"read"`
MemoryLimit int64 `json:"memory_limit"`
SystemUsage uint64 `json:"system_usage"`
}

type Mount struct {
Source string `json:"source"`
Destination string `json:"destination"`
Expand Down
3 changes: 2 additions & 1 deletion daemon/execdriver/execdrivers/execdrivers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package execdrivers

import (
"fmt"
"path"

"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/daemon/execdriver/lxc"
"github.com/docker/docker/daemon/execdriver/native"
"github.com/docker/docker/pkg/sysinfo"
"path"
)

func NewDriver(name, root, initPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
Expand Down
5 changes: 5 additions & 0 deletions daemon/execdriver/lxc/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,3 +524,8 @@ func (t *TtyConsole) Close() error {
func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
return -1, ErrExec
}

func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
return nil, fmt.Errorf("container stats are not supported with LXC")

}

0 comments on commit d8a1cbe

Please sign in to comment.