Skip to content

Commit

Permalink
feature: start of work to add instance stats
Browse files Browse the repository at this point in the history
This commit includes adding the singularity stats command group (and docs) and
the start of the function that retrieves the instance to interact with. Next I will
need to figure out enabling cgroups to instantiate a manager and get stats for
the instance from it

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed May 6, 2022
1 parent 9bd92c8 commit 3ebe15b
Show file tree
Hide file tree
Showing 7 changed files with 313 additions and 6 deletions.
8 changes: 8 additions & 0 deletions LICENSE_THIRD_PARTY.md
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,11 @@ Are code from the conmon project, under the Apache License, Version 2.0.
See the License for the specific language governing permissions and
limitations under the License.
```

## github.com/docker/cli

The source files:

* `internal/app/singularity/instance_linux.go`

Contain code from the docker cli project, under the Apache License, Version 2.0.
1 change: 1 addition & 0 deletions cmd/internal/cli/instance_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ func init() {
cmdManager.RegisterSubCmd(instanceCmd, instanceStartCmd)
cmdManager.RegisterSubCmd(instanceCmd, instanceStopCmd)
cmdManager.RegisterSubCmd(instanceCmd, instanceListCmd)
cmdManager.RegisterSubCmd(instanceCmd, instanceStatsCmd)
})
}

Expand Down
82 changes: 82 additions & 0 deletions cmd/internal/cli/instance_stats_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (c) 2022, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package cli

import (
"errors"
"os"

"github.com/spf13/cobra"
"github.com/sylabs/singularity/docs"
"github.com/sylabs/singularity/internal/app/singularity"
"github.com/sylabs/singularity/pkg/cmdline"
"github.com/sylabs/singularity/pkg/sylog"
)

// Basic Design
// singularity instance stats <name>
// singularity instance stats --json <name>

func init() {
addCmdInit(func(cmdManager *cmdline.CommandManager) {
cmdManager.RegisterFlagForCmd(&instanceStatsUserFlag, instanceStatsCmd)
cmdManager.RegisterFlagForCmd(&instanceStatsJSONFlag, instanceStatsCmd)
})
}

// -u|--user
var instanceStatsUser string

var instanceStatsUserFlag = cmdline.Flag{
ID: "instanceStatsUserFlag",
Value: &instanceStatsUser,
DefaultValue: "",
Name: "user",
ShortHand: "u",
Usage: "view stats for an instance belonging to a user (root only)",
Tag: "<username>",
EnvKeys: []string{"USER"},
}

// -j|--json
var instanceStatsJSON bool

var instanceStatsJSONFlag = cmdline.Flag{
ID: "instanceStatsJSONFlag",
Value: &instanceStatsJSON,
DefaultValue: false,
Name: "json",
ShortHand: "j",
Usage: "output stats in json",
}

// singularity instance stats
var instanceStatsCmd = &cobra.Command{
Args: cobra.RangeArgs(0, 1),
DisableFlagsInUseLine: true,
RunE: func(cmd *cobra.Command, args []string) error {
// An instance name is required
if len(args) == 0 {
return errors.New("invalid command")
}

uid := os.Getuid()

// Root is required to look at stats for another user
if instanceStatsUser != "" && uid != 0 {
sylog.Fatalf("Only the root user can look at stats of a user instance")
}

// Instance name is the only arg
name := args[0]
return singularity.InstanceStats(name, instanceStatsUser, instanceStatsJSON)
},

Use: docs.InstanceStatsUse,
Short: docs.InstanceStatsShort,
Long: docs.InstanceStatsLong,
Example: docs.InstanceStatsExample,
}
14 changes: 14 additions & 0 deletions docs/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,20 @@ Enterprise Performance Computing (EPC)`
$ singularity instance stop /tmp/my-sql.sif mysql
Stopping /tmp/my-sql.sif mysql`

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// instance stats
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
InstanceStatsUse string = `stats [stats options...] <instance name>`
InstanceStatsShort string = `Get stats for a named instance`
InstanceStatsLong string = `
The instance stats command allows you to get statistics for a named instance,
either printed to the terminal or in json. If you are root, you can optionally
ask for statistics for a container instance belonging to a specific user.`
InstanceStatsExample string = `
$ singularity instance stats mysql
$ singularity instance stats --json mysql
$ sudo singularity instance stats --user <username> user-mysql`

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// instance stop
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
77 changes: 77 additions & 0 deletions e2e/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,78 @@ type ctx struct {
env e2e.TestEnv
}

// instanceStats tests an instance ability to output stats
func (c *ctx) instanceStats(t *testing.T, profile e2e.Profile) {
e2e.EnsureImage(t, c.env)

// All tests require root
tests := []struct {
name string
createArgs []string
startErrorCode int
statsErrorCode int
}{
{
name: "basic stats create",
createArgs: []string{"--apply-cgroups", "testdata/cgroups/cpu_success.toml", c.env.ImagePath},
statsErrorCode: 0,
startErrorCode: 0,
},
}

for _, tt := range tests {

// stats only for privileged atm
if !profile.Privileged() {
t.Skip()
}

// We always expect stats output, not create
createExitFunc := []e2e.SingularityCmdResultOp{}
instanceName := randomName(t)

// Start the instance with cgroups for stats
createArgs := append(tt.createArgs, instanceName)
c.env.RunSingularity(
t,
e2e.AsSubtest(tt.name+"/start"),
e2e.WithProfile(profile),
e2e.WithCommand("instance start"),
e2e.WithArgs(createArgs...),
e2e.ExpectExit(tt.startErrorCode, createExitFunc...),
)

// Get stats for the instance
c.env.RunSingularity(
t,
e2e.AsSubtest(tt.name+"/stats"),
e2e.WithProfile(profile),
e2e.WithCommand("instance stats"),
e2e.WithArgs(instanceName),
e2e.ExpectExit(tt.statsErrorCode,
e2e.ExpectOutput(e2e.ContainMatch, instanceName),
e2e.ExpectOutput(e2e.ContainMatch, "INSTANCE NAME"),
e2e.ExpectOutput(e2e.ContainMatch, "CPU USAGE"),
e2e.ExpectOutput(e2e.ContainMatch, "MEM USAGE / LIMIT"),
e2e.ExpectOutput(e2e.ContainMatch, "MEM %"),
e2e.ExpectOutput(e2e.ContainMatch, "BLOCK I/O"),
e2e.ExpectOutput(e2e.ContainMatch, "PIDS"),
e2e.ExpectOutput(e2e.ContainMatch, "GiB"),
e2e.ExpectOutput(e2e.ContainMatch, "KiB"),
e2e.ExpectOutput(e2e.ContainMatch, "MiB"),
),
)
c.env.RunSingularity(
t,
e2e.AsSubtest(tt.name+"/stop"),
e2e.WithProfile(profile),
e2e.WithCommand("instance stop"),
e2e.WithArgs(instanceName),
e2e.ExpectExit(0),
)
}
}

// moved from INSTANCE suite, as testing with systemd cgroup manager requires
// e2e to be run without PID namespace
func (c *ctx) instanceApply(t *testing.T, profile e2e.Profile) {
Expand Down Expand Up @@ -164,6 +236,10 @@ func (c *ctx) instanceApplyRoot(t *testing.T) {
c.instanceApply(t, e2e.RootProfile)
}

func (c *ctx) instanceStatsRoot(t *testing.T) {
c.instanceStats(t, e2e.RootProfile)
}

// TODO - when instance support for rootless cgroups is ready, this
// should instead call instanceApply over the user profiles.
func (c *ctx) instanceApplyRootless(t *testing.T) {
Expand Down Expand Up @@ -508,6 +584,7 @@ func E2ETests(env e2e.TestEnv) testhelper.Tests {
np := testhelper.NoParallel

return testhelper.Tests{
"instance stats": np(env.WithRootManagers(c.instanceStatsRoot)),
"instance root cgroups": np(env.WithRootManagers(c.instanceApplyRoot)),
"instance rootless cgroups": np(env.WithRootlessManagers(c.instanceApplyRootless)),
"action root cgroups": np(env.WithRootManagers(c.actionApplyRoot)),
Expand Down
128 changes: 122 additions & 6 deletions internal/app/singularity/instance_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,25 @@
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

// Includes code from https://github.com/docker/cli
// Released under the Apache License Version 2.0

package singularity

import (
"encoding/json"
"fmt"
"io"
"os"
"strconv"
"strings"
"syscall"
"text/tabwriter"
"time"

units "github.com/docker/go-units"
libcgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/sylabs/singularity/internal/pkg/cgroups"
"github.com/sylabs/singularity/internal/pkg/instance"
"github.com/sylabs/singularity/pkg/sylog"
"github.com/sylabs/singularity/pkg/util/fs/proc"
Expand Down Expand Up @@ -122,19 +130,127 @@ func WriteInstancePidFile(name, pidFile string) error {
return nil
}

// instanceListOrError is a private function to retrieve named instances or fail if there are no instances
// We wrap the error from instance.List to provide a more specific error message
func instanceListOrError(instanceUser, name string) ([]*instance.File, error) {
ii, err := instance.List(instanceUser, name, instance.SingSubDir)
if err != nil {
return ii, fmt.Errorf("could not retrieve instance list: %w", err)
}
if len(ii) == 0 {
return ii, fmt.Errorf("no instance found")
}
return ii, err
}

// calculate BlockIO counts up read/write totals
func calculateBlockIO(stats *libcgroups.BlkioStats) (float64, float64) {
var read, write float64
for _, entry := range stats.IoServiceBytesRecursive {
switch strings.ToLower(entry.Op) {
case "read":
read += float64(entry.Value)
case "write":
write += float64(entry.Value)
}
}
return read, write
}

// calculateMemoryUsage returns the current usage, limit, and percentage
func calculateMemoryUsage(stats *libcgroups.MemoryStats) (float64, float64, float64) {
// Note that there is also MaxUsage
memUsage := float64(stats.Usage.Usage)
memLimit := 0.0
memPercent := 0.0

// Calculate total memory of system
in := &syscall.Sysinfo_t{}
err := syscall.Sysinfo(in)
if err == nil {
memLimit = float64(in.Totalram) * float64(in.Unit)
}
if memLimit != 0 {
memPercent = memUsage / memLimit * 100.0
}
return memUsage, memLimit, memPercent
}

// InstanceStats uses underlying cgroups to get statistics for a named instance
func InstanceStats(name, instanceUser string, formatJSON bool) error {
ii, err := instanceListOrError(instanceUser, name)
if err != nil {
return err
}
// Instance stats required 1 instance
if len(ii) != 1 {
return fmt.Errorf("query returned more than one instance (%d)", len(ii))
}

// Grab our instance to interact with!
i := ii[0]
if !formatJSON {
sylog.Infof("Stats for %s instance of %s (PID=%d)\n", i.Name, i.Image, i.Pid)
}

// Cut out early if we do not have cgroups
if !i.Cgroup {
url := "https://sylabs.io/guides/latest/user-guide/cgroups.html"
return fmt.Errorf("stats are only available if cgroups are enabled, see %s", url)
}

// Get a cgroupfs managed cgroup from the pid
manager, err := cgroups.GetManagerForPid(i.Pid)
if err != nil {
return fmt.Errorf("while getting cgroup manager for pid: %v", err)
}
stats, err := manager.GetStats()
if err != nil {
return fmt.Errorf("while getting stats for pid: %v", err)
}

// Do we want json?
if formatJSON {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", "\t")
err = enc.Encode(stats)
return err
}

// Otherwise print shortened table
tabWriter := tabwriter.NewWriter(os.Stdout, 0, 8, 4, ' ', 0)
defer tabWriter.Flush()

// Stats can be added from this set:
// https://github.com/opencontainers/runc/blob/main/libcontainer/cgroups/stats.go
_, err = fmt.Fprintln(tabWriter, "INSTANCE NAME\tCPU USAGE\tMEM USAGE / LIMIT\tMEM %\tBLOCK I/O\tPIDS")
if err != nil {
return fmt.Errorf("could not write stats header: %v", err)
}

// CpuUsage denotes the usage of a CPU, aggregate since container inception.
// TODO CPU time needs to be a percentage
totalCPUTime := strconv.FormatUint(stats.CpuStats.CpuUsage.TotalUsage, 10) + " ns"
memUsage, memLimit, memPercent := calculateMemoryUsage(&stats.MemoryStats)
blockRead, blockWrite := calculateBlockIO(&stats.BlkioStats)

// Generate a shortened stats list
_, err = fmt.Fprintf(tabWriter, "%s\t%s\t%s / %s\t%.2f%s\t%s / %s\t%d\n", i.Name, totalCPUTime, units.BytesSize(memUsage), units.BytesSize(memLimit), memPercent, "%", units.BytesSize(blockRead), units.BytesSize(blockWrite), stats.PidsStats.Current)
if err != nil {
return fmt.Errorf("could not write instance stats: %v", err)
}
return nil
}

// StopInstance fetches instance list, applying name and
// user filters, and stops them by sending a signal sig. If an instance
// is still running after a grace period defined by timeout is expired,
// it will be forcibly killed.
func StopInstance(name, user string, sig syscall.Signal, timeout time.Duration) error {
ii, err := instance.List(user, name, instance.SingSubDir)
ii, err := instanceListOrError(user, name)
if err != nil {
return fmt.Errorf("could not retrieve instance list: %v", err)
return err
}
if len(ii) == 0 {
return fmt.Errorf("no instance found")
}

stoppedPID := make(chan int, 1)
stopped := make([]int, 0)

Expand Down
9 changes: 9 additions & 0 deletions internal/pkg/cgroups/manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ func (m *Manager) GetCgroupRelPath() (relPath string, err error) {
return filepath.Clean(pathParts[1]), nil
}

// GetStats wraps the Manager.GetStats from runc
func (m *Manager) GetStats() (*lccgroups.Stats, error) {
stats, err := m.cgroup.GetStats()
if err != nil {
return &lccgroups.Stats{}, fmt.Errorf("could not get stats from cgroups manager: %x", err)
}
return stats, nil
}

// UpdateFromSpec updates the existing managed cgroup using configuration from
// an OCI LinuxResources spec struct.
func (m *Manager) UpdateFromSpec(resources *specs.LinuxResources) (err error) {
Expand Down

0 comments on commit 3ebe15b

Please sign in to comment.