Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .drone.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ local build(arch, testUI) = [{
trigger: {
event: [
'push',
'pull_request',
],
},
services: [
Expand Down
9 changes: 7 additions & 2 deletions backend/cmd/stability/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,20 @@ func main() {
defer cancel()

mem := stability.NewMemInfo("/proc")
z := stability.NewZram(mem, stability.SwaponSyscall, stability.SwapoffSyscall, logger)
commonDir := os.Getenv("SNAP_COMMON")
if commonDir == "" {
commonDir = "/var/snap/platform/common"
}
events := stability.NewEventLog(commonDir + "/stability-events.jsonl")
z := stability.NewZram(mem, stability.SwaponSyscall, stability.SwapoffSyscall, events, logger)
if err := z.EnsureConfigured(); err != nil {
logger.Sugar().Warnf("stability: zram setup failed (continuing): %v", err)
}

scan := stability.NewProcScanner("/proc")
w := stability.NewWatcher(mem, scan, func(pid int, sig syscall.Signal) error {
return syscall.Kill(pid, sig)
}, logger)
}, events, logger)

if err := w.Run(ctx); err != nil && err != context.Canceled {
logger.Sugar().Errorf("stability: watcher exited: %v", err)
Expand Down
22 changes: 22 additions & 0 deletions backend/health/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package health

import (
"github.com/syncloud/platform/stability"
)

type Health struct {
events *stability.EventLog
collector *Collector
}

func NewHealth(events *stability.EventLog, collector *Collector) *Health {
return &Health{events: events, collector: collector}
}

func (h *Health) Events(limit int) ([]stability.Event, error) {
return h.events.Recent(limit)
}

func (h *Health) Metrics() (Snapshot, error) {
return h.collector.Snapshot()
}
234 changes: 234 additions & 0 deletions backend/health/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
package health

import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
)

type CPU struct {
User uint64 `json:"user"`
Nice uint64 `json:"nice"`
System uint64 `json:"system"`
Idle uint64 `json:"idle"`
IOWait uint64 `json:"iowait"`
IRQ uint64 `json:"irq"`
SoftIRQ uint64 `json:"softirq"`
Steal uint64 `json:"steal"`
}

func (c CPU) Total() uint64 {
return c.User + c.Nice + c.System + c.Idle + c.IOWait + c.IRQ + c.SoftIRQ + c.Steal
}

func (c CPU) Busy() uint64 {
return c.Total() - c.Idle - c.IOWait
}

type Memory struct {
TotalKB uint64 `json:"total_kb"`
AvailableKB uint64 `json:"available_kb"`
FreeKB uint64 `json:"free_kb"`
BuffersKB uint64 `json:"buffers_kb"`
CachedKB uint64 `json:"cached_kb"`
SwapTotalKB uint64 `json:"swap_total_kb"`
SwapFreeKB uint64 `json:"swap_free_kb"`
}

type Disk struct {
Name string `json:"name"`
ReadsTotal uint64 `json:"reads_total"`
WritesTotal uint64 `json:"writes_total"`
SectorsRead uint64 `json:"sectors_read"`
SectorsWrt uint64 `json:"sectors_written"`
}

type Mount struct {
Path string `json:"path"`
TotalKB uint64 `json:"total_kb"`
UsedKB uint64 `json:"used_kb"`
}

type Net struct {
Name string `json:"name"`
RxBytes uint64 `json:"rx_bytes"`
TxBytes uint64 `json:"tx_bytes"`
}

type Snapshot struct {
CPU CPU `json:"cpu"`
Memory Memory `json:"memory"`
Disks []Disk `json:"disks"`
Mounts []Mount `json:"mounts"`
Net []Net `json:"net"`
}

type Collector struct {
procDir string
}

func NewCollector(procDir string) *Collector {
return &Collector{procDir: procDir}
}

func (c *Collector) Snapshot() (Snapshot, error) {
var s Snapshot
cpu, err := c.readCPU()
if err != nil {
return s, err
}
s.CPU = cpu
mem, err := c.readMemory()
if err != nil {
return s, err
}
s.Memory = mem
s.Disks, _ = c.readDisks()
s.Net, _ = c.readNet()
s.Mounts = c.Mounts()
return s, nil
}

func (c *Collector) readCPU() (CPU, error) {
f, err := os.Open(filepath.Join(c.procDir, "stat"))
if err != nil {
return CPU{}, err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
line := sc.Text()
if !strings.HasPrefix(line, "cpu ") {
continue
}
fields := strings.Fields(line)
nums := make([]uint64, 0, 8)
for _, fld := range fields[1:] {
n, _ := strconv.ParseUint(fld, 10, 64)
nums = append(nums, n)
}
for len(nums) < 8 {
nums = append(nums, 0)
}
return CPU{nums[0], nums[1], nums[2], nums[3], nums[4], nums[5], nums[6], nums[7]}, nil
}
return CPU{}, fmt.Errorf("cpu: 'cpu ' line missing")
}

func (c *Collector) readMemory() (Memory, error) {
f, err := os.Open(filepath.Join(c.procDir, "meminfo"))
if err != nil {
return Memory{}, err
}
defer f.Close()
m := Memory{}
sc := bufio.NewScanner(f)
for sc.Scan() {
line := sc.Text()
idx := strings.IndexByte(line, ':')
if idx < 0 {
continue
}
key := line[:idx]
rest := strings.TrimSpace(strings.TrimSuffix(strings.TrimSpace(line[idx+1:]), " kB"))
v, _ := strconv.ParseUint(rest, 10, 64)
switch key {
case "MemTotal":
m.TotalKB = v
case "MemAvailable":
m.AvailableKB = v
case "MemFree":
m.FreeKB = v
case "Buffers":
m.BuffersKB = v
case "Cached":
m.CachedKB = v
case "SwapTotal":
m.SwapTotalKB = v
case "SwapFree":
m.SwapFreeKB = v
}
}
return m, sc.Err()
}

func (c *Collector) readDisks() ([]Disk, error) {
f, err := os.Open(filepath.Join(c.procDir, "diskstats"))
if err != nil {
return nil, err
}
defer f.Close()
var out []Disk
sc := bufio.NewScanner(f)
for sc.Scan() {
fields := strings.Fields(sc.Text())
if len(fields) < 14 {
continue
}
name := fields[2]
if isPartition(name) {
continue
}
reads, _ := strconv.ParseUint(fields[3], 10, 64)
sectorsRead, _ := strconv.ParseUint(fields[5], 10, 64)
writes, _ := strconv.ParseUint(fields[7], 10, 64)
sectorsWrt, _ := strconv.ParseUint(fields[9], 10, 64)
out = append(out, Disk{
Name: name,
ReadsTotal: reads,
SectorsRead: sectorsRead,
WritesTotal: writes,
SectorsWrt: sectorsWrt,
})
}
return out, sc.Err()
}

func isPartition(name string) bool {
if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") || strings.HasPrefix(name, "dm-") {
return true
}
if len(name) == 0 {
return true
}
last := name[len(name)-1]
if last < '0' || last > '9' {
return false
}
if strings.HasPrefix(name, "mmcblk") || strings.HasPrefix(name, "nvme") {
return strings.Contains(name, "p")
}
return true
}

func (c *Collector) readNet() ([]Net, error) {
f, err := os.Open(filepath.Join(c.procDir, "net/dev"))
if err != nil {
return nil, err
}
defer f.Close()
var out []Net
sc := bufio.NewScanner(f)
for sc.Scan() {
line := sc.Text()
idx := strings.IndexByte(line, ':')
if idx < 0 {
continue
}
name := strings.TrimSpace(line[:idx])
if name == "lo" {
continue
}
fields := strings.Fields(line[idx+1:])
if len(fields) < 9 {
continue
}
rx, _ := strconv.ParseUint(fields[0], 10, 64)
tx, _ := strconv.ParseUint(fields[8], 10, 64)
out = append(out, Net{Name: name, RxBytes: rx, TxBytes: tx})
}
return out, sc.Err()
}
104 changes: 104 additions & 0 deletions backend/health/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package health

import (
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func writeProc(t *testing.T, dir, rel, contents string) {
t.Helper()
p := filepath.Join(dir, rel)
require.NoError(t, os.MkdirAll(filepath.Dir(p), 0755))
require.NoError(t, os.WriteFile(p, []byte(contents), 0644))
}

func newTestCollector(t *testing.T) (*Collector, string) {
t.Helper()
dir := t.TempDir()
writeProc(t, dir, "stat", "cpu 1000 50 200 5000 30 0 10 0\ncpu0 ...\n")
writeProc(t, dir, "meminfo", "MemTotal: 3700000 kB\nMemAvailable: 1500000 kB\nMemFree: 200000 kB\nBuffers: 50000 kB\nCached: 900000 kB\nSwapTotal: 2000000 kB\nSwapFree: 1500000 kB\n")
writeProc(t, dir, "diskstats", " 8 0 sda 100 0 200 0 10 0 20 0 0 0 0 0 0 0\n"+
" 8 1 sda1 50 0 100 0 5 0 10 0 0 0 0 0 0 0\n"+
" 179 0 mmcblk0 1000 0 2000 0 100 0 200 0 0 0 0 0 0 0\n"+
" 179 1 mmcblk0p1 500 0 1000 0 50 0 100 0 0 0 0 0 0 0\n"+
" 7 0 loop0 1 0 2 0 0 0 0 0 0 0 0 0 0 0\n")
writeProc(t, dir, "net/dev", `Inter-| Receive | Transmit
face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed
lo: 1000 10 0 0 0 0 0 0 1000 10 0 0 0 0 0 0
eth0: 5000 20 0 0 0 0 0 0 8000 30 0 0 0 0 0 0
`)
return NewCollector(dir), dir
}

func TestReadCPU(t *testing.T) {
c, _ := newTestCollector(t)
cpu, err := c.readCPU()
require.NoError(t, err)
assert.Equal(t, uint64(1000), cpu.User)
assert.Equal(t, uint64(5000), cpu.Idle)
assert.Equal(t, uint64(30), cpu.IOWait)
}

func TestReadMemory(t *testing.T) {
c, _ := newTestCollector(t)
mem, err := c.readMemory()
require.NoError(t, err)
assert.Equal(t, uint64(3700000), mem.TotalKB)
assert.Equal(t, uint64(1500000), mem.AvailableKB)
assert.Equal(t, uint64(2000000), mem.SwapTotalKB)
}

func TestReadDisksFiltersPartitionsAndLoops(t *testing.T) {
c, _ := newTestCollector(t)
disks, err := c.readDisks()
require.NoError(t, err)
names := []string{}
for _, d := range disks {
names = append(names, d.Name)
}
assert.ElementsMatch(t, []string{"sda", "mmcblk0"}, names)
}

func TestReadNetSkipsLoopback(t *testing.T) {
c, _ := newTestCollector(t)
nets, err := c.readNet()
require.NoError(t, err)
require.Len(t, nets, 1)
assert.Equal(t, "eth0", nets[0].Name)
assert.Equal(t, uint64(5000), nets[0].RxBytes)
assert.Equal(t, uint64(8000), nets[0].TxBytes)
}

func TestSnapshotEndToEnd(t *testing.T) {
c, _ := newTestCollector(t)
s, err := c.Snapshot()
require.NoError(t, err)
assert.Equal(t, uint64(1000), s.CPU.User)
assert.Equal(t, uint64(3700000), s.Memory.TotalKB)
require.Len(t, s.Disks, 2)
require.Len(t, s.Net, 1)
}

func TestIsPartition(t *testing.T) {
cases := []struct {
name string
want bool
}{
{"sda", false},
{"sda1", true},
{"mmcblk0", false},
{"mmcblk0p1", true},
{"nvme0n1", false},
{"nvme0n1p1", true},
{"loop0", true},
{"dm-0", true},
{"ram0", true},
}
for _, c := range cases {
assert.Equal(t, c.want, isPartition(c.name), c.name)
}
}
Loading