Skip to content
This repository has been archived by the owner on Jun 12, 2024. It is now read-only.

Commit

Permalink
added reliability risk analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
pnickolov committed Oct 7, 2021
1 parent 8320c5a commit 4e6de25
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 67 deletions.
84 changes: 62 additions & 22 deletions app/model/model.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package model

import "fmt"

const (
QOS_GUARANTEED = "guaranteed"
QOS_BURSTABLE = "burstable"
Expand Down Expand Up @@ -51,42 +53,67 @@ type AppContainer struct {
}

type AppMetrics struct {
AverageReplicas float64 `yaml:"average_replicas"` // averaged over the evaluated time range
CpuUtilization float64 `yaml:"cpu_saturation"` // aka Saturation, in percent, can be 0 or >100
MemoryUtilization float64 `yaml:"memory_saturation"` // aka Saturation, in percent, can be 0 or >100
PacketReceiveRate float64 `yaml:"packet_receive_rate"` // per second
PacketTransmitRate float64 `yaml:"packet_transmit_rate"` // per second
RequestRate float64 `yaml:"request_rate"` // per second
AverageReplicas float64 `yaml:"average_replicas"` // averaged over the evaluated time range
CpuUtilization float64 `yaml:"cpu_saturation"` // aka Saturation, in percent, can be 0 or >100
MemoryUtilization float64 `yaml:"memory_saturation"` // aka Saturation, in percent, can be 0 or >100
CpuSecondsThrottled float64 `yaml:"cpu_seconds_throttled"` // sum of seconds throttled/second across all containers
PacketReceiveRate float64 `yaml:"packet_receive_rate"` // per second
PacketTransmitRate float64 `yaml:"packet_transmit_rate"` // per second
RequestRate float64 `yaml:"request_rate"` // per second
}

type AppFlag int

const (
F_WRITEABLE_VOLUME = iota
F_MAIN_CONTAINER = iota
F_WRITEABLE_VOLUME
F_RESOURCE_SPEC
F_SINGLE_REPLICA
F_MANY_REPLICAS
F_TRAFFIC
F_RESOURCE_LIMITS
F_RESOURCE_GUARANTEED
F_UTILIZATION
F_BURST
F_MAIN_CONTAINER
F_TRAFFIC
F_SINGLE_REPLICA
F_MANY_REPLICAS
)

func (f AppFlag) String() string {
return []string{"V", "R", "S", "M", "T", "U", "B", "C"}[f]
return []string{"C", "V", "R", "L", "G", "U", "B", "T", "S", "M"}[f]
}

func (f AppFlag) MarshalYAML() (interface{}, error) {
return f.String(), nil
}

type RiskLevel int

const (
RISK_UNKNOWN = iota
RISK_NONE
RISK_LOW
RISK_MEDIUM
RISK_HIGH
RISK_CRITICAL
)

func (r RiskLevel) String() string {
return []string{"-", "None", "Low", "Medium", "High", "Critical"}[r]
}

func (r RiskLevel) MarshalYAML() (interface{}, error) {
return r.String(), nil
}

type AppAnalysis struct {
Rating int `yaml:"rating"` // how suitable for optimization
Confidence int `yaml:"confidence"` // how confident is the rating
MainContainer string `yaml:"main_container"` // container to optimize or empty if not identified
EfficiencyScore int `yaml:"efficiency_score"`
ReliabilityScore int `yaml:"reliability_score,omitempty"`
PerformanceScore int `yaml:"performance_score,omitempty"`
Flags map[AppFlag]bool `yaml:"flags"` // flags
Opportunities []string `yaml:"opportunities"` // list of optimization opportunities
Cautions []string `yaml:"cautions"` // list of concerns/cautions
Blockers []string `yaml:"blockers"` // list of blockers prevention optimization
Rating int `yaml:"rating"` // how suitable for optimization
Confidence int `yaml:"confidence"` // how confident is the rating
MainContainer string `yaml:"main_container"` // container to optimize or empty if not identified
EfficiencyScore *int `yaml:"efficiency_score"`
ReliabilityRisk *RiskLevel `yaml:"reliability_risk"`
Flags map[AppFlag]bool `yaml:"flags"` // flags
Opportunities []string `yaml:"opportunities"` // list of optimization opportunities
Cautions []string `yaml:"cautions"` // list of concerns/cautions
Blockers []string `yaml:"blockers"` // list of blockers prevention optimization
}

type App struct {
Expand All @@ -109,3 +136,16 @@ func (app *App) ContainerIndexByName(name string) (index int, ok bool) {
}
return
}

func Score2String(s *int) string {
if s == nil {
return "n/a"
}
return fmt.Sprintf("%v", *s)
}
func Risk2String(r *RiskLevel) string {
if r == nil {
return "n/a"
}
return r.String()
}
95 changes: 89 additions & 6 deletions cmd/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"math"
"sort"
"strings"

log "github.com/sirupsen/logrus"

Expand Down Expand Up @@ -226,6 +227,16 @@ func computePodQoS(app *appmodel.App) string {
}
}

func resourcesLimited(app *appmodel.App) bool {
for i := range app.Containers {
c := &app.Containers[i]
if c.Cpu.Limit == 0 || c.Memory.Limit == 0 {
return false
}
}
return true
}

func resourcesExplicitlyDefined(app *appmodel.App) (bool, string) {
// select the main container
if app.Analysis.MainContainer == "" {
Expand Down Expand Up @@ -322,6 +333,49 @@ func efficiencyImprovementEstimate(app *appmodel.App) string {
}
}

func bumpRisk(prior *appmodel.RiskLevel, level appmodel.RiskLevel) *appmodel.RiskLevel {
if prior == nil {
return &level
}
if level > *prior {
return &level
}
return prior
}

func riskAssessment(app *appmodel.App) (*appmodel.RiskLevel, []string) {
var risk *appmodel.RiskLevel
msg := []string{}

if app.Settings.QosClass == appmodel.QOS_BESTEFFORT {
risk = bumpRisk(risk, appmodel.RISK_HIGH)
msg = append(msg, "Pod QoS class is Best Effort")
} else if app.Settings.QosClass != appmodel.QOS_GUARANTEED {
risk = bumpRisk(risk, appmodel.RISK_MEDIUM)
msg = append(msg, fmt.Sprintf("Pod QOS class is %v", strings.Title(app.Settings.QosClass)))
}

if app.Metrics.CpuUtilization >= 200 ||
app.Metrics.MemoryUtilization >= 200 ||
app.Metrics.CpuSecondsThrottled >= 0.7 {
risk = bumpRisk(risk, appmodel.RISK_HIGH)
msg = append(msg, "Resource utilization significantly exceeds allocation")
} else if app.Metrics.CpuUtilization > 120 ||
app.Metrics.MemoryUtilization > 120 ||
app.Metrics.CpuSecondsThrottled > 0.25 {
risk = bumpRisk(risk, appmodel.RISK_HIGH)
msg = append(msg, "Resource utilization exceeds allocation")
} else if app.Metrics.CpuUtilization > 90 ||
app.Metrics.MemoryUtilization > 90 ||
app.Metrics.CpuSecondsThrottled > 0.1 {
risk = bumpRisk(risk, appmodel.RISK_MEDIUM)
msg = append(msg, "Resource utilization close to allocation")
}

risk = bumpRisk(risk, appmodel.RISK_LOW) // in case not set yet
return risk, msg
}

func analyzeApp(app *appmodel.App) {
// finalize basis and prepare for analysis
preAnalyzeApp(app)
Expand All @@ -348,7 +402,17 @@ func analyzeApp(app *appmodel.App) {
o.Flags[appmodel.F_WRITEABLE_VOLUME] = false
}

// missing resource specification (main container has no QoS)
// resource specification flags
if app.Settings.QosClass == appmodel.QOS_GUARANTEED {
o.Flags[appmodel.F_RESOURCE_GUARANTEED] = true
} else {
o.Flags[appmodel.F_RESOURCE_GUARANTEED] = false
}
if resourcesLimited(app) {
o.Flags[appmodel.F_RESOURCE_LIMITS] = true
} else {
o.Flags[appmodel.F_RESOURCE_LIMITS] = false
}
if resGood, msg := resourcesExplicitlyDefined(app); resGood {
o.Flags[appmodel.F_RESOURCE_SPEC] = true
} else {
Expand Down Expand Up @@ -378,8 +442,20 @@ func analyzeApp(app *appmodel.App) {
}
}

// compute scores
o.EfficiencyScore = int(math.Round(app.Metrics.CpuUtilization*CPU_WEIGHT + app.Metrics.MemoryUtilization*MEM_WEIGHT))
// compute efficiency score
if app.Metrics.MemoryUtilization == 0 {
o.EfficiencyScore = nil // something is wrong - this app likely not functioning or we don't have metrics
} else if app.Metrics.CpuUtilization == 0 {
// idle apps are inefficient by definition
score := 0
o.EfficiencyScore = &score
} else {
cpuSat := opsmath.Min(app.Metrics.CpuUtilization, 100) // cap utilization for efficiency calc
memSat := opsmath.Min(app.Metrics.MemoryUtilization, 100) // " "
// score can be assigned only if the app is not bursting
score := int(math.Round(cpuSat*CPU_WEIGHT + memSat*MEM_WEIGHT))
o.EfficiencyScore = &score
}

// analyze request rate
if app.Metrics.RequestRate == 0 {
Expand Down Expand Up @@ -408,13 +484,20 @@ func analyzeApp(app *appmodel.App) {
o.Confidence += 30
o.Flags[appmodel.F_SINGLE_REPLICA] = false
o.Flags[appmodel.F_MANY_REPLICAS] = true
} else if app.Metrics.AverageReplicas >= 3 {
o.Rating += 10
o.Confidence += 10
} else {
if app.Metrics.AverageReplicas > 3 {
o.Rating += 10
o.Confidence += 10
}
o.Flags[appmodel.F_SINGLE_REPLICA] = false
o.Flags[appmodel.F_MANY_REPLICAS] = false
}

// perform risk assessment
riskCautions := []string{}
o.ReliabilityRisk, riskCautions = riskAssessment(app)
o.Cautions = append(o.Cautions, riskCautions...)

// finalize blockers
if len(o.Blockers) > 0 {
o.Rating = -100
Expand Down
58 changes: 51 additions & 7 deletions cmd/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,54 @@ func flagsString(flags map[appmodel.AppFlag]bool) (ret string) {
return
}

func (table *AppTable) outputTableHeader() {
const RIGHT = tablewriter.ALIGN_RIGHT
func riskColor(r *appmodel.RiskLevel) int {
color := 0 // neutral
if r == nil {
return color
}
switch *r {
case appmodel.RISK_LOW:
color = tablewriter.FgGreenColor
case appmodel.RISK_MEDIUM:
color = tablewriter.FgYellowColor
case appmodel.RISK_HIGH:
color = tablewriter.FgRedColor
}
return color
}

type HeaderInfo struct {
Title string
Alignment int
}

func getHeadersInfo() []HeaderInfo {
const LEFT = tablewriter.ALIGN_LEFT
const CENTER = tablewriter.ALIGN_CENTER
const RIGHT = tablewriter.ALIGN_RIGHT

return []HeaderInfo{
{"Namespace", LEFT},
{"Deployment", LEFT},
{"Efficiency\nScore", RIGHT},
{"Reliability\nRisk", CENTER},
{"Instances", RIGHT},
{"CPU", RIGHT},
{"Mem", RIGHT},
{"Opportunity", LEFT},
{"Flags", LEFT},
}
}

table.t.SetHeader([]string{"Efficiency\nScore", "Namespace", "Deployment", "QoS Class", "Instances", "CPU", "Mem", "Opportunity", "Flags"})
table.t.SetColumnAlignment([]int{RIGHT, LEFT, LEFT, LEFT, RIGHT, RIGHT, RIGHT, LEFT, LEFT})
func (table *AppTable) outputTableHeader() {
var headers []string
var alignments []int
for _, header := range getHeadersInfo() {
headers = append(headers, header.Title)
alignments = append(alignments, header.Alignment)
}
table.t.SetHeader(headers)
table.t.SetColumnAlignment(alignments)
table.t.SetFooter([]string{})
table.t.SetCenterSeparator("")
table.t.SetColumnSeparator("")
Expand All @@ -106,10 +148,10 @@ func (table *AppTable) outputTableHeader() {
func (table *AppTable) outputTableApp(app *appmodel.App) {
reason, color := appOpportunityAndColor(app)
rowValues := []string{
fmt.Sprintf("%3d", app.Analysis.EfficiencyScore),
app.Metadata.Namespace,
app.Metadata.Workload,
app.Settings.QosClass,
fmt.Sprintf("%3v", appmodel.Score2String(app.Analysis.EfficiencyScore)),
fmt.Sprintf("%v", appmodel.Risk2String(app.Analysis.ReliabilityRisk)),
fmt.Sprintf("%.0fx%d", app.Metrics.AverageReplicas, len(app.Containers)),
fmt.Sprintf("%.0f%%", app.Metrics.CpuUtilization),
fmt.Sprintf("%.0f%%", app.Metrics.MemoryUtilization),
Expand Down Expand Up @@ -140,14 +182,16 @@ func (table *AppTable) outputDetailApp(app *appmodel.App) {
opportunityColors := []tablewriter.Colors{[]int{0}, []int{tablewriter.FgGreenColor}}
cautionColors := []tablewriter.Colors{[]int{0}, []int{tablewriter.FgYellowColor}}
blockerColors := []tablewriter.Colors{[]int{0}, []int{tablewriter.FgRedColor}}
riskColors := []tablewriter.Colors{[]int{0}, []int{riskColor(app.Analysis.ReliabilityRisk)}}

table.t.Rich([]string{"Namespace", app.Metadata.Namespace}, nil)
table.t.Rich([]string{"Deployment", app.Metadata.Workload}, nil)
table.t.Rich([]string{"Kind", fmt.Sprintf("%v (%v)", app.Metadata.WorkloadKind, app.Metadata.WorkloadApiVersion)}, nil)
table.t.Rich([]string{"Main Container", app.Analysis.MainContainer}, nil)
table.t.Rich([]string{"Pod QoS Class", app.Settings.QosClass}, nil)

table.t.Rich([]string{"Efficiency Score", fmt.Sprintf("%4d", app.Analysis.EfficiencyScore)}, appColors)
table.t.Rich([]string{"Efficiency Score", fmt.Sprintf("%4v", appmodel.Score2String(app.Analysis.EfficiencyScore))}, appColors)
table.t.Rich([]string{"Reliability Risk", fmt.Sprintf("%v", appmodel.Risk2String(app.Analysis.ReliabilityRisk))}, riskColors)
table.t.Rich([]string{"Rating", fmt.Sprintf("%4d%%", app.Analysis.Rating)}, appColors)
table.t.Rich([]string{"Confidence", fmt.Sprintf("%4d%%", app.Analysis.Confidence)}, appColors)

Expand Down
51 changes: 51 additions & 0 deletions math/stats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
Copyright © 2021 Opsani <support@opsani.com>
This file is part of https://github.com/opsani/opsani-ignite
*/

package math

import (
m "math"
)

func Min(samples ...float64) float64 {
min := m.NaN()
for _, val := range samples {
if m.IsNaN(val) || m.IsInf(val, 0) {
continue
}
if m.IsNaN(min) || val < min {
min = val
}
}
return min // will return NaN for empty slice or slice that has no valid values
}

func Sum(samples ...float64) float64 {
total := 0.0
for _, val := range samples {
if m.IsNaN(val) || m.IsInf(val, 0) {
continue
}
total += val
}
return total
}

func Avg(samples ...float64) float64 {
total := 0.0
count := 0
for _, val := range samples {
if m.IsNaN(val) || m.IsInf(val, 0) {
continue
}
total += val
count += 1
}

if count == 0 {
return 0.0
}
return total / float64(len(samples))
}
Loading

0 comments on commit 4e6de25

Please sign in to comment.