Skip to content

Commit

Permalink
fix platform power return unit
Browse files Browse the repository at this point in the history
Signed-off-by: Sunyanan Choochotkaew <sunyanan.choochotkaew1@ibm.com>
  • Loading branch information
sunya-ch committed May 23, 2024
1 parent c2a4a09 commit 6184c87
Show file tree
Hide file tree
Showing 18 changed files with 75 additions and 54 deletions.
2 changes: 2 additions & 0 deletions pkg/collector/stats/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,11 @@ func (m *Stats) CalcDynEnergy(absM, idleM, dynM, id string) {
idlePower := uint64(0)
if idleStat, found := m.EnergyUsage[idleM].Stat[id]; found {
idlePower = idleStat.Delta
klog.V(6).Infof("Idle Energy stat: %v (%s)", m.EnergyUsage[idleM].Stat, id)
}
dynPower := calcDynEnergy(totalPower, idlePower)
m.EnergyUsage[dynM].SetDeltaStat(id, dynPower)
klog.V(6).Infof("Dynamic Energy stat: %v (%s)", m.EnergyUsage[dynM].Stat, id)
}

func calcDynEnergy(totalE, idleE uint64) uint64 {
Expand Down
2 changes: 1 addition & 1 deletion pkg/metrics/consts/conts.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const (
MetricsNamespace = "kepler"
EnergyMetricNameSuffix = "_joules_total"
UsageMetricNameSuffix = "_total"
MiliJouleToJoule = 1000
MilliJouleToJoule = 1000
)

var (
Expand Down
4 changes: 2 additions & 2 deletions pkg/metrics/container/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,15 @@ func (c *collector) collectTotalEnergyMetrics(ch chan<- prometheus.Metric, conta
energy += container.EnergyUsage[config.DynEnergyInDRAM].SumAllAggrValues()
energy += container.EnergyUsage[config.DynEnergyInOther].SumAllAggrValues()
energy += container.EnergyUsage[config.DynEnergyInGPU].SumAllAggrValues()
energy /= consts.MiliJouleToJoule
energy /= consts.MilliJouleToJoule
labelValues := []string{container.ContainerID, container.PodName, container.ContainerName, container.Namespace, "dynamic"}
ch <- c.collectors["total"].MustMetric(float64(energy), labelValues...)

energy = container.EnergyUsage[config.IdleEnergyInPkg].SumAllAggrValues()
energy += container.EnergyUsage[config.IdleEnergyInDRAM].SumAllAggrValues()
energy += container.EnergyUsage[config.IdleEnergyInOther].SumAllAggrValues()
energy += container.EnergyUsage[config.IdleEnergyInGPU].SumAllAggrValues()
energy /= consts.MiliJouleToJoule
energy /= consts.MilliJouleToJoule
labelValues = []string{container.ContainerID, container.PodName, container.ContainerName, container.Namespace, "idle"}
ch <- c.collectors["total"].MustMetric(float64(energy), labelValues...)
}
8 changes: 4 additions & 4 deletions pkg/metrics/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,19 +74,19 @@ func collectEnergy(ch chan<- prometheus.Metric, instance interface{}, metricName
switch v := instance.(type) {
case *stats.ContainerStats:
container := instance.(*stats.ContainerStats)
value = float64(container.EnergyUsage[metricName].SumAllAggrValues()) / consts.MiliJouleToJoule
value = float64(container.EnergyUsage[metricName].SumAllAggrValues()) / consts.MilliJouleToJoule
labelValues = []string{container.ContainerID, container.PodName, container.ContainerName, container.Namespace, mode}
collect(ch, collector, value, labelValues)

case *stats.ProcessStats:
process := instance.(*stats.ProcessStats)
value = float64(process.EnergyUsage[metricName].SumAllAggrValues()) / consts.MiliJouleToJoule
value = float64(process.EnergyUsage[metricName].SumAllAggrValues()) / consts.MilliJouleToJoule
labelValues = []string{strconv.FormatUint(process.PID, 10), process.ContainerID, process.VMID, process.Command, mode}
collect(ch, collector, value, labelValues)

case *stats.VMStats:
vm := instance.(*stats.VMStats)
value = float64(vm.EnergyUsage[metricName].SumAllAggrValues()) / consts.MiliJouleToJoule
value = float64(vm.EnergyUsage[metricName].SumAllAggrValues()) / consts.MilliJouleToJoule
labelValues = []string{vm.VMID, mode}
collect(ch, collector, value, labelValues)

Expand All @@ -95,7 +95,7 @@ func collectEnergy(ch chan<- prometheus.Metric, instance interface{}, metricName
node := instance.(*stats.NodeStats)
if _, exist := node.EnergyUsage[metricName]; exist {
for deviceID, utilization := range node.EnergyUsage[metricName].Stat {
value = float64(utilization.Aggr) / consts.MiliJouleToJoule
value = float64(utilization.Aggr) / consts.MilliJouleToJoule
labelValues = []string{deviceID, stats.NodeName, mode}
collect(ch, collector, value, labelValues)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/model/estimator/local/ratio.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ func (r *RatioPowerModel) getPowerByRatio(processIdx, resUsageFeature, nodePower
}

// GetPlatformPower applies ModelWeight prediction and return a list of total powers
func (r *RatioPowerModel) GetPlatformPower(isIdlePower bool) ([]float64, error) {
var processPlatformPower []float64
func (r *RatioPowerModel) GetPlatformPower(isIdlePower bool) ([]uint64, error) {
var processPlatformPower []uint64

// the number of processes is used to evernly divide the power consumption for OTHER and UNCORE
// we do not use CPU utilization for OTHER and UNCORE because they are not necessarily directly
Expand All @@ -104,7 +104,7 @@ func (r *RatioPowerModel) GetPlatformPower(isIdlePower bool) ([]float64, error)
} else {
processPower = r.getPowerByRatio(processIdx, int(PlatformUsageMetric), int(PlatformDynPower), numProcesses)
}
processPlatformPower = append(processPlatformPower, processPower)
processPlatformPower = append(processPlatformPower, uint64(processPower))
}
return processPlatformPower, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/estimator/local/regressor/exponential_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ var (
var _ = Describe("Test Exponential Predictor Unit", func() {
It("Get Node Platform Power By Exponential Regression", func() {
powers := GetNodePlatformPowerFromDummyServer(dummyExponentialWeightHandler, types.ExponentialTrainer)
Expect(int(powers[0])).Should(BeEquivalentTo(4))
Expect(int(powers[0]/1000) * 1000).Should(BeEquivalentTo(4000))
})

It("Get Node Components Power By Exponential Regression", func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/estimator/local/regressor/linear_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
var _ = Describe("Test Linear Predictor Unit", func() {
It("Get Node Platform Power By Linear Regression", func() {
powers := GetNodePlatformPowerFromDummyServer(DummyWeightHandler, types.LinearRegressionTrainer)
Expect(powers[0]).Should(BeEquivalentTo(3))
Expect(powers[0]).Should(BeEquivalentTo(3000))
})

It("Get Node Components Power By Linear Regression", func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/estimator/local/regressor/logarithm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ var (
var _ = Describe("Test Logarithmic Predictor Unit", func() {
It("Get Node Platform Power By Logarithmic Regression", func() {
powers := GetNodePlatformPowerFromDummyServer(dummyLogarithmicWeightHandler, types.LogarithmicTrainer)
Expect(int(powers[0])).Should(BeEquivalentTo(2))
Expect(int(powers[0]/1000) * 1000).Should(BeEquivalentTo(2000))
})

It("Get Node Components Power By Logarithmic Regression", func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/estimator/local/regressor/logistic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ var (
var _ = Describe("Test Logistic Predictor Unit", func() {
It("Get Node Platform Power By Logistic Regression", func() {
powers := GetNodePlatformPowerFromDummyServer(dummyLogisticWeightHandler, types.LogisticTrainer)
Expect(int(powers[0])).Should(BeEquivalentTo(2))
Expect(int(powers[0]/1000) * 1000).Should(BeEquivalentTo(2000))
})

It("Get Node Components Power By Logistic Regression", func() {
Expand Down
12 changes: 6 additions & 6 deletions pkg/model/estimator/local/regressor/regressor.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,24 +232,24 @@ func (r *Regressor) createPredictor(weight ModelWeights) (predictor Predictor, e
}

// GetPlatformPower applies ModelWeight prediction and return a list of power associated to each process/process/pod
func (r *Regressor) GetPlatformPower(isIdlePower bool) ([]float64, error) {
func (r *Regressor) GetPlatformPower(isIdlePower bool) ([]uint64, error) {
if !r.enabled {
return []float64{}, fmt.Errorf("disabled power model call: %s", r.OutputType.String())
return []uint64{}, fmt.Errorf("disabled power model call: %s", r.OutputType.String())
}
if r.modelPredictors != nil {
floatFeatureValues := r.floatFeatureValues[0:r.xidx]
if isIdlePower {
floatFeatureValues = r.floatFeatureValuesForIdlePower[0:r.xidx]
}
if predictor, found := (r.modelPredictors)[config.PLATFORM]; found {
power := predictor.predict(
powers := predictor.predict(
r.FloatFeatureNames, floatFeatureValues,
r.SystemMetaDataFeatureNames, r.SystemMetaDataFeatureValues)
return power, nil
return utils.GetPlatformPower(powers), nil
}
return []float64{}, fmt.Errorf("model Weight for model type %s is not valid: %v", r.OutputType.String(), r.modelWeight)
return []uint64{}, fmt.Errorf("model Weight for model type %s is not valid: %v", r.OutputType.String(), r.modelWeight)
}
return []float64{}, fmt.Errorf("model Weight for model type %s is nil", r.OutputType.String())
return []uint64{}, fmt.Errorf("model Weight for model type %s is nil", r.OutputType.String())
}

// GetComponentsPower applies each component's ModelWeight prediction and return a map of component power associated to each process/process/pod
Expand Down
18 changes: 12 additions & 6 deletions pkg/model/estimator/local/regressor/regressor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func genRegressor(outputType types.ModelOutputType, energySource, modelServerEnd
}
}

func GetNodePlatformPowerFromDummyServer(handler http.HandlerFunc, trainer string) (power []float64) {
func GetNodePlatformPowerFromDummyServer(handler http.HandlerFunc, trainer string) (power []uint64) {
testServer := httptest.NewServer(handler)
modelWeightFilepath := config.GetDefaultPowerModelURL(types.AbsPower.String(), types.PlatformEnergySource)
r := genRegressor(types.AbsPower, types.PlatformEnergySource, testServer.URL, "", modelWeightFilepath, trainer)
Expand Down Expand Up @@ -172,14 +172,13 @@ var _ = Describe("Test Regressor Weight Unit (default trainer)", func() {
It("Get Node Platform Power By Default Regression with ModelServerEndpoint", func() {
powers := GetNodePlatformPowerFromDummyServer(DummyWeightHandler, "")
// TODO: verify if the power makes sense
Expect(powers[0]).Should(BeEquivalentTo(3))
Expect(powers[0]).Should(BeEquivalentTo(3000))
})

It("Get Node Components Power By Default Regression Estimator with ModelServerEndpoint", func() {
compPowers := GetNodeComponentsPowerFromDummyServer(genHandlerFunc([]float64{}), "")
// TODO: verify if the power makes sense
Expect(compPowers[0].Core).Should(BeEquivalentTo(3000))
Expect(compPowers[0].Core).Should(BeEquivalentTo(3000))
})

It("Get Process Platform Power By Default Regression Estimator with ModelServerEndpoint", func() {
Expand All @@ -196,8 +195,11 @@ var _ = Describe("Test Regressor Weight Unit (default trainer)", func() {
Expect(err).NotTo(HaveOccurred())
Expect(len(powers)).Should(Equal(len(processFeatureValues)))
// TODO: verify if the power makes sense
Expect(powers[0]).Should(BeEquivalentTo(2.5))
Expect(powers[0]).Should(BeEquivalentTo(2.5))
Expect(powers[0]).Should(BeEquivalentTo(2500))
idlePowers, err := r.GetPlatformPower(true)
Expect(err).NotTo(HaveOccurred())
Expect(len(idlePowers)).Should(Equal(len(processFeatureValues)))
Expect(idlePowers[0]).Should(BeEquivalentTo(2000))
})

It("Get Process Components Power By Default Regression Estimator with ModelServerEndpoint", func() {
Expand All @@ -215,7 +217,11 @@ var _ = Describe("Test Regressor Weight Unit (default trainer)", func() {
Expect(len(compPowers)).Should(Equal(len(processFeatureValues)))
// TODO: verify if the power makes sense
Expect(compPowers[0].Core).Should(BeEquivalentTo(2500))
Expect(compPowers[0].Core).Should(BeEquivalentTo(2500))

idlePowers, err := r.GetComponentsPower(true)
Expect(err).NotTo(HaveOccurred())
Expect(len(idlePowers)).Should(Equal(len(processFeatureValues)))
Expect(idlePowers[0].Core).Should(BeEquivalentTo(2000))
})
})

Expand Down
13 changes: 6 additions & 7 deletions pkg/model/estimator/sidecar/estimate.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,26 +143,26 @@ func (c *EstimatorSidecar) makeRequest(usageValues [][]float64, systemValues []s
}

// GetPlatformPower makes a request to Kepler Estimator EstimatorSidecar and returns a list of total powers
func (c *EstimatorSidecar) GetPlatformPower(isIdlePower bool) ([]float64, error) {
func (c *EstimatorSidecar) GetPlatformPower(isIdlePower bool) ([]uint64, error) {
if !c.enabled {
return []float64{}, fmt.Errorf("disabled power model call: %s", c.OutputType.String())
return []uint64{}, fmt.Errorf("disabled power model call: %s", c.OutputType.String())
}
featuresValues := c.floatFeatureValues[0:c.xidx]
if isIdlePower {
featuresValues = c.floatFeatureValuesForIdlePower[0:c.xidx]
}
compPowers, err := c.makeRequest(featuresValues, c.SystemMetaDataFeatureValues)
if err != nil {
return []float64{}, err
return []uint64{}, err
}
power := compPowers.(map[string][]float64)
if len(power) == 0 {
return []float64{}, err
return []uint64{}, err
}
if powers, found := power[config.PLATFORM]; !found {
return []float64{}, fmt.Errorf("not found %s in response %v", config.PLATFORM, power)
return []uint64{}, fmt.Errorf("not found %s in response %v", config.PLATFORM, power)
} else {
return powers, nil
return utils.GetPlatformPower(powers), nil
}
}

Expand All @@ -187,7 +187,6 @@ func (c *EstimatorSidecar) GetComponentsPower(isIdlePower bool) ([]source.NodeCo
break
}
nodeComponentsPower := make([]source.NodeComponentsEnergy, num)

for index := 0; index < num; index++ {
pkgPower := utils.GetComponentPower(power, config.PKG, index)
corePower := utils.GetComponentPower(power, config.CORE, index)
Expand Down
7 changes: 4 additions & 3 deletions pkg/model/estimator/sidecar/estimate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ import (
)

var (
SampleDynEnergyValue float64 = 100000 // 100 mJ
SampleDynEnergyValue float64 = 1.0
SampleDynEnergyValueInMilliJoule uint64 = uint64(SampleDynEnergyValue) * 1000

processFeatureNames = []string{
config.CPUCycle,
Expand Down Expand Up @@ -151,7 +152,7 @@ var _ = Describe("Test Estimate Unit", func() {
powers, err := c.GetPlatformPower(false)
Expect(err).NotTo(HaveOccurred())
Expect(len(powers)).Should(Equal(1))
Expect(powers[0]).Should(Equal(SampleDynEnergyValue))
Expect(powers[0]).Should(Equal(SampleDynEnergyValueInMilliJoule))
quit <- true
})

Expand All @@ -173,7 +174,7 @@ var _ = Describe("Test Estimate Unit", func() {
powers, err := c.GetPlatformPower(false)
Expect(err).NotTo(HaveOccurred())
Expect(len(powers)).Should(Equal(len(processFeatureValues)))
Expect(powers[0]).Should(Equal(SampleDynEnergyValue))
Expect(powers[0]).Should(Equal(SampleDynEnergyValueInMilliJoule))
quit <- true
})
It("Get Node Component Power By Sidecar Estimator", func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ type PowerModelInterface interface {
GetNodeFeatureNamesList() []string
// GetPlatformPower returns the total Platform Power in Watts associated to each process/process/pod
// If isIdlePower is true, return the idle power, otherwise return the dynamic or absolute power depending on the model.
GetPlatformPower(isIdlePower bool) ([]float64, error)
GetPlatformPower(isIdlePower bool) ([]uint64, error)
// GetComponentsPower returns RAPL components Power in Watts associated to each each process/process/pod
// If isIdlePower is true, return the idle power, otherwise return the dynamic or absolute power depending on the model.
GetComponentsPower(isIdlePower bool) ([]source.NodeComponentsEnergy, error)
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/node_component_energy.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func GetNodeComponentPowers(nodeMetrics *stats.NodeStats, isIdlePower bool) (nod
return
}

// UpdateNodeComponentIdleEnergy sets the power model samples, get absolute powers, and set gauge value for each component energy
// UpdateNodeComponentEnergy sets the power model samples, get absolute powers, and set gauge value for each component energy
func UpdateNodeComponentEnergy(nodeMetrics *stats.NodeStats) {
addEnergy(nodeMetrics, stats.AvailableAbsEnergyMetrics, absPower)
}
Expand Down
22 changes: 12 additions & 10 deletions pkg/model/node_platform_energy.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,19 @@ func IsNodePlatformPowerModelEnabled() bool {
}

// GetNodePlatformPower returns a single estimated value of node total power
func GetNodePlatformPower(nodeMetrics *stats.NodeStats, isIdlePower bool) (platformEnergy map[string]float64) {
func GetNodePlatformPower(nodeMetrics *stats.NodeStats, isIdlePower bool) (platformEnergy map[string]uint64) {
if NodePlatformPowerModel == nil {
klog.Errorln("Node Platform Power Model was not created")
}
platformEnergy = map[string]float64{}
// reset power model features sample list for new estimation
NodePlatformPowerModel.ResetSampleIdx()
// converts to node metrics map to array to add the samples to the power model
// the featureList is defined in the container power model file and the features varies accordinly to the selected power model
featureValues := nodeMetrics.ToEstimatorValues(NodePlatformPowerModel.GetNodeFeatureNamesList(), true) // add container features with normalized values
NodePlatformPowerModel.AddNodeFeatureValues(featureValues) // add samples to estimation
platformEnergy = map[string]uint64{}
if !isIdlePower {
// reset power model features sample list for new estimation
NodePlatformPowerModel.ResetSampleIdx()
// converts to node metrics map to array to add the samples to the power model
// the featureList is defined in the container power model file and the features varies accordinly to the selected power model
featureValues := nodeMetrics.ToEstimatorValues(NodePlatformPowerModel.GetNodeFeatureNamesList(), true) // add container features with normalized values
NodePlatformPowerModel.AddNodeFeatureValues(featureValues) // add samples to estimation
}
powers, err := NodePlatformPowerModel.GetPlatformPower(isIdlePower)
if err != nil {
klog.Infof("Failed to get node platform power %v\n", err)
Expand All @@ -92,14 +94,14 @@ func GetNodePlatformPower(nodeMetrics *stats.NodeStats, isIdlePower bool) (platf
func UpdateNodePlatformEnergy(nodeMetrics *stats.NodeStats) {
platformPower := GetNodePlatformPower(nodeMetrics, absPower)
for sourceID, power := range platformPower {
nodeMetrics.EnergyUsage[config.AbsEnergyInPlatform].SetDeltaStat(sourceID, uint64(power)*config.SamplePeriodSec)
nodeMetrics.EnergyUsage[config.AbsEnergyInPlatform].SetDeltaStat(sourceID, power*config.SamplePeriodSec)
}
}

// UpdateNodePlatformIdleEnergy sets the power model samples to zeros, get idle powers, and set platform energy
func UpdateNodePlatformIdleEnergy(nodeMetrics *stats.NodeStats) {
platformPower := GetNodePlatformPower(nodeMetrics, idlePower)
for sourceID, power := range platformPower {
nodeMetrics.EnergyUsage[config.IdleEnergyInPlatform].SetDeltaStat(sourceID, uint64(power)*config.SamplePeriodSec)
nodeMetrics.EnergyUsage[config.IdleEnergyInPlatform].SetDeltaStat(sourceID, power*config.SamplePeriodSec)
}
}
10 changes: 6 additions & 4 deletions pkg/model/process_energy.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func addSamplesToPowerModels(processesMetrics map[uint64]*stats.ProcessStats, no
func addEstimatedEnergy(processIDList []uint64, processesMetrics map[uint64]*stats.ProcessStats, isIdlePower bool) {
var err error
var processGPUPower []float64
var processPlatformPower []float64
var processPlatformPower []uint64
var processComponentsPower []source.NodeComponentsEnergy

errComp := fmt.Errorf("component power model is not enabled")
Expand Down Expand Up @@ -274,7 +274,7 @@ func addEstimatedEnergy(processIDList []uint64, processesMetrics map[uint64]*sta
}

if errPlat == nil {
energy = uint64(processPlatformPower[i]) * config.SamplePeriodSec
energy = processPlatformPower[i] * config.SamplePeriodSec
if isIdlePower {
processesMetrics[processID].EnergyUsage[config.IdleEnergyInPlatform].SetDeltaStat(utils.GenericSocketID, energy)
} else {
Expand All @@ -288,9 +288,11 @@ func addEstimatedEnergy(processIDList []uint64, processesMetrics map[uint64]*sta
// estimate other components power if both platform and components power are available
if errComp == nil && errPlat == nil {
// TODO: verify if Platform power also includes the GPU into consideration
otherPower := processPlatformPower[i] - float64(processComponentsPower[i].Pkg) - float64(processComponentsPower[i].DRAM)
if otherPower < 0 {
var otherPower uint64
if processPlatformPower[i] <= (processComponentsPower[i].Pkg + processComponentsPower[i].DRAM) {
otherPower = 0
} else {
otherPower = processPlatformPower[i] - processComponentsPower[i].Pkg - processComponentsPower[i].DRAM
}
energy = uint64(otherPower) * config.SamplePeriodSec

Check failure on line 297 in pkg/model/process_energy.go

View workflow job for this annotation

GitHub Actions / golang / lint

unnecessary conversion (unconvert)
if isIdlePower {
Expand Down
Loading

0 comments on commit 6184c87

Please sign in to comment.