From 559b0d78b6cedad58e92e20fa394c33367c3ce54 Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Tue, 13 Jun 2023 09:50:35 -0400 Subject: [PATCH] redfish: access model Signed-off-by: Huamin Chen --- cmd/exporter.go | 13 +- pkg/collector/metric/node_metric.go | 47 +++++- pkg/collector/metric_collector.go | 10 ++ pkg/collector/node_energy_collector.go | 12 +- pkg/collector/prometheus_collector.go | 23 +++ pkg/collector/redfish_collector.go | 57 ++++++++ pkg/config/config.go | 11 ++ pkg/nodecred/csv_cred.go | 116 +++++++++++++++ pkg/nodecred/csv_cred_test.go | 132 +++++++++++++++++ pkg/nodecred/node_cred.go | 65 +++++++++ pkg/nodecred/secret_cred.go | 29 ++++ pkg/power/redfish/redfish.go | 190 +++++++++++++++++++++++++ pkg/power/redfish/redfish_test.go | 78 ++++++++++ pkg/power/redfish/util.go | 84 +++++++++++ 14 files changed, 863 insertions(+), 4 deletions(-) create mode 100644 pkg/collector/redfish_collector.go create mode 100644 pkg/nodecred/csv_cred.go create mode 100644 pkg/nodecred/csv_cred_test.go create mode 100644 pkg/nodecred/node_cred.go create mode 100644 pkg/nodecred/secret_cred.go create mode 100644 pkg/power/redfish/redfish.go create mode 100644 pkg/power/redfish/redfish_test.go create mode 100644 pkg/power/redfish/util.go diff --git a/cmd/exporter.go b/cmd/exporter.go index 672ca42b4e..cb06dc6423 100644 --- a/cmd/exporter.go +++ b/cmd/exporter.go @@ -63,6 +63,7 @@ var ( kubeconfig = flag.String("kubeconfig", "", "absolute path to the kubeconfig file, if empty we use the in-cluster configuration") apiserverEnabled = flag.Bool("apiserver", true, "if apiserver is disabled, we collect pod information from kubelet") kernelSourceDirPath = flag.String("kernel-source-dir", "", "path to the kernel source directory") + redfishCredFilePath = flag.String("redfish-cred-file-path", "", "path to the redfish credential file") ) func healthProbe(w http.ResponseWriter, req *http.Request) { @@ -154,10 +155,13 @@ func main() { config.SetEnabledHardwareCounterMetrics(*exposeHardwareCounterMetrics) config.SetEnabledGPU(*enableGPU) config.EnabledMSR = *enabledMSR + config.SetKubeConfig(*kubeconfig) config.SetEnableAPIServer(*apiserverEnabled) - if err := config.SetKernelSourceDir(*kernelSourceDirPath); err != nil { - klog.Warningf("failed to set kernel source dir to %q: %v", *kernelSourceDirPath, err) + if kernelSourceDirPath != nil && len(*kernelSourceDirPath) > 0 { + if err := config.SetKernelSourceDir(*kernelSourceDirPath); err != nil { + klog.Warningf("failed to set kernel source dir to %q: %v", *kernelSourceDirPath, err) + } } // the ebpf batch deletion operation was introduced in linux kernel 5.6, which provides better performance to delete keys. @@ -174,6 +178,11 @@ func main() { collector_metric.InitAvailableParamAndMetrics() + // set redfish credential file path + if *redfishCredFilePath != "" { + config.SetRedfishCredFilePath(*redfishCredFilePath) + } + // For local estimator, there is endpoint provided, thus we should let // model component decide whether/how to init model.InitEstimateFunctions(collector_metric.ContainerMetricNames, collector_metric.NodeMetadataNames, collector_metric.NodeMetadataValues) diff --git a/pkg/collector/metric/node_metric.go b/pkg/collector/metric/node_metric.go index ddd439445f..4b230ef25d 100644 --- a/pkg/collector/metric/node_metric.go +++ b/pkg/collector/metric/node_metric.go @@ -37,6 +37,7 @@ const ( GPU = "gpu" OTHER = "other" PLATFORM = "platform" + REDFISH = "redfish" FREQUENCY = "frequency" ) @@ -61,6 +62,7 @@ type NodeMetrics struct { TotalEnergyInGPU *types.UInt64StatCollection TotalEnergyInOther *types.UInt64StatCollection TotalEnergyInPlatform *types.UInt64StatCollection + TotalEnergyInRedfish *types.UInt64StatCollection DynEnergyInCore *types.UInt64StatCollection DynEnergyInDRAM *types.UInt64StatCollection @@ -69,6 +71,7 @@ type NodeMetrics struct { DynEnergyInGPU *types.UInt64StatCollection DynEnergyInOther *types.UInt64StatCollection DynEnergyInPlatform *types.UInt64StatCollection + DynEnergyInRedfish *types.UInt64StatCollection IdleEnergyInCore *types.UInt64StatCollection IdleEnergyInDRAM *types.UInt64StatCollection @@ -77,6 +80,7 @@ type NodeMetrics struct { IdleEnergyInGPU *types.UInt64StatCollection IdleEnergyInOther *types.UInt64StatCollection IdleEnergyInPlatform *types.UInt64StatCollection + IdleEnergyInRedfish *types.UInt64StatCollection CPUFrequency map[int32]uint64 @@ -109,6 +113,9 @@ func NewNodeMetrics() *NodeMetrics { TotalEnergyInPlatform: &types.UInt64StatCollection{ Stat: make(map[string]*types.UInt64Stat), }, + TotalEnergyInRedfish: &types.UInt64StatCollection{ + Stat: make(map[string]*types.UInt64Stat), + }, DynEnergyInCore: &types.UInt64StatCollection{ Stat: make(map[string]*types.UInt64Stat), @@ -131,6 +138,9 @@ func NewNodeMetrics() *NodeMetrics { DynEnergyInPlatform: &types.UInt64StatCollection{ Stat: make(map[string]*types.UInt64Stat), }, + DynEnergyInRedfish: &types.UInt64StatCollection{ + Stat: make(map[string]*types.UInt64Stat), + }, IdleEnergyInCore: &types.UInt64StatCollection{ Stat: make(map[string]*types.UInt64Stat), @@ -153,6 +163,9 @@ func NewNodeMetrics() *NodeMetrics { IdleEnergyInPlatform: &types.UInt64StatCollection{ Stat: make(map[string]*types.UInt64Stat), }, + IdleEnergyInRedfish: &types.UInt64StatCollection{ + Stat: make(map[string]*types.UInt64Stat), + }, } } @@ -163,6 +176,7 @@ func (ne *NodeMetrics) ResetDeltaValues() { ne.TotalEnergyInPkg.ResetDeltaValues() ne.TotalEnergyInGPU.ResetDeltaValues() ne.TotalEnergyInPlatform.ResetDeltaValues() + ne.TotalEnergyInRedfish.ResetDeltaValues() ne.DynEnergyInCore.ResetDeltaValues() ne.DynEnergyInDRAM.ResetDeltaValues() ne.DynEnergyInUncore.ResetDeltaValues() @@ -207,6 +221,17 @@ func (ne *NodeMetrics) SetLastestPlatformEnergy(platformEnergy map[string]float6 } } +// SetLastestRedfishEnergy adds the lastest energy consumption from the node redfish BMC +func (ne *NodeMetrics) SetLastestRedfishEnergy(redfishEnergy map[string]float64, gauge bool) { + for system, energy := range redfishEnergy { + if gauge { + ne.TotalEnergyInRedfish.SetDeltaStat(system, uint64(math.Ceil(energy))) + } else { + ne.TotalEnergyInRedfish.SetAggrStat(system, uint64(math.Ceil(energy))) + } + } +} + // SetNodeComponentsEnergy adds the lastest energy consumption collected from the node's components (e.g., using RAPL) func (ne *NodeMetrics) SetNodeComponentsEnergy(componentsEnergy map[int]source.NodeComponentsEnergy, gauge bool) { for pkgID, energy := range componentsEnergy { @@ -244,6 +269,7 @@ func (ne *NodeMetrics) UpdateIdleEnergy() { ne.CalcIdleEnergy(GPU) } ne.CalcIdleEnergy(PLATFORM) + ne.CalcIdleEnergy(REDFISH) // reset ne.FoundNewIdleState = false } @@ -278,6 +304,9 @@ func (ne *NodeMetrics) UpdateDynEnergy() { for sensorID := range ne.TotalEnergyInPlatform.Stat { ne.CalcDynEnergy(PLATFORM, sensorID) } + for system := range ne.TotalEnergyInRedfish.Stat { + ne.CalcDynEnergy(REDFISH, system) + } // gpu metric if config.EnabledGPU && accelerator.IsGPUCollectionSupported() { for gpuID := range ne.TotalEnergyInGPU.Stat { @@ -306,12 +335,17 @@ func (ne *NodeMetrics) SetNodeOtherComponentsEnergy() { dynCPUComponentsEnergy := ne.DynEnergyInPkg.SumAllDeltaValues() + ne.DynEnergyInDRAM.SumAllDeltaValues() + ne.DynEnergyInGPU.SumAllDeltaValues() + // other component can be either platform or redfish dynPlatformEnergy := ne.DynEnergyInPlatform.SumAllDeltaValues() if dynPlatformEnergy > dynCPUComponentsEnergy { otherComponentEnergy := dynPlatformEnergy - dynCPUComponentsEnergy ne.DynEnergyInOther.SetDeltaStat(OTHER, otherComponentEnergy) } - + dynRedfishEnergy := ne.DynEnergyInRedfish.SumAllDeltaValues() + if dynRedfishEnergy > dynCPUComponentsEnergy { + otherComponentEnergy := dynRedfishEnergy - dynCPUComponentsEnergy + ne.DynEnergyInOther.SetDeltaStat(OTHER, otherComponentEnergy) + } idleCPUComponentsEnergy := ne.IdleEnergyInPkg.SumAllDeltaValues() + ne.IdleEnergyInDRAM.SumAllDeltaValues() + ne.IdleEnergyInGPU.SumAllDeltaValues() @@ -320,6 +354,11 @@ func (ne *NodeMetrics) SetNodeOtherComponentsEnergy() { otherComponentEnergy := idlePlatformEnergy - idleCPUComponentsEnergy ne.IdleEnergyInOther.SetDeltaStat(OTHER, otherComponentEnergy) } + idleRedfishEnergy := ne.IdleEnergyInRedfish.SumAllDeltaValues() + if idleRedfishEnergy > idleCPUComponentsEnergy { + otherComponentEnergy := idleRedfishEnergy - idleCPUComponentsEnergy + ne.IdleEnergyInOther.SetDeltaStat(OTHER, otherComponentEnergy) + } } func (ne *NodeMetrics) GetNodeResUsagePerResType(resource string) (float64, error) { @@ -424,6 +463,8 @@ func (ne *NodeMetrics) getTotalEnergyStatCollection(component string) (energySta return ne.TotalEnergyInOther case PLATFORM: return ne.TotalEnergyInPlatform + case REDFISH: + return ne.TotalEnergyInRedfish default: klog.Fatalf("TotalEnergy component type %s is unknown\n", component) } @@ -446,6 +487,8 @@ func (ne *NodeMetrics) getDynEnergyStatCollection(component string) (energyStat return ne.DynEnergyInOther case PLATFORM: return ne.DynEnergyInPlatform + case REDFISH: + return ne.DynEnergyInRedfish default: klog.Fatalf("DynEnergy component type %s is unknown\n", component) } @@ -468,6 +511,8 @@ func (ne *NodeMetrics) getIdleEnergyStatCollection(component string) (energyStat return ne.IdleEnergyInOther case PLATFORM: return ne.IdleEnergyInPlatform + case REDFISH: + return ne.IdleEnergyInRedfish default: klog.Fatalf("IdleEnergy component type %s is unknown\n", component) } diff --git a/pkg/collector/metric_collector.go b/pkg/collector/metric_collector.go index 51b4211489..52bb7550df 100644 --- a/pkg/collector/metric_collector.go +++ b/pkg/collector/metric_collector.go @@ -25,6 +25,7 @@ import ( "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/power/accelerator" "github.com/sustainable-computing-io/kepler/pkg/power/acpi" + "github.com/sustainable-computing-io/kepler/pkg/power/redfish" "github.com/sustainable-computing-io/kepler/pkg/utils" collector_metric "github.com/sustainable-computing-io/kepler/pkg/collector/metric" @@ -43,6 +44,8 @@ type Collector struct { bpfHCMeter *attacher.BpfModuleTables // instance that collects the node energy consumption acpiPowerMeter *acpi.ACPI + // instance that collects the node redfish power consumption + redfishClient *redfish.RedFishClient // NodeMetrics holds all node energy and resource usage metrics NodeMetrics collector_metric.NodeMetrics @@ -87,6 +90,12 @@ func (c *Collector) Initialize() error { c.updateNodeEnergyMetrics() c.acpiPowerMeter.Run(attacher.HardwareCountersEnabled) + if str := config.GetRedfishCredFilePath(); str != "" { + if err := c.initRedFishCollector(str); err != nil { + return fmt.Errorf("%s", fmt.Sprintf("failed to init redfish collector from %s: %v", str, err)) + } + } + return nil } @@ -94,6 +103,7 @@ func (c *Collector) Destroy() { if c.bpfHCMeter != nil { attacher.DetachBPFModules(c.bpfHCMeter) } + c.stopRedfishCollector() } // Update updates the node and container energy and resource usage metrics diff --git a/pkg/collector/node_energy_collector.go b/pkg/collector/node_energy_collector.go index 858a432441..1bb19b6eb2 100644 --- a/pkg/collector/node_energy_collector.go +++ b/pkg/collector/node_energy_collector.go @@ -48,6 +48,15 @@ func (c *Collector) updatePlatformEnergy(wg *sync.WaitGroup) { } } +// updateRedfishEnergy updates the node redfish power consumption, i.e, the BMC power consumption +func (c *Collector) updateRedfishEnergy(wg *sync.WaitGroup) { + defer wg.Done() + if c.redfishClient != nil { + redfishEnergy := c.redfishClient.GetPower() + c.NodeMetrics.SetLastestRedfishEnergy(redfishEnergy, true) + } +} + // updateMeasuredNodeEnergy updates each node component power consumption, i.e., the CPU core, uncore, package/socket and DRAM func (c *Collector) updateNodeComponentsEnergy(wg *sync.WaitGroup) { defer wg.Done() @@ -95,11 +104,12 @@ func (c *Collector) updateNodeAvgCPUFrequency(wg *sync.WaitGroup) { // updateNodeEnergyMetrics updates the node energy consumption of each component func (c *Collector) updateNodeEnergyMetrics() { var wgNode sync.WaitGroup - wgNode.Add(4) + wgNode.Add(5) go c.updatePlatformEnergy(&wgNode) go c.updateNodeComponentsEnergy(&wgNode) go c.updateNodeAvgCPUFrequency(&wgNode) go c.updateNodeGPUEnergy(&wgNode) + go c.updateRedfishEnergy(&wgNode) wgNode.Wait() // after updating the total energy we calculate the dynamic energy // the idle energy is only updated if we find the node using less resources than previously observed diff --git a/pkg/collector/prometheus_collector.go b/pkg/collector/prometheus_collector.go index 46927e57fc..9a19fc17b0 100644 --- a/pkg/collector/prometheus_collector.go +++ b/pkg/collector/prometheus_collector.go @@ -97,6 +97,7 @@ type NodeDesc struct { nodePlatformJoulesTotal *prometheus.Desc nodeOtherComponentsJoulesTotal *prometheus.Desc nodeGPUJoulesTotal *prometheus.Desc + nodeRedfishJoulesTotal *prometheus.Desc // Additional metrics (gauge) // TODO: review if we really need to expose this metric. @@ -210,6 +211,8 @@ func (p *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) { ch <- p.nodeDesc.nodePackageJoulesTotal ch <- p.nodeDesc.nodePlatformJoulesTotal ch <- p.nodeDesc.nodeOtherComponentsJoulesTotal + ch <- p.nodeDesc.nodeRedfishJoulesTotal + if config.EnabledGPU { ch <- p.nodeDesc.nodeGPUJoulesTotal } @@ -319,6 +322,11 @@ func (p *PrometheusCollector) newNodeMetrics() { "Current GPU value in joules", []string{"index", "instance", "source", "mode"}, nil, ) + nodeRedfishJoulesTotal := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "node", "redfish_joules_total"), + "Current Redfish power value in joules", + []string{"instance", "source", "mode"}, nil, + ) // Additional metrics (gauge) NodeCPUFrequency := prometheus.NewDesc( @@ -346,6 +354,7 @@ func (p *PrometheusCollector) newNodeMetrics() { nodeDramJoulesTotal: nodeDramJoulesTotal, nodePackageJoulesTotal: nodePackageJoulesTotal, nodePlatformJoulesTotal: nodePlatformJoulesTotal, + nodeRedfishJoulesTotal: nodeRedfishJoulesTotal, nodeOtherComponentsJoulesTotal: nodeOtherComponentsJoulesTotal, nodeGPUJoulesTotal: nodeGPUJoulesTotal, NodeCPUFrequency: NodeCPUFrequency, @@ -661,6 +670,20 @@ func (p *PrometheusCollector) updateNodeMetrics(wg *sync.WaitGroup, ch chan<- pr idlePower, collector_metric.NodeName, "acpi", "idle", ) + dynPower = (float64(p.NodeMetrics.GetSumAggrDynEnergyFromAllSources(collector_metric.REDFISH)) / miliJouleToJoule) + ch <- prometheus.MustNewConstMetric( + p.nodeDesc.nodeRedfishJoulesTotal, + prometheus.CounterValue, + dynPower, + collector_metric.NodeName, "redfish", "dynamic", + ) + idlePower = (float64(p.NodeMetrics.GetSumAggrIdleEnergyromAllSources(collector_metric.REDFISH)) / miliJouleToJoule) + ch <- prometheus.MustNewConstMetric( + p.nodeDesc.nodeRedfishJoulesTotal, + prometheus.CounterValue, + idlePower, + collector_metric.NodeName, "redfish", "idle", + ) if config.EnabledGPU { for gpuID := range p.NodeMetrics.TotalEnergyInGPU.Stat { diff --git a/pkg/collector/redfish_collector.go b/pkg/collector/redfish_collector.go new file mode 100644 index 0000000000..0761657b6f --- /dev/null +++ b/pkg/collector/redfish_collector.go @@ -0,0 +1,57 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package collector + +import ( + "fmt" + "os" + + "github.com/sustainable-computing-io/kepler/pkg/nodecred" + "github.com/sustainable-computing-io/kepler/pkg/power/redfish" + "k8s.io/klog/v2" +) + +func (c *Collector) initRedFishCollector(credPath string) error { + if err := nodecred.InitNodeCredImpl(map[string]string{"redfish_cred_file_path": credPath}); err != nil { + return fmt.Errorf("%s", fmt.Sprintf("failed to init node credential: %v", err)) + } else { + klog.V(5).Infof("Initialized node credential") + nodeName := os.Getenv("NODE_NAME") + if nodeName == "" { + nodeName = "localhost" + } + redfishCred, err := nodecred.GetNodeCredByNodeName(nodeName, "redfish") + if err == nil { + userName := redfishCred["redfish_username"] + password := redfishCred["redfish_password"] + host := redfishCred["redfish_host"] + if userName != "" && password != "" && host != "" { + klog.V(5).Infof("Initialized redfish credential") + c.redfishClient = redfish.NewRedfishClient(userName, password, host) + } + } else { + return fmt.Errorf("%s", fmt.Sprintf("failed to get node credential: %v", err)) + } + } + return nil +} + +func (c *Collector) stopRedfishCollector() { + if c.redfishClient != nil { + c.redfishClient.StopPower() + } +} diff --git a/pkg/config/config.go b/pkg/config/config.go index 831e1da605..0652865c9e 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -90,6 +90,9 @@ var ( // dir of kernel sources for bcc kernelSourceDirs = []string{} + // redfish cred file path + redfishCredFilePath string + //////////////////////////////////// ModelServerEnable = getBoolConfig("MODEL_SERVER_ENABLE", false) ModelServerEndpoint = SetModelServerReqEndpoint() @@ -183,6 +186,14 @@ func GetKernelSourceDirs() []string { return kernelSourceDirs } +func SetRedfishCredFilePath(credFilePath string) { + redfishCredFilePath = credFilePath +} + +func GetRedfishCredFilePath() string { + return redfishCredFilePath +} + func SetModelServerReqEndpoint() (modelServerReqEndpoint string) { modelServerURL := getConfig("MODEL_SERVER_URL", modelServerService) if modelServerURL == modelServerService { diff --git a/pkg/nodecred/csv_cred.go b/pkg/nodecred/csv_cred.go new file mode 100644 index 0000000000..a32b740e9f --- /dev/null +++ b/pkg/nodecred/csv_cred.go @@ -0,0 +1,116 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodecred + +import ( + "encoding/csv" + "fmt" + "os" + + "k8s.io/klog/v2" +) + +// csvNodeCredImpl is the implementation of NodeCred using on disk file +// the file is in the format of +// node1,admin,password,localhost +// node2,admin,password,localhost +// node3,admin,password,localhost +type csvNodeCred struct { +} + +var ( + credMap map[string]string +) + +func (c csvNodeCred) GetNodeCredByNodeName(nodeName, target string) (map[string]string, error) { + if credMap == nil { + return nil, fmt.Errorf("credential is not set") + } else if target == "redfish" { + cred := make(map[string]string) + cred["redfish_username"] = credMap["redfish_username"] + cred["redfish_password"] = credMap["redfish_password"] + cred["redfish_host"] = credMap["redfish_host"] + if cred["redfish_username"] == "" || cred["redfish_password"] == "" || cred["redfish_host"] == "" { + return nil, fmt.Errorf("no credential found") + } + return cred, nil + } + + return nil, fmt.Errorf("no credential found for target %s", target) +} + +func (c csvNodeCred) IsSupported(info map[string]string) bool { + // read redfish_cred_file_path from info + filePath := info["redfish_cred_file_path"] + if filePath == "" { + return false + } else { + nodeName := getNodeName() + // read file from filePath + userName, password, host, err := readCSVFile(filePath, nodeName) + if err != nil { + klog.V(5).Infof("failed to read csv file: %v", err) + return false + } + klog.V(5).Infof("read csv file successfully") + credMap = make(map[string]string) + credMap["redfish_username"] = userName + credMap["redfish_password"] = password + credMap["redfish_host"] = host + } + return true +} + +func getNodeName() string { + nodeName := os.Getenv("NODE_NAME") + if nodeName == "" { + nodeName = "localhost" + } + return nodeName +} + +func readCSVFile(filePath, nodeName string) (userName, password, host string, err error) { + // Open the CSV file + file, err := os.Open(filePath) + if err != nil { + fmt.Println("Error opening the file:", err) + return + } + defer file.Close() + + // Create a new CSV reader + reader := csv.NewReader(file) + + // Read all rows from the CSV file + rows, err := reader.ReadAll() + if err != nil { + fmt.Println("Error reading CSV:", err) + return + } + + // Iterate over each row and check if the node name matches + for _, row := range rows { + if row[0] == nodeName && len(row) >= 4 { + userName = row[1] + password = row[2] + host = row[3] + return userName, password, host, nil + } + } + err = fmt.Errorf("node name %s not found in file %s", nodeName, filePath) + return +} diff --git a/pkg/nodecred/csv_cred_test.go b/pkg/nodecred/csv_cred_test.go new file mode 100644 index 0000000000..55007b4666 --- /dev/null +++ b/pkg/nodecred/csv_cred_test.go @@ -0,0 +1,132 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodecred + +import ( + "os" + "testing" +) + +func TestGetNodeCredByNodeName(t *testing.T) { + credMap = map[string]string{ + "redfish_username": "admin", + "redfish_password": "password", + "redfish_host": "node1", + } + c := csvNodeCred{} + + // Test with target "redfish" + result, err := c.GetNodeCredByNodeName("node1", "redfish") + if err != nil { + t.Errorf("Expected nil error, got: %v", err) + } + expected := credMap + if !mapStringStringEqual(result, expected) { + t.Errorf("Expected credMap: %v, got: %v", expected, result) + } + + // Test with unsupported target + _, err = c.GetNodeCredByNodeName("node1", "unsupported") + if err == nil { + t.Errorf("Expected an error, got nil") + } + + // Test with nil credMap + credMap = nil + _, err = c.GetNodeCredByNodeName("node1", "redfish") + if err == nil { + t.Errorf("Expected an error, got nil") + } +} + +func TestIsSupported(t *testing.T) { + // Test when redfish_cred_file_path is missing + info := map[string]string{} + c := csvNodeCred{} + result := c.IsSupported(info) + if result { + t.Errorf("Expected false, got: %v", result) + } + + // Test when redfish_cred_file_path is empty + info = map[string]string{ + "redfish_cred_file_path": "", + } + result = c.IsSupported(info) + if result { + t.Errorf("Expected false, got: %v", result) + } + + // create a temp csv file with the following content: + // node1,admin,password,localhost + // node2,admin,password,localhost + // node3,admin,password,localhost + file, err := os.CreateTemp("", "test.csv") + if err != nil { + t.Errorf("Expected nil error, got: %v", err) + } + defer os.Remove(file.Name()) + _, err = file.WriteString("node1,admin,password,localhost\nnode2,admin,password,localhost\nnode3,admin,password,localhost\n") + if err != nil { + t.Errorf("Expected nil error, got: %v", err) + } + + // Test with valid redfish_cred_file_path + info = map[string]string{ + "redfish_cred_file_path": file.Name(), + } + + // set ENV variable NODE_NAME to "node1" + os.Setenv("NODE_NAME", "node1") + // check if getNodeName() returns "node1" + nodeName := getNodeName() + if nodeName != "node1" { + t.Errorf("Expected nodeName: node1, got: %v", nodeName) + } + // readCSVFile should return the credentials for node1 + userName, password, host, err := readCSVFile(file.Name(), nodeName) + if err != nil { + t.Errorf("Expected nil error, got: %v", err) + } + if host != "localhost" { + t.Errorf("Expected host: localhost, got: %v", host) + } + if userName != "admin" { + t.Errorf("Expected userName: admin, got: %v", userName) + } + if password != "password" { + t.Errorf("Expected password: password, got: %v", password) + } + result = c.IsSupported(info) + if !result { + t.Errorf("Expected true, got: %v", result) + } +} + +// Helper function to compare two maps of strings +func mapStringStringEqual(a, b map[string]string) bool { + if len(a) != len(b) { + return false + } + for key, valA := range a { + valB, ok := b[key] + if !ok || valA != valB { + return false + } + } + return true +} diff --git a/pkg/nodecred/node_cred.go b/pkg/nodecred/node_cred.go new file mode 100644 index 0000000000..b302307046 --- /dev/null +++ b/pkg/nodecred/node_cred.go @@ -0,0 +1,65 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// per node credential interface +// Kubernetes doesn't have per node API object, so we need to use node credential to access node specific information +// e.g. node specific metrics, node specific power consumption, etc. +// This interface is used to access node credential. +// Instances of node credentials can be ConfigMap, Secret, key value store, etc. +package nodecred + +import ( + "fmt" + + "k8s.io/klog/v2" +) + +type NodeCredInterface interface { + // GetNodeCredByNodeName returns map of per node credential for targets such as redfish + GetNodeCredByNodeName(nodeName string, target string) (map[string]string, error) + // IsSupported returns if this node credential is supported + IsSupported(info map[string]string) bool +} + +var ( + // secretNodeCredImpl is the implementation of NodeCred using Kubernetes Secret + secretNodeCredImpl secretNodeCred + // csvNodeCredImpl is the implementation of NodeCred using on disk csv file + csvNodeCredImpl csvNodeCred + // nodeCredImpl is the pointer to the runtime detected implementation of NodeCred + nodeCredImpl NodeCredInterface = nil +) + +func InitNodeCredImpl(param map[string]string) error { + if secretNodeCredImpl.IsSupported(param) { + klog.V(1).Infoln("use configmap to obtain node credential") + nodeCredImpl = secretNodeCredImpl + } else { + klog.V(1).Infoln("Not able to obtain node credential, use file method") + if csvNodeCredImpl.IsSupported(param) { + klog.V(1).Infoln("use csv file to obtain node credential") + nodeCredImpl = csvNodeCredImpl + } + } + if nodeCredImpl != nil { + return nil + } + return fmt.Errorf("no supported node credential implementation") +} + +func GetNodeCredByNodeName(nodeName, target string) (map[string]string, error) { + return nodeCredImpl.GetNodeCredByNodeName(nodeName, target) +} diff --git a/pkg/nodecred/secret_cred.go b/pkg/nodecred/secret_cred.go new file mode 100644 index 0000000000..9662b85a52 --- /dev/null +++ b/pkg/nodecred/secret_cred.go @@ -0,0 +1,29 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodecred + +// secretNodeCredImpl is the implementation of NodeCred using Kubernetes Secret +type secretNodeCred struct { +} + +func (s secretNodeCred) GetNodeCredByNodeName(nodeName, target string) (map[string]string, error) { + return nil, nil +} + +func (s secretNodeCred) IsSupported(info map[string]string) bool { + return false +} diff --git a/pkg/power/redfish/redfish.go b/pkg/power/redfish/redfish.go new file mode 100644 index 0000000000..4c074565b6 --- /dev/null +++ b/pkg/power/redfish/redfish.go @@ -0,0 +1,190 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package redfish + +import ( + "strings" + "sync" + "time" + + "k8s.io/klog/v2" +) + +// RedfishSystemModel is the struct for the system model +// this is generated via the following command: +// redfishtool Systems +type RedfishSystemModel struct { + OdataContext string `json:"@odata.context"` + OdataID string `json:"@odata.id"` + OdataType string `json:"@odata.type"` + Description string `json:"Description"` + Members []struct { + OdataID string `json:"@odata.id"` + } `json:"Members"` + MembersOdataCount int `json:"Members@odata.count"` + Name string `json:"Name"` +} + +// RedfishPowerModel is the struct for the power model +// this is generated via the following command: +// redfishtool raw GET /redfish/v1/Chassis/System.Embedded.1/Power/PowerControl, where "System.Embedded.1" is the system ID from the RedfishSystemModel +type RedfishPowerModel struct { + OdataContext string `json:"@odata.context"` + OdataID string `json:"@odata.id"` + OdataType string `json:"@odata.type"` + MemberID string `json:"MemberId"` + Name string `json:"Name"` + PowerAllocatedWatts int `json:"PowerAllocatedWatts"` + PowerAvailableWatts int `json:"PowerAvailableWatts"` + PowerCapacityWatts int `json:"PowerCapacityWatts"` + PowerConsumedWatts int `json:"PowerConsumedWatts"` + PowerLimit struct { + CorrectionInMs int `json:"CorrectionInMs"` + LimitException string `json:"LimitException"` + LimitInWatts int `json:"LimitInWatts"` + } `json:"PowerLimit"` + PowerMetrics struct { + AverageConsumedWatts int `json:"AverageConsumedWatts"` + IntervalInMin int `json:"IntervalInMin"` + MaxConsumedWatts int `json:"MaxConsumedWatts"` + MinConsumedWatts int `json:"MinConsumedWatts"` + } `json:"PowerMetrics"` + PowerRequestedWatts int `json:"PowerRequestedWatts"` + RelatedItem []struct { + OdataID string `json:"@odata.id"` + } `json:"RelatedItem"` + RelatedItemOdataCount int `json:"RelatedItem@odata.count"` +} + +// RedfishSystemPowerResult is the system power query result +type RedfishSystemPowerResult struct { + system string + consumedWatts int + timestamp time.Time +} + +// RedfishAccessInfo is the struct for the access model +type RedfishAccessInfo struct { + Username string `json:"username"` + Password string `json:"password"` + Host string `json:"host"` +} + +type RedFishClient struct { + // systemEnergy is the system accumulated energy consumption in Joule + accessInfo RedfishAccessInfo + systems []*RedfishSystemPowerResult + ticker *time.Ticker + mutex sync.Mutex +} + +func NewRedfishClient(username, password, host string) *RedFishClient { + redfish := &RedFishClient{ + accessInfo: RedfishAccessInfo{ + Username: username, + Password: password, + Host: host, + }, + } + if redfish.IsPowerSupported() { + klog.Infof("starting redfish power collection\n") + } + return redfish +} + +func (rf *RedFishClient) IsPowerSupported() bool { + system, err := getRedfishSystem(rf.accessInfo) + + if err != nil { + klog.V(5).Infof("failed to get system info: %v\n", err) + return false + } + + intervalInMin := 0 + // iterate each "Members" in the system and get the power info + for _, member := range system.Members { + // split the OdataID by delimiter "/" and get the system ID + split := strings.Split(member.OdataID, "/") + if len(split) < 2 { + continue + } + id := split[len(split)-1] + res := RedfishSystemPowerResult{} + power, err := getRedfishPower(rf.accessInfo, id) + if err == nil && power.PowerConsumedWatts > 0 { + res.system = id + res.consumedWatts = power.PowerConsumedWatts + res.timestamp = time.Now() + rf.systems = append(rf.systems, &res) + klog.V(5).Infof("power info: %+v\n", power) + if power.PowerMetrics.IntervalInMin > intervalInMin { + intervalInMin = power.PowerMetrics.IntervalInMin + } + } else { + klog.V(5).Infof("failed to get power info: %v\n", err) + } + } + + // set a timer to check the power info every intervalInMin minutes + if intervalInMin > 0 { + rf.ticker = time.NewTicker(time.Duration(intervalInMin) * time.Minute) + go func() { + for { + <-rf.ticker.C + for _, system := range rf.systems { + power, err := getRedfishPower(rf.accessInfo, system.system) + if err == nil && power.PowerConsumedWatts > 0 { + // mutex + rf.mutex.Lock() + klog.V(5).Infof("power info: %+v\n", power) + system.consumedWatts = power.PowerConsumedWatts + system.timestamp = time.Now() + rf.mutex.Unlock() + } else { + klog.V(5).Infof("failed to get power info: %v\n", err) + } + } + } + }() + } + return rf.systems != nil && len(rf.systems) > 0 +} + +// GetPower returns the power consumption in Watt +func (rf *RedFishClient) GetPower() map[string]float64 { + if rf.systems != nil { + power := make(map[string]float64) + for _, system := range rf.systems { + rf.mutex.Lock() + now := time.Now() + // calculate the elapsed time since the last power query in seconds + elapsed := now.Sub(system.timestamp).Seconds() + klog.V(5).Infof("power info: %+v\n", system) + power[system.system] = float64(system.consumedWatts * 1000 * int(elapsed)) // convert to mW + rf.mutex.Unlock() + } + return power + } + return nil +} + +// StopPower stops the power collection timer +func (rf *RedFishClient) StopPower() { + if rf.ticker != nil { + rf.ticker.Stop() + } +} diff --git a/pkg/power/redfish/redfish_test.go b/pkg/power/redfish/redfish_test.go new file mode 100644 index 0000000000..45d71f2799 --- /dev/null +++ b/pkg/power/redfish/redfish_test.go @@ -0,0 +1,78 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package redfish + +import ( + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "testing" +) + +func TestRedFishClient_IsPowerSupported(t *testing.T) { + // Create a mock HTTP server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/Systems" { + system := RedfishSystemModel{ + Name: "Test System", + Members: []struct { + OdataID string `json:"@odata.id"` + }{ + { + OdataID: "/redfish/v1/Chassis/1", + }, + { + OdataID: "/redfish/v1/Chassis/2", + }, + }, + } + if err := json.NewEncoder(w).Encode(system); err != nil { + fmt.Println(err) + } + } else if r.URL.Path == "/redfish/v1/Chassis/1/Power/PowerControl" || r.URL.Path == "/redfish/v1/Chassis/2/Power/PowerControl" { + power := RedfishPowerModel{ + Name: "Test Power", + PowerConsumedWatts: 100, + RelatedItemOdataCount: 0, + } + if err := json.NewEncoder(w).Encode(power); err != nil { + fmt.Println(err) + } + } else { + w.WriteHeader(http.StatusNotFound) + } + })) + + defer server.Close() + fmt.Println("Mock server listening on", server.URL) + // Configure the access details for the mock server + access := RedfishAccessInfo{ + Username: "testuser", + Password: "testpass", + Host: server.URL, + } + + // Create a new Redfish client + client := NewRedfishClient(access.Username, access.Password, access.Host) + + // Check if power is supported + isPowerSupported := client.IsPowerSupported() + if !isPowerSupported { + t.Error("Expected power support, but got false") + } +} diff --git a/pkg/power/redfish/util.go b/pkg/power/redfish/util.go new file mode 100644 index 0000000000..d93216642e --- /dev/null +++ b/pkg/power/redfish/util.go @@ -0,0 +1,84 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package redfish + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "time" +) + +func getRedfishModel(access RedfishAccessInfo, endpoint string, model interface{}) error { + username := access.Username + password := access.Password + host := access.Host + + // Create a HTTP client and set up the basic authentication header + client := &http.Client{} + url := host + endpoint + req, err := http.NewRequest("GET", url, http.NoBody) + if err != nil { + return err + } + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(time.Second*30)) + defer cancel() + req = req.WithContext(ctx) + + req.SetBasicAuth(username, password) + + // Send the request and check the response + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + // Check the response status code + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("server returned status: %v", resp.Status) + } + + // Decode the response body into the provided model struct + err = json.NewDecoder(resp.Body).Decode(model) + if err != nil { + return err + } + + return nil +} + +func getRedfishSystem(access RedfishAccessInfo) (*RedfishSystemModel, error) { + var system RedfishSystemModel + err := getRedfishModel(access, "/redfish/v1/Systems", &system) + if err != nil { + return nil, err + } + + return &system, nil +} + +func getRedfishPower(access RedfishAccessInfo, system string) (*RedfishPowerModel, error) { + var power RedfishPowerModel + err := getRedfishModel(access, "/redfish/v1/Chassis/"+system+"/Power/PowerControl", &power) + if err != nil { + return nil, err + } + + return &power, nil +}