Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update thresholds logic in the farmerbot and create independent checks for each cloud unit utilization #819

Merged
merged 12 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion farmerbot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,13 @@ never_shutdown_nodes:
- "<your node ID to be never shutdown, optional>"
power:
periodic_wake_up_start: "<daily time to wake up nodes for your farm, default is the time your run the command, format is 00:00AM or 00:00PM, optional>"
wake_up_threshold: "<the threshold number for resources usage that will need another node to be on, default is 80, optional>"
periodic_wake_up_limit: "<the number (limit) of nodes to be waken up everyday, default is 1, optional>"
overprovision_cpu: "<how much node allows over provisioning the CPU , default is 1, range: [1;4], optional>"
wake_up_threshold:
cru: "<the threshold number for cru usage that will need another node to be on, default is 80, optional>"
mru: "<the threshold number for mru usage that will need another node to be on, default is 80, optional>"
sru: "<the threshold number for sru usage that will need another node to be on, default is 80, optional>"
hru: "<the threshold number for hru usage that will need another node to be on, default is 80, optional>"
```

## Supported commands
Expand Down
6 changes: 3 additions & 3 deletions farmerbot/internal/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ const defaultPeriodicWakeUPLimit uint8 = 1
const defaultCPUProvision int8 = 2

// defaultWakeUpThreshold default threshold to wake up a new node
const defaultWakeUpThreshold uint8 = 80
const defaultWakeUpThreshold float64 = 80

// minWakeUpThreshold min threshold to wake up a new node
const minWakeUpThreshold uint8 = 50
const minWakeUpThreshold float64 = 50

// maxWakeUpThreshold max threshold to wake up a new node
const maxWakeUpThreshold uint8 = 80
const maxWakeUpThreshold float64 = 80

// minBalanceToRun min balance the farmer should have to run the bot
const minBalanceToRun float64 = 5
Expand Down
7 changes: 6 additions & 1 deletion farmerbot/internal/farmerbot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ func TestFarmerbot(t *testing.T) {
inputs := Config{
FarmID: 1,
IncludedNodes: []uint32{1, 2},
Power: power{WakeUpThreshold: 50},
Power: power{WakeUpThresholdPercentages: ThresholdPercentages{
CRU: 50,
SRU: 50,
MRU: 50,
HRU: 50,
}},
}

farmerbot, err := NewFarmerBot(ctx, inputs, "dev", aliceSeed, peer.KeyTypeSr25519)
Expand Down
7 changes: 6 additions & 1 deletion farmerbot/internal/find_node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ func TestFindNode(t *testing.T) {
inputs := Config{
FarmID: 1,
IncludedNodes: []uint32{1, 2},
Power: power{WakeUpThreshold: 30},
Power: power{WakeUpThresholdPercentages: ThresholdPercentages{
CRU: 30,
SRU: 30,
MRU: 30,
HRU: 30,
}},
}

farmerbot, err := NewFarmerBot(ctx, inputs, "dev", aliceSeed, peer.KeyTypeSr25519)
Expand Down
166 changes: 132 additions & 34 deletions farmerbot/internal/power.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package internal

import (
"fmt"
"math"
"time"

"github.com/rs/zerolog/log"
Expand Down Expand Up @@ -114,21 +115,47 @@ func (f *FarmerBot) manageNodesPower(sub Substrate) error {
nodes := f.filterNodesPower([]powerState{on, wakingUp})

usedResources, totalResources := calculateResourceUsage(nodes)
if totalResources == 0 {
return nil
}

resourceUsage := 100 * float32(usedResources) / float32(totalResources)
if resourceUsage >= float32(f.config.Power.WakeUpThreshold) {
log.Info().Msgf("Too high resource usage = %.1f%%, threshold = %d%%", resourceUsage, f.config.Power.WakeUpThreshold)
return f.resourceUsageTooHigh(sub)
demand := calculateDemandBasedOnThresholds(totalResources, usedResources, f.config.Power.WakeUpThresholdPercentages)
if demand.cru > 0 || demand.mru > 0 || demand.sru > 0 || demand.hru > 0 {
log.Info().Msgf("Too high resource usage, resources usage for online nodes: CRU:%v%%, SRU:%v%%, MRU:%v%%, HRU:%v%%",
math.Ceil(float64(usedResources.cru)/float64(totalResources.cru)*100),
math.Ceil(float64(usedResources.sru)/float64(totalResources.sru)*100),
math.Ceil(float64(usedResources.mru)/float64(totalResources.mru)*100),
math.Ceil(float64(usedResources.hru)/float64(totalResources.hru)*100),
)
return f.resourceUsageTooHigh(sub, demand)
}

log.Info().Msgf("Too low resource usage = %.1f%%, threshold = %d%%", resourceUsage, f.config.Power.WakeUpThreshold)
log.Info().Msgf("Too low resource usage, resources usage for online nodes: CRU:%v%%, SRU:%v%%, MRU:%v%%, HRU:%v%%",
math.Ceil(float64(usedResources.cru)/float64(totalResources.cru)*100),
math.Ceil(float64(usedResources.sru)/float64(totalResources.sru)*100),
math.Ceil(float64(usedResources.mru)/float64(totalResources.mru)*100),
math.Ceil(float64(usedResources.hru)/float64(totalResources.hru)*100),
)
return f.resourceUsageTooLow(sub, usedResources, totalResources)
}

func calculateResourceUsage(nodes map[uint32]node) (uint64, uint64) {
func calculateDemandBasedOnThresholds(total, used capacity, thresholdPercentages ThresholdPercentages) capacity {
var demand capacity

if float64(used.cru)/float64(total.cru)*100 > thresholdPercentages.CRU {
demand.cru = uint64(math.Ceil((float64(used.cru)/float64(total.cru)*100 - thresholdPercentages.CRU) / 100 * float64(total.cru)))
}
if float64(used.mru)/float64(total.mru)*100 > thresholdPercentages.MRU {
demand.mru = uint64(math.Ceil((float64(used.mru)/float64(total.mru)*100 - thresholdPercentages.MRU) / 100 * float64(total.mru)))
}
if float64(used.sru)/float64(total.sru)*100 > thresholdPercentages.SRU {
demand.sru = uint64(math.Ceil((float64(used.sru)/float64(total.sru)*100 - thresholdPercentages.SRU) / 100 * float64(total.sru)))
}
if total.hru > 0 && float64(used.hru)/float64(total.hru)*100 > thresholdPercentages.HRU {
demand.hru = uint64(math.Ceil((float64(used.hru)/float64(total.hru)*100 - thresholdPercentages.HRU) / 100 * float64(total.hru)))
}

return demand
}

func calculateResourceUsage(nodes map[uint32]node) (capacity, capacity) {
usedResources := capacity{}
totalResources := capacity{}

Expand All @@ -141,23 +168,72 @@ func calculateResourceUsage(nodes map[uint32]node) (uint64, uint64) {
totalResources.add(node.resources.total)
}

used := usedResources.cru + usedResources.hru + usedResources.mru + usedResources.sru
total := totalResources.cru + totalResources.hru + totalResources.mru + totalResources.sru
return usedResources, totalResources
}

func (f *FarmerBot) selectNodesToPowerOn(demand capacity) ([]node, error) {
var selectedNodes []node
remainingDemand := demand

for _, node := range f.nodes {
if node.powerState != off {
continue // Skip nodes that are already on or waking up
}

return used, total
// Check if this node can contribute to the remaining demand
contribute := false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you think it is a good idea to find the least number of nodes to wakeup?
i am thinking of in some cases maybe node1 satisfy cru only and it will be added to selectedNodes to power on, then node2 will satisfy the rest of resources and it also can satisfy the initial demand of cru, so instead of powering on both, node2 will be enough.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I discussed it with @muhamadazmy. I think it is ok (your suggestion is right) to power on the next node that fits requirements. Of course, we don't guarantee if it will be used or not.

if remainingDemand.cru > 0 && uint64(node.Resources.CRU) >= remainingDemand.cru {
contribute = true
remainingDemand.cru -= uint64(node.Resources.CRU)
}
if remainingDemand.sru > 0 && uint64(node.Resources.SRU) >= remainingDemand.sru {
contribute = true
remainingDemand.sru -= uint64(node.Resources.SRU)
}
if remainingDemand.mru > 0 && uint64(node.Resources.MRU) >= remainingDemand.mru {
contribute = true
remainingDemand.mru -= uint64(node.Resources.MRU)
}
if remainingDemand.hru > 0 && uint64(node.Resources.HRU) >= remainingDemand.hru {
contribute = true
remainingDemand.hru -= uint64(node.Resources.HRU)
}

if contribute {
selectedNodes = append(selectedNodes, node)
}

// Check if all demands have been met
if remainingDemand.cru <= 0 && remainingDemand.sru <= 0 && remainingDemand.mru <= 0 && remainingDemand.hru <= 0 {
break // All demands have been met, no need to check more nodes
}
}

if remainingDemand.cru > 0 || remainingDemand.sru > 0 || remainingDemand.mru > 0 || remainingDemand.hru > 0 {
return nil, fmt.Errorf("unable to meet resources demand with available nodes")
}

return selectedNodes, nil
}

func (f *FarmerBot) resourceUsageTooHigh(sub Substrate) error {
for nodeID, node := range f.nodes {
if node.powerState == off {
return f.powerOn(sub, nodeID)
func (f *FarmerBot) resourceUsageTooHigh(sub Substrate, demand capacity) error {
log.Info().Msg("Too high resource usage. Powering on some nodes")
nodes, err := f.selectNodesToPowerOn(demand)
if err != nil {
return err
}

for _, node := range nodes {
log.Info().Uint32("nodeID", uint32(node.ID)).Msg("Too much resource usage. Turning on node")
if err := f.powerOn(sub, uint32(node.ID)); err != nil {
log.Error().Err(err).Uint32("node ID", uint32(node.ID)).Msg("couldn't power on node")
}
}

return fmt.Errorf("no available node to wake up, resources usage is high")
return nil
}

func (f *FarmerBot) resourceUsageTooLow(sub Substrate, usedResources, totalResources uint64) error {
func (f *FarmerBot) resourceUsageTooLow(sub Substrate, usedResources, totalResources capacity) error {
onNodes := f.filterNodesPower([]powerState{on})

// nodes with public config can't be shutdown
Expand Down Expand Up @@ -186,35 +262,57 @@ func (f *FarmerBot) resourceUsageTooLow(sub Substrate, usedResources, totalResou
break
}
nodesLeftOnline -= 1
newUsedResources -= node.resources.used.hru + node.resources.used.sru +
node.resources.used.mru + node.resources.used.cru
newTotalResources -= node.resources.total.hru + node.resources.total.sru +
node.resources.total.mru + node.resources.total.cru

if newTotalResources == 0 {
cpNewUsedResources := newUsedResources
cpNewTotalResources := newTotalResources

rawdaGastan marked this conversation as resolved.
Show resolved Hide resolved
newUsedResources.cru -= node.resources.used.cru
newUsedResources.sru -= node.resources.used.sru
newUsedResources.mru -= node.resources.used.mru
newUsedResources.hru -= node.resources.used.hru

newTotalResources.cru -= node.resources.total.cru
newTotalResources.sru -= node.resources.total.sru
newTotalResources.mru -= node.resources.total.mru
newTotalResources.hru -= node.resources.total.hru

if newTotalResources.isEmpty() {
break
}

newResourceUsage := 100 * float32(newUsedResources) / float32(newTotalResources)
if newResourceUsage < float32(f.config.Power.WakeUpThreshold) {
// we need to keep the resource percentage lower then the threshold
log.Info().Uint32("nodeID", uint32(node.ID)).Msgf("Too low resource usage = %.1f%%. Turning off unused node", newResourceUsage)
currentDemand := calculateDemandBasedOnThresholds(newTotalResources, newUsedResources, f.config.Power.WakeUpThresholdPercentages)

if checkResourcesMeetDemand(newTotalResources, newUsedResources, currentDemand) {
log.Info().Uint32("nodeID", uint32(node.ID)).Msg("Resource usage too low. Turning off unused node")
err := f.powerOff(sub, uint32(node.ID))
if err != nil {
log.Error().Err(err).Uint32("nodeID", uint32(node.ID)).Msg("Failed to power off node")

if node.powerState == shuttingDown {
continue
}

// restore the newUsedResources and newTotalResources
newUsedResources = cpNewUsedResources
newTotalResources = cpNewTotalResources
nodesLeftOnline += 1
newUsedResources += node.resources.used.hru + node.resources.used.sru +
node.resources.used.mru + node.resources.used.cru
newTotalResources += node.resources.total.hru + node.resources.total.sru +
node.resources.total.mru + node.resources.total.cru
}
}
}

return nil
}

func checkResourcesMeetDemand(total, used, demand capacity) bool {
remaining := capacity{
cru: total.cru - used.cru,
sru: total.sru - used.sru,
mru: total.mru - used.mru,
hru: total.hru - used.hru,
}

// Check if remaining resources meet or exceed demand for each resource type
meetsCRUDemand := remaining.cru >= demand.cru
meetsSRUDemand := remaining.sru >= demand.sru
meetsMRUDemand := remaining.mru >= demand.mru
meetsHRUDemand := remaining.hru >= demand.hru

return meetsCRUDemand && meetsSRUDemand && meetsMRUDemand && meetsHRUDemand
}
7 changes: 6 additions & 1 deletion farmerbot/internal/power_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,12 @@ func TestPower(t *testing.T) {
inputs := Config{
FarmID: 1,
IncludedNodes: []uint32{1, 2},
Power: power{WakeUpThreshold: 30},
Power: power{WakeUpThresholdPercentages: ThresholdPercentages{
CRU: 30,
SRU: 30,
MRU: 30,
HRU: 30,
}},
}

farmerbot, err := NewFarmerBot(ctx, inputs, "dev", aliceSeed, peer.KeyTypeSr25519)
Expand Down
20 changes: 13 additions & 7 deletions farmerbot/internal/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ type NodeReport struct {
HasActiveContracts bool `json:"has_active_contracts"`
Dedicated bool `json:"dedicated"`
PublicConfig bool `json:"public_config"`
UsagePercentage float32 `json:"usage_percentage"`
CRUUsagePercentage uint8 `json:"cru_usage_percentage"`
SRUUsagePercentage uint8 `json:"sru_usage_percentage"`
MRUUsagePercentage uint8 `json:"mru_usage_percentage"`
HRUUsagePercentage uint8 `json:"hru_usage_percentage"`
TimesRandomWakeUps int `json:"random_wakeups"`
SincePowerStateChanged time.Duration `json:"since_power_state_changed"`
SinceLastTimeAwake time.Duration `json:"since_last_time_awake"`
Expand Down Expand Up @@ -55,10 +58,11 @@ func createNodeReport(n node) NodeReport {
sinceLastTimeAwake = time.Since(n.lastTimeAwake)
}

var usage float32
used, total := calculateResourceUsage(map[uint32]node{nodeID: n})
if total != 0 {
usage = 100 * float32(used) / float32(total)

var hruUsage uint8
if total.hru != 0 {
hruUsage += uint8(100 * used.hru / total.hru)
}

return NodeReport{
Expand All @@ -68,7 +72,10 @@ func createNodeReport(n node) NodeReport {
HasActiveContracts: n.hasActiveContracts,
Dedicated: n.dedicated,
PublicConfig: n.PublicConfig.HasValue,
UsagePercentage: usage,
CRUUsagePercentage: uint8(100 * used.cru / total.cru),
SRUUsagePercentage: uint8(100 * used.sru / total.sru),
MRUUsagePercentage: uint8(100 * used.mru / total.mru),
HRUUsagePercentage: hruUsage,
TimesRandomWakeUps: n.timesRandomWakeUps,
SincePowerStateChanged: sincePowerStateChanged,
SinceLastTimeAwake: sinceLastTimeAwake,
Expand Down Expand Up @@ -115,8 +122,7 @@ func (f *FarmerBot) report() string {
nodeReport.HasActiveRentContract,
nodeReport.Dedicated,
nodeReport.PublicConfig,
fmt.Sprintf("%.1f%%", nodeReport.UsagePercentage),
nodeReport.HasActiveContracts,
fmt.Sprintf("CRU:%d%%, SRU:%d%%,\nMRU:%d%%, HRU:%d%%", nodeReport.CRUUsagePercentage, nodeReport.SRUUsagePercentage, nodeReport.MRUUsagePercentage, nodeReport.HRUUsagePercentage),
nodeReport.TimesRandomWakeUps,
periodicWakeup,
nodeReport.SincePowerStateChanged,
Expand Down
Loading
Loading