Skip to content

Commit

Permalink
fix(hatchery): too many routines in same time (#2965)
Browse files Browse the repository at this point in the history
closes #2962
  • Loading branch information
fsamin authored and sguiheux committed Jun 28, 2018
1 parent f3fd6e5 commit 58aae1c
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 12 deletions.
9 changes: 7 additions & 2 deletions engine/hatchery/marathon/marathon.go
Expand Up @@ -198,8 +198,13 @@ func (h *HatcheryMarathon) CanSpawn(model *sdk.Model, jobID int64, requirements
log.Info("CanSpawn> Error on h.marathonClient.Deployments() : %s", errd)
return false
}
// Do not DOS marathon, if deployment queue is longer than MarathonMaxConcurrentSpawn (default 10)
if h.Config.MarathonMaxConcurrentSpawn > 0 && len(deployments) >= h.Config.MarathonMaxConcurrentSpawn {
// Do not DOS marathon, if deployment queue is longer than MaxConcurrentProvisioning (default 10)
maxProvisionning := h.Configuration().Provision.MaxConcurrentProvisioning
if maxProvisionning == 0 {
maxProvisionning = 10
}

if len(deployments) >= maxProvisionning {
log.Info("CanSpawn> %d item in deployment queue, waiting", len(deployments))
return false
}
Expand Down
3 changes: 0 additions & 3 deletions engine/hatchery/marathon/types.go
Expand Up @@ -24,9 +24,6 @@ type HatcheryConfiguration struct {
// MarathonPassword "marathon-password"
MarathonPassword string `mapstructure:"password" toml:"password" default:"" commented:"false" comment:"Marathon Password, you need a marathon User to use it"`

// MarathonMaxConcurrentSpawn "marathon-max-deployments"
MarathonMaxConcurrentSpawn int `mapstructure:"maxConcurrentSpawn" toml:"maxConcurrentSpawn" default:"10" commented:"false" comment:"Max elements in deployment queue. The hatchery will wait less than x elements in deployment queue before spawning new CDS worker."`

// MarathonLabelsStr "marathon-labels"
MarathonLabels string `mapstructure:"labels" toml:"labels" default:"" commented:"false" comment:"Use this option if you want to add labels on workers spawned by this hatchery.\n Format: MarathonLabels = \"A_LABEL=value-of-label,B_LABEL=value-of-label-b\""`

Expand Down
20 changes: 13 additions & 7 deletions sdk/hatchery/hatchery.go
Expand Up @@ -37,12 +37,13 @@ type CommonConfiguration struct {
MaxHeartbeatFailures int `toml:"maxHeartbeatFailures" default:"10" comment:"Maximum allowed consecutives failures on heatbeat routine"`
} `toml:"api"`
Provision struct {
Disabled bool `toml:"disabled" default:"false" comment:"Disabled provisioning. Format:true or false"`
Frequency int `toml:"frequency" default:"30" comment:"Check provisioning each n Seconds"`
MaxWorker int `toml:"maxWorker" default:"10" comment:"Maximum allowed simultaneous workers"`
GraceTimeQueued int `toml:"graceTimeQueued" default:"4" comment:"if worker is queued less than this value (seconds), hatchery does not take care of it"`
RegisterFrequency int `toml:"registerFrequency" default:"60" comment:"Check if some worker model have to be registered each n Seconds"`
WorkerLogsOptions struct {
Disabled bool `toml:"disabled" default:"false" comment:"Disabled provisioning. Format:true or false"`
Frequency int `toml:"frequency" default:"30" comment:"Check provisioning each n Seconds"`
MaxWorker int `toml:"maxWorker" default:"10" comment:"Maximum allowed simultaneous workers"`
MaxConcurrentProvisioning int `toml:"maxConcurrentProvisioning" default:"10" comment:"Maximum allowed simultaneous workers provisioning"`
GraceTimeQueued int `toml:"graceTimeQueued" default:"4" comment:"if worker is queued less than this value (seconds), hatchery does not take care of it"`
RegisterFrequency int `toml:"registerFrequency" default:"60" comment:"Check if some worker model have to be registered each n Seconds"`
WorkerLogsOptions struct {
Graylog struct {
Host string `toml:"host" comment:"Example: thot.ovh.com"`
Port int `toml:"port" comment:"Example: 12202"`
Expand Down Expand Up @@ -130,8 +131,13 @@ func receiveJob(h Interface, isWorkflowJob bool, execGroups []sdk.Group, jobID i
return false, false, nil
}

maxProvisionning := h.Configuration().Provision.MaxConcurrentProvisioning
if maxProvisionning == 0 {
maxProvisionning = 10
}

n := atomic.LoadInt64(nRoutines)
if n > 10 {
if int(n) > maxProvisionning {
log.Info("too many routines in same time %d", n)
return false, false, nil
}
Expand Down

0 comments on commit 58aae1c

Please sign in to comment.