Skip to content

Commit

Permalink
Remove cpu limits from K8s pod spec builder (#3338)
Browse files Browse the repository at this point in the history
This commit changes the resource allocation made by nextflow with K8s 
so that CPUs are allocated via `resources.request` and not adding also the 
`resources.limit` constraint. 

This allows a better usage of CPUs resources and it's aligned with allocation 
made by other scheduler such as AWS Batch

Closed #3329 

Signed-off-by: Ben Sherman <bentshermann@gmail.com>
Signed-off-by: Paolo Di Tommaso <paolo.ditommaso@gmail.com>
Co-authored-by: Paolo Di Tommaso <paolo.ditommaso@gmail.com>
  • Loading branch information
bentsherman and pditommaso committed Nov 16, 2022
1 parent b38c388 commit dc7f78b
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 31 deletions.
Expand Up @@ -355,12 +355,6 @@ class PodSpecBuilder {
env.add(entry.toSpec())
}

final res = [:]
if( this.cpus )
res.cpu = this.cpus
if( this.memory )
res.memory = this.memory

final container = [ name: this.podName, image: this.imageName ]
if( this.command )
container.command = this.command
Expand Down Expand Up @@ -432,13 +426,16 @@ class PodSpecBuilder {
container.env = env

// add resources
if( res ) {
container.resources = [requests: res, limits: new HashMap<>(res)]
if( this.cpus ) {
container.resources = addCpuResources(this.cpus, container.resources as Map)
}

if( this.memory ) {
container.resources = addMemoryResources(this.memory, container.resources as Map)
}

// add gpu settings
if( accelerator ) {
container.resources = addAcceleratorResources(accelerator, container.resources as Map)
if( this.accelerator ) {
container.resources = addAcceleratorResources(this.accelerator, container.resources as Map)
}

// add storage definitions ie. volumes and mounts
Expand Down Expand Up @@ -530,7 +527,34 @@ class PodSpecBuilder {
}

@PackageScope
@CompileDynamic
Map addCpuResources(Integer cpus, Map res) {
if( res == null )
res = [:]

final req = res.requests as Map ?: new LinkedHashMap<>(10)
req.cpu = cpus
res.requests = req

return res
}

@PackageScope
Map addMemoryResources(String memory, Map res) {
if( res == null )
res = new LinkedHashMap(10)

final req = res.requests as Map ?: new LinkedHashMap(10)
req.memory = memory
res.requests = req

final lim = res.limits as Map ?: new LinkedHashMap(10)
lim.memory = memory
res.limits = lim

return res
}

@PackageScope
String getAcceleratorType(AcceleratorResource accelerator) {

def type = accelerator.type ?: 'nvidia.com'
Expand All @@ -548,7 +572,6 @@ class PodSpecBuilder {


@PackageScope
@CompileDynamic
Map addAcceleratorResources(AcceleratorResource accelerator, Map res) {

if( res == null )
Expand All @@ -557,12 +580,12 @@ class PodSpecBuilder {
def type = getAcceleratorType(accelerator)

if( accelerator.request ) {
final req = res.requests ?: new LinkedHashMap<>(2)
final req = res.requests as Map ?: new LinkedHashMap<>(2)
req.put(type, accelerator.request)
res.requests = req
}
if( accelerator.limit ) {
final lim = res.limits ?: new LinkedHashMap<>(2)
final lim = res.limits as Map ?: new LinkedHashMap<>(2)
lim.put(type, accelerator.limit)
res.limits = lim
}
Expand Down
Expand Up @@ -362,7 +362,7 @@ class K8sDriverLauncherTest extends Specification {
],
resources: [
requests: [cpu: 2, memory: '200Mi'],
limits: [cpu: 2, memory: '200Mi']
limits: [memory: '200Mi']
],
volumeMounts: [
[name:'vol-1', mountPath:'/mnt/path/data'],
Expand Down
Expand Up @@ -120,7 +120,7 @@ class K8sTaskHandlerTest extends Specification {
[name:'nf-foo',
image:'debian:latest',
command:['/bin/bash', '-ue','/some/work/dir/.command.run'],
resources:[ requests: [cpu:1], limits:[cpu:1] ],
resources:[ requests: [cpu:1] ],
env: [ [name:'NXF_OWNER', value:'501:502'] ]
]
]
Expand Down Expand Up @@ -154,7 +154,7 @@ class K8sTaskHandlerTest extends Specification {
[name:'nf-abc',
image:'user/alpine:1.0',
command:['/bin/bash', '-ue', '/some/work/dir/.command.run'],
resources:[ requests: [cpu:4, memory:'16384Mi'], limits:[cpu:4, memory:'16384Mi'] ]
resources:[ requests: [cpu:4, memory:'16384Mi'], limits:[memory:'16384Mi'] ]
]
]
]
Expand Down Expand Up @@ -234,7 +234,7 @@ class K8sTaskHandlerTest extends Specification {
[name:'nf-foo',
image:'debian:latest',
command:['/bin/bash', '-ue','/some/work/dir/.command.run'],
resources:[ requests: [cpu:1], limits:[cpu:1] ],
resources:[ requests: [cpu:1] ],
env: [ [name:'NXF_OWNER', value:'501:502'] ]
]
]
Expand Down Expand Up @@ -267,7 +267,7 @@ class K8sTaskHandlerTest extends Specification {
[name:'nf-abc',
image:'user/alpine:1.0',
command:['/bin/bash', '-ue', '/some/work/dir/.command.run'],
resources:[ requests: [cpu:4, memory:'16384Mi'], limits:[cpu:4, memory:'16384Mi'] ]
resources:[ requests: [cpu:4, memory:'16384Mi'], limits: [memory:'16384Mi'] ]
]
]
]
Expand Down Expand Up @@ -314,7 +314,7 @@ class K8sTaskHandlerTest extends Specification {
[name:'nf-123',
image:'debian:latest',
command:['/bin/bash', '-ue','/some/work/dir/.command.run'],
resources:[requests:[cpu:1], limits:[cpu:1]]
resources:[requests:[cpu:1]]
]
]
]
Expand Down
Expand Up @@ -271,7 +271,7 @@ class PodSpecBuilderTest extends Specification {
],
resources:[
requests: ['foo.org/gpu':5, cpu:8, memory:'100Gi'],
limits:['foo.org/gpu':10, cpu:8, memory:'100Gi'] ]
limits:['foo.org/gpu':10, memory:'100Gi'] ]
]
]
]
Expand Down Expand Up @@ -767,16 +767,16 @@ class PodSpecBuilderTest extends Specification {
res.limits == null

when:
res = builder.addAcceleratorResources(new AcceleratorResource(request: 5, type:'foo.org'), [limits: [cpus: 2]])
res = builder.addAcceleratorResources(new AcceleratorResource(request: 5, type: 'foo.org'), [requests: [cpu: 2]])
then:
res.requests == ['foo.org/gpu': 5]
res.limits == [cpus:2]
res.requests == [cpu: 2, 'foo.org/gpu': 5]
res.limits == null

when:
res = builder.addAcceleratorResources(new AcceleratorResource(request: 5, limit: 10, type:'foo.org'), [limits: [cpus: 2]])
res = builder.addAcceleratorResources(new AcceleratorResource(request: 5, limit: 10, type: 'foo.org'), [requests: [cpu: 2]])
then:
res.requests == ['foo.org/gpu': 5]
res.limits == [cpus:2, 'foo.org/gpu': 10]
res.requests == [cpu: 2, 'foo.org/gpu': 5]
res.limits == ['foo.org/gpu': 10]

when:
res = builder.addAcceleratorResources(new AcceleratorResource(request: 5, type:'example.com/fpga'), null)
Expand All @@ -785,10 +785,10 @@ class PodSpecBuilderTest extends Specification {
res.limits == null

when:
res = builder.addAcceleratorResources(new AcceleratorResource(request: 5, limit: 10, type:'example.com/fpga'), [limits: [cpus: 2]])
res = builder.addAcceleratorResources(new AcceleratorResource(request: 5, limit: 10, type: 'example.com/fpga'), [requests: [cpu: 2]])
then:
res.requests == ['example.com/fpga': 5]
res.limits == [cpus:2, 'example.com/fpga': 10]
res.requests == [cpu: 2, 'example.com/fpga': 5]
res.limits == ['example.com/fpga': 10]
}


Expand Down

0 comments on commit dc7f78b

Please sign in to comment.