Skip to content

Commit

Permalink
Add container memory limit (#1169)
Browse files Browse the repository at this point in the history
  • Loading branch information
ArtoLord committed Apr 11, 2024
1 parent d6b99c4 commit 2c54615
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 7 deletions.
9 changes: 8 additions & 1 deletion lzy/execution-env/src/main/java/ai/lzy/env/Environment.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,15 @@ public void signal(int sigValue) {}

InputStream err();

int waitFor() throws InterruptedException;
int waitFor() throws InterruptedException, OomKilledException;

void signal(int sigValue);
}


class OomKilledException extends RuntimeException {
public OomKilledException(String msg) {
super(msg);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ public record DockerEnvDescription(
List<MountDescription> mounts,
boolean needGpu,
List<String> envVars, // In format <NAME>=<value>
@Nullable
String networkMode,
@Nullable String networkMode,
DockerClientConfig dockerClientConfig,
String user,
Set<String> allowedPlatforms // In format os/arch like "linux/amd64". Empty means all are allowed
Set<String> allowedPlatforms, // In format os/arch like "linux/amd64". Empty means all are allowed
@Nullable Long memLimitMb
) {

public static Builder newBuilder() {
Expand Down Expand Up @@ -57,6 +57,7 @@ public static class Builder {
DockerClientConfig dockerClientConfig;
String user = ROOT_USER_UID;
Set<String> allowedPlatforms = new HashSet<>();
Long memLimitMb = null;

public Builder withName(String name) {
this.name = name;
Expand Down Expand Up @@ -108,12 +109,17 @@ public Builder withAllowedPlatforms(Collection<String> allowedPlatforms) {
return this;
}

public Builder withMemLimitMb(Long memLimitMb) {
this.memLimitMb = memLimitMb;
return this;
}

public DockerEnvDescription build() {
if (StringUtils.isBlank(name)) {
name = "job-" + RandomStringUtils.randomAlphanumeric(5);
}
return new DockerEnvDescription(name, image, mounts, gpu, envVars, networkMode, dockerClientConfig, user,
allowedPlatforms);
allowedPlatforms, memLimitMb);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.io.OutputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.lang.management.ManagementFactory;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -114,6 +115,10 @@ public void install(LogStream outStream, LogStream errStream) throws Environment
hostConfig.withNetworkMode(config.networkMode());
}

if (config.memLimitMb() != null) {
hostConfig.withMemory(config.memLimitMb() * 1024 * 1024);
}

AtomicInteger containerCreatingAttempt = new AtomicInteger(0);
final var container = retry.executeSupplier(() -> {
LOG.info("Creating container {}... (attempt {}); image: {}, config: {}",
Expand Down Expand Up @@ -243,11 +248,21 @@ public InputStream err() {
}

@Override
public int waitFor() throws InterruptedException {
public int waitFor() throws InterruptedException, OomKilledException {
try {
feature.get();
return Math.toIntExact(retry.executeSupplier(() -> client.inspectExecCmd(exec.getId()).exec())
var rc = Math.toIntExact(retry.executeSupplier(() -> client.inspectExecCmd(exec.getId()).exec())
.getExitCodeLong());

if (rc == 0) {
return 0;
}

if (isOomKilled()) {
throw new OomKilledException("Process exited with rc %s, and it was killed by OOM killer".formatted(rc));
}

return rc;
} catch (InterruptedException e) {
try {
startCmd.close();
Expand Down Expand Up @@ -361,4 +376,21 @@ private void checkPlatform(InspectImageResponse inspectImageResponse, LogStream
throw new RuntimeException(msg);
}
}

private boolean isOomKilled() {
if (containerId == null) {
return false;
}

var killed = client.inspectContainerCmd(containerId)
.exec()
.getState()
.getOOMKilled();

if (killed != null) {
return killed;
}

return false;
}
}
6 changes: 6 additions & 0 deletions lzy/execution-env/src/main/java/ai/lzy/env/logs/Logs.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package ai.lzy.env.logs;

import com.google.common.annotations.VisibleForTesting;
import jakarta.annotation.Nullable;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -67,4 +68,9 @@ public void close() {
LOG.debug("Stream {} closed", stream.name());
}
}

@VisibleForTesting
public LogStream empty() {
return stream("empty");
}
}
33 changes: 33 additions & 0 deletions lzy/execution-env/src/test/java/ai/lzy/env/DockerOomTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package ai.lzy.env;

import ai.lzy.env.base.DockerEnvDescription;
import ai.lzy.env.base.DockerEnvironment;
import ai.lzy.env.logs.LogStream;
import ai.lzy.env.logs.Logs;
import com.github.dockerjava.core.DefaultDockerClientConfig;
import org.junit.Assert;
import org.junit.Test;

import java.util.List;

public class DockerOomTest {
private static final Logs logs = new Logs();
private static final LogStream stream = logs.empty();
static {
logs.init(List.of());
}

@Test
public void testSimple() throws Exception {
var dockerEnvDesc = DockerEnvDescription.newBuilder()
.withImage("alpine:3.19.1")
.withMemLimitMb(6L)
.withDockerClientConfig(DefaultDockerClientConfig.createDefaultConfigBuilder().build())
.build();

try (var env = new DockerEnvironment(dockerEnvDesc)) {
env.install(stream, stream);
Assert.assertThrows(Environment.OomKilledException.class, () -> env.runProcess("/bin/sh", "-c", "tail /dev/zero").waitFor());
}
}
}

0 comments on commit 2c54615

Please sign in to comment.