diff --git a/src/linux/init/WSLCInit.cpp b/src/linux/init/WSLCInit.cpp index 5def741aa..3b3f2748b 100644 --- a/src/linux/init/WSLCInit.cpp +++ b/src/linux/init/WSLCInit.cpp @@ -32,6 +32,10 @@ Module Name: #include #include "mountutilcpp.h" #include +#include +#include "JsonUtils.h" +#include "cdi_schema.h" +#include "lxfsshares.h" extern int InitializeLogging(bool SetStderr, wil::LogFunction* ExceptionCallback) noexcept; @@ -63,10 +67,95 @@ struct WSLCState static WSLCState g_state; -int CreateCaptureCrashSymlink() +void WriteWslcCdiSpec() try { - THROW_LAST_ERROR_IF(symlink("/init", "/" LX_INIT_WSL_CAPTURE_CRASH) < 0); + wsl::shared::cdi::DeviceNode dxg{}; + dxg.path = "/dev/dxg"; + dxg.permissions = "rwm"; + + wsl::shared::cdi::Mount libs{}; + libs.hostPath = LXSS_LIB_PATH; + libs.containerPath = LXSS_LIB_PATH; + libs.options = {"ro", "rbind"}; + + wsl::shared::cdi::Mount drivers{}; + drivers.hostPath = LXSS_GPU_DRIVERS_PATH; + drivers.containerPath = LXSS_GPU_DRIVERS_PATH; + drivers.options = {"ro", "rbind"}; + + wsl::shared::cdi::Hook hook{}; + hook.hookName = "createContainer"; + hook.path = "/" LX_INIT_WSLC_GPU_HOOK; + hook.args = {LX_INIT_WSLC_GPU_HOOK}; + + wsl::shared::cdi::Device gpu{}; + gpu.name = "gpu"; + gpu.containerEdits.deviceNodes.push_back(std::move(dxg)); + gpu.containerEdits.mounts.push_back(std::move(libs)); + gpu.containerEdits.mounts.push_back(std::move(drivers)); + gpu.containerEdits.hooks.push_back(std::move(hook)); + + wsl::shared::cdi::Spec spec{}; + spec.cdiVersion = "0.6.0"; + spec.kind = LX_WSLC_CDI_KIND; + spec.devices.push_back(std::move(gpu)); + + THROW_LAST_ERROR_IF(UtilMkdirPath("/etc/cdi", 0755) < 0); + THROW_LAST_ERROR_IF( + WriteToFile("/etc/cdi/microsoft.com-wslc.json", nlohmann::json(spec).dump().c_str(), O_WRONLY | O_CLOEXEC | O_CREAT | O_TRUNC) < 0); +} +CATCH_LOG() + +void WriteDockerDaemonConfig() +try +{ + constexpr auto c_daemonConfigPath = "/etc/docker/daemon.json"; + + THROW_ERRNO_IF(EEXIST, std::filesystem::exists(c_daemonConfigPath)); + + nlohmann::json config = nlohmann::json::object(); + config["features"]["cdi"] = true; + + THROW_LAST_ERROR_IF(UtilMkdirPath("/etc/docker", 0755) < 0); + THROW_LAST_ERROR_IF(WriteToFile(c_daemonConfigPath, config.dump().c_str(), O_WRONLY | O_CLOEXEC | O_CREAT | O_TRUNC) < 0); +} +CATCH_LOG() + +int WslcGpuHookEntry() +try +{ + // OCI runtime hooks receive the container state as JSON on stdin. + const std::string stateJson{std::istreambuf_iterator(std::cin), {}}; + THROW_ERRNO_IF(EINVAL, stateJson.empty()); + + const auto state = nlohmann::json::parse(stateJson); + const std::filesystem::path bundle = state.at("bundle").get(); + THROW_ERRNO_IF(EINVAL, !bundle.is_absolute()); + + // Read the OCI spec's root.path from /config.json. This is either an absolute path to + // the overlay-merged rootfs or a path relative to the bundle directory. + const auto spec = nlohmann::json::parse(UtilReadFileContent((bundle / "config.json").native())); + std::filesystem::path rootfsPath = spec.at("root").at("path").get(); + if (rootfsPath.is_relative()) + { + rootfsPath = bundle / rootfsPath; + } + + rootfsPath = std::filesystem::canonical(rootfsPath); + THROW_ERRNO_IF(EINVAL, rootfsPath == "/"); + THROW_ERRNO_IF(ENOTDIR, !std::filesystem::is_directory(rootfsPath)); + + const auto confDir = rootfsPath / "etc/ld.so.conf.d"; + const auto confPath = confDir / "ld.wsl.conf"; + + THROW_LAST_ERROR_IF(UtilMkdirPath(confDir.c_str(), 0755) < 0); + THROW_LAST_ERROR_IF(WriteToFile(confPath.c_str(), LXSS_LIB_PATH "\n", O_WRONLY | O_CLOEXEC | O_CREAT | O_TRUNC | O_NOFOLLOW) < 0); + + // chroots into the rootfs and uses the container's own /etc/ld.so.conf chain, + // writing /etc/ld.so.cache inside the container. + const char* const ldArgv[] = {LDCONFIG_COMMAND, "-r", rootfsPath.c_str(), nullptr}; + THROW_LAST_ERROR_IF(UtilCreateProcessAndWait(ldArgv[0], ldArgv) < 0); return 0; } @@ -74,8 +163,9 @@ CATCH_RETURN_ERRNO() void WSLCEnableCrashDumpCollection() { - if (CreateCaptureCrashSymlink() < 0) + if (symlink("/init", "/" LX_INIT_WSL_CAPTURE_CRASH) < 0 && errno != EEXIST) { + LOG_ERROR("symlink(/init, /" LX_INIT_WSL_CAPTURE_CRASH ") failed {}", errno); return; } @@ -619,8 +709,12 @@ void HandleMessageImpl( { THROW_LAST_ERROR_IF(Chroot(target) < 0); - // Recreate the crash dump symlink inside the new root. - CreateCaptureCrashSymlink(); + // Recreate the /init symlinks inside the new root. + THROW_LAST_ERROR_IF(symlink("/init", "/" LX_INIT_WSL_CAPTURE_CRASH) < 0 && errno != EEXIST); + THROW_LAST_ERROR_IF(symlink("/init", "/" LX_INIT_WSLC_GPU_HOOK) < 0 && errno != EEXIST); + + WriteWslcCdiSpec(); + WriteDockerDaemonConfig(); } response.Result = 0; diff --git a/src/linux/init/init.cpp b/src/linux/init/init.cpp index 6b95e98bc..da1ad4e34 100644 --- a/src/linux/init/init.cpp +++ b/src/linux/init/init.cpp @@ -162,6 +162,8 @@ wil::unique_fd UnmarshalConsoleFromServer(int MessageFd, LXBUS_IPC_CONSOLE_ID Co int WslInitWatcher(int Argc, char** Argv); +int WslcGpuHookEntry(); + int WslEntryPoint(int Argc, char* Argv[]) { // @@ -228,6 +230,10 @@ int WslEntryPoint(int Argc, char* Argv[]) { ExitCode = WslInitWatcher(Argc, Argv); } + else if (strcmp(BaseName, LX_INIT_WSLC_GPU_HOOK) == 0) + { + ExitCode = WslcGpuHookEntry(); + } else { // Handle the special case for import result messages, everything else is sent to the binfmt interpreter. diff --git a/src/linux/init/util.cpp b/src/linux/init/util.cpp index 2c6f3ddae..a7dfdd7d3 100644 --- a/src/linux/init/util.cpp +++ b/src/linux/init/util.cpp @@ -3338,7 +3338,7 @@ uint16_t UtilWinAfToLinuxAf(uint16_t WinAddressFamily) return LinuxAddressFamily; } -int WriteToFile(const char* Path, const char* Content, int permissions) +int WriteToFile(const char* Path, const char* Content, int OpenFlags, int Permissions) /*++ @@ -3352,6 +3352,10 @@ Routine Description: Content - Supplies the content to be written to the file. + Permissions - Supplies the file mode used when O_CREAT causes the file to be created. + + OpenFlags - Supplies the flags passed to open(). Defaults to O_WRONLY | O_CLOEXEC | O_CREAT. + Return Value: 0 on success, -1 on failure. @@ -3359,7 +3363,7 @@ Return Value: --*/ { - wil::unique_fd Fd{open(Path, (O_WRONLY | O_CLOEXEC | O_CREAT), permissions)}; + wil::unique_fd Fd{open(Path, OpenFlags, Permissions)}; if (!Fd) { int errnoPrev = errno; diff --git a/src/linux/init/util.h b/src/linux/init/util.h index 1d287edd0..0a180cbe4 100644 --- a/src/linux/init/util.h +++ b/src/linux/init/util.h @@ -313,6 +313,6 @@ std::string UtilReadFileContent(std::string_view path); uint16_t UtilWinAfToLinuxAf(uint16_t AddressFamily); -int WriteToFile(const char* Path, const char* Content, int permissions = 0644); +int WriteToFile(const char* Path, const char* Content, int OpenFlags = O_WRONLY | O_CLOEXEC | O_CREAT, int Permissions = 0644); int ProcessCreateProcessMessage(wsl::shared::Transaction& Transaction, gsl::span Buffer); \ No newline at end of file diff --git a/src/shared/inc/cdi_schema.h b/src/shared/inc/cdi_schema.h new file mode 100644 index 000000000..150d6448a --- /dev/null +++ b/src/shared/inc/cdi_schema.h @@ -0,0 +1,74 @@ +/*++ + +Copyright (c) Microsoft. All rights reserved. + +Module Name: + + cdi_schema.h + +Abstract: + + Schema for Container Device Interface (CDI) specs. + See https://github.com/cncf-tags/container-device-interface/blob/main/SPEC.md + +--*/ + +#pragma once + +#include "JsonUtils.h" + +namespace wsl::shared::cdi { + +struct DeviceNode +{ + std::string path; + std::string permissions; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(DeviceNode, path, permissions); +}; + +struct Mount +{ + std::string hostPath; + std::string containerPath; + std::vector options; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Mount, hostPath, containerPath, options); +}; + +struct Hook +{ + std::string hookName; + std::string path; + std::vector args; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Hook, hookName, path, args); +}; + +struct ContainerEdits +{ + std::vector deviceNodes; + std::vector mounts; + std::vector hooks; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(ContainerEdits, deviceNodes, mounts, hooks); +}; + +struct Device +{ + std::string name; + ContainerEdits containerEdits; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Device, name, containerEdits); +}; + +struct Spec +{ + std::string cdiVersion; + std::string kind; + std::vector devices; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Spec, cdiVersion, kind, devices); +}; + +} // namespace wsl::shared::cdi diff --git a/src/shared/inc/lxfsshares.h b/src/shared/inc/lxfsshares.h index e7be3d0a9..23fd21922 100644 --- a/src/shared/inc/lxfsshares.h +++ b/src/shared/inc/lxfsshares.h @@ -23,6 +23,7 @@ typedef struct _LXSS_SHARED_DIRECTORY #define LXSS_LIB_PREFIX "/usr/lib/wsl" #define LXSS_LIB_PATH LXSS_LIB_PREFIX "/lib" +#define LXSS_GPU_DRIVERS_PATH LXSS_LIB_PREFIX "/drivers" #define LXSS_GPU_DRIVERS_SHARE "drivers" #define LXSS_GPU_LIB_SHARE "lib" #define LXSS_GPU_INBOX_LIB_SHARE LXSS_GPU_LIB_SHARE "_inbox" @@ -32,4 +33,4 @@ typedef struct _LXSS_SHARED_DIRECTORY // Shared directories for GPU compute support. // -constexpr LXSS_SHARED_DIRECTORY g_gpuShares[] = {{LXSS_GPU_DRIVERS_SHARE, LXSS_LIB_PREFIX "/drivers"}, {LXSS_GPU_LIB_SHARE, LXSS_LIB_PATH}}; +constexpr LXSS_SHARED_DIRECTORY g_gpuShares[] = {{LXSS_GPU_DRIVERS_SHARE, LXSS_GPU_DRIVERS_PATH}, {LXSS_GPU_LIB_SHARE, LXSS_LIB_PATH}}; diff --git a/src/shared/inc/lxinitshared.h b/src/shared/inc/lxinitshared.h index eef6fc8a9..e892a73d4 100644 --- a/src/shared/inc/lxinitshared.h +++ b/src/shared/inc/lxinitshared.h @@ -247,6 +247,11 @@ Module Name: #define LX_INIT_WSL_INIT_WATCHER "init-watcher" +#define LX_INIT_WSLC_GPU_HOOK "wsl-gpu-hook" + +#define LX_WSLC_CDI_KIND "microsoft.com/wslc" +#define LX_WSLC_GPU_CDI_DEVICE LX_WSLC_CDI_KIND "=gpu" + // // WSL2-specific environment variables. // diff --git a/src/windows/inc/docker_schema.h b/src/windows/inc/docker_schema.h index 2cadfd8d5..ef010a6ce 100644 --- a/src/windows/inc/docker_schema.h +++ b/src/windows/inc/docker_schema.h @@ -215,6 +215,14 @@ struct Ulimit NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Ulimit, Name, Soft, Hard); }; +struct DeviceRequest +{ + std::string Driver; + std::vector DeviceIDs; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(DeviceRequest, Driver, DeviceIDs); +}; + struct HostConfig { std::vector Mounts; @@ -230,6 +238,7 @@ struct HostConfig // the field — so we don't bother with std::optional here. std::int64_t ShmSize{}; std::optional> Devices; + std::optional> DeviceRequests; // Per-container resource limits. 0 means "no limit" (Docker default). std::int64_t Memory{}; @@ -237,7 +246,7 @@ struct HostConfig std::optional> Ulimits; NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT( - HostConfig, Mounts, PortBindings, NetworkMode, Init, Dns, DnsSearch, DnsOptions, Binds, Tmpfs, Devices, ShmSize, Memory, NanoCpus, Ulimits); + HostConfig, Mounts, PortBindings, NetworkMode, Init, Dns, DnsSearch, DnsOptions, Binds, Tmpfs, Devices, DeviceRequests, ShmSize, Memory, NanoCpus, Ulimits); }; struct EndpointSettings diff --git a/src/windows/wslcsession/WSLCContainer.cpp b/src/windows/wslcsession/WSLCContainer.cpp index d1c073eb1..704b5c6d4 100644 --- a/src/windows/wslcsession/WSLCContainer.cpp +++ b/src/windows/wslcsession/WSLCContainer.cpp @@ -494,29 +494,6 @@ void ProcessAdditionalNetworks( } } -void ConfigureLdPathForGpu(std::vector& Env) -{ - static constexpr std::string_view ldLibraryPathPrefix = "LD_LIBRARY_PATH="; - auto it = std::ranges::find_if(Env, [](const std::string& e) { return e.starts_with(ldLibraryPathPrefix); }); - - if (it != Env.end()) - { - // If the user already has an LD_LIBRARY_PATH, append the GPU library paths to it. - auto ldPath = it->substr(ldLibraryPathPrefix.size()); - if (!ldPath.empty() && !ldPath.ends_with(":")) - { - it->append(":"); - } - - it->append(WSLCVirtualMachine::c_gpuLibrariesPath); - } - else - { - // Otherwise create a new entry. - Env.emplace_back(std::format("LD_LIBRARY_PATH={}", WSLCVirtualMachine::c_gpuLibrariesPath)); - } -} - } // namespace ContainerPortMapping::ContainerPortMapping(VMPortMapping&& VmMapping, uint16_t ContainerPort) : @@ -1203,11 +1180,6 @@ void WSLCContainerImpl::Exec(const WSLCProcessOptions* Options, LPCSTR DetachKey request.DetachKeys = DetachKeys; } - if (WI_IsFlagSet(m_containerFlags, WSLCContainerFlagsGpu)) - { - ConfigureLdPathForGpu(request.Env); - } - try { auto result = m_dockerClient.CreateExec(m_id, request); @@ -1595,17 +1567,8 @@ std::unique_ptr WSLCContainerImpl::Create( !virtualMachine.FeatureEnabled(WslcFeatureFlagsGPU), "WSLCContainerFlagsGpu requires GPU support enabled on the session"); - if (!request.HostConfig.Binds.has_value()) - { - request.HostConfig.Binds = std::vector{}; - } - - request.HostConfig.Binds->push_back(std::format("{0}:{0}:ro", WSLCVirtualMachine::c_gpuLibrariesPath)); - request.HostConfig.Binds->push_back(std::format("{0}:{0}:ro", WSLCVirtualMachine::c_gpuDriversPath)); - - request.HostConfig.Devices = {{"/dev/dxg", "/dev/dxg", "rwm"}}; - - ConfigureLdPathForGpu(request.Env); + // Request the WSL GPU device via CDI. + request.HostConfig.DeviceRequests = std::vector{{"cdi", {LX_WSLC_GPU_CDI_DEVICE}}}; } // Prepare port mappings from container options. diff --git a/test/windows/WSLCTests.cpp b/test/windows/WSLCTests.cpp index b0da4a00f..1464a6427 100644 --- a/test/windows/WSLCTests.cpp +++ b/test/windows/WSLCTests.cpp @@ -3379,17 +3379,6 @@ class WSLCTests auto session = CreateSession(settings); - // Validate that the GPU is correctly configured for containers init process. - { - WSLCContainerLauncher launcher( - "debian:latest", "test-container-init-gpu", {"/bin/sh", "-c", "test -c /dev/dxg && echo $LD_LIBRARY_PATH"}); - launcher.SetContainerFlags(WSLCContainerFlagsGpu); - - auto container = launcher.Launch(*session); - - ValidateContainerOutput(container, {{1, "/usr/lib/wsl/lib\n"}}, 0); - } - // Validate that GPU resources are available inside a container when WSLCContainerFlagsGpu is set. { WSLCContainerLauncher launcher("debian:latest", "test-container-gpu", {"sleep", "99999"}); @@ -3405,8 +3394,9 @@ class WSLCTests ValidateProcessOutput(process, expectedOutput, exitCode); }; - // Validate that /dev/dxg is available as a character device. - expect({"/bin/sh", "-c", "test -c /dev/dxg"}, 0); + // Validate that /dev/dxg is available as a character device with the + // read/write/mknod permissions. + expect({"/bin/sh", "-c", "test -c /dev/dxg && test -r /dev/dxg && test -w /dev/dxg"}, 0); // Validate that the GPU library directory is mounted and contains libraries. expect({"/bin/sh", "-c", "test -d /usr/lib/wsl/lib && ls /usr/lib/wsl/lib | grep -q ."}, 0); @@ -3418,15 +3408,9 @@ class WSLCTests expect({"/usr/bin/touch", "/usr/lib/wsl/lib/test"}, 1); expect({"/usr/bin/touch", "/usr/lib/wsl/drivers/test"}, 1); - // Validate that LD_LIBRARY_PATH is set to include the GPU library path. - expect({"/bin/sh", "-c", "echo $LD_LIBRARY_PATH"}, 0, {{1, "/usr/lib/wsl/lib\n"}}); - - // Validate that exec with a pre-existing LD_LIBRARY_PATH appends the GPU path. - expect({"/bin/sh", "-c", "echo $LD_LIBRARY_PATH"}, 0, {{1, "/custom/path:/usr/lib/wsl/lib\n"}}, {"LD_LIBRARY_PATH=/custom/path"}); - - // Validate that exec with a trailing colon in LD_LIBRARY_PATH doesn't produce a double colon. - expect({"/bin/sh", "-c", "echo $LD_LIBRARY_PATH"}, 0, {{1, "/custom/path:/usr/lib/wsl/lib\n"}}, {"LD_LIBRARY_PATH=/custom/path:"}); - expect({"/bin/sh", "-c", "echo $LD_LIBRARY_PATH"}, 0, {{1, "/usr/lib/wsl/lib\n"}}, {"LD_LIBRARY_PATH="}); + // Validate that the dynamic linker is configured to resolve the WSL GPU libraries. + expect({"/bin/sh", "-c", "cat /etc/ld.so.conf.d/ld.wsl.conf"}, 0, {{1, "/usr/lib/wsl/lib\n"}}); + expect({"/bin/sh", "-c", "ldconfig -p | grep -q ' => /usr/lib/wsl/lib/'"}, 0); } // Validate that containers without the GPU flag do not have GPU resources. diff --git a/test/windows/WslcSdkTests.cpp b/test/windows/WslcSdkTests.cpp index 5644c68d6..4305d12e2 100644 --- a/test/windows/WslcSdkTests.cpp +++ b/test/windows/WslcSdkTests.cpp @@ -2509,9 +2509,11 @@ class WslcSdkTests VERIFY_SUCCEEDED(WslcCreateSession(&sessionSettings, &gpuSession, nullptr)); THROW_IF_FAILED(WslcLoadSessionImageFromFile(gpuSession.get(), GetTestImagePath("debian:latest").c_str(), nullptr, nullptr)); - // Validate /dev/dxg is available and LD_LIBRARY_PATH is set via the container init command. + // Validate /dev/dxg is available and the dynamic linker is configured to resolve the WSL + // GPU libraries. { - const char* initArgv[] = {"/bin/sh", "-c", "test -c /dev/dxg && echo $LD_LIBRARY_PATH"}; + const char* initArgv[] = { + "/bin/sh", "-c", "test -c /dev/dxg && test -r /dev/dxg && test -w /dev/dxg && cat /etc/ld.so.conf.d/ld.wsl.conf"}; auto output = RunContainerAndCapture( gpuSession.get(), "debian:latest", {initArgv[0], initArgv[1], initArgv[2]}, WSLC_CONTAINER_FLAG_ENABLE_GPU); diff --git a/test/windows/WslcSdkWinRTTests.cpp b/test/windows/WslcSdkWinRTTests.cpp index 171a9395e..2eb679625 100644 --- a/test/windows/WslcSdkWinRTTests.cpp +++ b/test/windows/WslcSdkWinRTTests.cpp @@ -1656,11 +1656,14 @@ class WslcSdkWinRtTests const auto debianTar = GetTestImagePath("debian:latest"); gpuSession.LoadImageAsync(debianTar.wstring()).get(); - // Positive: /dev/dxg must be available and LD_LIBRARY_PATH set in a GPU container. + // Positive: /dev/dxg must be available with rwm permissions and the dynamic linker must be + // configured to resolve the WSL GPU libraries inside a GPU container. { auto procSettings = WSLCSDK::ProcessSettings(); - procSettings.CmdLine( - winrt::single_threaded_vector({L"/bin/sh", L"-c", L"test -c /dev/dxg && echo $LD_LIBRARY_PATH"})); + procSettings.CmdLine(winrt::single_threaded_vector( + {L"/bin/sh", + L"-c", + L"test -c /dev/dxg && test -r /dev/dxg && test -w /dev/dxg && cat /etc/ld.so.conf.d/ld.wsl.conf"})); procSettings.OutputMode(WSLCSDK::ProcessOutputMode::Stream); auto containerSettings = WSLCSDK::ContainerSettings(L"debian:latest");