From ed8c5d82dc3b44cc706a1928098d38ef22a73215 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sun, 23 Jan 2022 11:07:33 -0800 Subject: [PATCH] TSCBasic: change the behaviour of `exec` on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simultaneously makes `exec` less like `exec` and more like `exec` on POSIX platforms. Replace the use of the ucrt `_execve` in favour of spelling out the implementation inline with alterations. There are multiple reasons that this needs to be done. The concept of `exec` is impossible to map to the process management structure on Windows (just as `fork` is). Fundamentally, `exec` is a replacement of the process image which will retain the process id and parts of the libc state (e.g. non-`cloexec` fds in their current state). However, the process model on Windows does not have the ability to do such an operation. Each process is immutable. `_execve` is a wrapper for `_spawnl` with `_P_OVERLAY` - it simply will create a new process and terminate the existing one. This implicitly breaks the façade - the PID is not inherited - `GetCurrentProcessId()` would return a different value (which would require to be passed from the parent to the child as the parent state will be demolished and there is no lineage that is preserved). Additionally, the new process will only inherit `HANDLE`s which have marked `bInheritable` as `TRUE` at construction time via `CreateFileW`. More importantly, when the `_execve` is used, it firstly inherit the ASCII traits which will further limit the use of this already less-than-useful portability utility. It will limit the file paths even more than the unicode variant, which is already limited by the Win32 subsystem and requires explicit escape via use of NT style paths to work around the Win32 path limitations. Secondly, and more user visible, is the fact that the implementation does not properly hand off the console. The new process is launched in the background and the current process is unceremoniously terminated, restoring control to the command interpreter (cmd.com). The order in which this occurs is unspecified and uncontrollable (i.e. the new process may start before or after the termination). More problematically, this results in two processes with access to the console stdin/stdout/stderr handles, which now creates a problem of who acquires the input. Most often, this is manifested as read by the command interpreter rather than the application, followed by the application rather than the command interpreter. We have effectively re-implemented `_execve` in place here, with a few exceptions: - we do not explicitly enumerate the inheritable handles and pass them via an undocumented handoff to ucrt (if for no other reason than we do not have a good solution to accessing the FD table) - we do not explicitly create the environment block, the normal process inheritance rules apply to the environment. - we do not pass in the first parameter to `CreateProcessW`, which would influence how the process is created (requires that the program suffix is a well-known suffix - .exe, .com, etc). Note that this will prevent the execution of a batch file as that requires that `lpApplicationName` is explicitly set to `cmd.exe` and that the first parameter of the argument string is `/c` rather than the executable path. - we use the unicode variant of the operations to allow us to access the filesystem properly - we create a job object to monitor the subsequent process hierarchy with a silent breakaway, kill-on-close Job Object to ensure that the subprocesses of the "exec"-ed image are treated as part of the same process tree - we now reliably create the process (suspended) and assign it to the job object, and then wait for the process termination before the process exit, preventing the problem of the interrupted execution. While this has limitations in the precise emulation of `exec` as defined by POSIX, it is sufficient to allow execution of subprocesses as desired. This has user-visible differences, e.g. PID and file descriptor states are lost. It has been opined by many others that `fork` and `exec` are a mistake, and it may be a better approach to replace the `exec` call with a `invoke_tool` operation which more precisely matches the usecase and retains the behavioural differences from `exec`. Resolves SR-13806! --- Sources/TSCBasic/misc.swift | 141 ++++++++++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 8 deletions(-) diff --git a/Sources/TSCBasic/misc.swift b/Sources/TSCBasic/misc.swift index d39cbc11..c12f3d65 100644 --- a/Sources/TSCBasic/misc.swift +++ b/Sources/TSCBasic/misc.swift @@ -10,6 +10,9 @@ import TSCLibc import Foundation +#if os(Windows) +import WinSDK +#endif #if os(Windows) public let executableFileSuffix = ".exe" @@ -17,6 +20,70 @@ public let executableFileSuffix = ".exe" public let executableFileSuffix = "" #endif +#if os(Windows) +private func quote(_ arguments: [String]) -> String { + func quote(argument: String) -> String { + if !argument.contains(where: { " \t\n\"".contains($0) }) { + return argument + } + + // To escape the command line, we surround the argument with quotes. + // However, the complication comes due to how the Windows command line + // parser treats backslashes (\) and quotes ("). + // + // - \ is normally treated as a literal backslash + // e.g. alpha\beta\gamma => alpha\beta\gamma + // - The sequence \" is treated as a literal " + // e.g. alpha\"beta => alpha"beta + // + // But then what if we are given a path that ends with a \? + // + // Surrounding alpha\beta\ with " would be "alpha\beta\" which would be + // an unterminated string since it ends on a literal quote. To allow + // this case the parser treats: + // + // - \\" as \ followed by the " metacharacter + // - \\\" as \ followed by a literal " + // + // In general: + // - 2n \ followed by " => n \ followed by the " metacharacter + // - 2n + 1 \ followed by " => n \ followed by a literal " + + var quoted = "\"" + var unquoted = argument.unicodeScalars + + while !unquoted.isEmpty { + guard let firstNonBS = unquoted.firstIndex(where: { $0 != "\\" }) else { + // String ends with a backslash (e.g. first\second\), escape all + // the backslashes then add the metacharacter ". + let count = unquoted.count + quoted.append(String(repeating: "\\", count: 2 * count)) + break + } + + let count = unquoted.distance(from: unquoted.startIndex, to: firstNonBS) + if unquoted[firstNonBS] == "\"" { + // This is a string of \ followed by a " (e.g. first\"second). + // Escape the backslashes and the quote. + quoted.append(String(repeating: "\\", count: 2 * count + 1)) + } else { + // These are just literal backslashes + quoted.append(String(repeating: "\\", count: count)) + } + + quoted.append(String(unquoted[firstNonBS])) + + // Drop the backslashes and the following character + unquoted.removeFirst(count + 1) + } + quoted.append("\"") + + return quoted + } + return arguments.map(quote(argument:)).joined(separator: " ") +} +#endif + /// Replace the current process image with a new process image. /// /// - Parameters: @@ -25,20 +92,78 @@ public let executableFileSuffix = "" public func exec(path: String, args: [String]) throws -> Never { let cArgs = CStringArray(args) #if os(Windows) - guard cArgs.cArray.withUnsafeBufferPointer({ - $0.withMemoryRebound(to: UnsafePointer?.self, { - _execv(path, $0.baseAddress) != -1 - }) - }) - else { - throw SystemError.exec(errno, path: path, args: args) + var hJob: HANDLE + + hJob = CreateJobObjectA(nil, nil) + if hJob == HANDLE(bitPattern: 0) { + throw SystemError.exec(Int32(GetLastError()), path: path, args: args) } + defer { CloseHandle(hJob) } + + let hPort = CreateIoCompletionPort(INVALID_HANDLE_VALUE, nil, 0, 1) + if hPort == HANDLE(bitPattern: 0) { + throw SystemError.exec(Int32(GetLastError()), path: path, args: args) + } + + var acpAssociation: JOBOBJECT_ASSOCIATE_COMPLETION_PORT = JOBOBJECT_ASSOCIATE_COMPLETION_PORT() + acpAssociation.CompletionKey = hJob + acpAssociation.CompletionPort = hPort + if !SetInformationJobObject(hJob, JobObjectAssociateCompletionPortInformation, + &acpAssociation, DWORD(MemoryLayout.size)) { + throw SystemError.exec(Int32(GetLastError()), path: path, args: args) + } + + var eliLimits: JOBOBJECT_EXTENDED_LIMIT_INFORMATION = JOBOBJECT_EXTENDED_LIMIT_INFORMATION() + eliLimits.BasicLimitInformation.LimitFlags = + DWORD(JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE) | DWORD(JOB_OBJECT_LIMIT_SILENT_BREAKAWAY_OK) + if !SetInformationJobObject(hJob, JobObjectExtendedLimitInformation, &eliLimits, + DWORD(MemoryLayout.size)) { + throw SystemError.exec(Int32(GetLastError()), path: path, args: args) + } + + + var siInfo: STARTUPINFOW = STARTUPINFOW() + siInfo.cb = DWORD(MemoryLayout.size) + + var piInfo: PROCESS_INFORMATION = PROCESS_INFORMATION() + + try quote(args).withCString(encodedAs: UTF16.self) { pwszCommandLine in + if !CreateProcessW(nil, + UnsafeMutablePointer(mutating: pwszCommandLine), + nil, nil, false, + DWORD(CREATE_SUSPENDED) | DWORD(CREATE_NEW_PROCESS_GROUP), + nil, nil, &siInfo, &piInfo) { + throw SystemError.exec(Int32(GetLastError()), path: path, args: args) + } + } + + defer { CloseHandle(piInfo.hThread) } + defer { CloseHandle(piInfo.hProcess) } + + if !AssignProcessToJobObject(hJob, piInfo.hProcess) { + throw SystemError.exec(Int32(GetLastError()), path: path, args: args) + } + + _ = ResumeThread(piInfo.hThread) + + var dwCompletionCode: DWORD = 0 + var ulCompletionKey: ULONG_PTR = 0 + var lpOverlapped: LPOVERLAPPED? + repeat { + } while GetQueuedCompletionStatus(hPort, &dwCompletionCode, &ulCompletionKey, + &lpOverlapped, INFINITE) && + !(ulCompletionKey == ULONG_PTR(UInt(bitPattern: hJob)) && + dwCompletionCode == JOB_OBJECT_MSG_ACTIVE_PROCESS_ZERO) + + var dwExitCode: DWORD = DWORD(bitPattern: -1) + _ = GetExitCodeProcess(piInfo.hProcess, &dwExitCode) + _exit(Int32(bitPattern: dwExitCode)) #elseif (!canImport(Darwin) || os(macOS)) guard execv(path, cArgs.cArray) != -1 else { throw SystemError.exec(errno, path: path, args: args) } - #endif fatalError("unreachable") + #endif } @_disfavoredOverload