Skip to content

Commit 07328ee

Browse files
committed
add diagnosability for plain-Windows path
Logging additions: surface the decisions Lima makes that previously went unmarked, so a `--debug` run on a failing host shows enough to diagnose without source-diving. sshutil.IsSSHCygwin: log the toolchain detection result once per process at Debug level, including the full ssh.exe path and whether cygpath.exe was found alongside. Caches the result in a sync.Once, so the log fires exactly once even when many call sites consult it. ioutilx.WindowsSubsystemPath: when the native fallback is taken (cygpath unavailable), log the input -> output mapping at Debug, so unexpected drive-letter conversions are visible in the trace. downloader.decompressLocal: change "decompressing X with gzip" to either "with in-process gzip" or "with external <cmd>" depending on which path was taken. The previous message was misleading after the in-process gzip change because it still said "with gzip" for both. copytool.scp / rsync: log Debug when ControlMaster options are stripped on Windows, so a copy-failure trace makes the mux decision visible without reading the source. hostagent.setupMount: log the resolved sftp-server LocalPath at Debug, so reverse-sshfs failures surface what was actually passed. CI: the windows-plain job now produces a tool inventory so failures have actionable context. Print PATH before and after the Cygwin/MSYS2/Git-for-Windows scrub. Enumerate every external binary Lima might shell out to on Windows, classified into required (must resolve and must come from C:\Windows\...), forbidden (must not resolve at all -- cygpath, pacman), and optional (logged for context, e.g. rsync, gzip, qemu-img). Fail with an actionable message if a required tool is missing or a forbidden one is found, so the CI failure points at the scrub regex rather than the smoke test that follows. Pass --debug to every limactl invocation in the smoke test, and add an "if: failure()" step that dumps ha.stderr.log, ha.stdout.log, serial.log, lima.yaml, and ssh.config from the instance directory. Persist the scrubbed PATH into $GITHUB_ENV so the smoke test and the failure-dump step run against the same environment as the verification step. Signed-off-by: Jan Dubois <jan.dubois@suse.com>
1 parent 9858c83 commit 07328ee

7 files changed

Lines changed: 144 additions & 25 deletions

File tree

.github/workflows/test.yml

Lines changed: 104 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -301,45 +301,135 @@ jobs:
301301
- name: Build limactl
302302
# Avoid `make` so the build does not require MSYS2 make / bash.
303303
run: go build -o _output\bin\limactl.exe .\cmd\limactl
304-
- name: Smoke test (create / start / shell / copy / stop / delete)
304+
- name: Scrub PATH and verify only native Windows toolchain is reachable
305305
run: |
306306
$ErrorActionPreference = 'Stop'
307+
Write-Host "PATH before scrub:"
308+
$env:PATH -split ';' | ForEach-Object { Write-Host " $_" }
307309
# Scrub PATH of Cygwin/MSYS2/Git-for-Windows toolchains so that
308310
# limactl is forced to use native Windows binaries only. The
309311
# GitHub runner image has these pre-installed and on PATH by
310312
# default; we want to simulate a vanilla Windows host where
311313
# users have not installed Git for Windows or MSYS2.
312-
$env:PATH = ($env:PATH -split ';' | Where-Object {
314+
$scrubbed = ($env:PATH -split ';' | Where-Object {
313315
$_ -notmatch '\\msys(64|2)\\' -and
314316
$_ -notmatch '\\Git\\(usr|mingw\d+)\\bin' -and
315317
$_ -notmatch '\\Git\\cmd' -and
316-
$_ -notmatch '\\Git\\bin'
318+
$_ -notmatch '\\Git\\bin' -and
319+
$_ -notmatch '\\cygwin\d*\\'
317320
}) -join ';'
321+
# Persist the scrubbed PATH into subsequent steps so the smoke test
322+
# and any post-step diagnostics see the same environment.
323+
Add-Content -Path $env:GITHUB_ENV -Value "PATH=$scrubbed"
324+
$env:PATH = $scrubbed
325+
Write-Host ""
326+
Write-Host "PATH after scrub:"
327+
$env:PATH -split ';' | ForEach-Object { Write-Host " $_" }
328+
Write-Host ""
329+
# Required: ssh must be the native Windows OpenSSH binary, since
330+
# that is the toolchain we are exercising.
318331
$sshSrc = (Get-Command ssh).Source
319332
Write-Host "ssh resolves to: $sshSrc"
320333
if ($sshSrc -notlike 'C:\Windows\System32\OpenSSH\ssh.exe') {
321334
Write-Error "Expected native Windows OpenSSH; got $sshSrc"
322335
exit 1
323336
}
324-
if (Get-Command cygpath -ErrorAction SilentlyContinue) {
325-
$cyg = (Get-Command cygpath).Source
326-
Write-Error "cygpath is still on PATH at $cyg; PATH scrub did not catch it"
337+
# Tool inventory: print the resolved location of every external binary
338+
# Lima might shell out to on Windows, so the workflow log captures the
339+
# exact toolchain layout the test ran against. Tools split into:
340+
# required-native: must come from C:\Windows or be a built-in like wsl.
341+
# forbidden: must NOT resolve at all on a "plain Windows" host.
342+
# optional: may or may not be present; logged for context. Lima has
343+
# graceful fallbacks (id, rsync) or only needs them for
344+
# non-default code paths (qemu-img, decompressors).
345+
$required = @('ssh','scp','ssh-keygen','sftp-server','wsl','tar')
346+
$forbidden = @{
347+
'cygpath' = 'cygpath comes from MSYS2/Git-for-Windows/Cygwin only'
348+
'pacman' = 'pacman comes from MSYS2 only'
349+
}
350+
# bash.exe is interesting but ambiguous: Windows 10/11 ship a WSL
351+
# launcher named bash.exe in System32, which is fine. Git for Windows
352+
# also ships a bash.exe in usr/bin. Log it but do not fail on it.
353+
$optional = @('bash','sh','gzip','bzip2','xz','zstd','rsync','id','make','awk','sed','qemu-img')
354+
355+
Write-Host ""
356+
Write-Host "=== Required tools (must resolve) ==="
357+
$missingRequired = @()
358+
foreach ($cmd in $required) {
359+
$found = Get-Command $cmd -ErrorAction SilentlyContinue
360+
if ($found) { Write-Host (" {0,-15} -> {1}" -f $cmd, $found.Source) }
361+
else { Write-Host (" {0,-15} -> NOT FOUND" -f $cmd); $missingRequired += $cmd }
362+
}
363+
Write-Host ""
364+
Write-Host "=== Forbidden tools (must not resolve on plain Windows) ==="
365+
$foundForbidden = @()
366+
foreach ($cmd in $forbidden.Keys) {
367+
$found = Get-Command $cmd -ErrorAction SilentlyContinue
368+
if ($found) {
369+
Write-Host (" {0,-15} -> {1} (FORBIDDEN: {2})" -f $cmd, $found.Source, $forbidden[$cmd])
370+
$foundForbidden += $cmd
371+
} else {
372+
Write-Host (" {0,-15} -> not found (good)" -f $cmd)
373+
}
374+
}
375+
Write-Host ""
376+
Write-Host "=== Optional tools (logged for context, not asserted) ==="
377+
foreach ($cmd in $optional) {
378+
$found = Get-Command $cmd -ErrorAction SilentlyContinue
379+
if ($found) { Write-Host (" {0,-15} -> {1}" -f $cmd, $found.Source) }
380+
else { Write-Host (" {0,-15} -> not found" -f $cmd) }
381+
}
382+
Write-Host ""
383+
if ($missingRequired.Count -gt 0) {
384+
Write-Error ("Required tools not on PATH: " + ($missingRequired -join ', '))
385+
exit 1
386+
}
387+
if ($foundForbidden.Count -gt 0) {
388+
Write-Error ("Forbidden tools still on PATH: " + ($foundForbidden -join ', ') + ". Update the scrub regex in this step.")
327389
exit 1
328390
}
329-
$env:LIMA_HOME = "$env:TEMP\lima-plain"
391+
- name: Smoke test (create / start / shell / copy / stop / delete)
392+
env:
393+
# Surface decisions in path-translation, ssh-args, mux-stripping, etc.
394+
# The Debug-level logging is only useful when this job fails, but the
395+
# success-case noise is small and CI logs are kept for a long time.
396+
LIMA_HOME: ${{ runner.temp }}\lima-plain
397+
run: |
398+
$ErrorActionPreference = 'Stop'
330399
if (Test-Path $env:LIMA_HOME) { Remove-Item $env:LIMA_HOME -Recurse -Force }
331400
New-Item -ItemType Directory -Path $env:LIMA_HOME | Out-Null
332401
# Use the same WSL2 rootfs template that windows-wsl2 uses, so this
333402
# job exercises a real boot rather than the smallest synthetic config.
334-
& .\_output\bin\limactl.exe create --tty=false --name=plain `
403+
& .\_output\bin\limactl.exe --debug create --tty=false --name=plain `
335404
.\templates\experimental\wsl2.yaml
336-
& .\_output\bin\limactl.exe start plain
337-
& .\_output\bin\limactl.exe shell --tty=false plain -- uname -srm
405+
& .\_output\bin\limactl.exe --debug start plain
406+
& .\_output\bin\limactl.exe --debug shell --tty=false plain -- uname -srm
338407
'roundtrip' | Out-File -Encoding ascii "$env:LIMA_HOME\rt.txt"
339-
& .\_output\bin\limactl.exe copy "$env:LIMA_HOME\rt.txt" plain:/tmp/rt.txt
340-
& .\_output\bin\limactl.exe shell --tty=false plain -- cat /tmp/rt.txt
341-
& .\_output\bin\limactl.exe stop plain
342-
& .\_output\bin\limactl.exe delete --force plain
408+
& .\_output\bin\limactl.exe --debug copy "$env:LIMA_HOME\rt.txt" plain:/tmp/rt.txt
409+
& .\_output\bin\limactl.exe --debug shell --tty=false plain -- cat /tmp/rt.txt
410+
& .\_output\bin\limactl.exe --debug stop plain
411+
& .\_output\bin\limactl.exe --debug delete --force plain
412+
- name: Dump Lima logs on failure
413+
if: failure()
414+
env:
415+
LIMA_HOME: ${{ runner.temp }}\lima-plain
416+
run: |
417+
# Hostagent logs are the most useful artefact when start fails.
418+
# Print everything Lima wrote about the instance so failures show up
419+
# in the workflow run instead of being lost when the runner is gone.
420+
$instDir = "$env:LIMA_HOME\plain"
421+
if (-not (Test-Path $instDir)) {
422+
Write-Host "No instance directory at $instDir, nothing to dump."
423+
exit 0
424+
}
425+
Get-ChildItem $instDir | Format-Table Name, Length, LastWriteTime
426+
foreach ($name in 'ha.stdout.log','ha.stderr.log','serial.log','lima.yaml','ssh.config') {
427+
$p = Join-Path $instDir $name
428+
if (Test-Path $p) {
429+
Write-Host ("===== {0} =====" -f $p)
430+
Get-Content $p
431+
}
432+
}
343433
344434
qemu:
345435
name: "Integration tests (QEMU, macOS host)"

pkg/copytool/rsync.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ func (t *rsyncTool) Command(ctx context.Context, paths []string, opts *Options)
133133
// so rsync's ssh transport does not try to use a
134134
// ControlMaster socket that is unavailable or unreliable
135135
// on Windows.
136+
logrus.Debug("rsync: stripping ControlMaster/ControlPath/ControlPersist (Windows)")
136137
sshOpts = sshutil.SSHOptsRemovingControlPath(sshOpts)
137138
}
138139

pkg/copytool/scp.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"runtime"
1212

1313
"github.com/coreos/go-semver/semver"
14+
"github.com/sirupsen/logrus"
1415

1516
"github.com/lima-vm/lima/v2/pkg/limatype"
1617
"github.com/lima-vm/lima/v2/pkg/sshutil"
@@ -128,6 +129,7 @@ func (t *scpTool) Command(ctx context.Context, paths []string, opts *Options) (*
128129
// socket), and Cygwin-based ssh has known reliability issues with
129130
// sftp over a mux socket. See also the equivalent handling in
130131
// hostagent and limactl shell.
132+
logrus.Debug("scp: stripping ControlMaster/ControlPath/ControlPersist (Windows)")
131133
sshOpts = sshutil.SSHOptsRemovingControlPath(sshOpts)
132134
}
133135
sshArgs := sshutil.SSHArgsFromOpts(sshOpts)

pkg/downloader/downloader.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,11 @@ func decompressorByMagic(file string) string {
526526
}
527527

528528
func decompressLocal(ctx context.Context, decompressCmd, dst, src, ext, description string) error {
529-
logrus.Infof("decompressing %s with %v", ext, decompressCmd)
529+
if decompressCmd == "gzip" {
530+
logrus.Infof("decompressing %s with in-process gzip", ext)
531+
} else {
532+
logrus.Infof("decompressing %s with external %q", ext, decompressCmd)
533+
}
530534

531535
st, err := os.Stat(src)
532536
if err != nil {

pkg/hostagent/mount.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ func (a *HostAgent) setupMount(ctx context.Context, m limatype.Mount) (*mount, e
6666
if err != nil {
6767
return nil, err
6868
}
69+
logrus.Debugf("reverse-sshfs: host path %q resolved to %q for sftp-server", m.Location, resolvedLocation)
6970
}
7071

7172
sshAddress, sshPort := a.sshAddressPort()

pkg/ioutilx/ioutilx.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ func WindowsSubsystemPath(ctx context.Context, orig string) (string, error) {
6464
logrus.WithError(err).Debugf("cygpath unavailable for %q, attempting native conversion", orig)
6565
}
6666
if vol := filepath.VolumeName(orig); len(vol) == 2 && vol[1] == ':' {
67-
return "/" + strings.ToLower(vol[:1]) + filepath.ToSlash(orig[2:]), nil
67+
out := "/" + strings.ToLower(vol[:1]) + filepath.ToSlash(orig[2:])
68+
logrus.Debugf("native cygpath fallback: %q -> %q", orig, out)
69+
return out, nil
6870
}
6971
return "", fmt.Errorf("cannot convert %q to a Cygwin-style path: cygpath unavailable and input is not a drive-letter path", orig)
7072
}

pkg/sshutil/sshutil.go

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,25 +69,44 @@ func NewSSHExe() (SSHExe, error) {
6969
return sshExe, nil
7070
}
7171

72+
var cygwinDetect struct {
73+
sync.Once
74+
isCygwin bool
75+
}
76+
7277
// IsSSHCygwin reports whether sshExe is a Cygwin-based build of OpenSSH
7378
// (e.g. Git for Windows, MSYS2, or Cygwin itself), as opposed to native
7479
// Windows OpenSSH. The detection is based on whether cygpath.exe lives in
7580
// the same directory as ssh.exe, which is the layout used by Git for Windows
7681
// and MSYS2. Always returns false on non-Windows.
82+
//
83+
// The first call's result is cached and logged at Debug level. Subsequent
84+
// calls return the same answer regardless of the sshExe passed; in practice
85+
// limactl uses a single ssh binary throughout a process, so this is fine
86+
// and avoids spamming the log when toolchain-aware code paths run repeatedly.
7787
func IsSSHCygwin(sshExe SSHExe) bool {
7888
if runtime.GOOS != "windows" || sshExe.Exe == "" {
7989
return false
8090
}
81-
path := sshExe.Exe
82-
if !filepath.IsAbs(path) {
83-
resolved, err := exec.LookPath(path)
84-
if err != nil {
85-
return false
91+
cygwinDetect.Do(func() {
92+
path := sshExe.Exe
93+
if !filepath.IsAbs(path) {
94+
resolved, err := exec.LookPath(path)
95+
if err != nil {
96+
logrus.WithError(err).Debugf("IsSSHCygwin: cannot resolve %q via PATH; assuming native", sshExe.Exe)
97+
return
98+
}
99+
path = resolved
86100
}
87-
path = resolved
88-
}
89-
_, err := os.Stat(filepath.Join(filepath.Dir(path), "cygpath.exe"))
90-
return err == nil
101+
cygpathPath := filepath.Join(filepath.Dir(path), "cygpath.exe")
102+
if _, err := os.Stat(cygpathPath); err == nil {
103+
cygwinDetect.isCygwin = true
104+
logrus.Debugf("ssh at %q detected as Cygwin-based (found %q alongside)", path, cygpathPath)
105+
} else {
106+
logrus.Debugf("ssh at %q detected as native Windows OpenSSH (no cygpath.exe alongside)", path)
107+
}
108+
})
109+
return cygwinDetect.isCygwin
91110
}
92111

93112
// PathForSSH converts orig to the path form expected by the ssh family of

0 commit comments

Comments
 (0)