diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index da3ee770dba..565eb996b29 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -220,6 +220,14 @@ jobs: timeout-minutes: 120 steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Fetch homebrew-core commit messages + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + # needed by ./hack/brew-install-version.sh + repository: homebrew/homebrew-core + path: homebrew-core + fetch-depth: 0 + filter: tree:0 - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 with: go-version: 1.25.x @@ -227,6 +235,11 @@ jobs: run: go test -v ./... - name: Make run: make + - name: "Inject `no_timer_check` to kernel cmdline" + # workaround to https://github.com/lima-vm/lima/issues/84 + run: | + export PATH="$PWD/_output/bin:$PATH" + ./hack/inject-cmdline-to-template.sh _output/share/lima/templates/_images/ubuntu.yaml no_timer_check - name: Install run: sudo make install - name: Validate jsonschema @@ -234,18 +247,23 @@ jobs: - name: Validate templates # Can't validate base templates in `_default` because they have no images run: find -L templates -name '*.yaml' ! -path '*/_default/*' | xargs limactl validate + - name: Install test dependencies (QEMU 10.1.1) + run: | + brew install bash coreutils + # QEMU 10.1.2 seems to break on GitHub runners + # We revert back to 10.1.1, which seems to work fine + git config --global user.name "GitHub Actions Bot" + git config --global user.email "nobody@localhost" + ./hack/brew-install-version.sh qemu 10.1.1 - name: Install test dependencies # QEMU: required by Lima itself # bash: required by test-templates.sh (OS version of bash is too old) # coreutils: required by test-templates.sh for the "timeout" command # w3m : required by test-templates.sh for port forwarding tests # socat: required by test-templates.sh for port forwarding tests - run: brew install qemu bash coreutils w3m socat + run: brew install bash coreutils w3m socat - name: "Adjust LIMACTL_CREATE_ARGS" run: echo "LIMACTL_CREATE_ARGS=${LIMACTL_CREATE_ARGS} --vm-type=qemu" >>$GITHUB_ENV - - name: "Inject `no_timer_check` to kernel cmdline" - # workaround to https://github.com/lima-vm/lima/issues/84 - run: ./hack/inject-cmdline-to-template.sh templates/_images/ubuntu.yaml no_timer_check - name: Cache image used by default.yaml uses: ./.github/actions/setup_cache_for_template with: @@ -421,24 +439,42 @@ jobs: timeout-minutes: 120 steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Fetch homebrew-core commit messages + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + # needed by ./hack/brew-install-version.sh + repository: homebrew/homebrew-core + path: homebrew-core + fetch-depth: 0 + filter: tree:0 - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 with: go-version: 1.25.x - name: Make run: make + - name: "Inject `no_timer_check` to kernel cmdline" + # workaround to https://github.com/lima-vm/lima/issues/84 + run: | + export PATH="$PWD/_output/bin:$PATH" + ./hack/inject-cmdline-to-template.sh _output/share/lima/templates/_images/ubuntu.yaml no_timer_check - name: Install run: sudo make install - name: "Adjust LIMACTL_CREATE_ARGS" run: echo "LIMACTL_CREATE_ARGS=${LIMACTL_CREATE_ARGS} --vm-type=qemu --network=lima:shared" >>$GITHUB_ENV - - name: "Inject `no_timer_check` to kernel cmdline" - # workaround to https://github.com/lima-vm/lima/issues/84 - run: ./hack/inject-cmdline-to-template.sh templates/_images/ubuntu.yaml no_timer_check - name: Cache image used by default .yaml uses: ./.github/actions/setup_cache_for_template with: template: templates/default.yaml + - name: Install test dependencies (QEMU 10.1.1) + run: | + brew install bash coreutils + # QEMU 10.1.2 seems to break on GitHub runners + # We revert back to 10.1.1, which seems to work fine + git config --global user.name "GitHub Actions Bot" + git config --global user.email "nobody@localhost" + ./hack/brew-install-version.sh qemu 10.1.1 - name: Install test dependencies - run: brew install qemu bash coreutils w3m socat + run: brew install bash coreutils w3m socat - name: Install socket_vmnet env: SOCKET_VMNET_VERSION: v1.2.0 diff --git a/go.mod b/go.mod index 7158ea1dfd0..c168b942a83 100644 --- a/go.mod +++ b/go.mod @@ -117,7 +117,7 @@ require ( github.com/x448/float16 v0.8.4 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect github.com/yuin/gopher-lua v1.1.1 // indirect - golang.org/x/crypto v0.43.0 // indirect + golang.org/x/crypto v0.43.0 golang.org/x/mod v0.29.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/term v0.36.0 // indirect diff --git a/pkg/cidata/cidata.TEMPLATE.d/user-data b/pkg/cidata/cidata.TEMPLATE.d/user-data index e2e13045396..9054127261f 100644 --- a/pkg/cidata/cidata.TEMPLATE.d/user-data +++ b/pkg/cidata/cidata.TEMPLATE.d/user-data @@ -104,3 +104,11 @@ bootcmd: {{- end }} {{- end }} {{- end }} + +{{- if .SSHHostKeys }} +ssh_keys: + {{- range $type, $key := .SSHHostKeys }} + {{ $type }}: | +{{ indent 4 $key }} + {{- end }} +{{- end }} diff --git a/pkg/cidata/cidata.go b/pkg/cidata/cidata.go index 6eac7496f24..4cc4d72e28d 100644 --- a/pkg/cidata/cidata.go +++ b/pkg/cidata/cidata.go @@ -118,7 +118,7 @@ func setupEnv(instConfigEnv map[string]string, propagateProxyEnv bool, slirpGate return env, nil } -func templateArgs(ctx context.Context, bootScripts bool, instDir, name string, instConfig *limatype.LimaYAML, udpDNSLocalPort, tcpDNSLocalPort, vsockPort int, virtioPort string, noCloudInit, rosettaEnabled, rosettaBinFmt bool) (*TemplateArgs, error) { +func templateArgs(ctx context.Context, bootScripts bool, instDir, name string, instConfig *limatype.LimaYAML, udpDNSLocalPort, tcpDNSLocalPort, vsockPort int, virtioPort string, noCloudInit, rosettaEnabled, rosettaBinFmt, hostKeys bool) (*TemplateArgs, error) { if err := limayaml.Validate(instConfig, false); err != nil { return nil, err } @@ -342,11 +342,19 @@ func templateArgs(ctx context.Context, bootScripts bool, instDir, name string, i } } + if hostKeys { + sshHostKeys, err := sshutil.GenerateSSHHostKeys(instDir, args.Hostname) + if err != nil { + return nil, fmt.Errorf("failed to generate SSH host keys: %w", err) + } + args.SSHHostKeys = sshHostKeys + } + return &args, nil } func GenerateCloudConfig(ctx context.Context, instDir, name string, instConfig *limatype.LimaYAML) error { - args, err := templateArgs(ctx, false, instDir, name, instConfig, 0, 0, 0, "", false, false, false) + args, err := templateArgs(ctx, false, instDir, name, instConfig, 0, 0, 0, "", false, false, false, false) if err != nil { return err } @@ -369,7 +377,7 @@ func GenerateCloudConfig(ctx context.Context, instDir, name string, instConfig * } func GenerateISO9660(ctx context.Context, drv driver.Driver, instDir, name string, instConfig *limatype.LimaYAML, udpDNSLocalPort, tcpDNSLocalPort int, guestAgentBinary, nerdctlArchive string, vsockPort int, virtioPort string, noCloudInit, rosettaEnabled, rosettaBinFmt bool) error { - args, err := templateArgs(ctx, true, instDir, name, instConfig, udpDNSLocalPort, tcpDNSLocalPort, vsockPort, virtioPort, noCloudInit, rosettaEnabled, rosettaBinFmt) + args, err := templateArgs(ctx, true, instDir, name, instConfig, udpDNSLocalPort, tcpDNSLocalPort, vsockPort, virtioPort, noCloudInit, rosettaEnabled, rosettaBinFmt, true) if err != nil { return err } @@ -467,6 +475,13 @@ func GenerateISO9660(ctx context.Context, drv driver.Driver, instDir, name strin Path: "ssh_authorized_keys", Reader: strings.NewReader(strings.Join(args.SSHPubKeys, "\n")), }) + for keyType, keyContent := range args.SSHHostKeys { + suffix := strings.Replace(strings.Replace(keyType, "_public", "_key.pub", 1), "_private", "_key", 1) + layout = append(layout, iso9660util.Entry{ + Path: "ssh_host_" + suffix, + Reader: strings.NewReader(keyContent), + }) + } return writeCIDataDir(filepath.Join(instDir, filenames.CIDataISODir), layout) } diff --git a/pkg/cidata/template.go b/pkg/cidata/template.go index 84dfafce86d..db5a259b82c 100644 --- a/pkg/cidata/template.go +++ b/pkg/cidata/template.go @@ -115,6 +115,7 @@ type TemplateArgs struct { Plain bool TimeZone string NoCloudInit bool + SSHHostKeys map[string]string // `ssh_keys` field in cloud-init SSH module } func ValidateTemplateArgs(args *TemplateArgs) error { diff --git a/pkg/driver/vz/vm_darwin.go b/pkg/driver/vz/vm_darwin.go index 15cff0444e3..11fbce55595 100644 --- a/pkg/driver/vz/vm_darwin.go +++ b/pkg/driver/vz/vm_darwin.go @@ -113,18 +113,18 @@ func startVM(ctx context.Context, inst *limatype.Instance, sshLocalPort int) (vm useSSHOverVsock = b } } + hostAddress := net.JoinHostPort(inst.SSHAddress, strconv.Itoa(usernetSSHLocalPort)) if !useSSHOverVsock { logrus.Info("LIMA_SSH_OVER_VSOCK is false, skipping detection of SSH server on vsock port") - } else if err := usernetClient.WaitOpeningSSHPort(ctx, inst); err == nil { - hostAddress := net.JoinHostPort(inst.SSHAddress, strconv.Itoa(usernetSSHLocalPort)) - if err := wrapper.startVsockForwarder(ctx, 22, hostAddress); err == nil { - logrus.Infof("Detected SSH server is listening on the vsock port; changed %s to proxy for the vsock port", hostAddress) - usernetSSHLocalPort = 0 // disable gvisor ssh port forwarding - } else { - logrus.WithError(err).Warn("Failed to detect SSH server on vsock port, falling back to usernet forwarder") - } + } else if err := usernetClient.WaitOpeningSSHPort(ctx, inst); err != nil { + logrus.WithError(err).Info("Failed to wait for the guest SSH server to become available, falling back to usernet forwarder") + } else if err := wrapper.checkSSHOverVsockAvailable(ctx, inst); err != nil { + logrus.WithError(err).Info("Failed to detect SSH server on vsock port, falling back to usernet forwarder") + } else if err := wrapper.startVsockForwarder(ctx, 22, hostAddress); err != nil { + logrus.WithError(err).Info("Failed to start SSH server forwarder on vsock port, falling back to usernet forwarder") } else { - logrus.WithError(err).Warn("Failed to wait for the guest SSH server to become available, falling back to usernet forwarder") + logrus.Infof("Detected SSH server is listening on the vsock port; changed %s to proxy for the vsock port", hostAddress) + usernetSSHLocalPort = 0 // disable gvisor ssh port forwarding } err := usernetClient.ConfigureDriver(ctx, inst, usernetSSHLocalPort) if err != nil { diff --git a/pkg/driver/vz/vsock_forwarder.go b/pkg/driver/vz/vsock_forwarder.go index 044c3d5105a..58aa596f0aa 100644 --- a/pkg/driver/vz/vsock_forwarder.go +++ b/pkg/driver/vz/vsock_forwarder.go @@ -12,17 +12,14 @@ import ( "github.com/containers/gvisor-tap-vsock/pkg/tcpproxy" "github.com/sirupsen/logrus" + + "github.com/lima-vm/lima/v2/pkg/limatype" + "github.com/lima-vm/lima/v2/pkg/sshutil" ) func (m *virtualMachineWrapper) startVsockForwarder(ctx context.Context, vsockPort uint32, hostAddress string) error { - // Test if the vsock port is open - conn, err := m.dialVsock(ctx, vsockPort) - if err != nil { - return err - } - conn.Close() // Start listening on localhost:hostPort and forward to vsock:vsockPort - _, _, err = net.SplitHostPort(hostAddress) + _, _, err := net.SplitHostPort(hostAddress) if err != nil { return err } @@ -73,3 +70,9 @@ func (m *virtualMachineWrapper) dialVsock(_ context.Context, port uint32) (conn } return nil, err } + +func (m *virtualMachineWrapper) checkSSHOverVsockAvailable(ctx context.Context, inst *limatype.Instance) error { + return sshutil.WaitSSHReady(ctx, func(ctx context.Context) (net.Conn, error) { + return m.dialVsock(ctx, uint32(22)) + }, "vsock:22", *inst.Config.User.Name, inst.Name, 1) +} diff --git a/pkg/driver/wsl2/boot/02-no-cloud-init-setup.sh b/pkg/driver/wsl2/boot/02-no-cloud-init-setup.sh index ebfe351cd3e..fd86a67cb5d 100755 --- a/pkg/driver/wsl2/boot/02-no-cloud-init-setup.sh +++ b/pkg/driver/wsl2/boot/02-no-cloud-init-setup.sh @@ -17,6 +17,11 @@ chmod 700 "${LIMA_CIDATA_HOME}"/.ssh/ cp "${LIMA_CIDATA_MNT}"/ssh_authorized_keys "${LIMA_CIDATA_HOME}"/.ssh/authorized_keys chown "${LIMA_CIDATA_UID}:${LIMA_CIDATA_GID}" "${LIMA_CIDATA_HOME}"/.ssh/authorized_keys chmod 600 "${LIMA_CIDATA_HOME}"/.ssh/authorized_keys +# copy SSH host keys +mkdir -p /etc/ssh/ +cp "${LIMA_CIDATA_MNT}"/ssh_host_* /etc/ssh/ +chmod 600 /etc/ssh/ssh_host_* +chmod 644 /etc/ssh/ssh_host_*.pub # add $LIMA_CIDATA_USER to sudoers echo "${LIMA_CIDATA_USER} ALL=(ALL) NOPASSWD:ALL" | tee -a /etc/sudoers.d/99_lima_sudoers diff --git a/pkg/hostagent/requirements.go b/pkg/hostagent/requirements.go index 2873cebd786..d753142f44e 100644 --- a/pkg/hostagent/requirements.go +++ b/pkg/hostagent/requirements.go @@ -6,8 +6,11 @@ package hostagent import ( "errors" "fmt" + "os" "runtime" + "strconv" "strings" + "sync" "time" "github.com/lima-vm/sshocker/pkg/ssh" @@ -103,26 +106,31 @@ func (a *HostAgent) waitForRequirement(r requirement) error { if err != nil { return err } + var stdout, stderr string sshConfig := a.sshConfig - if r.noMaster || runtime.GOOS == "windows" { - // Remove ControlMaster, ControlPath, and ControlPersist options, - // because Cygwin-based SSH clients do not support multiplexing when executing commands. - // References: - // https://inbox.sourceware.org/cygwin/c98988a5-7e65-4282-b2a1-bb8e350d5fab@acm.org/T/ - // https://stackoverflow.com/questions/20959792/is-ssh-controlmaster-with-cygwin-on-windows-actually-possible - // By removing these options: - // - Avoids execution failures when the control master is not yet available. - // - Prevents error messages such as: - // > mux_client_request_session: read from master failed: Connection reset by peer - // > ControlSocket ....sock already exists, disabling multiplexing - // > mm_send_fd: sendmsg(2): Connection reset by peer\\r\\nmux_client_request_session: send fds failed\\r\\n - sshConfig = &ssh.SSHConfig{ - ConfigFile: sshConfig.ConfigFile, - Persist: false, - AdditionalArgs: sshutil.DisableControlMasterOptsFromSSHArgs(sshConfig.AdditionalArgs), + if r.external || determineUseExternalSSH() { + if r.noMaster || runtime.GOOS == "windows" { + // Remove ControlMaster, ControlPath, and ControlPersist options, + // because Cygwin-based SSH clients do not support multiplexing when executing commands. + // References: + // https://inbox.sourceware.org/cygwin/c98988a5-7e65-4282-b2a1-bb8e350d5fab@acm.org/T/ + // https://stackoverflow.com/questions/20959792/is-ssh-controlmaster-with-cygwin-on-windows-actually-possible + // By removing these options: + // - Avoids execution failures when the control master is not yet available. + // - Prevents error messages such as: + // > mux_client_request_session: read from master failed: Connection reset by peer + // > ControlSocket ....sock already exists, disabling multiplexing + // > mm_send_fd: sendmsg(2): Connection reset by peer\\r\\nmux_client_request_session: send fds failed\\r\\n + sshConfig = &ssh.SSHConfig{ + ConfigFile: sshConfig.ConfigFile, + Persist: false, + AdditionalArgs: sshutil.DisableControlMasterOptsFromSSHArgs(sshConfig.AdditionalArgs), + } } + stdout, stderr, err = ssh.ExecuteScript(a.instSSHAddress, a.sshLocalPort, sshConfig, script, r.description) + } else { + stdout, stderr, err = sshutil.ExecuteScriptViaInProcessClient(a.instSSHAddress, a.sshLocalPort, *a.instConfig.User.Name, a.instName, script, r.description) } - stdout, stderr, err := ssh.ExecuteScript(a.instSSHAddress, a.sshLocalPort, sshConfig, script, r.description) logrus.Debugf("stdout=%q, stderr=%q, err=%v", stdout, stderr, err) if err != nil { return fmt.Errorf("stdout=%q, stderr=%q: %w", stdout, stderr, err) @@ -130,12 +138,33 @@ func (a *HostAgent) waitForRequirement(r requirement) error { return nil } +var determineUseExternalSSH = sync.OnceValue(func() bool { + var useExternalSSH bool + // allow overriding via LIMA_EXTERNAL_SSH_REQUIREMENT environment variable + if envVar := os.Getenv("LIMA_EXTERNAL_SSH_REQUIREMENT"); envVar != "" { + if b, err := strconv.ParseBool(envVar); err != nil { + logrus.WithError(err).Warnf("invalid LIMA_EXTERNAL_SSH_REQUIREMENT value %q", envVar) + } else { + useExternalSSH = b + } + } + if useExternalSSH { + logrus.Info("using external ssh command for executing requirement scripts") + } else { + logrus.Info("using in-process ssh client for executing requirement scripts") + } + return useExternalSSH +}) + type requirement struct { description string script string debugHint string fatal bool noMaster bool + // Execute the script externally via the ssh command instead of using the in-process client. + // noMaster will be ignored if external is false. + external bool } func (a *HostAgent) essentialRequirements() []requirement { @@ -158,6 +187,7 @@ If any private key under ~/.ssh is protected with a passphrase, you need to have true `, debugHint: `The persistent ssh ControlMaster should be started immediately.`, + external: true, } if *a.instConfig.Plain { req = append(req, startControlMasterReq) diff --git a/pkg/limatype/filenames/filenames.go b/pkg/limatype/filenames/filenames.go index a8459d77888..4b5fd316228 100644 --- a/pkg/limatype/filenames/filenames.go +++ b/pkg/limatype/filenames/filenames.go @@ -50,6 +50,7 @@ const ( SerialVirtioSock = "serialv.sock" SSHSock = "ssh.sock" SSHConfig = "ssh.config" + SSHKnownHosts = "ssh_known_hosts" VhostSock = "virtiofsd-%d.sock" VNCDisplayFile = "vncdisplay" VNCPasswordFile = "vncpassword" diff --git a/pkg/networks/usernet/client.go b/pkg/networks/usernet/client.go index 6a2437c3bd7..018f65504a0 100644 --- a/pkg/networks/usernet/client.go +++ b/pkg/networks/usernet/client.go @@ -140,8 +140,10 @@ func (c *Client) WaitOpeningSSHPort(ctx context.Context, inst *limatype.Instance if err != nil { return err } + user := *inst.Config.User.Name + instanceName := inst.Name // -1 avoids both sides timing out simultaneously. - u := fmt.Sprintf("%s/extension/wait_port?ip=%s&port=22&timeout=%d", c.base, ipAddr, timeoutSeconds-1) + u := fmt.Sprintf("%s/extension/wait-ssh-server?ip=%s&port=22&timeout=%d&user=%s&instance-name=%s", c.base, ipAddr, timeoutSeconds-1, user, instanceName) res, err := httpclientutil.Get(ctx, c.client, u) if err != nil { return err diff --git a/pkg/networks/usernet/gvproxy.go b/pkg/networks/usernet/gvproxy.go index 89b05286013..60d5a4f26eb 100644 --- a/pkg/networks/usernet/gvproxy.go +++ b/pkg/networks/usernet/gvproxy.go @@ -22,6 +22,8 @@ import ( "github.com/containers/gvisor-tap-vsock/pkg/virtualnetwork" "github.com/sirupsen/logrus" "golang.org/x/sync/errgroup" + + "github.com/lima-vm/lima/v2/pkg/sshutil" ) type GVisorNetstackOpts struct { @@ -243,7 +245,7 @@ func httpServe(ctx context.Context, g *errgroup.Group, ln net.Listener, mux http func muxWithExtension(n *virtualnetwork.VirtualNetwork) *http.ServeMux { m := n.Mux() - m.HandleFunc("/extension/wait_port", func(w http.ResponseWriter, r *http.Request) { + m.HandleFunc("/extension/wait-ssh-server", func(w http.ResponseWriter, r *http.Request) { ip := r.URL.Query().Get("ip") if net.ParseIP(ip) == nil { msg := fmt.Sprintf("invalid ip address: %s", ip) @@ -255,8 +257,15 @@ func muxWithExtension(n *virtualnetwork.VirtualNetwork) *http.ServeMux { http.Error(w, err.Error(), http.StatusBadRequest) return } - port := uint16(port16) - addr := fmt.Sprintf("%s:%d", ip, port) + addr := net.JoinHostPort(ip, fmt.Sprintf("%d", uint16(port16))) + + user := r.URL.Query().Get("user") + instanceName := r.URL.Query().Get("instance-name") + if user == "" || instanceName == "" { + msg := "user and instanceName query parameters are required" + http.Error(w, msg, http.StatusBadRequest) + return + } timeoutSeconds := 10 if timeoutString := r.URL.Query().Get("timeout"); timeoutString != "" { @@ -267,27 +276,14 @@ func muxWithExtension(n *virtualnetwork.VirtualNetwork) *http.ServeMux { } timeoutSeconds = int(timeout16) } - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSeconds)*time.Second) - defer cancel() + dialContext := func(ctx context.Context) (net.Conn, error) { + return n.DialContextTCP(ctx, addr) + } // Wait until the port is available. - for { - conn, err := n.DialContextTCP(ctx, addr) - if err == nil { - conn.Close() - logrus.Debugf("Port is available on %s", addr) - w.WriteHeader(http.StatusOK) - break - } - select { - case <-ctx.Done(): - msg := fmt.Sprintf("timed out waiting for port to become available on %s", addr) - logrus.Warn(msg) - http.Error(w, msg, http.StatusRequestTimeout) - return - default: - } - logrus.Debugf("Waiting for port to become available on %s", addr) - time.Sleep(1 * time.Second) + if err = sshutil.WaitSSHReady(r.Context(), dialContext, addr, user, instanceName, timeoutSeconds); err != nil { + http.Error(w, err.Error(), http.StatusRequestTimeout) + } else { + w.WriteHeader(http.StatusOK) } }) return m diff --git a/pkg/osutil/osutil_unix.go b/pkg/osutil/osutil_unix.go index cf00ff69237..285014e1f2d 100644 --- a/pkg/osutil/osutil_unix.go +++ b/pkg/osutil/osutil_unix.go @@ -8,6 +8,7 @@ package osutil import ( "bytes" "context" + "errors" "fmt" "os" "os/exec" @@ -36,3 +37,7 @@ func Sysctl(ctx context.Context, name string) (string, error) { } return strings.TrimSuffix(string(stdout), "\n"), nil } + +func IsConnectionResetError(err error) bool { + return errors.Is(err, syscall.ECONNRESET) +} diff --git a/pkg/osutil/osutil_windows.go b/pkg/osutil/osutil_windows.go index a5ed533d988..ac27bce1e92 100644 --- a/pkg/osutil/osutil_windows.go +++ b/pkg/osutil/osutil_windows.go @@ -57,3 +57,7 @@ func SignalName(sig os.Signal) string { func Sysctl(_ context.Context, _ string) (string, error) { return "", errors.New("sysctl: unimplemented on Windows") } + +func IsConnectionResetError(err error) bool { + return errors.Is(err, syscall.WSAECONNRESET) +} diff --git a/pkg/sshutil/sshutil.go b/pkg/sshutil/sshutil.go index b60491b10e7..658bff0301c 100644 --- a/pkg/sshutil/sshutil.go +++ b/pkg/sshutil/sshutil.go @@ -6,11 +6,20 @@ package sshutil import ( "bytes" "context" + "crypto" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/elliptic" + "crypto/rand" + "crypto/rsa" "encoding/base64" "encoding/binary" + "encoding/pem" "errors" "fmt" + "io" "io/fs" + "net" "os" "os/exec" "path/filepath" @@ -22,10 +31,14 @@ import ( "time" "github.com/coreos/go-semver/semver" + sshocker "github.com/lima-vm/sshocker/pkg/ssh" "github.com/mattn/go-shellwords" "github.com/sirupsen/logrus" + "golang.org/x/crypto/ssh" + "golang.org/x/crypto/ssh/knownhosts" "golang.org/x/sys/cpu" + "github.com/lima-vm/lima/v2/pkg/instance/hostname" "github.com/lima-vm/lima/v2/pkg/ioutilx" "github.com/lima-vm/lima/v2/pkg/limatype/dirnames" "github.com/lima-vm/lima/v2/pkg/limatype/filenames" @@ -242,7 +255,6 @@ func CommonOpts(ctx context.Context, sshExe SSHExe, useDotSSH bool) ([]string, e opts = append(opts, "StrictHostKeyChecking=no", - "UserKnownHostsFile=/dev/null", "NoHostAuthenticationForLocalhost=yes", "PreferredAuthentications=publickey", "Compression=no", @@ -343,18 +355,28 @@ func SSHOpts(ctx context.Context, sshExe SSHExe, instDir, username string, useDo return nil, err } controlPath := fmt.Sprintf(`ControlPath="%s"`, controlSock) + userKnownHostsPath := filepath.Join(instDir, filenames.SSHKnownHosts) + userKnownHosts := fmt.Sprintf(`UserKnownHostsFile="%s"`, userKnownHostsPath) if runtime.GOOS == "windows" { controlSock, err = ioutilx.WindowsSubsystemPath(ctx, controlSock) if err != nil { return nil, err } controlPath = fmt.Sprintf(`ControlPath='%s'`, controlSock) + userKnownHostsPath, err = ioutilx.WindowsSubsystemPath(ctx, userKnownHostsPath) + if err != nil { + return nil, err + } + userKnownHosts = fmt.Sprintf(`UserKnownHostsFile='%s'`, userKnownHostsPath) } + hostKeyAlias := fmt.Sprintf("HostKeyAlias=%s", hostname.FromInstName(filepath.Base(instDir))) opts = append(opts, fmt.Sprintf("User=%s", username), // guest and host have the same username, but we should specify the username explicitly (#85) "ControlMaster=auto", controlPath, "ControlPersist=yes", + userKnownHosts, + hostKeyAlias, ) if forwardAgent { opts = append(opts, "ForwardAgent=yes") @@ -509,3 +531,226 @@ func detectAESAcceleration() bool { } return cpu.ARM.HasAES || cpu.ARM64.HasAES || cpu.PPC64.IsPOWER8 || cpu.S390X.HasAES || cpu.X86.HasAES } + +// WaitSSHReady waits until the SSH server is ready to accept connections. +// The dialContext function is used to create a connection to the SSH server. +// The addr, user, privateKeyPath parameter is used for ssh.ClientConn creation. +// The timeoutSeconds parameter specifies the maximum number of seconds to wait. +func WaitSSHReady(ctx context.Context, dialContext func(context.Context) (net.Conn, error), addr, user, instanceName string, timeoutSeconds int) error { + ctx, cancel := context.WithTimeout(ctx, time.Duration(timeoutSeconds)*time.Second) + defer cancel() + + // Prepare signer + signer, err := UserPrivateKey() + if err != nil { + return err + } + // Prepare HostKeyCallback + hostKeyChecker, err := HostKeyCheckerWithKeysInKnownHosts(instanceName) + if err != nil { + return err + } + // Prepare ssh client config + sshConfig := &ssh.ClientConfig{ + User: user, + Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, + HostKeyCallback: hostKeyChecker, + Timeout: 10 * time.Second, + } + // Wait until the SSH server is available. + for { + conn, err := dialContext(ctx) + if err == nil { + sshConn, chans, reqs, err := ssh.NewClientConn(conn, addr, sshConfig) + if err == nil { + sshClient := ssh.NewClient(sshConn, chans, reqs) + return sshClient.Close() + } + conn.Close() + if !isRetryableError(err) { + return fmt.Errorf("failed to create ssh.Conn to %q: %w", addr, err) + } + } + logrus.Debugf("Waiting for SSH port to accept connections on %s", addr) + select { + case <-ctx.Done(): + return fmt.Errorf("failed to waiting for SSH port to become available on %s: %w", addr, ctx.Err()) + case <-time.After(1 * time.Second): + continue + } + } +} + +func isRetryableError(err error) bool { + // Port forwarder accepted the connection, but the destination is not ready yet. + return osutil.IsConnectionResetError(err) || + // SSH server not ready yet (e.g. host key not generated on initial boot). + strings.HasSuffix(err.Error(), "no supported methods remain") +} + +// UserPrivateKey returns the user's private key signer. +// The public key is always installed in the VM. +func UserPrivateKey() (ssh.Signer, error) { + configDir, err := dirnames.LimaConfigDir() + if err != nil { + return nil, err + } + privateKeyPath := filepath.Join(configDir, filenames.UserPrivateKey) + key, err := os.ReadFile(privateKeyPath) + if err != nil { + return nil, fmt.Errorf("failed to read private key %q: %w", privateKeyPath, err) + } + signer, err := ssh.ParsePrivateKey(key) + if err != nil { + return nil, fmt.Errorf("failed to parse private key %q: %w", privateKeyPath, err) + } + return signer, nil +} + +func HostKeyCheckerWithKeysInKnownHosts(instanceName string) (ssh.HostKeyCallback, error) { + publicKeys, err := PublicKeysFromKnownHosts(instanceName) + if err != nil { + return nil, err + } + return func(_ string, _ net.Addr, key ssh.PublicKey) error { + keyBytes := key.Marshal() + for _, pk := range publicKeys { + if bytes.Equal(keyBytes, pk.Marshal()) { + return nil + } + } + return errors.New("ssh: host key mismatch") + }, nil +} + +// PublicKeysFromKnownHosts returns the public keys from the known_hosts file located in the instance directory. +func PublicKeysFromKnownHosts(instanceName string) ([]ssh.PublicKey, error) { + // Load known_hosts from the instance directory + instanceDir, err := dirnames.InstanceDir(instanceName) + if err != nil { + return nil, fmt.Errorf("failed to get instance dir for instance %q: %w", instanceName, err) + } + knownHostsPath := filepath.Join(instanceDir, filenames.SSHKnownHosts) + knownHostsBytes, err := os.ReadFile(knownHostsPath) + if err != nil { + return nil, fmt.Errorf("failed to read known_hosts file at %s: %w", knownHostsPath, err) + } + var publicKeys []ssh.PublicKey + rest := knownHostsBytes + for len(rest) > 0 { + var publicKey ssh.PublicKey + publicKey, _, _, rest, err = ssh.ParseAuthorizedKey(rest) + if err != nil { + return nil, fmt.Errorf("failed to parse public key from known_hosts file %s: %w", knownHostsPath, err) + } + publicKeys = append(publicKeys, publicKey) + } + return publicKeys, nil +} + +// GenerateSSHHostKeys generates an Ed25519 host key pair for the SSH server. +// The private key is returned in PEM format, and the public key. +func GenerateSSHHostKeys(instDir, hostname string) (map[string]string, error) { + generators := map[string]func(io.Reader) (crypto.PrivateKey, error){ + "ecdsa": func(rand io.Reader) (crypto.PrivateKey, error) { + return ecdsa.GenerateKey(elliptic.P256(), rand) + }, + "ed25519": func(rand io.Reader) (crypto.PrivateKey, error) { + _, priv, err := ed25519.GenerateKey(rand) + return priv, err + }, + "rsa": func(rand io.Reader) (crypto.PrivateKey, error) { + return rsa.GenerateKey(rand, 3072) + }, + } + res := make(map[string]string, len(generators)) + var sshKnownHosts []byte + for keyType, generator := range generators { + priv, err := generator(rand.Reader) + if err != nil { + return nil, err + } + privPem, err := ssh.MarshalPrivateKey(priv, hostname) + if err != nil { + return nil, fmt.Errorf("failed to marshal %s private key to PEM format: %w", keyType, err) + } + pub, err := ssh.NewPublicKey(priv.(crypto.Signer).Public()) + if err != nil { + return nil, fmt.Errorf("failed to create ssh %s public key: %w", keyType, err) + } + res[keyType+"_private"] = string(pem.EncodeToMemory(privPem)) + res[keyType+"_public"] = string(ssh.MarshalAuthorizedKey(pub)) + sshKnownHosts = append(sshKnownHosts, knownhosts.Line([]string{hostname}, pub)...) + sshKnownHosts = append(sshKnownHosts, '\n') + } + knownHostsPath := filepath.Join(instDir, filenames.SSHKnownHosts) + if err := os.WriteFile(knownHostsPath, sshKnownHosts, 0o644); err != nil { + return nil, fmt.Errorf("failed to write known_hosts file at %s: %w", knownHostsPath, err) + } + return res, nil +} + +// ExecuteScriptViaInProcessClient executes the given script on the remote host via in-process SSH client. +func ExecuteScriptViaInProcessClient(host string, port int, user, instanceName, script, scriptName string) (stdout, stderr string, err error) { + // Prepare signer + signer, err := UserPrivateKey() + if err != nil { + return "", "", err + } + // Prepare HostKeyCallback + hostKeyChecker, err := HostKeyCheckerWithKeysInKnownHosts(instanceName) + if err != nil { + return "", "", err + } + + // Prepare ssh client config + sshConfig := &ssh.ClientConfig{ + User: user, + Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, + HostKeyCallback: hostKeyChecker, + Timeout: 10 * time.Second, + } + + // Connect to SSH server + addr := net.JoinHostPort(host, fmt.Sprintf("%d", port)) + var dialer net.Dialer + dialer.Timeout = sshConfig.Timeout + conn, err := dialer.DialContext(context.Background(), "tcp", addr) + if err != nil { + return "", "", fmt.Errorf("failed to dial %q: %w", addr, err) + } + sshConn, chans, reqs, err := ssh.NewClientConn(conn, addr, sshConfig) + if err != nil { + return "", "", fmt.Errorf("failed to create ssh.Conn to %q: %w", addr, err) + } + client := ssh.NewClient(sshConn, chans, reqs) + if err != nil { + return "", "", fmt.Errorf("failed to create SSH client to %q: %w", addr, err) + } + defer client.Close() + + // Create session + session, err := client.NewSession() + if err != nil { + return "", "", fmt.Errorf("failed to create SSH session to %q: %w", addr, err) + } + defer session.Close() + + // Execute script + interpreter, err := sshocker.ParseScriptInterpreter(script) + if err != nil { + return "", "", err + } + // Provide the script via stdin + session.Stdin = strings.NewReader(strings.TrimPrefix(script, "#!"+interpreter+"\n")) + // Capture stdout and stderr + var stdoutBuf, stderrBuf bytes.Buffer + session.Stdout = &stdoutBuf + session.Stderr = &stderrBuf + logrus.Debugf("executing ssh for script %q", scriptName) + err = session.Run(interpreter) + if err != nil { + return stdoutBuf.String(), stderrBuf.String(), fmt.Errorf("failed to execute script %q: stdout=%q, stderr=%q: %w", scriptName, stdoutBuf.String(), stderrBuf.String(), err) + } + return stdoutBuf.String(), stderrBuf.String(), nil +} diff --git a/pkg/textutil/textutil.go b/pkg/textutil/textutil.go index 29d4d1bda76..831ceec4d3b 100644 --- a/pkg/textutil/textutil.go +++ b/pkg/textutil/textutil.go @@ -16,7 +16,7 @@ import ( // ExecuteTemplate executes a text/template template. func ExecuteTemplate(tmpl string, args any) ([]byte, error) { - x, err := template.New("").Parse(tmpl) + x, err := template.New("").Funcs(TemplateFuncMap).Parse(tmpl) if err != nil { return nil, err } diff --git a/website/content/en/docs/config/environment-variables.md b/website/content/en/docs/config/environment-variables.md index 7881d5eddf4..64079240c62 100644 --- a/website/content/en/docs/config/environment-variables.md +++ b/website/content/en/docs/config/environment-variables.md @@ -106,6 +106,14 @@ This page documents the environment variables used in Lima. lima ``` +### `LIMA_EXTERNAL_SSH_REQUIREMENT` +- **Description**: Specifies whether to use an external SSH client for checking requirements instead of the built-in SSH client. +- **Default**: `false` +- **Usage**: + ```sh + export LIMA_EXTERNAL_SSH_REQUIREMENT=true + ``` + ### `LIMA_SSH_OVER_VSOCK` - **Description**: Specifies to use vsock for SSH connection instead of port forwarding. - **Default**: `true` (since v2.0.0)