Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ require (
github.com/x448/float16 v0.8.4 // indirect
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
github.com/yuin/gopher-lua v1.1.1 // indirect
golang.org/x/crypto v0.45.0 // indirect
golang.org/x/crypto v0.45.0
golang.org/x/mod v0.29.0 // indirect
golang.org/x/oauth2 v0.32.0 // indirect
golang.org/x/term v0.37.0 // indirect
Expand Down
18 changes: 9 additions & 9 deletions pkg/driver/vz/vm_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,18 @@ func startVM(ctx context.Context, inst *limatype.Instance, sshLocalPort int) (vm
useSSHOverVsock = b
}
}
hostAddress := net.JoinHostPort(inst.SSHAddress, strconv.Itoa(usernetSSHLocalPort))
if !useSSHOverVsock {
logrus.Info("LIMA_SSH_OVER_VSOCK is false, skipping detection of SSH server on vsock port")
} else if err := usernetClient.WaitOpeningSSHPort(ctx, inst); err == nil {
hostAddress := net.JoinHostPort(inst.SSHAddress, strconv.Itoa(usernetSSHLocalPort))
if err := wrapper.startVsockForwarder(ctx, 22, hostAddress); err == nil {
logrus.Infof("Detected SSH server is listening on the vsock port; changed %s to proxy for the vsock port", hostAddress)
usernetSSHLocalPort = 0 // disable gvisor ssh port forwarding
} else {
logrus.WithError(err).Warn("Failed to detect SSH server on vsock port, falling back to usernet forwarder")
}
} else if err := usernetClient.WaitOpeningSSHPort(ctx, inst); err != nil {
logrus.WithError(err).Info("Failed to wait for the guest SSH server to become available, falling back to usernet forwarder")
} else if err := wrapper.checkSSHOverVsockAvailable(ctx, inst); err != nil {
logrus.WithError(err).Info("Failed to detect SSH server on vsock port, falling back to usernet forwarder")
} else if err := wrapper.startVsockForwarder(ctx, 22, hostAddress); err != nil {
logrus.WithError(err).Info("Failed to start SSH server forwarder on vsock port, falling back to usernet forwarder")
} else {
logrus.WithError(err).Warn("Failed to wait for the guest SSH server to become available, falling back to usernet forwarder")
logrus.Infof("Detected SSH server is listening on the vsock port; changed %s to proxy for the vsock port", hostAddress)
usernetSSHLocalPort = 0 // disable gvisor ssh port forwarding
}
err := usernetClient.ConfigureDriver(ctx, inst, usernetSSHLocalPort)
if err != nil {
Expand Down
17 changes: 10 additions & 7 deletions pkg/driver/vz/vsock_forwarder.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,14 @@ import (

"github.com/containers/gvisor-tap-vsock/pkg/tcpproxy"
"github.com/sirupsen/logrus"

"github.com/lima-vm/lima/v2/pkg/limatype"
"github.com/lima-vm/lima/v2/pkg/sshutil"
)

func (m *virtualMachineWrapper) startVsockForwarder(ctx context.Context, vsockPort uint32, hostAddress string) error {
// Test if the vsock port is open
conn, err := m.dialVsock(ctx, vsockPort)
if err != nil {
return err
}
conn.Close()
// Start listening on localhost:hostPort and forward to vsock:vsockPort
_, _, err = net.SplitHostPort(hostAddress)
_, _, err := net.SplitHostPort(hostAddress)
if err != nil {
return err
}
Expand Down Expand Up @@ -73,3 +70,9 @@ func (m *virtualMachineWrapper) dialVsock(_ context.Context, port uint32) (conn
}
return nil, err
}

func (m *virtualMachineWrapper) checkSSHOverVsockAvailable(ctx context.Context, inst *limatype.Instance) error {
return sshutil.WaitSSHReady(ctx, func(ctx context.Context) (net.Conn, error) {
return m.dialVsock(ctx, uint32(22))
}, "vsock:22", *inst.Config.User.Name, 1)
}
3 changes: 2 additions & 1 deletion pkg/networks/usernet/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ func (c *Client) WaitOpeningSSHPort(ctx context.Context, inst *limatype.Instance
if err != nil {
return err
}
user := *inst.Config.User.Name
// -1 avoids both sides timing out simultaneously.
u := fmt.Sprintf("%s/extension/wait_port?ip=%s&port=22&timeout=%d", c.base, ipAddr, timeoutSeconds-1)
u := fmt.Sprintf("%s/extension/wait-ssh-server?ip=%s&port=22&timeout=%d&user=%s", c.base, ipAddr, timeoutSeconds-1, user)
res, err := httpclientutil.Get(ctx, c.client, u)
if err != nil {
return err
Expand Down
41 changes: 18 additions & 23 deletions pkg/networks/usernet/gvproxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"github.com/containers/gvisor-tap-vsock/pkg/virtualnetwork"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"

"github.com/lima-vm/lima/v2/pkg/sshutil"
)

type GVisorNetstackOpts struct {
Expand Down Expand Up @@ -243,7 +245,7 @@ func httpServe(ctx context.Context, g *errgroup.Group, ln net.Listener, mux http

func muxWithExtension(n *virtualnetwork.VirtualNetwork) *http.ServeMux {
m := n.Mux()
m.HandleFunc("/extension/wait_port", func(w http.ResponseWriter, r *http.Request) {
m.HandleFunc("/extension/wait-ssh-server", func(w http.ResponseWriter, r *http.Request) {
ip := r.URL.Query().Get("ip")
if net.ParseIP(ip) == nil {
msg := fmt.Sprintf("invalid ip address: %s", ip)
Expand All @@ -255,8 +257,14 @@ func muxWithExtension(n *virtualnetwork.VirtualNetwork) *http.ServeMux {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
port := uint16(port16)
addr := fmt.Sprintf("%s:%d", ip, port)
addr := net.JoinHostPort(ip, fmt.Sprintf("%d", uint16(port16)))

user := r.URL.Query().Get("user")
if user == "" {
msg := "user query parameter is required"
http.Error(w, msg, http.StatusBadRequest)
return
}

timeoutSeconds := 10
if timeoutString := r.URL.Query().Get("timeout"); timeoutString != "" {
Expand All @@ -267,27 +275,14 @@ func muxWithExtension(n *virtualnetwork.VirtualNetwork) *http.ServeMux {
}
timeoutSeconds = int(timeout16)
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSeconds)*time.Second)
defer cancel()
dialContext := func(ctx context.Context) (net.Conn, error) {
return n.DialContextTCP(ctx, addr)
}
// Wait until the port is available.
for {
conn, err := n.DialContextTCP(ctx, addr)
if err == nil {
conn.Close()
logrus.Debugf("Port is available on %s", addr)
w.WriteHeader(http.StatusOK)
break
}
select {
case <-ctx.Done():
msg := fmt.Sprintf("timed out waiting for port to become available on %s", addr)
logrus.Warn(msg)
http.Error(w, msg, http.StatusRequestTimeout)
return
default:
}
logrus.Debugf("Waiting for port to become available on %s", addr)
time.Sleep(1 * time.Second)
if err = sshutil.WaitSSHReady(r.Context(), dialContext, addr, user, timeoutSeconds); err != nil {
http.Error(w, err.Error(), http.StatusRequestTimeout)
} else {
w.WriteHeader(http.StatusOK)
}
})
return m
Expand Down
5 changes: 5 additions & 0 deletions pkg/osutil/osutil_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package osutil
import (
"bytes"
"context"
"errors"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -36,3 +37,7 @@ func Sysctl(ctx context.Context, name string) (string, error) {
}
return strings.TrimSuffix(string(stdout), "\n"), nil
}

func IsConnectionResetError(err error) bool {
return errors.Is(err, syscall.ECONNRESET)
}
4 changes: 4 additions & 0 deletions pkg/osutil/osutil_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@ func SignalName(sig os.Signal) string {
func Sysctl(_ context.Context, _ string) (string, error) {
return "", errors.New("sysctl: unimplemented on Windows")
}

func IsConnectionResetError(err error) bool {
return errors.Is(err, syscall.WSAECONNRESET)
}
126 changes: 126 additions & 0 deletions pkg/sshutil/sshutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"errors"
"fmt"
"io/fs"
"net"
"os"
"os/exec"
"path/filepath"
Expand All @@ -24,6 +25,7 @@ import (
"github.com/coreos/go-semver/semver"
"github.com/mattn/go-shellwords"
"github.com/sirupsen/logrus"
"golang.org/x/crypto/ssh"
"golang.org/x/sys/cpu"

"github.com/lima-vm/lima/v2/pkg/ioutilx"
Expand Down Expand Up @@ -509,3 +511,127 @@ func detectAESAcceleration() bool {
}
return cpu.ARM.HasAES || cpu.ARM64.HasAES || cpu.PPC64.IsPOWER8 || cpu.S390X.HasAES || cpu.X86.HasAES
}

// WaitSSHReady waits until the SSH server is ready to accept connections.
// The dialContext function is used to create a connection to the SSH server.
// The addr, user, parameter is used for ssh.ClientConn creation.
// The timeoutSeconds parameter specifies the maximum number of seconds to wait.
func WaitSSHReady(ctx context.Context, dialContext func(context.Context) (net.Conn, error), addr, user string, timeoutSeconds int) error {
ctx, cancel := context.WithTimeout(ctx, time.Duration(timeoutSeconds)*time.Second)
defer cancel()

// Prepare signer
signer, err := userPrivateKeySigner()
if err != nil {
return err
}
// Prepare ssh client config
sshConfig := &ssh.ClientConfig{
User: user,
Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)},
HostKeyCallback: hostKeyCollector().checker(),
Timeout: 10 * time.Second,
}
// Wait until the SSH server is available.
for {
conn, err := dialContext(ctx)
if err == nil {
sshConn, chans, reqs, err := ssh.NewClientConn(conn, addr, sshConfig)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is quite hard to maintain the code that mixes up /usr/bin/ssh with golang.org/x/crypto/ssh

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR looks overengineering.

Can we consider another way to fix the issue #4334 ?

e.g., by writing vsock availability to a serial port

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I really want to do is #4333, and this is like a grounding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel that #4333 and this can also be implemented without using host key generation and known_hosts.
If the host key generation and the known_hosts area disappear, I think the implementation will be simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g., by writing vsock availability to a serial port

I tried to reconsider this, but I think it's difficult.
On Fedora 43's case:

  • Like ubuntu, where normal SSH over VSOCK can be used, systemd listens to VSOCK:22.
[norio@lima-fedora ~]$ sudo ss -l --vsock -p
Netid State  Recv-Q Send-Q Local Address:Port Peer Address:Port Process                                     
v_str LISTEN 0      0                  *:22              *:*     users:(("systemd",pid=1,fd=228))           
v_str LISTEN 0      0                  3:2222            *:*     users:(("lima-guestagent",pid=1443,fd=13)) 
  • Accepted connection is denied by some mechanism(audit?).
    port availability detection log:

    [norio@lima-fedora ~]$ sudo journalctl|grep -E '[(1030|1037)]'
    Nov 20 08:32:47 lima-fedora audit[1030]: AVC avc: denied { getattr } for pid=1030 comm="sshd-session" scontext=system_u:system_r:sshd_session_t:s0-s0:c0.c1023 tcontext=system_u:system_r:sshd_t:s0-s0:c0.c1023 tclass=vsock_socket permissive=1
    Nov 20 08:32:47 lima-fedora sshd-session[1030]: banner exchange: Connection from UNKNOWN port 65535: Broken pipe

    Actual log on SSH connection via forwarded to VSOCK is denied:

    Nov 20 08:32:48 lima-fedora sshd-session[1037]: ssh_dispatch_run_fatal: Connection from UNKNOWN port 65535: Permission denied [preauth]
    Nov 20 08:32:48 lima-fedora audit[1037]: AUDIT1112 pid=1037 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:sshd_session_t:s0-s0:c0.c1023 msg='op=login acct="(unknown)" exe="/usr/libexec/openssh/sshd-session" hostname=? addr=UNKNOWN terminal=ssh res=failed'

To writing availability via serial:

  • Detecting listener on VSOCK:22 is not sufficient, because Fedora 43 is listening.
  • It requires detecting configuration of audit? or another refusing mechanism on other distros. And it must be detected whether they are set to interrupt the connection.

So, I think, it is more reliable to actually try the connection with the SSH protocol, and the portability is also high.
And using ssh installed on the host to test the connection to VSOCK will increase the same uncertainty as the execution of requirements that cause CI failure.

if err == nil {
sshClient := ssh.NewClient(sshConn, chans, reqs)
return sshClient.Close()
}
conn.Close()
if !isRetryableError(err) {
return fmt.Errorf("failed to create ssh.Conn to %q: %w", addr, err)
}
}
logrus.Debugf("Waiting for SSH port to accept connections on %s", addr)
select {
case <-ctx.Done():
return fmt.Errorf("failed to waiting for SSH port to become available on %s: %w", addr, ctx.Err())
case <-time.After(1 * time.Second):
continue
}
}
}

// errHostKeyMismatch is returned when the SSH host key does not match known hosts.
var errHostKeyMismatch = errors.New("ssh: host key mismatch")

func isRetryableError(err error) bool {
// Port forwarder accepted the connection, but the destination is not ready yet.
return osutil.IsConnectionResetError(err) ||
// SSH server not ready yet (e.g. host key not generated on initial boot).
strings.HasSuffix(err.Error(), "no supported methods remain") ||
// Host key is not yet in known_hosts, but will be collected, so we can retry.
errors.Is(err, errHostKeyMismatch)
}

// userPrivateKeySigner returns the user's private key signer.
// The public key is always installed in the VM.
func userPrivateKeySigner() (ssh.Signer, error) {
configDir, err := dirnames.LimaConfigDir()
if err != nil {
return nil, err
}
privateKeyPath := filepath.Join(configDir, filenames.UserPrivateKey)
key, err := os.ReadFile(privateKeyPath)
if err != nil {
return nil, fmt.Errorf("failed to read private key %q: %w", privateKeyPath, err)
}
signer, err := ssh.ParsePrivateKey(key)
if err != nil {
return nil, fmt.Errorf("failed to parse private key %q: %w", privateKeyPath, err)
}
return signer, nil
}

// hostKeyCollector is a singleton host key collector.
var hostKeyCollector = sync.OnceValue(func() *_hostKeyCollector {
return &_hostKeyCollector{
hostKeys: make(map[string]ssh.PublicKey),
}
})

type _hostKeyCollector struct {
hostKeys map[string]ssh.PublicKey
mu sync.Mutex
}

// checker returns a HostKeyCallback that either checks and collects the host key,
// or only checks the host key, depending on whether any host keys have been collected.
// It is expected to pass host key checks by retrying after the first collection.
// On second invocation, it will only check the host key.
func (h *_hostKeyCollector) checker() ssh.HostKeyCallback {
if len(h.hostKeys) == 0 {
return h.checkAndCollect
}
return h.checkOnly
}

// checkAndCollect is a HostKeyCallback that records the host key provided by the SSH server.
func (h *_hostKeyCollector) checkAndCollect(_ string, _ net.Addr, key ssh.PublicKey) error {
marshaledKey := string(key.Marshal())
h.mu.Lock()
defer h.mu.Unlock()
if _, ok := h.hostKeys[marshaledKey]; ok {
return nil
}
h.hostKeys[marshaledKey] = key
// If always returning nil here, GitHub Advanced Security may report "Use of insecure HostKeyCallback implementation".
// So, we return an error here to make the SSH client report the host key mismatch.
return errHostKeyMismatch
}

// check is a HostKeyCallback that checks whether the host key has been collected.
func (h *_hostKeyCollector) checkOnly(_ string, _ net.Addr, key ssh.PublicKey) error {
h.mu.Lock()
defer h.mu.Unlock()
if _, ok := h.hostKeys[string(key.Marshal())]; ok {
return nil
}
// If always returning nil here, GitHub Advanced Security may report "Use of insecure HostKeyCallback implementation".
// So, we return an error here to make the SSH client report the host key mismatch.
return errHostKeyMismatch
}
Loading