From 0a02f2121cd6e2b9e61d23091c7238d4f573334d Mon Sep 17 00:00:00 2001 From: Ivan Kozlovic Date: Mon, 1 May 2023 16:41:51 -0600 Subject: [PATCH] [ADDED] LeafNode: TLSHandhsakeFirst option A new field in `tls{}` blocks force the server to do TLS handshake before sending the INFO protocol. ``` leafnodes { port: 7422 tls { cert_file: ... ... handshake_first: true } remotes [ { url: tls://host:7423 tls { ... handshake_first: true } } ] } ``` Note that if `handshake_first` is set in the "accept" side, the first `tls{}` block in the example above, a server trying to create a LeafNode connection to this server would need to have `handshake_first` set to true inside the `tls{}` block of the corresponding remote. Configuration reload of leafnodes is generally not supported, but TLS certificates can be reloaded and the support for this new field was also added. Signed-off-by: Ivan Kozlovic --- server/client.go | 6 +- server/leafnode.go | 71 ++++++++++++++++----- server/leafnode_test.go | 134 ++++++++++++++++++++++++++++++++++++++++ server/opts.go | 29 +++++---- server/reload.go | 37 +++++++++++ 5 files changed, 250 insertions(+), 27 deletions(-) diff --git a/server/client.go b/server/client.go index faef128fac..a538512ced 100644 --- a/server/client.go +++ b/server/client.go @@ -1752,7 +1752,11 @@ func (c *client) markConnAsClosed(reason ClosedState) { // we use Noticef on create, so use that too for delete. if c.srv != nil { if c.kind == LEAF { - c.Noticef("%s connection closed: %s account: %s", c.kindString(), reason, c.acc.traceLabel()) + if c.acc != nil { + c.Noticef("%s connection closed: %s - Account: %s", c.kindString(), reason, c.acc.traceLabel()) + } else { + c.Noticef("%s connection closed: %s", c.kindString(), reason) + } } else if c.kind == ROUTER || c.kind == GATEWAY { c.Noticef("%s connection closed: %s", c.kindString(), reason) } else { // Client, System, Jetstream, and Account connections. diff --git a/server/leafnode.go b/server/leafnode.go index b7076db109..7d303d05c5 100644 --- a/server/leafnode.go +++ b/server/leafnode.go @@ -339,6 +339,7 @@ func (s *Server) updateRemoteLeafNodesTLSConfig(opts *Options) { if ro.TLSConfig != nil { cfg.Lock() cfg.TLSConfig = ro.TLSConfig.Clone() + cfg.TLSHandshakeFirst = ro.TLSHandshakeFirst cfg.Unlock() } } @@ -938,6 +939,7 @@ func (s *Server) createLeafNode(conn net.Conn, rURL *url.URL, remote *leafNodeCf c.initClient() c.Noticef("Leafnode connection created%s %s", remoteSuffix, c.opts.Name) + var tlsFirst bool if remote != nil { solicited = true remote.Lock() @@ -946,6 +948,7 @@ func (s *Server) createLeafNode(conn net.Conn, rURL *url.URL, remote *leafNodeCf if !c.leaf.remote.Hub { c.leaf.isSpoke = true } + tlsFirst = remote.TLSHandshakeFirst remote.Unlock() c.acc = acc } else { @@ -990,6 +993,30 @@ func (s *Server) createLeafNode(conn net.Conn, rURL *url.URL, remote *leafNodeCf return nil } } else { + // If configured to do TLS handshake first + if tlsFirst { + // Still check if there is really need for TLS in case user set + // this boolean but nothing else... + tlsRequired, tlsConfig, tlsName, tlsTimeout := c.leafNodeGetTLSConfigForSolicit(remote, true) + + // If TLS required, peform handshake. + if tlsRequired { + // Get the URL that was used to connect to the remote server. + rURL := remote.getCurrentURL() + + // Perform the client-side TLS handshake. + if resetTLSName, err := c.doTLSClientHandshake("leafnode", rURL, tlsConfig, tlsName, tlsTimeout, opts.LeafNode.TLSPinnedCerts); err != nil { + // Check if we need to reset the remote's TLS name. + if resetTLSName { + remote.Lock() + remote.tlsName = _EMPTY_ + remote.Unlock() + } + c.mu.Unlock() + return nil + } + } + } // We need to wait for the info, but not for too long. c.nc.SetReadDeadline(time.Now().Add(DEFAULT_LEAFNODE_INFO_WAIT)) } @@ -1004,17 +1031,19 @@ func (s *Server) createLeafNode(conn net.Conn, rURL *url.URL, remote *leafNodeCf info.Nonce = string(c.nonce) info.CID = c.cid proto := generateInfoJSON(info) - // We have to send from this go routine because we may - // have to block for TLS handshake before we start our - // writeLoop go routine. The other side needs to receive - // this before it can initiate the TLS handshake.. - c.sendProtoNow(proto) - - // The above call could have marked the connection as closed (due to TCP error). - if c.isClosed() { - c.mu.Unlock() - c.closeConnection(WriteError) - return nil + if !opts.LeafNode.TLSHandshakeFirst { + // We have to send from this go routine because we may + // have to block for TLS handshake before we start our + // writeLoop go routine. The other side needs to receive + // this before it can initiate the TLS handshake.. + c.sendProtoNow(proto) + + // The above call could have marked the connection as closed (due to TCP error). + if c.isClosed() { + c.mu.Unlock() + c.closeConnection(WriteError) + return nil + } } // Check to see if we need to spin up TLS. @@ -1026,6 +1055,17 @@ func (s *Server) createLeafNode(conn net.Conn, rURL *url.URL, remote *leafNodeCf } } + // If the user wants the TLS handshake to occur first, now that it is + // done, send the INFO protocol. + if opts.LeafNode.TLSHandshakeFirst { + c.sendProtoNow(proto) + if c.isClosed() { + c.mu.Unlock() + c.closeConnection(WriteError) + return nil + } + } + // Leaf nodes will always require a CONNECT to let us know // when we are properly bound to an account. c.setAuthTimer(secondsToDuration(opts.LeafNode.AuthTimeout)) @@ -1042,7 +1082,7 @@ func (s *Server) createLeafNode(conn net.Conn, rURL *url.URL, remote *leafNodeCf // Spin up the read loop. s.startGoRoutine(func() { c.readLoop(preBuf) }) - // We will sping the write loop for solicited connections only + // We will spin the write loop for solicited connections only // when processing the INFO and after switching to TLS if needed. if !solicited { s.startGoRoutine(func() { c.writeLoop() }) @@ -2611,7 +2651,7 @@ func (c *client) leafNodeSolicitWSConnection(opts *Options, rURL *url.URL, remot const connectProcessTimeout = 2 * time.Second // This is invoked for remote LEAF remote connections after processing the INFO -// protocol. This will do the TLS handshake (if needed be) +// protocol. This will do the TLS handshake (if need be) func (s *Server) leafNodeResumeConnectProcess(c *client) { clusterName := s.ClusterName() @@ -2625,8 +2665,9 @@ func (s *Server) leafNodeResumeConnectProcess(c *client) { var tlsRequired bool // In case of websocket, the TLS handshake has been already done. - // So check only for non websocket connections. - if !c.isWebsocket() { + // So check only for non websocket connections and for configurations + // where the TLS Handshake was not done first. + if !c.isWebsocket() && !remote.TLSHandshakeFirst { var tlsConfig *tls.Config var tlsName string var tlsTimeout float64 diff --git a/server/leafnode_test.go b/server/leafnode_test.go index fd211beaf1..ad3f52f575 100644 --- a/server/leafnode_test.go +++ b/server/leafnode_test.go @@ -4846,3 +4846,137 @@ func TestLeafNodeDuplicateMsg(t *testing.T) { t.Run("sub_b2_pub_a1", func(t *testing.T) { check(t, b2, a1) }) t.Run("sub_b2_pub_a2", func(t *testing.T) { check(t, b2, a2) }) } + +func TestLeafNodeTLSHandshakeFirstVerifyNoInfoSent(t *testing.T) { + confHub := createConfFile(t, []byte(` + port : -1 + leafnodes : { + port : -1 + tls { + cert_file: "../test/configs/certs/server-cert.pem" + key_file: "../test/configs/certs/server-key.pem" + ca_file: "../test/configs/certs/ca.pem" + timeout: 2 + handshake_first: true + } + } + `)) + s1, o1 := RunServerWithConfig(confHub) + defer s1.Shutdown() + + c, err := net.DialTimeout("tcp", fmt.Sprintf("127.0.0.1:%d", o1.LeafNode.Port), 2*time.Second) + require_NoError(t, err) + defer c.Close() + + buf := make([]byte, 1024) + // We will wait for up to 500ms to see if the server is sending (incorrectly) + // the INFO. + c.SetReadDeadline(time.Now().Add(500 * time.Millisecond)) + n, err := c.Read(buf) + c.SetReadDeadline(time.Time{}) + // If we did not get an error, this is an issue... + if err == nil { + t.Fatalf("Should not have received anything, got n=%v buf=%s", n, buf[:n]) + } + // We expect a timeout error + if ne, ok := err.(net.Error); !ok || !ne.Timeout() { + t.Fatalf("Expected a timeout error, got %v", err) + } +} + +func TestLeafNodeTLSHandshakeFirst(t *testing.T) { + tmpl1 := ` + port : -1 + leafnodes : { + port : -1 + tls { + cert_file: "../test/configs/certs/server-cert.pem" + key_file: "../test/configs/certs/server-key.pem" + ca_file: "../test/configs/certs/ca.pem" + timeout: 2 + handshake_first: %s + } + } + ` + confHub := createConfFile(t, []byte(fmt.Sprintf(tmpl1, "true"))) + s1, o1 := RunServerWithConfig(confHub) + defer s1.Shutdown() + + tmpl2 := ` + port: -1 + leafnodes : { + port : -1 + remotes : [ + { + urls : [tls://127.0.0.1:%d] + tls { + cert_file: "../test/configs/certs/client-cert.pem" + key_file: "../test/configs/certs/client-key.pem" + ca_file: "../test/configs/certs/ca.pem" + timeout: 2 + first: %s + } + } + ] + } + ` + confSpoke := createConfFile(t, []byte(fmt.Sprintf(tmpl2, o1.LeafNode.Port, "true"))) + s2, _ := RunServerWithConfig(confSpoke) + defer s2.Shutdown() + + checkLeafNodeConnected(t, s2) + + s2.Shutdown() + + // Now check that there will be a failure if the remote does not ask for + // handshake first since the hub is configured that way. + // Set a logger on s1 to capture errors + l := &captureErrorLogger{errCh: make(chan string, 10)} + s1.SetLogger(l, false, false) + + confSpoke = createConfFile(t, []byte(fmt.Sprintf(tmpl2, o1.LeafNode.Port, "false"))) + s2, _ = RunServerWithConfig(confSpoke) + defer s2.Shutdown() + + select { + case err := <-l.errCh: + if !strings.Contains(err, "handshake error") { + t.Fatalf("Unexpected error: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatal("Did not get TLS handshake failure") + } + + // Check configuration reload for this remote + reloadUpdateConfig(t, s2, confSpoke, fmt.Sprintf(tmpl2, o1.LeafNode.Port, "true")) + checkLeafNodeConnected(t, s2) + s2.Shutdown() + + // Drain the logger error channel + for done := false; !done; { + select { + case <-l.errCh: + default: + done = true + } + } + + // Now change the config on the hub + reloadUpdateConfig(t, s1, confHub, fmt.Sprintf(tmpl1, "false")) + // Restart s2 + s2, _ = RunServerWithConfig(confSpoke) + defer s2.Shutdown() + + select { + case err := <-l.errCh: + if !strings.Contains(err, "handshake error") { + t.Fatalf("Unexpected error: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatal("Did not get TLS handshake failure") + } + + // Reload again with "true" + reloadUpdateConfig(t, s1, confHub, fmt.Sprintf(tmpl1, "true")) + checkLeafNodeConnected(t, s2) +} diff --git a/server/opts.go b/server/opts.go index 02656d7ce1..b445a22e45 100644 --- a/server/opts.go +++ b/server/opts.go @@ -153,6 +153,7 @@ type LeafNodeOpts struct { TLSTimeout float64 `json:"tls_timeout,omitempty"` TLSMap bool `json:"-"` TLSPinnedCerts PinnedCertSet `json:"-"` + TLSHandshakeFirst bool `json:"-"` Advertise string `json:"-"` NoAdvertise bool `json:"-"` ReconnectInterval time.Duration `json:"-"` @@ -183,17 +184,18 @@ type SignatureHandler func([]byte) (string, []byte, error) // RemoteLeafOpts are options for connecting to a remote server as a leaf node. type RemoteLeafOpts struct { - LocalAccount string `json:"local_account,omitempty"` - NoRandomize bool `json:"-"` - URLs []*url.URL `json:"urls,omitempty"` - Credentials string `json:"-"` - SignatureCB SignatureHandler `json:"-"` - TLS bool `json:"-"` - TLSConfig *tls.Config `json:"-"` - TLSTimeout float64 `json:"tls_timeout,omitempty"` - Hub bool `json:"hub,omitempty"` - DenyImports []string `json:"-"` - DenyExports []string `json:"-"` + LocalAccount string `json:"local_account,omitempty"` + NoRandomize bool `json:"-"` + URLs []*url.URL `json:"urls,omitempty"` + Credentials string `json:"-"` + SignatureCB SignatureHandler `json:"-"` + TLS bool `json:"-"` + TLSConfig *tls.Config `json:"-"` + TLSTimeout float64 `json:"tls_timeout,omitempty"` + TLSHandshakeFirst bool `json:"-"` + Hub bool `json:"hub,omitempty"` + DenyImports []string `json:"-"` + DenyExports []string `json:"-"` // When an URL has the "ws" (or "wss") scheme, then the server will initiate the // connection as a websocket connection. By default, the websocket frames will be @@ -604,6 +606,7 @@ type TLSConfigOpts struct { Insecure bool Map bool TLSCheckKnownURLs bool + HandshakeFirst bool // Indicate that the TLS handshake should occur first, before sending the INFO protocol Timeout float64 RateLimit int64 Ciphers []uint16 @@ -2173,6 +2176,7 @@ func parseLeafNodes(v interface{}, opts *Options, errors *[]error, warnings *[]e opts.LeafNode.TLSTimeout = tc.Timeout opts.LeafNode.TLSMap = tc.Map opts.LeafNode.TLSPinnedCerts = tc.PinnedCerts + opts.LeafNode.TLSHandshakeFirst = tc.HandshakeFirst opts.LeafNode.tlsConfigOpts = tc case "leafnode_advertise", "advertise": opts.LeafNode.Advertise = mv.(string) @@ -2388,6 +2392,7 @@ func parseRemoteLeafNodes(v interface{}, errors *[]error, warnings *[]error) ([] } else { remote.TLSTimeout = float64(DEFAULT_LEAF_TLS_TIMEOUT) / float64(time.Second) } + remote.TLSHandshakeFirst = tc.HandshakeFirst remote.tlsConfigOpts = tc case "hub": remote.Hub = v.(bool) @@ -4205,6 +4210,8 @@ func parseTLS(v interface{}, isClientCtx bool) (t *TLSConfigOpts, retErr error) return nil, &configErr{tk, certstore.ErrBadCertMatchField.Error()} } tc.CertMatch = certMatch + case "handshake_first", "first", "immediate": + tc.HandshakeFirst = mv.(bool) default: return nil, &configErr{tk, fmt.Sprintf("error parsing tls config, unknown field [%q]", mk)} } diff --git a/server/reload.go b/server/reload.go index 880f4ad69e..ba3a7c7e07 100644 --- a/server/reload.go +++ b/server/reload.go @@ -789,6 +789,18 @@ func (o *mqttInactiveThresholdReload) Apply(s *Server) { s.Noticef("Reloaded: MQTT consumer_inactive_threshold = %v", o.newValue) } +type leafNodeOption struct { + noopOption +} + +func (l *leafNodeOption) Apply(s *Server) { + opts := s.getOpts() + s.Noticef("Reloaded: LeafNode TLS HandshakeFirst value is: %v", opts.LeafNode.TLSHandshakeFirst) + for _, r := range opts.LeafNode.Remotes { + s.Noticef("Reloaded: LeafNode Remote to %v TLS HandshakeFirst value is: %v", r.URLs, r.TLSHandshakeFirst) + } +} + // Compares options and disconnects clients that are no longer listed in pinned certs. Lock must not be held. func (s *Server) recheckPinnedCerts(curOpts *Options, newOpts *Options) { s.mu.Lock() @@ -1232,6 +1244,24 @@ func (s *Server) diffOptions(newOpts *Options) ([]option, error) { tmpNew.TLSConfig = nil tmpOld.tlsConfigOpts = nil tmpNew.tlsConfigOpts = nil + // We will allow TLSHandshakeFirst to me config reloaded. First, + // we just want to detect if there was a change in the leafnodes{} + // block, and if not, we will check the remotes. + handshakeFirstChanged := tmpOld.TLSHandshakeFirst != tmpNew.TLSHandshakeFirst + // If changed, set them (in the temporary variables) to false so that the + // rest of the comparison does not fail. + if handshakeFirstChanged { + tmpOld.TLSHandshakeFirst, tmpNew.TLSHandshakeFirst = false, false + } else if len(tmpOld.Remotes) == len(tmpNew.Remotes) { + // Since we don't support changes in the remotes, we will do a + // simple pass to see if there was a change of this field. + for i := 0; i < len(tmpOld.Remotes); i++ { + if tmpOld.Remotes[i].TLSHandshakeFirst != tmpNew.Remotes[i].TLSHandshakeFirst { + handshakeFirstChanged = true + break + } + } + } // Need to do the same for remote leafnodes' TLS configs. // But we can't just set remotes' TLSConfig to nil otherwise this @@ -1320,6 +1350,12 @@ func (s *Server) diffOptions(newOpts *Options) ([]option, error) { return nil, fmt.Errorf("config reload not supported for %s: old=%v, new=%v", field.Name, oldValue, newValue) } + + // If we detected a change in TLSHandshakeFirst, then let's add to + // the diffOpts so that we can print that we change that. + if handshakeFirstChanged { + diffOpts = append(diffOpts, &leafNodeOption{}) + } case "jetstream": new := newValue.(bool) old := oldValue.(bool) @@ -1506,6 +1542,7 @@ func copyRemoteLNConfigForReloadCompare(current []*RemoteLeafOpts) []*RemoteLeaf cp := *rcfg cp.TLSConfig = nil cp.tlsConfigOpts = nil + cp.TLSHandshakeFirst = false // This is set only when processing a CONNECT, so reset here so that we // don't fail the DeepEqual comparison. cp.TLS = false