diff --git a/go.mod b/go.mod index eba0777a..f21a2bf1 100644 --- a/go.mod +++ b/go.mod @@ -6,10 +6,10 @@ require ( github.com/hashicorp/go-msgpack v0.5.5 github.com/hashicorp/raft v1.1.1 github.com/lib/pq v1.2.0 - github.com/nats-io/nats-server/v2 v2.0.4 - github.com/nats-io/nats.go v1.8.1 + github.com/nats-io/nats-server/v2 v2.1.2 + github.com/nats-io/nats.go v1.9.1 github.com/nats-io/nuid v1.0.1 - github.com/nats-io/stan.go v0.5.0 + github.com/nats-io/stan.go v0.5.2 github.com/prometheus/procfs v0.0.3 go.etcd.io/bbolt v1.3.3 golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 diff --git a/go.sum b/go.sum index 085d5002..fbc098f7 100644 --- a/go.sum +++ b/go.sum @@ -12,10 +12,10 @@ github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZp github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= -github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-hclog v0.9.1 h1:9PZfAcVEvez4yhLH2TBU64/h/z4xlFI80cWXRrxuKuM= @@ -37,19 +37,20 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/nats-io/jwt v0.2.14 h1:wA50KvFz/JXGXMHRygTWsRGh/ixxgC5E3kHvmtGLNf4= -github.com/nats-io/jwt v0.2.14/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= -github.com/nats-io/nats-server/v2 v2.0.4 h1:XOMeQRbhl1lGNTIctPhih6pTa15NGif54Uas6ZW5q7g= -github.com/nats-io/nats-server/v2 v2.0.4/go.mod h1:AWdGEVbjKRS9ZIx4DSP5eKW48nfFm7q3uiSkP/1KD7M= -github.com/nats-io/nats.go v1.8.1 h1:6lF/f1/NN6kzUDBz6pyvQDEXO39jqXcWRLu/tKjtOUQ= -github.com/nats-io/nats.go v1.8.1/go.mod h1:BrFz9vVn0fU3AcH9Vn4Kd7W0NpJ651tD5omQ3M8LwxM= -github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4= -github.com/nats-io/nkeys v0.1.0 h1:qMd4+pRHgdr1nAClu+2h/2a5F2TmKcCzjCDazVgRoX4= +github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/jwt v0.3.2 h1:+RB5hMpXUUA2dfxuhBTEkMOrYmM+gKIZYS1KjSostMI= +github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU= +github.com/nats-io/nats-server/v2 v2.1.2 h1:i2Ly0B+1+rzNZHHWtD4ZwKi+OU5l+uQo1iDHZ2PmiIc= +github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k= +github.com/nats-io/nats.go v1.9.1 h1:ik3HbLhZ0YABLto7iX80pZLPw/6dx3T+++MZJwLnMrQ= +github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nkeys v0.1.3 h1:6JrEfig+HzTH85yxzhSVbjHRJv9cn0p6n3IngIcM5/k= +github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= -github.com/nats-io/stan.go v0.5.0 h1:ZaSPMb6jnDXsSlOACynJrUiB3Evleg3ZyyX+rnf3TlQ= -github.com/nats-io/stan.go v0.5.0/go.mod h1:dYqB+vMN3C2F9pT1FRQpg9eHbjPj6mP0yYuyBNuXHZE= +github.com/nats-io/stan.go v0.5.2 h1:/DfflNAztFQVjssQ7hW8d9gWl3hU+SJ3mWjokaQEsog= +github.com/nats-io/stan.go v0.5.2/go.mod h1:eIcD5bi3pqbHT/xIIvXMwvzXYElgouBvaVRftaE+eac= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -68,7 +69,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= @@ -78,14 +78,12 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190523142557-0e01d883c5c5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e h1:D5TXcfTk7xF7hvieo4QErS3qqCB4teTffacDWr7CI+0= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456 h1:ng0gs1AKnRRuEMZoTLLlbOd+C17zUDepwGQBb/n+JVg= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/github.com/nats-io/jwt/.travis.yml b/vendor/github.com/nats-io/jwt/.travis.yml index f7dc9242..50e27a6b 100644 --- a/vendor/github.com/nats-io/jwt/.travis.yml +++ b/vendor/github.com/nats-io/jwt/.travis.yml @@ -1,8 +1,8 @@ language: go sudo: false go: +- 1.13.x - 1.12.x -- 1.11.x install: - go get -t ./... @@ -18,4 +18,5 @@ before_script: - staticcheck ./... script: -- if [[ "$TRAVIS_GO_VERSION" == 1.10.* ]] ; then ./scripts/cov.sh TRAVIS; else go test -v -race ./...; fi +- go test -v -race ./... +- if [[ "$TRAVIS_GO_VERSION" =~ 1.12 ]]; then ./scripts/cov.sh TRAVIS; fi diff --git a/vendor/github.com/nats-io/jwt/account_claims.go b/vendor/github.com/nats-io/jwt/account_claims.go index ec7132d0..945bd987 100644 --- a/vendor/github.com/nats-io/jwt/account_claims.go +++ b/vendor/github.com/nats-io/jwt/account_claims.go @@ -17,6 +17,7 @@ package jwt import ( "errors" + "sort" "time" "github.com/nats-io/nkeys" @@ -127,7 +128,8 @@ func (a *AccountClaims) Encode(pair nkeys.KeyPair) (string, error) { if !nkeys.IsValidPublicAccountKey(a.Subject) { return "", errors.New("expected subject to be account public key") } - + sort.Sort(a.Exports) + sort.Sort(a.Imports) a.ClaimsData.Type = AccountClaim return a.ClaimsData.Encode(pair, a) } diff --git a/vendor/github.com/nats-io/jwt/creds_utils.go b/vendor/github.com/nats-io/jwt/creds_utils.go index 75fcab6d..bb913dc1 100644 --- a/vendor/github.com/nats-io/jwt/creds_utils.go +++ b/vendor/github.com/nats-io/jwt/creds_utils.go @@ -23,6 +23,7 @@ func formatJwt(kind string, jwtString string) ([]byte, error) { templ := `-----BEGIN NATS %s JWT----- %s ------END NATS %s JWT------ + ` w := bytes.NewBuffer(nil) kind = strings.ToUpper(kind) @@ -60,6 +61,7 @@ func DecorateSeed(seed []byte) ([]byte, error) { header := `************************* IMPORTANT ************************* NKEY Seed printed below can be used to sign and prove identity. NKEYs are sensitive and should be treated as secrets. + -----BEGIN %s NKEY SEED----- ` _, err := fmt.Fprintf(w, header, kind) @@ -70,6 +72,7 @@ NKEYs are sensitive and should be treated as secrets. footer := ` ------END %s NKEY SEED------ + ************************************************************* ` _, err = fmt.Fprintf(w, footer, kind) @@ -132,8 +135,6 @@ func FormatUserConfig(jwtString string, seed []byte) ([]byte, error) { // ParseDecoratedJWT takes a creds file and returns the JWT portion. func ParseDecoratedJWT(contents []byte) (string, error) { - defer wipeSlice(contents) - items := userConfigRE.FindAllSubmatch(contents, -1) if len(items) == 0 { return string(contents), nil @@ -150,7 +151,6 @@ func ParseDecoratedJWT(contents []byte) (string, error) { // key pair from it. func ParseDecoratedNKey(contents []byte) (nkeys.KeyPair, error) { var seed []byte - defer wipeSlice(contents) items := userConfigRE.FindAllSubmatch(contents, -1) if len(items) > 1 { @@ -201,10 +201,3 @@ func ParseDecoratedUserNKey(contents []byte) (nkeys.KeyPair, error) { } return kp, nil } - -// Just wipe slice with 'x', for clearing contents of nkey seed file. -func wipeSlice(buf []byte) { - for i := range buf { - buf[i] = 'x' - } -} diff --git a/vendor/github.com/nats-io/jwt/exports.go b/vendor/github.com/nats-io/jwt/exports.go index 720abf6e..5578f988 100644 --- a/vendor/github.com/nats-io/jwt/exports.go +++ b/vendor/github.com/nats-io/jwt/exports.go @@ -1,5 +1,5 @@ /* - * Copyright 2018 The NATS Authors + * Copyright 2018-2019 The NATS Authors * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -34,14 +34,50 @@ const ( ResponseTypeChunked = "Chunked" ) +// ServiceLatency is used when observing and exported service for +// latency measurements. +// Sampling 1-100, represents sampling rate, defaults to 100. +// Results is the subject where the latency metrics are published. +// A metric will be defined by the nats-server's ServiceLatency. Time durations +// are in nanoseconds. +// see https://github.com/nats-io/nats-server/blob/master/server/accounts.go#L524 +// e.g. +// { +// "app": "dlc22", +// "start": "2019-09-16T21:46:23.636869585-07:00", +// "svc": 219732, +// "nats": { +// "req": 320415, +// "resp": 228268, +// "sys": 0 +// }, +// "total": 768415 +// } +// +type ServiceLatency struct { + Sampling int `json:"sampling,omitempty"` + Results Subject `json:"results"` +} + +func (sl *ServiceLatency) Validate(vr *ValidationResults) { + if sl.Sampling < 1 || sl.Sampling > 100 { + vr.AddError("sampling percentage needs to be between 1-100") + } + sl.Results.Validate(vr) + if sl.Results.HasWildCards() { + vr.AddError("results subject can not contain wildcards") + } +} + // Export represents a single export type Export struct { - Name string `json:"name,omitempty"` - Subject Subject `json:"subject,omitempty"` - Type ExportType `json:"type,omitempty"` - TokenReq bool `json:"token_req,omitempty"` - Revocations RevocationList `json:"revocations,omitempty"` - ResponseType ResponseType `json:"response_type,omitempty"` + Name string `json:"name,omitempty"` + Subject Subject `json:"subject,omitempty"` + Type ExportType `json:"type,omitempty"` + TokenReq bool `json:"token_req,omitempty"` + Revocations RevocationList `json:"revocations,omitempty"` + ResponseType ResponseType `json:"response_type,omitempty"` + Latency *ServiceLatency `json:"service_latency,omitempty"` } // IsService returns true if an export is for a service @@ -81,6 +117,12 @@ func (e *Export) Validate(vr *ValidationResults) { if e.IsStream() && e.ResponseType != "" { vr.AddError("invalid response type for stream: %q", e.ResponseType) } + if e.Latency != nil { + if !e.IsService() { + vr.AddError("latency tracking only permitted for services") + } + e.Latency.Validate(vr) + } e.Subject.Validate(vr) } @@ -116,7 +158,7 @@ func (e *Export) IsRevoked(pubKey string) bool { return e.Revocations.IsRevoked(pubKey, time.Now()) } -// Exports is an array of exports +// Exports is a slice of exports type Exports []*Export // Add appends exports to the list @@ -180,3 +222,15 @@ func (e *Exports) HasExportContainingSubject(subject Subject) bool { } return false } + +func (e Exports) Len() int { + return len(e) +} + +func (e Exports) Swap(i, j int) { + e[i], e[j] = e[j], e[i] +} + +func (e Exports) Less(i, j int) bool { + return e[i].Subject < e[j].Subject +} diff --git a/vendor/github.com/nats-io/jwt/go.mod b/vendor/github.com/nats-io/jwt/go.mod index b1ad180a..a780dde9 100644 --- a/vendor/github.com/nats-io/jwt/go.mod +++ b/vendor/github.com/nats-io/jwt/go.mod @@ -1,3 +1,3 @@ module github.com/nats-io/jwt -require github.com/nats-io/nkeys v0.1.0 +require github.com/nats-io/nkeys v0.1.3 diff --git a/vendor/github.com/nats-io/jwt/go.sum b/vendor/github.com/nats-io/jwt/go.sum index ea493eca..9baf67f5 100644 --- a/vendor/github.com/nats-io/jwt/go.sum +++ b/vendor/github.com/nats-io/jwt/go.sum @@ -1,5 +1,5 @@ -github.com/nats-io/nkeys v0.1.0 h1:qMd4+pRHgdr1nAClu+2h/2a5F2TmKcCzjCDazVgRoX4= -github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nkeys v0.1.3 h1:6JrEfig+HzTH85yxzhSVbjHRJv9cn0p6n3IngIcM5/k= +github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= diff --git a/vendor/github.com/nats-io/jwt/header.go b/vendor/github.com/nats-io/jwt/header.go index 2625856b..27c65811 100644 --- a/vendor/github.com/nats-io/jwt/header.go +++ b/vendor/github.com/nats-io/jwt/header.go @@ -23,7 +23,7 @@ import ( const ( // Version is semantic version. - Version = "0.0.5" + Version = "0.3.2" // TokenTypeJwt is the JWT token type supported JWT tokens // encoded and decoded by this library diff --git a/vendor/github.com/nats-io/jwt/imports.go b/vendor/github.com/nats-io/jwt/imports.go index b41f7000..8cd97479 100644 --- a/vendor/github.com/nats-io/jwt/imports.go +++ b/vendor/github.com/nats-io/jwt/imports.go @@ -63,10 +63,11 @@ func (i *Import) Validate(actPubKey string, vr *ValidationResults) { i.Subject.Validate(vr) - if i.IsService() { - if i.Subject.HasWildCards() { - vr.AddWarning("services cannot have wildcard subject: %q", i.Subject) - } + if i.IsService() && i.Subject.HasWildCards() { + vr.AddError("services cannot have wildcard subject: %q", i.Subject) + } + if i.IsStream() && i.To.HasWildCards() { + vr.AddError("streams cannot have wildcard to subject: %q", i.Subject) } var act *ActivationClaims @@ -120,7 +121,14 @@ type Imports []*Import // Validate checks if an import is valid for the wrapping account func (i *Imports) Validate(acctPubKey string, vr *ValidationResults) { + toSet := make(map[Subject]bool, len(*i)) for _, v := range *i { + if v.Type == Service { + if _, ok := toSet[v.To]; ok { + vr.AddError("Duplicate To subjects for %q", v.To) + } + toSet[v.To] = true + } v.Validate(acctPubKey, vr) } } @@ -129,3 +137,15 @@ func (i *Imports) Validate(acctPubKey string, vr *ValidationResults) { func (i *Imports) Add(a ...*Import) { *i = append(*i, a...) } + +func (i Imports) Len() int { + return len(i) +} + +func (i Imports) Swap(j, k int) { + i[j], i[k] = i[k], i[j] +} + +func (i Imports) Less(j, k int) bool { + return i[j].Subject < i[k].Subject +} diff --git a/vendor/github.com/nats-io/jwt/operator_claims.go b/vendor/github.com/nats-io/jwt/operator_claims.go index 1383b7ad..6a99597b 100644 --- a/vendor/github.com/nats-io/jwt/operator_claims.go +++ b/vendor/github.com/nats-io/jwt/operator_claims.go @@ -26,9 +26,19 @@ import ( // Operator specific claims type Operator struct { - Identities []Identity `json:"identity,omitempty"` - SigningKeys StringList `json:"signing_keys,omitempty"` - AccountServerURL string `json:"account_server_url,omitempty"` + // Slice of real identies (like websites) that can be used to identify the operator. + Identities []Identity `json:"identity,omitempty"` + // Slice of other operator NKeys that can be used to sign on behalf of the main + // operator identity. + SigningKeys StringList `json:"signing_keys,omitempty"` + // AccountServerURL is a partial URL like "https://host.domain.org:/jwt/v1" + // tools will use the prefix and build queries by appending /accounts/ + // or /operator to the path provided. Note this assumes that the account server + // can handle requests in a nats-account-server compatible way. See + // https://github.com/nats-io/nats-account-server. + AccountServerURL string `json:"account_server_url,omitempty"` + // A list of NATS urls (tls://host:port) where tools can connect to the server + // using proper credentials. OperatorServiceURLs StringList `json:"operator_service_urls,omitempty"` } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/accounts.go b/vendor/github.com/nats-io/nats-server/v2/server/accounts.go index 75fd479c..357614e7 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/accounts.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/accounts.go @@ -16,6 +16,7 @@ package server import ( "fmt" "io/ioutil" + "math/rand" "net/http" "net/url" "reflect" @@ -40,6 +41,7 @@ type Account struct { claimJWT string updated time.Time mu sync.RWMutex + sqmu sync.Mutex sl *Sublist etmr *time.Timer ctmr *time.Timer @@ -58,12 +60,16 @@ type Account struct { imports importMap exports exportMap limits - nae int32 - pruning bool - rmPruning bool - expired bool - signingKeys []string - srv *Server // server this account is registered with (possibly nil) + nae int32 + pruning bool + rmPruning bool + expired bool + signingKeys []string + srv *Server // server this account is registered with (possibly nil) + lds string // loop detection subject for leaf nodes + siReply []byte // service reply prefix, will form wildcard subscription. + siReplyClient *client + prand *rand.Rand } // Account based limits. @@ -98,11 +104,14 @@ type serviceImport struct { claim *jwt.Import from string to string - rt ServiceRespType ts int64 + rt ServiceRespType + latency *serviceLatency + m1 *ServiceLatency ae bool internal bool invalid bool + tracking bool } // This is used to record when we create a mapping for implicit service @@ -142,19 +151,36 @@ func (rt ServiceRespType) String() string { type exportAuth struct { tokenReq bool approved map[string]*Account +} + +// streamExport +type streamExport struct { + exportAuth +} + +// serviceExport holds additional information for exported services. +type serviceExport struct { + exportAuth respType ServiceRespType + latency *serviceLatency } -// importMap tracks the imported streams and services. -type importMap struct { - streams map[string]*streamImport - services map[string]*serviceImport // TODO(dlc) sync.Map may be better. +// Used to track service latency. +type serviceLatency struct { + sampling int8 + subject string } // exportMap tracks the exported streams and services. type exportMap struct { - streams map[string]*exportAuth - services map[string]*exportAuth + streams map[string]*streamExport + services map[string]*serviceExport +} + +// importMap tracks the imported streams and services. +type importMap struct { + streams map[string]*streamImport + services map[string]*serviceImport // TODO(dlc) sync.Map may be better. } // NewAccount creates a new unlimited account with the given name. @@ -177,6 +203,60 @@ func (a *Account) shallowCopy() *Account { return na } +// Called to track a remote server and connections and leafnodes it +// has for this account. +func (a *Account) updateRemoteServer(m *AccountNumConns) { + a.mu.Lock() + if a.strack == nil { + a.strack = make(map[string]sconns) + } + // This does not depend on receiving all updates since each one is idempotent. + // FIXME(dlc) - We should cleanup when these both go to zero. + prev := a.strack[m.Server.ID] + a.strack[m.Server.ID] = sconns{conns: int32(m.Conns), leafs: int32(m.LeafNodes)} + a.nrclients += int32(m.Conns) - prev.conns + a.nrleafs += int32(m.LeafNodes) - prev.leafs + a.mu.Unlock() +} + +// Removes tracking for a remote server that has shutdown. +func (a *Account) removeRemoteServer(sid string) { + a.mu.Lock() + if a.strack != nil { + prev := a.strack[sid] + delete(a.strack, sid) + a.nrclients -= prev.conns + a.nrleafs -= prev.leafs + } + a.mu.Unlock() +} + +// When querying for subject interest this is the number of +// expected responses. We need to actually check that the entry +// has active connections. +func (a *Account) expectedRemoteResponses() (expected int32) { + a.mu.RLock() + for _, sc := range a.strack { + if sc.conns > 0 || sc.leafs > 0 { + expected++ + } + } + a.mu.RUnlock() + return +} + +// Clears eventing and tracking for this account. +func (a *Account) clearEventing() { + a.mu.Lock() + a.nrclients = 0 + // Now clear state + clearTimer(&a.etmr) + clearTimer(&a.ctmr) + a.clients = nil + a.strack = nil + a.mu.Unlock() +} + // NumConnections returns active number of clients for this account for // all known servers. func (a *Account) NumConnections() int { @@ -186,6 +266,15 @@ func (a *Account) NumConnections() int { return nc } +// NumRemoteConnections returns the number of client or leaf connections that +// are not on this server. +func (a *Account) NumRemoteConnections() int { + a.mu.RLock() + nc := int(a.nrclients + a.nrleafs) + a.mu.RUnlock() + return nc +} + // NumLocalConnections returns active number of clients for this account // on this server. func (a *Account) NumLocalConnections() int { @@ -200,23 +289,28 @@ func (a *Account) numLocalConnections() int { return len(a.clients) - int(a.sysclients) - int(a.nleafs) } +// This is for extended local interest. +// Lock should not be held. +func (a *Account) numLocalAndLeafConnections() int { + a.mu.RLock() + nlc := len(a.clients) - int(a.sysclients) + a.mu.RUnlock() + return nlc +} + func (a *Account) numLocalLeafNodes() int { return int(a.nleafs) } // MaxTotalConnectionsReached returns if we have reached our limit for number of connections. func (a *Account) MaxTotalConnectionsReached() bool { + var mtc bool a.mu.RLock() - mtc := a.maxTotalConnectionsReached() - a.mu.RUnlock() - return mtc -} - -func (a *Account) maxTotalConnectionsReached() bool { if a.mconns != jwt.NoLimit { - return len(a.clients)-int(a.sysclients)+int(a.nrclients) >= int(a.mconns) + mtc = len(a.clients)-int(a.sysclients)+int(a.nrclients) >= int(a.mconns) } - return false + a.mu.RUnlock() + return mtc } // MaxActiveConnections return the set limit for the account system @@ -352,6 +446,9 @@ func (a *Account) removeClient(c *client) int { func (a *Account) randomClient() *client { var c *client + if a.siReplyClient != nil { + return a.siReplyClient + } for _, c = range a.clients { break } @@ -365,27 +462,27 @@ func (a *Account) AddServiceExport(subject string, accounts []*Account) error { // AddServiceExportWithresponse will configure the account with the defined export and response type. func (a *Account) AddServiceExportWithResponse(subject string, respType ServiceRespType, accounts []*Account) error { - a.mu.Lock() - defer a.mu.Unlock() if a == nil { return ErrMissingAccount } + a.mu.Lock() + defer a.mu.Unlock() if a.exports.services == nil { - a.exports.services = make(map[string]*exportAuth) + a.exports.services = make(map[string]*serviceExport) } ea := a.exports.services[subject] if respType != Singleton { if ea == nil { - ea = &exportAuth{} + ea = &serviceExport{} } ea.respType = respType } if accounts != nil { if ea == nil { - ea = &exportAuth{} + ea = &serviceExport{} } // empty means auth required but will be import token. if len(accounts) == 0 { @@ -403,11 +500,286 @@ func (a *Account) AddServiceExportWithResponse(subject string, respType ServiceR return nil } +// TrackServiceExport will enable latency tracking of the named service. +// Results will be published in this account to the given results subject. +func (a *Account) TrackServiceExport(service, results string) error { + return a.TrackServiceExportWithSampling(service, results, DEFAULT_SERVICE_LATENCY_SAMPLING) +} + +// TrackServiceExportWithSampling will enable latency tracking of the named service for the given +// sampling rate (1-100). Results will be published in this account to the given results subject. +func (a *Account) TrackServiceExportWithSampling(service, results string, sampling int) error { + if a == nil { + return ErrMissingAccount + } + + if sampling < 1 || sampling > 100 { + return ErrBadSampling + } + if !IsValidPublishSubject(results) { + return ErrBadPublishSubject + } + // Don't loop back on outselves. + if a.IsExportService(results) { + return ErrBadPublishSubject + } + + if a.srv != nil && !a.srv.EventsEnabled() { + return ErrNoSysAccount + } + + a.mu.Lock() + if a.exports.services == nil { + a.mu.Unlock() + return ErrMissingService + } + ea, ok := a.exports.services[service] + if !ok { + a.mu.Unlock() + return ErrMissingService + } + if ea == nil { + ea = &serviceExport{} + a.exports.services[service] = ea + } else if ea.respType != Singleton { + a.mu.Unlock() + return ErrBadServiceType + } + ea.latency = &serviceLatency{ + sampling: int8(sampling), + subject: results, + } + s := a.srv + a.mu.Unlock() + + if s == nil { + return nil + } + + // Now track down the imports and add in latency as needed to enable. + s.accounts.Range(func(k, v interface{}) bool { + acc := v.(*Account) + acc.mu.Lock() + for _, im := range acc.imports.services { + if im != nil && im.acc.Name == a.Name && subjectIsSubsetMatch(im.to, service) { + im.latency = ea.latency + } + } + acc.mu.Unlock() + return true + }) + + return nil +} + +// UnTrackServiceExport will disable latency tracking of the named service. +func (a *Account) UnTrackServiceExport(service string) { + if a == nil || (a.srv != nil && !a.srv.EventsEnabled()) { + return + } + + a.mu.Lock() + if a == nil || a.exports.services == nil { + a.mu.Unlock() + return + } + ea, ok := a.exports.services[service] + if !ok || ea == nil || ea.latency == nil { + a.mu.Unlock() + return + } + // We have latency here. + ea.latency = nil + s := a.srv + a.mu.Unlock() + + if s == nil { + return + } + + // Now track down the imports and clean them up. + s.accounts.Range(func(k, v interface{}) bool { + acc := v.(*Account) + acc.mu.Lock() + for _, im := range acc.imports.services { + if im != nil && im.acc.Name == a.Name { + if subjectIsSubsetMatch(im.to, service) { + im.latency, im.m1 = nil, nil + } + } + } + acc.mu.Unlock() + return true + }) +} + +// IsExportService will indicate if this service exists. Will check wildcard scenarios. +func (a *Account) IsExportService(service string) bool { + a.mu.RLock() + defer a.mu.RUnlock() + _, ok := a.exports.services[service] + if ok { + return true + } + tokens := strings.Split(service, tsep) + for subj := range a.exports.services { + if isSubsetMatch(tokens, subj) { + return true + } + } + return false +} + +// IsExportServiceTracking will indicate if given publish subject is an export service with tracking enabled. +func (a *Account) IsExportServiceTracking(service string) bool { + a.mu.RLock() + ea, ok := a.exports.services[service] + if ok && ea == nil { + a.mu.RUnlock() + return false + } + if ok && ea != nil && ea.latency != nil { + a.mu.RUnlock() + return true + } + // FIXME(dlc) - Might want to cache this is in the hot path checking for + // latency tracking. + tokens := strings.Split(service, tsep) + for subj, ea := range a.exports.services { + if isSubsetMatch(tokens, subj) && ea != nil && ea.latency != nil { + a.mu.RUnlock() + return true + } + } + a.mu.RUnlock() + return false +} + +// NATSLatency represents the internal NATS latencies, including RTTs to clients. +type NATSLatency struct { + Requestor time.Duration `json:"req"` + Responder time.Duration `json:"resp"` + System time.Duration `json:"sys"` +} + +// TotalTime is a helper function that totals the NATS latencies. +func (nl *NATSLatency) TotalTime() time.Duration { + return nl.Requestor + nl.Responder + nl.System +} + +// ServiceLatency is the JSON message sent out in response to latency tracking for +// exported services. +type ServiceLatency struct { + AppName string `json:"app,omitempty"` + RequestStart time.Time `json:"start"` + ServiceLatency time.Duration `json:"svc"` + NATSLatency NATSLatency `json:"nats"` + TotalLatency time.Duration `json:"total"` +} + +// Merge function to merge m1 and m2 (requestor and responder) measurements +// when there are two samples. This happens when the requestor and responder +// are on different servers. +// +// m2 ServiceLatency is correct, so use that. +// m1 TotalLatency is correct, so use that. +// Will use those to back into NATS latency. +func (m1 *ServiceLatency) merge(m2 *ServiceLatency) { + m1.AppName = m2.AppName + m1.NATSLatency.System = m1.ServiceLatency - (m2.ServiceLatency + m2.NATSLatency.Responder) + m1.ServiceLatency = m2.ServiceLatency + m1.NATSLatency.Responder = m2.NATSLatency.Responder + sanitizeLatencyMetric(m1) +} + +// sanitizeLatencyMetric adjusts latency metric values that could go +// negative in some edge conditions since we estimate client RTT +// for both requestor and responder. +// These numbers are never meant to be negative, it just could be +// how we back into the values based on estimated RTT. +func sanitizeLatencyMetric(sl *ServiceLatency) { + if sl.ServiceLatency < 0 { + sl.ServiceLatency = 0 + } + if sl.NATSLatency.System < 0 { + sl.NATSLatency.System = 0 + } +} + +// Used for transporting remote latency measurements. +type remoteLatency struct { + Account string `json:"account"` + ReqId string `json:"req_id"` + M2 ServiceLatency `json:"m2"` +} + +// sendTrackingMessage will send out the appropriate tracking information for the +// service request/response latency. This is called when the requestor's server has +// received the response. +// TODO(dlc) - holding locks for RTTs may be too much long term. Should revisit. +func (a *Account) sendTrackingLatency(si *serviceImport, requestor, responder *client) bool { + ts := time.Now() + serviceRTT := time.Duration(ts.UnixNano() - si.ts) + + var reqClientRTT = requestor.getRTTValue() + var respClientRTT time.Duration + var appName string + + if responder != nil && responder.kind == CLIENT { + respClientRTT = responder.getRTTValue() + appName = responder.GetName() + } + + // We will estimate time when request left the requestor by time we received + // and the client RTT for the requestor. + reqStart := time.Unix(0, si.ts-int64(reqClientRTT)) + sl := &ServiceLatency{ + AppName: appName, + RequestStart: reqStart, + ServiceLatency: serviceRTT - respClientRTT, + NATSLatency: NATSLatency{ + Requestor: reqClientRTT, + Responder: respClientRTT, + System: 0, + }, + TotalLatency: reqClientRTT + serviceRTT, + } + if respClientRTT > 0 { + sl.NATSLatency.System = time.Since(ts) + sl.TotalLatency += sl.NATSLatency.System + } + + sanitizeLatencyMetric(sl) + + // If we are expecting a remote measurement, store our sl here. + // We need to account for the race between this and us receiving the + // remote measurement. + // FIXME(dlc) - We need to clean these up but this should happen + // already with the auto-expire logic. + if responder != nil && responder.kind != CLIENT { + si.acc.mu.Lock() + if si.m1 != nil { + m1, m2 := sl, si.m1 + m1.merge(m2) + si.acc.mu.Unlock() + a.srv.sendInternalAccountMsg(a, si.latency.subject, m1) + return true + } + si.m1 = sl + si.acc.mu.Unlock() + return false + } else { + a.srv.sendInternalAccountMsg(a, si.latency.subject, sl) + } + return true +} + // numServiceRoutes returns the number of service routes on this account. func (a *Account) numServiceRoutes() int { a.mu.RLock() - defer a.mu.RUnlock() - return len(a.imports.services) + num := len(a.imports.services) + a.mu.RUnlock() + return num } // AddServiceImportWithClaim will add in the service import via the jwt claim. @@ -427,8 +799,8 @@ func (a *Account) AddServiceImportWithClaim(destination *Account, from, to strin return ErrServiceImportAuthorization } - a.addServiceImport(destination, from, to, imClaim) - return nil + _, err := a.addServiceImport(destination, from, to, imClaim) + return err } // AddServiceImport will add a route to an account to send published messages / requests @@ -439,6 +811,13 @@ func (a *Account) AddServiceImport(destination *Account, from, to string) error return a.AddServiceImportWithClaim(destination, from, to, nil) } +// NumServiceImports return number of service imports we have. +func (a *Account) NumServiceImports() int { + a.mu.RLock() + defer a.mu.RUnlock() + return len(a.imports.services) +} + // removeServiceImport will remove the route by subject. func (a *Account) removeServiceImport(subject string) { a.mu.Lock() @@ -448,9 +827,6 @@ func (a *Account) removeServiceImport(subject string) { } delete(a.imports.services, subject) a.mu.Unlock() - if a.srv != nil && a.srv.gateway.enabled { - a.srv.gatewayHandleServiceImport(a, []byte(subject), nil, -1) - } } // This tracks responses to service requests mappings. This is used for cleanup. @@ -580,37 +956,156 @@ func (a *Account) SetMaxResponseMaps(max int) { // Add a route to connect from an implicit route created for a response to a request. // This does no checks and should be only called by the msg processing code. Use // AddServiceImport from above if responding to user input or config changes, etc. -func (a *Account) addServiceImport(dest *Account, from, to string, claim *jwt.Import) *serviceImport { +func (a *Account) addServiceImport(dest *Account, from, to string, claim *jwt.Import) (*serviceImport, error) { rt := Singleton + var lat *serviceLatency + dest.mu.Lock() - if ae := dest.exports.services[to]; ae != nil { - rt = ae.respType + if ea := dest.getServiceExport(to); ea != nil { + rt = ea.respType + lat = ea.latency } dest.mu.Unlock() a.mu.Lock() if a.imports.services == nil { a.imports.services = make(map[string]*serviceImport) + } else if dup := a.imports.services[from]; dup != nil { + a.mu.Unlock() + return nil, fmt.Errorf("duplicate service import subject %q, previously used in import for account %q, subject %q", + from, dup.acc.Name, dup.to) } - si := &serviceImport{dest, claim, from, to, rt, 0, false, false, false} + si := &serviceImport{dest, claim, from, to, 0, rt, lat, nil, false, false, false, false} a.imports.services[from] = si a.mu.Unlock() - return si + return si, nil +} + +// Helper to detrmine when to sample. +func shouldSample(l *serviceLatency) bool { + if l == nil || l.sampling <= 0 { + return false + } + if l.sampling >= 100 { + return true + } + return rand.Int31n(100) <= int32(l.sampling) +} + +// Used to mimic client like replies. +const ( + replyPrefix = "_R_." + trackSuffix = ".T" + replyPrefixLen = len(replyPrefix) + baseServerLen = 10 + replyLen = 6 + minReplyLen = 15 + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + base = 62 +) + +// Will create a wildcard subscription to handle interest graph propagation for all +// service replies. +// Lock should not be held. +func (a *Account) createRespWildcard() []byte { + a.mu.Lock() + if a.prand == nil { + a.prand = rand.New(rand.NewSource(time.Now().UnixNano())) + } + var b = [baseServerLen]byte{'_', 'R', '_', '.'} + rn := a.prand.Int63() + for i, l := replyPrefixLen, rn; i < len(b); i++ { + b[i] = digits[l%base] + l /= base + } + a.siReply = append(b[:], '.') + s := a.srv + aName := a.Name + pre := a.siReply + wcsub := append(a.siReply, '>') + a.mu.Unlock() + + // Check to see if we need to propagate interest. + if s != nil { + now := time.Now() + c := &client{srv: a.srv, acc: a, kind: SYSTEM, opts: internalOpts, msubs: -1, mpay: -1, start: now, last: now} + sub := &subscription{client: c, subject: wcsub} + s.updateRouteSubscriptionMap(a, sub, 1) + if s.gateway.enabled { + s.gatewayUpdateSubInterest(aName, sub, 1) + a.mu.Lock() + a.siReplyClient = c + a.mu.Unlock() + } + } + + return pre +} + +func (a *Account) replyClient() *client { + a.mu.RLock() + c := a.siReplyClient + a.mu.RUnlock() + return c +} + +// Test whether this is a tracked reply. +func isTrackedReply(reply []byte) bool { + lreply := len(reply) - 1 + return lreply > 3 && reply[lreply-1] == '.' && reply[lreply] == 'T' +} + +// Generate a new service reply from the wildcard prefix. +// FIXME(dlc) - probably do not have to use rand here. about 25ns per. +func (a *Account) newServiceReply(tracking bool) []byte { + a.mu.RLock() + replyPre := a.siReply + s := a.srv + a.mu.RUnlock() + + if replyPre == nil { + replyPre = a.createRespWildcard() + } + + var b [replyLen]byte + rn := a.prand.Int63() + for i, l := 0, rn; i < len(b); i++ { + b[i] = digits[l%base] + l /= base + } + // Make sure to copy. + reply := make([]byte, 0, len(replyPre)+len(b)) + reply = append(reply, replyPre...) + reply = append(reply, b[:]...) + + if tracking && s.sys != nil { + // Add in our tracking identifier. This allows the metrics to get back to only + // this server without needless SUBS/UNSUBS. + reply = append(reply, '.') + reply = append(reply, s.sys.shash...) + reply = append(reply, '.', 'T') + } + return reply } // This is for internal responses. -func (a *Account) addResponseServiceImport(dest *Account, from, to string, rt ServiceRespType) *serviceImport { +func (a *Account) addRespServiceImport(dest *Account, from, to string, rt ServiceRespType, lat *serviceLatency) *serviceImport { a.mu.Lock() if a.imports.services == nil { a.imports.services = make(map[string]*serviceImport) } + // dest is the requestor's account. a is the service responder with the export. ae := rt == Singleton - si := &serviceImport{dest, nil, from, to, rt, 0, ae, true, false} + si := &serviceImport{dest, nil, from, to, 0, rt, nil, nil, ae, true, false, false} a.imports.services[from] = si if ae { a.nae++ - si.ts = time.Now().Unix() + si.ts = time.Now().UnixNano() + if lat != nil { + si.latency = lat + si.tracking = true + } if a.nae > a.maxnae && !a.pruning { a.pruning = true go a.pruneAutoExpireResponseMaps() @@ -648,14 +1143,14 @@ func (a *Account) pruneAutoExpireResponseMaps() { }() a.mu.RLock() - ttl := int64(a.maxaettl/time.Second) + 1 + ttl := int64(a.maxaettl) a.mu.RUnlock() for { sis := a.autoExpireResponseMaps() // Check ttl items. - now := time.Now().Unix() + now := time.Now().UnixNano() for i, si := range sis { if now-si.ts >= ttl { a.removeServiceImport(si.from) @@ -692,16 +1187,25 @@ func (a *Account) AddStreamImportWithClaim(account *Account, from, prefix string return ErrStreamImportAuthorization } + // Check prefix if it exists and make sure its a literal. + // Append token separator if not already present. + if prefix != "" { + // Make sure there are no wildcards here, this prefix needs to be a literal + // since it will be prepended to a publish subject. + if !subjectIsLiteral(prefix) { + return ErrStreamImportBadPrefix + } + if prefix[len(prefix)-1] != btsep { + prefix = prefix + string(btsep) + } + } a.mu.Lock() - defer a.mu.Unlock() if a.imports.streams == nil { a.imports.streams = make(map[string]*streamImport) } - if prefix != "" && prefix[len(prefix)-1] != btsep { - prefix = prefix + string(btsep) - } // TODO(dlc) - collisions, etc. a.imports.streams[from] = &streamImport{account, from, prefix, imClaim, false} + a.mu.Unlock() return nil } @@ -716,18 +1220,18 @@ var IsPublicExport = []*Account(nil) // AddStreamExport will add an export to the account. If accounts is nil // it will signify a public export, meaning anyone can impoort. func (a *Account) AddStreamExport(subject string, accounts []*Account) error { - a.mu.Lock() - defer a.mu.Unlock() if a == nil { return ErrMissingAccount } + a.mu.Lock() + defer a.mu.Unlock() if a.exports.streams == nil { - a.exports.streams = make(map[string]*exportAuth) + a.exports.streams = make(map[string]*streamExport) } ea := a.exports.streams[subject] if accounts != nil { if ea == nil { - ea = &exportAuth{} + ea = &streamExport{} } // empty means auth required but will be import token. if len(accounts) == 0 { @@ -758,12 +1262,51 @@ func (a *Account) checkStreamImportAuthorizedNoLock(account *Account, subject st if a.exports.streams == nil || !IsValidSubject(subject) { return false } - return a.checkExportApproved(account, subject, imClaim, a.exports.streams) + return a.checkStreamExportApproved(account, subject, imClaim) } -func (a *Account) checkExportApproved(account *Account, subject string, imClaim *jwt.Import, m map[string]*exportAuth) bool { +func (a *Account) checkAuth(ea *exportAuth, account *Account, imClaim *jwt.Import) bool { + // if ea is nil or ea.approved is nil, that denotes a public export + if ea == nil || (ea.approved == nil && !ea.tokenReq) { + return true + } + // Check if token required + if ea.tokenReq { + return a.checkActivation(account, imClaim, true) + } + // If we have a matching account we are authorized + _, ok := ea.approved[account.Name] + return ok +} + +func (a *Account) checkStreamExportApproved(account *Account, subject string, imClaim *jwt.Import) bool { + // Check direct match of subject first + ea, ok := a.exports.streams[subject] + if ok { + if ea == nil { + return true + } + return a.checkAuth(&ea.exportAuth, account, imClaim) + } + // ok if we are here we did not match directly so we need to test each one. + // The import subject arg has to take precedence, meaning the export + // has to be a true subset of the import claim. We already checked for + // exact matches above. + tokens := strings.Split(subject, tsep) + for subj, ea := range a.exports.streams { + if isSubsetMatch(tokens, subj) { + if ea == nil { + return true + } + return a.checkAuth(&ea.exportAuth, account, imClaim) + } + } + return false +} + +func (a *Account) checkServiceExportApproved(account *Account, subject string, imClaim *jwt.Import) bool { // Check direct match of subject first - ea, ok := m[subject] + ea, ok := a.exports.services[subject] if ok { // if ea is nil or eq.approved is nil, that denotes a public export if ea == nil || (ea.approved == nil && !ea.tokenReq) { @@ -782,7 +1325,7 @@ func (a *Account) checkExportApproved(account *Account, subject string, imClaim // has to be a true subset of the import claim. We already checked for // exact matches above. tokens := strings.Split(subject, tsep) - for subj, ea := range m { + for subj, ea := range a.exports.services { if isSubsetMatch(tokens, subj) { if ea == nil || ea.approved == nil && !ea.tokenReq { return true @@ -798,6 +1341,30 @@ func (a *Account) checkExportApproved(account *Account, subject string, imClaim return false } +// Helper function to get a serviceExport. +// Lock should be held on entry. +func (a *Account) getServiceExport(subj string) *serviceExport { + ea, ok := a.exports.services[subj] + // The export probably has a wildcard, so lookup that up. + if !ok { + ea = a.getWildcardServiceExport(subj) + } + return ea +} + +// This helper is used when trying to match a serviceExport record that is +// represented by a wildcard. +// Lock should be held on entry. +func (a *Account) getWildcardServiceExport(to string) *serviceExport { + tokens := strings.Split(to, tsep) + for subj, ea := range a.exports.services { + if isSubsetMatch(tokens, subj) { + return ea + } + } + return nil +} + // Will fetch the activation token for an import. func fetchActivation(url string) string { // FIXME(dlc) - Make configurable. @@ -939,16 +1506,15 @@ func (a *Account) isIssuerClaimTrusted(claims *jwt.ActivationClaims) bool { if claims.IssuerAccount == "" { return true } - // get the referenced account - if a.srv != nil { - ia, err := a.srv.lookupAccount(claims.IssuerAccount) - if err != nil { - return false + // If the IssuerAccount is not us, then this is considered an error. + if a.Name != claims.IssuerAccount { + if a.srv != nil { + a.srv.Errorf("Invalid issuer account %q in activation claim (subject: %q - type: %q) for account %q", + claims.IssuerAccount, claims.Activation.ImportSubject, claims.Activation.ImportType, a.Name) } - return ia.hasIssuer(claims.Issuer) + return false } - // couldn't verify - return false + return a.hasIssuerNoLock(claims.Issuer) } // Returns true if `a` and `b` stream imports are the same. Note that the @@ -1005,10 +1571,12 @@ func (a *Account) checkServiceExportsEqual(b *Account) bool { return true } +// Check if another account is authorized to route requests to this service. func (a *Account) checkServiceImportAuthorized(account *Account, subject string, imClaim *jwt.Import) bool { a.mu.RLock() - defer a.mu.RUnlock() - return a.checkServiceImportAuthorizedNoLock(account, subject, imClaim) + authorized := a.checkServiceImportAuthorizedNoLock(account, subject, imClaim) + a.mu.RUnlock() + return authorized } // Check if another account is authorized to route requests to this service. @@ -1017,7 +1585,7 @@ func (a *Account) checkServiceImportAuthorizedNoLock(account *Account, subject s if a.exports.services == nil || !IsValidLiteralSubject(subject) { return false } - return a.checkExportApproved(account, subject, imClaim, a.exports.services) + return a.checkServiceExportApproved(account, subject, imClaim) } // IsExpired returns expiration status. @@ -1100,7 +1668,13 @@ func (a *Account) checkExpiration(claims *jwt.ClaimsData) { // issuer or it is a signing key for the account. func (a *Account) hasIssuer(issuer string) bool { a.mu.RLock() - defer a.mu.RUnlock() + hi := a.hasIssuerNoLock(issuer) + a.mu.RUnlock() + return hi +} + +// hasIssuerNoLock is the unlocked version of hasIssuer +func (a *Account) hasIssuerNoLock(issuer string) bool { // same issuer if a.Issuer == issuer { return true @@ -1113,6 +1687,14 @@ func (a *Account) hasIssuer(issuer string) bool { return false } +// Returns the loop detection subject used for leafnodes +func (a *Account) getLds() string { + a.mu.RLock() + lds := a.lds + a.mu.RUnlock() + return lds +} + // Placeholder for signaling token auth required. var tokenAuthReq = []*Account{} @@ -1133,8 +1715,9 @@ func (s *Server) SetAccountResolver(ar AccountResolver) { // AccountResolver returns the registered account resolver. func (s *Server) AccountResolver() AccountResolver { s.mu.Lock() - defer s.mu.Unlock() - return s.accResolver + ar := s.accResolver + s.mu.Unlock() + return ar } // UpdateAccountClaims will call updateAccountClaims. @@ -1144,6 +1727,7 @@ func (s *Server) UpdateAccountClaims(a *Account, ac *jwt.AccountClaims) { // updateAccountClaims will update an existing account with new claims. // This will replace any exports or imports previously defined. +// Lock MUST NOT be held upon entry. func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { if a == nil { return @@ -1153,11 +1737,26 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { a.mu.Lock() // Clone to update, only select certain fields. - old := &Account{Name: a.Name, imports: a.imports, exports: a.exports, limits: a.limits, signingKeys: a.signingKeys} + old := &Account{Name: a.Name, exports: a.exports, limits: a.limits, signingKeys: a.signingKeys} // Reset exports and imports here. a.exports = exportMap{} - a.imports = importMap{} + + // Imports are checked unlocked in processInbound, so we can't change out the struct here. Need to process inline. + if a.imports.streams != nil { + old.imports.streams = make(map[string]*streamImport, len(a.imports.streams)) + } + if a.imports.services != nil { + old.imports.services = make(map[string]*serviceImport, len(a.imports.services)) + } + for k, v := range a.imports.streams { + old.imports.streams[k] = v + delete(a.imports.streams, k) + } + for k, v := range a.imports.services { + old.imports.services[k] = v + delete(a.imports.services, k) + } // Reset any notion of export revocations. a.actsRevoked = nil @@ -1208,7 +1807,12 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { rt = Chunked } if err := a.AddServiceExportWithResponse(string(e.Subject), rt, authAccounts(e.TokenReq)); err != nil { - s.Debugf("Error adding service export to account [%s]: %v", a.Name, err.Error()) + s.Debugf("Error adding service export to account [%s]: %v", a.Name, err) + } + if e.Latency != nil { + if err := a.TrackServiceExportWithSampling(string(e.Subject), string(e.Latency.Results), e.Latency.Sampling); err != nil { + s.Debugf("Error adding latency tracking for service export to account [%s]: %v", a.Name, err) + } } } // We will track these at the account level. Should not have any collisions. @@ -1224,15 +1828,10 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { } } for _, i := range ac.Imports { - var acc *Account - if v, ok := s.accounts.Load(i.Account); ok { - acc = v.(*Account) - } - if acc == nil { - if acc, _ = s.fetchAccount(i.Account); acc == nil { - s.Debugf("Can't locate account [%s] for import of [%v] %s", i.Account, i.Subject, i.Type) - continue - } + acc, err := s.lookupAccount(i.Account) + if acc == nil || err != nil { + s.Errorf("Can't locate account [%s] for import of [%v] %s (err=%v)", i.Account, i.Subject, i.Type, err) + continue } switch i.Type { case jwt.Stream: @@ -1256,19 +1855,26 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { } // Now check if stream exports have changed. if !a.checkStreamExportsEqual(old) || signersChanged { - clients := make([]*client, 0, 16) + clients := map[*client]struct{}{} // We need to check all accounts that have an import claim from this account. awcsti := map[string]struct{}{} s.accounts.Range(func(k, v interface{}) bool { acc := v.(*Account) + // Move to the next if this account is actually account "a". + if acc.Name == a.Name { + return true + } + // TODO: checkStreamImportAuthorized() stack should not be trying + // to lock "acc". If we find that to be needed, we will need to + // rework this to ensure we don't lock acc. acc.mu.Lock() for _, im := range acc.imports.streams { if im != nil && im.acc.Name == a.Name { // Check for if we are still authorized for an import. - im.invalid = !a.checkStreamImportAuthorizedNoLock(im.acc, im.from, im.claim) + im.invalid = !a.checkStreamImportAuthorized(acc, im.from, im.claim) awcsti[acc.Name] = struct{}{} for _, c := range acc.clients { - clients = append(clients, c) + clients[c] = struct{}{} } } } @@ -1276,7 +1882,7 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { return true }) // Now walk clients. - for _, c := range clients { + for c := range clients { c.processSubsOnConfigReload(awcsti) } } @@ -1284,11 +1890,18 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { if !a.checkServiceExportsEqual(old) || signersChanged { s.accounts.Range(func(k, v interface{}) bool { acc := v.(*Account) + // Move to the next if this account is actually account "a". + if acc.Name == a.Name { + return true + } + // TODO: checkServiceImportAuthorized() stack should not be trying + // to lock "acc". If we find that to be needed, we will need to + // rework this to ensure we don't lock acc. acc.mu.Lock() for _, im := range acc.imports.services { if im != nil && im.acc.Name == a.Name { // Check for if we are still authorized for an import. - im.invalid = !a.checkServiceImportAuthorizedNoLock(a, im.to, im.claim) + im.invalid = !a.checkServiceImportAuthorized(acc, im.to, im.claim) } } acc.mu.Unlock() @@ -1315,14 +1928,17 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { clients := gatherClients() // Sort if we are over the limit. - if a.maxTotalConnectionsReached() { + if a.MaxTotalConnectionsReached() { sort.Slice(clients, func(i, j int) bool { return clients[i].start.After(clients[j].start) }) } now := time.Now().Unix() for i, c := range clients { - if a.mconns != jwt.NoLimit && i >= int(a.mconns) { + a.mu.RLock() + exceeded := a.mconns != jwt.NoLimit && i >= int(a.mconns) + a.mu.RUnlock() + if exceeded { c.maxAccountConnExceeded() continue } @@ -1360,9 +1976,18 @@ func (s *Server) updateAccountClaims(a *Account, ac *jwt.AccountClaims) { } // Helper to build an internal account structure from a jwt.AccountClaims. +// Lock MUST NOT be held upon entry. func (s *Server) buildInternalAccount(ac *jwt.AccountClaims) *Account { acc := NewAccount(ac.Subject) acc.Issuer = ac.Issuer + // Set this here since we are placing in s.tmpAccounts below and may be + // referenced by an route RS+, etc. + s.setAccountSublist(acc) + + // We don't want to register an account that is in the process of + // being built, however, to solve circular import dependencies, we + // need to store it here. + s.tmpAccounts.Store(ac.Subject, acc) s.updateAccountClaims(acc, ac) return acc } @@ -1395,16 +2020,13 @@ func buildInternalNkeyUser(uc *jwt.UserClaims, acc *Account) *NkeyUser { } if uc.Resp != nil { if p == nil { - p = &Permissions{Publish: &SubjectPermission{}} - } - if p.Publish.Allow == nil { - // We turn off the blanket allow statement. - p.Publish.Allow = []string{} + p = &Permissions{} } p.Response = &ResponsePermission{ MaxMsgs: uc.Resp.MaxMsgs, Expires: uc.Resp.Expires, } + validateResponsePermissions(p) } nu.Permissions = p return nu @@ -1447,12 +2069,18 @@ func NewURLAccResolver(url string) (*URLAccResolver, error) { if !strings.HasSuffix(url, "/") { url += "/" } - // Do basic test to see if anyone is home. - // FIXME(dlc) - Make timeout configurable post MVP. + + // FIXME(dlc) - Make timeout and others configurable. + // We create our own transport to amortize TLS. + tr := &http.Transport{ + MaxIdleConns: 10, + IdleConnTimeout: 30 * time.Second, + } ur := &URLAccResolver{ url: url, - c: &http.Client{Timeout: 2 * time.Second}, + c: &http.Client{Timeout: 2 * time.Second, Transport: tr}, } + // Do basic test to see if anyone is home. if _, err := ur.Fetch(""); err != nil { return nil, err } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/auth.go b/vendor/github.com/nats-io/nats-server/v2/server/auth.go index 2d8f8ea1..c5f081da 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/auth.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/auth.go @@ -193,6 +193,31 @@ func (s *Server) assignGlobalAccountToOrphanUsers() { } } +// If the given permissions has a ResponsePermission +// set, ensure that defaults are set (if values are 0) +// and that a Publish permission is set, and Allow +// is disabled if not explicitly set. +func validateResponsePermissions(p *Permissions) { + if p == nil || p.Response == nil { + return + } + if p.Publish == nil { + p.Publish = &SubjectPermission{} + } + if p.Publish.Allow == nil { + // We turn off the blanket allow statement. + p.Publish.Allow = []string{} + } + // If there is a response permission, ensure + // that if value is 0, we set the default value. + if p.Response.MaxMsgs == 0 { + p.Response.MaxMsgs = DEFAULT_ALLOW_RESPONSE_MAX_MSGS + } + if p.Response.Expires == 0 { + p.Response.Expires = DEFAULT_ALLOW_RESPONSE_EXPIRATION + } +} + // configureAuthorization will do any setup needed for authorization. // Lock is assumed held. func (s *Server) configureAuthorization() { @@ -220,6 +245,9 @@ func (s *Server) configureAuthorization() { copy.Account = v.(*Account) } } + if copy.Permissions != nil { + validateResponsePermissions(copy.Permissions) + } s.nkeys[u.Nkey] = copy } } @@ -232,6 +260,9 @@ func (s *Server) configureAuthorization() { copy.Account = v.(*Account) } } + if copy.Permissions != nil { + validateResponsePermissions(copy.Permissions) + } s.users[u.Username] = copy } } @@ -265,23 +296,19 @@ func (s *Server) checkAuthentication(c *client) bool { // isClientAuthorized will check the client against the proper authorization method and data. // This could be nkey, token, or username/password based. func (s *Server) isClientAuthorized(c *client) bool { - // Snapshot server options by hand and only grab what we really need. - s.optsMu.RLock() - customClientAuthentication := s.opts.CustomClientAuthentication - authorization := s.opts.Authorization - username := s.opts.Username - password := s.opts.Password - tlsMap := s.opts.TLSMap - s.optsMu.RUnlock() + opts := s.getOpts() // Check custom auth first, then jwts, then nkeys, then // multiple users with TLS map if enabled, then token, // then single user/pass. - if customClientAuthentication != nil { - return customClientAuthentication.Check(c) + if opts.CustomClientAuthentication != nil { + return opts.CustomClientAuthentication.Check(c) } - // Grab under lock but process after. + return s.processClientOrLeafAuthentication(c) +} + +func (s *Server) processClientOrLeafAuthentication(c *client) bool { var ( nkey *NkeyUser juc *jwt.UserClaims @@ -289,6 +316,7 @@ func (s *Server) isClientAuthorized(c *client) bool { user *User ok bool err error + opts = s.getOpts() ) s.mu.Lock() @@ -333,7 +361,7 @@ func (s *Server) isClientAuthorized(c *client) bool { } } else if hasUsers { // Check if we are tls verify and are mapping users from the client_certificate - if tlsMap { + if opts.TLSMap { var euser string authorized := checkClientTLSCertSubject(c, func(u string) bool { var ok bool @@ -417,7 +445,9 @@ func (s *Server) isClientAuthorized(c *client) bool { } nkey = buildInternalNkeyUser(juc, acc) - c.RegisterNkeyUser(nkey) + if err := c.RegisterNkeyUser(nkey); err != nil { + return false + } // Generate an event if we have a system account. s.accountConnectEvent(c) @@ -450,7 +480,9 @@ func (s *Server) isClientAuthorized(c *client) bool { c.Debugf("Signature not verified") return false } - c.RegisterNkeyUser(nkey) + if err := c.RegisterNkeyUser(nkey); err != nil { + return false + } return true } @@ -460,17 +492,27 @@ func (s *Server) isClientAuthorized(c *client) bool { // for pub/sub authorizations. if ok { c.RegisterUser(user) + // Generate an event if we have a system account and this is not the $G account. + s.accountConnectEvent(c) } return ok } - if authorization != "" { - return comparePasswords(authorization, c.opts.Authorization) - } else if username != "" { - if username != c.opts.Username { - return false + if c.kind == CLIENT { + if opts.Authorization != "" { + return comparePasswords(opts.Authorization, c.opts.Authorization) + } else if opts.Username != "" { + if opts.Username != c.opts.Username { + return false + } + return comparePasswords(opts.Password, c.opts.Password) } - return comparePasswords(password, c.opts.Password) + } else if c.kind == LEAF { + // There is no required username/password to connect and + // there was no u/p in the CONNECT or none that matches the + // know users. Register the leaf connection with global account + // or the one specified in config (if provided). + return s.registerLeafWithAccount(c, opts.LeafNode.Account) } return false @@ -574,120 +616,62 @@ func (s *Server) isGatewayAuthorized(c *client) bool { return comparePasswords(opts.Gateway.Password, c.opts.Password) } -// isLeafNodeAuthorized will check for auth for an inbound leaf node connection. -func (s *Server) isLeafNodeAuthorized(c *client) bool { - // FIXME(dlc) - This is duplicated from client auth, should be able to combine - // and not fail so bad on DRY. - - // Grab under lock but process after. - var ( - juc *jwt.UserClaims - acc *Account - err error - ) - - s.mu.Lock() - - // Check if we have trustedKeys defined in the server. If so we require a user jwt. - if s.trustedKeys != nil { - if c.opts.JWT == "" { - s.mu.Unlock() - c.Debugf("Authentication requires a user JWT") - return false - } - // So we have a valid user jwt here. - juc, err = jwt.DecodeUserClaims(c.opts.JWT) +func (s *Server) registerLeafWithAccount(c *client, account string) bool { + var err error + acc := s.globalAccount() + if account != _EMPTY_ { + acc, err = s.lookupAccount(account) if err != nil { - s.mu.Unlock() - c.Debugf("User JWT not valid: %v", err) - return false - } - vr := jwt.CreateValidationResults() - juc.Validate(vr) - if vr.IsBlocking(true) { - s.mu.Unlock() - c.Debugf("User JWT no longer valid: %+v", vr) + s.Errorf("authentication of user %q failed, unable to lookup account %q: %v", + c.opts.Username, account, err) return false } } - s.mu.Unlock() + if err = c.registerWithAccount(acc); err != nil { + return false + } + return true +} - // If we have a jwt and a userClaim, make sure we have the Account, etc associated. - // We need to look up the account. This will use an account resolver if one is present. - if juc != nil { - issuer := juc.Issuer - if juc.IssuerAccount != "" { - issuer = juc.IssuerAccount - } - if acc, err = s.LookupAccount(issuer); acc == nil { - c.Debugf("Account JWT lookup error: %v", err) - return false - } - if !s.isTrustedIssuer(acc.Issuer) { - c.Debugf("Account JWT not signed by trusted operator") - return false - } - if juc.IssuerAccount != "" && !acc.hasIssuer(juc.Issuer) { - c.Debugf("User JWT issuer is not known") - return false - } - if acc.IsExpired() { - c.Debugf("Account JWT has expired") - return false - } - // Verify the signature against the nonce. - if c.opts.Sig == "" { - c.Debugf("Signature missing") - return false - } - sig, err := base64.RawURLEncoding.DecodeString(c.opts.Sig) - if err != nil { - // Allow fallback to normal base64. - sig, err = base64.StdEncoding.DecodeString(c.opts.Sig) - if err != nil { - c.Debugf("Signature not valid base64") - return false - } - } - pub, err := nkeys.FromPublicKey(juc.Subject) - if err != nil { - c.Debugf("User nkey not valid: %v", err) +// isLeafNodeAuthorized will check for auth for an inbound leaf node connection. +func (s *Server) isLeafNodeAuthorized(c *client) bool { + opts := s.getOpts() + + isAuthorized := func(username, password, account string) bool { + if username != c.opts.Username { return false } - if err := pub.Verify(c.nonce, sig); err != nil { - c.Debugf("Signature not verified") + if !comparePasswords(password, c.opts.Password) { return false } + return s.registerLeafWithAccount(c, account) + } - nkey := buildInternalNkeyUser(juc, acc) - if err := c.RegisterNkeyUser(nkey); err != nil { - return false + // If leafnodes config has an authorization{} stanza, this takes precedence. + // The user in CONNECT mutch match. We will bind to the account associated + // with that user (from the leafnode's authorization{} config). + if opts.LeafNode.Username != _EMPTY_ { + return isAuthorized(opts.LeafNode.Username, opts.LeafNode.Password, opts.LeafNode.Account) + } else if len(opts.LeafNode.Users) > 0 { + // This is expected to be a very small array. + for _, u := range opts.LeafNode.Users { + if u.Username == c.opts.Username { + var accName string + if u.Account != nil { + accName = u.Account.Name + } + return isAuthorized(u.Username, u.Password, accName) + } } - - // Generate an event if we have a system account. - s.accountConnectEvent(c) - - // Check if we need to set an auth timer if the user jwt expires. - c.checkExpiration(juc.Claims()) - return true + return false } - // FIXME(dlc) - Add ability to support remote account bindings via - // other auth like user or nkey and tlsMapping. - - // For now this means we are binding the leafnode to the global account. - c.registerWithAccount(s.globalAccount()) + // We are here if we accept leafnode connections without any credential. - // Snapshot server options. - opts := s.getOpts() - - if opts.LeafNode.Username == "" { - return true - } - if opts.LeafNode.Username != c.opts.Username { - return false - } - return comparePasswords(opts.LeafNode.Password, c.opts.Password) + // Still, if the CONNECT has some user info, we will bind to the + // user's account or to the specified default account (if provided) + // or to the global account. + return s.processClientOrLeafAuthentication(c) } // Support for bcrypt stored passwords and tokens. diff --git a/vendor/github.com/nats-io/nats-server/v2/server/client.go b/vendor/github.com/nats-io/nats-server/v2/server/client.go index 830d6237..5c25ad29 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/client.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/client.go @@ -22,6 +22,7 @@ import ( "math/rand" "net" "regexp" + "runtime" "strings" "sync" "sync/atomic" @@ -70,12 +71,18 @@ const ( shortsToShrink = 2 // Trigger to shrink dynamic buffers maxFlushPending = 10 // Max fsps to have in order to wait for writeLoop readLoopReport = 2 * time.Second + + // Server should not send a PING (for RTT) before the first PONG has + // been sent to the client. However, in case some client libs don't + // send CONNECT+PING, cap the maximum time before server can send + // the RTT PING. + maxNoRTTPingBeforeFirstPong = 2 * time.Second ) var readLoopReportThreshold = readLoopReport // Represent client booleans with a bitmask -type clientFlag byte +type clientFlag uint16 // Some client state represented as flags const ( @@ -86,6 +93,8 @@ const ( clearConnection // Marks that clearConnection has already been called. flushOutbound // Marks client as having a flushOutbound call in progress. noReconnect // Indicate that on close, this connection should not attempt a reconnect + closeConnection // Marks that closeConnection has already been called. + leafAllSubsSent // Indicates that a leaf node has sent the subscription list ) // set the flag (would be equivalent to set the boolean to true) @@ -151,12 +160,18 @@ const ( const pmrNoFlag int = 0 const ( pmrCollectQueueNames int = 1 << iota - pmrTreatGatewayAsClient + pmrIgnoreEmptyQueueFilter + pmrAllowSendFromRouteToRoute ) type client struct { // Here first because of use of atomics, and memory alignment. stats + // Indicate if we should check gwrm or not. Since checking gwrm is done + // when processing inbound messages and requires the lock we want to + // check only when needed. This is set/get using atomic, so needs to + // be memory aligned. + cgwrt int32 mpay int32 msubs int32 mcl int32 @@ -187,18 +202,23 @@ type client struct { last time.Time parseState - rtt time.Duration - rttStart time.Time + rtt time.Duration + rttStart time.Time + rrTracking map[string]*remoteLatency + rrMax int route *route gw *gateway leaf *leaf + // To keep track of gateway replies mapping + gwrm map[string]*gwReplyMap + + flags clientFlag // Compact booleans into a single field. Size will be increased when needed. + debug bool trace bool echo bool - - flags clientFlag // Compact booleans into a single field. Size will be increased when needed. } // Struct for PING initiation from the server. @@ -215,15 +235,15 @@ type outbound struct { nb net.Buffers // net.Buffers for writev IO sz int32 // limit size per []byte, uses variable BufSize constants, start, min, max. sws int32 // Number of short writes, used for dynamic resizing. - pb int32 // Total pending/queued bytes. + pb int64 // Total pending/queued bytes. pm int32 // Total pending/queued messages. + fsp int32 // Flush signals that are pending per producer from readLoop's pcd. sg *sync.Cond // Flusher conditional for signaling to writeLoop. wdl time.Duration // Snapshot of write deadline. - mp int32 // Snapshot of max pending for client. - fsp int32 // Flush signals that are pending per producer from readLoop's pcd. + mp int64 // Snapshot of max pending for client. lft time.Duration // Last flush time for Write. - lwb int32 // Last byte size of Write. stc chan struct{} // Stall chan we create to slow down producers on overrun, e.g. fan-in. + lwb int32 // Last byte size of Write. sgw bool // Indicate flusher is waiting on condition wait. } @@ -280,7 +300,6 @@ type readCache struct { // This is for routes and gateways to have their own L1 as well that is account aware. pacache map[string]*perAccountCache - losc int64 // last orphan subs check // This is for when we deliver messages across a route. We use this structure // to make sure to only send one message and properly scope to queues as needed. @@ -300,13 +319,13 @@ type readCache struct { const ( defaultMaxPerAccountCacheSize = 4096 defaultPrunePerAccountCacheSize = 256 - defaultOrphanSubsCheckInterval = int64(5 * 60) //5 min in number of seconds + defaultClosedSubsCheckInterval = 5 * time.Minute ) var ( maxPerAccountCacheSize = defaultMaxPerAccountCacheSize prunePerAccountCacheSize = defaultPrunePerAccountCacheSize - orphanSubsCheckInterval = defaultOrphanSubsCheckInterval + closedSubsCheckInterval = defaultClosedSubsCheckInterval ) // perAccountCache is for L1 semantics for inbound messages from a route or gateway to mimic the performance of clients. @@ -320,6 +339,15 @@ func (c *client) String() (id string) { return c.ncs } +// GetName returns the application supplied name for the connection. +func (c *client) GetName() string { + c.mu.Lock() + name := c.opts.Name + c.mu.Unlock() + return name +} + +// GetOpts returns the client options provided by the application. func (c *client) GetOpts() *clientOpts { return &c.opts } @@ -340,27 +368,28 @@ func (c *client) GetTLSConnectionState() *tls.ConnectionState { // FIXME(dlc) - This is getting bloated for normal subs, need // to optionally have an opts section for non-normal stuff. type subscription struct { - nm int64 // Will atomically be set to -1 on unsub or connection close client *client im *streamImport // This is for import stream support. shadow []*subscription // This is to track shadowed accounts. subject []byte queue []byte sid []byte + nm int64 max int64 qw int32 + closed int32 } // Indicate that this subscription is closed. // This is used in pruning of route and gateway cache items. func (s *subscription) close() { - atomic.StoreInt64(&s.nm, -1) + atomic.StoreInt32(&s.closed, 1) } // Return true if this subscription was unsubscribed // or its connection has been closed. func (s *subscription) isClosed() bool { - return atomic.LoadInt64(&s.nm) == -1 + return atomic.LoadInt32(&s.closed) == 1 } type clientOpts struct { @@ -404,7 +433,7 @@ func (c *client) initClient() { opts := s.getOpts() // Snapshots to avoid mutex access in fast paths. c.out.wdl = opts.WriteDeadline - c.out.mp = int32(opts.MaxPending) + c.out.mp = opts.MaxPending c.subs = make(map[string]*subscription) c.echo = true @@ -599,6 +628,18 @@ func (c *client) RegisterNkeyUser(user *NkeyUser) error { return nil } +func splitSubjectQueue(sq string) ([]byte, []byte, error) { + vals := strings.Fields(strings.TrimSpace(sq)) + s := []byte(vals[0]) + var q []byte + if len(vals) == 2 { + q = []byte(vals[1]) + } else if len(vals) > 2 { + return nil, nil, fmt.Errorf("invalid subject-queue %q", sq) + } + return s, q, nil +} + // Initializes client.perms structure. // Lock is held on entry. func (c *client) setPermissions(perms *Permissions) { @@ -635,11 +676,17 @@ func (c *client) setPermissions(perms *Permissions) { // Loop over subscribe permissions if perms.Subscribe != nil { + var err error if len(perms.Subscribe.Allow) > 0 { c.perms.sub.allow = NewSublistWithCache() } for _, subSubject := range perms.Subscribe.Allow { - sub := &subscription{subject: []byte(subSubject)} + sub := &subscription{} + sub.subject, sub.queue, err = splitSubjectQueue(subSubject) + if err != nil { + c.Errorf("%s", err.Error()) + continue + } c.perms.sub.allow.Insert(sub) } if len(perms.Subscribe.Deny) > 0 { @@ -648,7 +695,12 @@ func (c *client) setPermissions(perms *Permissions) { c.darray = perms.Subscribe.Deny } for _, subSubject := range perms.Subscribe.Deny { - sub := &subscription{subject: []byte(subSubject)} + sub := &subscription{} + sub.subject, sub.queue, err = splitSubjectQueue(subSubject) + if err != nil { + c.Errorf("%s", err.Error()) + continue + } c.perms.sub.deny.Insert(sub) } } @@ -757,7 +809,6 @@ func (c *client) readLoop() { nc := c.nc s := c.srv c.in.rsz = startBufSize - c.in.losc = time.Now().Unix() // Snapshot max control line since currently can not be changed on reload and we // were checking it on each call to parse. If this changes and we allow MaxControlLine // to be reloaded without restart, this code will need to change. @@ -768,6 +819,10 @@ func (c *client) readLoop() { } } defer s.grWG.Done() + // Check the per-account-cache for closed subscriptions + cpacc := c.kind == ROUTER || c.kind == GATEWAY + // Last per-account-cache check for closed subscriptions + lpacc := time.Now() c.mu.Unlock() if nc == nil { @@ -781,7 +836,6 @@ func (c *client) readLoop() { }() // Start read buffer. - b := make([]byte, c.in.rsz) for { @@ -875,6 +929,11 @@ func (c *client) readLoop() { c.closeConnection(closedStateForErr(err)) return } + + if cpacc && start.Sub(lpacc) >= closedSubsCheckInterval { + c.pruneClosedSubFromPerAccountCache() + lpacc = time.Now() + } } } @@ -910,6 +969,13 @@ func (c *client) handlePartialWrite(pnb net.Buffers) { // Lock must be held func (c *client) flushOutbound() bool { if c.flags.isSet(flushOutbound) { + // Another go-routine has set this and is either + // doing the write or waiting to re-acquire the + // lock post write. Release lock to give it a + // chance to complete. + c.mu.Unlock() + runtime.Gosched() + c.mu.Lock() return false } c.flags.set(flushOutbound) @@ -957,12 +1023,12 @@ func (c *client) flushOutbound() bool { c.out.lwb = int32(n) // Subtract from pending bytes and messages. - c.out.pb -= c.out.lwb + c.out.pb -= int64(c.out.lwb) c.out.pm -= apm // FIXME(dlc) - this will not be totally accurate on partials. // Check for partial writes // TODO(dlc) - zero write with no error will cause lost message and the writeloop to spin. - if c.out.lwb != attempted && n > 0 { + if int64(c.out.lwb) != attempted && n > 0 { c.handlePartialWrite(nb) } else if c.out.lwb >= c.out.sz { c.out.sws = 0 @@ -1006,18 +1072,18 @@ func (c *client) flushOutbound() bool { } // Adjust based on what we wrote plus any pending. - pt := c.out.lwb + c.out.pb + pt := int64(c.out.lwb) + c.out.pb // Adjust sz as needed downward, keeping power of 2. // We do this at a slower rate. - if pt < c.out.sz && c.out.sz > minBufSize { + if pt < int64(c.out.sz) && c.out.sz > minBufSize { c.out.sws++ if c.out.sws > shortsToShrink { c.out.sz >>= 1 } } // Adjust sz as needed upward, keeping power of 2. - if pt > c.out.sz && c.out.sz < maxBufSize { + if pt > int64(c.out.sz) && c.out.sz < maxBufSize { c.out.sz <<= 1 } @@ -1042,7 +1108,7 @@ func (c *client) flushOutbound() bool { // Check if we have a stalled gate and if so and we are recovering release // any stalled producers. Only kind==CLIENT will stall. - if c.out.stc != nil && (c.out.lwb == attempted || c.out.pb < c.out.mp/2) { + if c.out.stc != nil && (int64(c.out.lwb) == attempted || c.out.pb < c.out.mp/2) { close(c.out.stc) c.out.stc = nil } @@ -1179,7 +1245,10 @@ func (c *client) processConnect(arg []byte) error { return nil } c.last = time.Now() - + // Estimate RTT to start. + if c.kind == CLIENT { + c.rtt = c.last.Sub(c.start) + } kind := c.kind srv := c.srv @@ -1378,7 +1447,7 @@ func (c *client) queueOutbound(data []byte) bool { // Assume data will not be referenced referenced := false // Add to pending bytes total. - c.out.pb += int32(len(data)) + c.out.pb += int64(len(data)) // Check for slow consumer via pending bytes limit. // ok to return here, client is going away. @@ -1472,6 +1541,32 @@ func (c *client) sendPong() { c.sendProto([]byte("PONG\r\n"), true) } +// Used to kick off a RTT measurement for latency tracking. +func (c *client) sendRTTPing() bool { + c.mu.Lock() + sent := c.sendRTTPingLocked() + c.mu.Unlock() + return sent +} + +// Used to kick off a RTT measurement for latency tracking. +// This is normally called only when the caller has checked that +// the c.rtt is 0 and wants to force an update by sending a PING. +// Client lock held on entry. +func (c *client) sendRTTPingLocked() bool { + // Most client libs send a CONNECT+PING and wait for a PONG from the + // server. So if firstPongSent flag is set, it is ok for server to + // send the PING. But in case we have client libs that don't do that, + // allow the send of the PING if more than 2 secs have elapsed since + // the client TCP connection was accepted. + if !c.flags.isSet(clearConnection) && + (c.flags.isSet(firstPongSent) || time.Since(c.start) > maxNoRTTPingBeforeFirstPong) { + c.sendPing() + return true + } + return false +} + // Assume the lock is held upon entry. func (c *client) sendPing() { c.rttStart = time.Now() @@ -1667,7 +1762,7 @@ func splitArg(arg []byte) [][]byte { return args } -func (c *client) processSub(argo []byte) (err error) { +func (c *client) processSub(argo []byte, noForward bool) (*subscription, error) { c.traceInOp("SUB", argo) // Indicate activity. @@ -1689,7 +1784,7 @@ func (c *client) processSub(argo []byte) (err error) { sub.queue = args[1] sub.sid = args[2] default: - return fmt.Errorf("processSub Parse Error: '%s'", arg) + return nil, fmt.Errorf("processSub Parse Error: '%s'", arg) } c.mu.Lock() @@ -1703,26 +1798,39 @@ func (c *client) processSub(argo []byte) (err error) { if c.nc == nil && kind != SYSTEM { c.mu.Unlock() - return nil + return sub, nil } // Check permissions if applicable. - if kind == CLIENT && !c.canSubscribe(string(sub.subject)) { - c.mu.Unlock() - c.sendErr(fmt.Sprintf("Permissions Violation for Subscription to %q", sub.subject)) - c.Errorf("Subscription Violation - %s, Subject %q, SID %s", - c.getAuthUser(), sub.subject, sub.sid) - return nil + if kind == CLIENT { + // First do a pass whether queue subscription is valid. This does not necessarily + // mean that it will not be able to plain subscribe. + // + // allow = ["foo"] -> can subscribe or queue subscribe to foo using any queue + // allow = ["foo v1"] -> can only queue subscribe to 'foo v1', no plain subs allowed. + // allow = ["foo", "foo v1"] -> can subscribe to 'foo' but can only queue subscribe to 'foo v1' + // + if sub.queue != nil { + if !c.canQueueSubscribe(string(sub.subject), string(sub.queue)) { + c.mu.Unlock() + c.subPermissionViolation(sub) + return nil, nil + } + } else if !c.canSubscribe(string(sub.subject)) { + c.mu.Unlock() + c.subPermissionViolation(sub) + return nil, nil + } } - // Check if we have a maximum on the number of subscriptions. if c.subsAtLimit() { c.mu.Unlock() c.maxSubsExceeded() - return nil + return nil, nil } - updateGWs := false + var updateGWs bool + var err error // Subscribe here. if c.subs[sid] == nil { @@ -1741,19 +1849,24 @@ func (c *client) processSub(argo []byte) (err error) { if err != nil { c.sendErr("Invalid Subject") - return nil + return nil, nil } else if c.opts.Verbose && kind != SYSTEM { c.sendOK() } // No account just return. if acc == nil { - return nil + return sub, nil } if err := c.addShadowSubscriptions(acc, sub); err != nil { c.Errorf(err.Error()) } + + if noForward { + return sub, nil + } + // If we are routing and this is a local sub, add to the route map for the associated account. if kind == CLIENT || kind == SYSTEM { srv.updateRouteSubscriptionMap(acc, sub, 1) @@ -1763,7 +1876,7 @@ func (c *client) processSub(argo []byte) (err error) { } // Now check on leafnode updates. srv.updateLeafNodes(acc, sub, 1) - return nil + return sub, nil } // If the client's account has stream imports and there are matches for @@ -1923,6 +2036,60 @@ func (c *client) canSubscribe(subject string) bool { return allowed } +func queueMatches(queue string, qsubs [][]*subscription) bool { + if len(qsubs) == 0 { + return true + } + for _, qsub := range qsubs { + qs := qsub[0] + qname := string(qs.queue) + + // NOTE: '*' and '>' tokens can also be valid + // queue names so we first check against the + // literal name. e.g. v1.* == v1.* + if queue == qname || (subjectHasWildcard(qname) && subjectIsSubsetMatch(queue, qname)) { + return true + } + } + return false +} + +func (c *client) canQueueSubscribe(subject, queue string) bool { + if c.perms == nil { + return true + } + + allowed := true + + if c.perms.sub.allow != nil { + r := c.perms.sub.allow.Match(subject) + + // If perms DO NOT have queue name, then psubs will be greater than + // zero. If perms DO have queue name, then qsubs will be greater than + // zero. + allowed = len(r.psubs) > 0 + if len(r.qsubs) > 0 { + // If the queue appears in the allow list, then DO allow. + allowed = queueMatches(queue, r.qsubs) + } + } + + if allowed && c.perms.sub.deny != nil { + r := c.perms.sub.deny.Match(subject) + + // If perms DO NOT have queue name, then psubs will be greater than + // zero. If perms DO have queue name, then qsubs will be greater than + // zero. + allowed = len(r.psubs) == 0 + if len(r.qsubs) > 0 { + // If the queue appears in the deny list, then DO NOT allow. + allowed = !queueMatches(queue, r.qsubs) + } + } + + return allowed +} + // Low level unsubscribe for a given client. func (c *client) unsubscribe(acc *Account, sub *subscription, force, remove bool) { c.mu.Lock() @@ -2087,7 +2254,7 @@ var needFlush = struct{}{} // deliverMsg will deliver a message to a matching subscription and its underlying client. // We process all connection/client types. mh is the part that will be protocol/client specific. -func (c *client) deliverMsg(sub *subscription, mh, msg []byte) bool { +func (c *client) deliverMsg(sub *subscription, subject, mh, msg []byte, gwrply bool) bool { if sub.client == nil { return false } @@ -2102,7 +2269,15 @@ func (c *client) deliverMsg(sub *subscription, mh, msg []byte) bool { // Check if we have a subscribe deny clause. This will trigger us to check the subject // for a match against the denied subjects. - if client.mperms != nil && client.checkDenySub(string(c.pa.subject)) { + if client.mperms != nil && client.checkDenySub(string(subject)) { + client.mu.Unlock() + return false + } + + // This is set under the client lock using atomic because it can be + // checked with atomic without the client lock. Here, we don't need + // the atomic operation since we are under the lock. + if sub.closed == 1 { client.mu.Unlock() return false } @@ -2127,6 +2302,9 @@ func (c *client) deliverMsg(sub *subscription, mh, msg []byte) bool { // Due to defer, reverse the code order so that execution // is consistent with other cases where we unsubscribe. if shouldForward { + if srv.gateway.enabled { + defer srv.gatewayUpdateSubInterest(client.acc.Name, sub, -1) + } defer srv.updateRouteSubscriptionMap(client.acc, sub, -1) } defer client.unsubscribe(client.acc, sub, true, true) @@ -2136,6 +2314,9 @@ func (c *client) deliverMsg(sub *subscription, mh, msg []byte) bool { client.unsubscribe(client.acc, sub, true, true) if shouldForward { srv.updateRouteSubscriptionMap(client.acc, sub, -1) + if srv.gateway.enabled { + srv.gatewayUpdateSubInterest(client.acc.Name, sub, -1) + } } return false } @@ -2159,7 +2340,7 @@ func (c *client) deliverMsg(sub *subscription, mh, msg []byte) bool { if client.kind == SYSTEM { s := client.srv client.mu.Unlock() - s.deliverInternalMsg(sub, c.pa.subject, c.pa.reply, msg[:msgSize]) + s.deliverInternalMsg(sub, c, subject, c.pa.reply, msg[:msgSize]) return true } @@ -2176,6 +2357,35 @@ func (c *client) deliverMsg(sub *subscription, mh, msg []byte) bool { return false } + // Do a fast check here to see if we should be tracking this from a latency + // perspective. This will be for a request being received for an exported service. + // This needs to be from a non-client (otherwise tracking happens at requestor). + // + // Also this check captures if the original reply (c.pa.reply) is a GW routed + // reply (since it is known to be > minReplyLen). If that is the case, we need to + // track the binding between the routed reply and the reply set in the message + // header (which is c.pa.reply without the GNR routing prefix). + if client.kind == CLIENT && len(c.pa.reply) > minReplyLen { + + if gwrply { + // Note we keep track "in" the destination client (`client`) but the + // routed reply subject is in `c.pa.reply`. Should that change, we + // would have to pass the "reply" in deliverMsg(). + srv.trackGWReply(client, c.pa.reply) + } + + // If we do not have a registered RTT queue that up now. + if client.rtt == 0 { + client.sendRTTPingLocked() + } + // FIXME(dlc) - We may need to optimize this. + // We will have tagged this with a suffix ('.T') if we are tracking. This is + // needed from sampling. Not all will be tracked. + if c.kind != CLIENT && client.acc.IsExportServiceTracking(string(subject)) && isTrackedReply(c.pa.reply) { + client.trackRemoteReply(string(c.pa.reply)) + } + } + // Queue to outbound buffer client.queueOutbound(mh) client.queueOutbound(msg) @@ -2216,6 +2426,25 @@ func (c *client) deliverMsg(sub *subscription, mh, msg []byte) bool { return true } +// This will track a remote reply for an exported service that has requested +// latency tracking. +// Lock assumed to be held. +func (c *client) trackRemoteReply(reply string) { + if c.rrTracking == nil { + c.rrTracking = make(map[string]*remoteLatency) + c.rrMax = c.acc.MaxAutoExpireResponseMaps() + } + rl := remoteLatency{ + Account: c.acc.Name, + ReqId: reply, + } + rl.M2.RequestStart = time.Now() + c.rrTracking[reply] = &rl + if len(c.rrTracking) >= c.rrMax { + c.pruneRemoteTracking() + } +} + // pruneReplyPerms will remove any stale or expired entries // in our reply cache. We make sure to not check too often. func (c *client) pruneReplyPerms() { @@ -2263,6 +2492,20 @@ func (c *client) prunePubPermsCache() { } } +// pruneRemoteTracking will prune any remote tracking objects +// that are too old. These are orphaned when a service is not +// sending reponses etc. +// Lock should be held upon entry. +func (c *client) pruneRemoteTracking() { + ttl := c.acc.AutoExpireTTL() + now := time.Now() + for reply, rl := range c.rrTracking { + if now.Sub(rl.M2.RequestStart) > ttl { + delete(c.rrTracking, reply) + } + } +} + // pubAllowed checks on publish permissioning. // Lock should not be held. func (c *client) pubAllowed(subject string) bool { @@ -2295,7 +2538,7 @@ func (c *client) pubAllowedFullCheck(subject string, fullCheck bool) bool { } // If we are currently not allowed but we are tracking reply subjects - // dynamically, check to see if we are allowed here Avoid pcache. + // dynamically, check to see if we are allowed here but avoid pcache. // We need to acquire the lock though. if !allowed && fullCheck && c.perms.resp != nil { c.mu.Lock() @@ -2322,37 +2565,25 @@ func (c *client) pubAllowedFullCheck(subject string, fullCheck bool) bool { return allowed } -// Used to mimic client like replies. -const ( - replyPrefix = "_R_." - replyPrefixLen = len(replyPrefix) - digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - base = 62 -) - -// newServiceReply is used when rewriting replies that cross account boundaries. -// These will look like _R_.XXXXXXXX. -func (c *client) newServiceReply() []byte { - // Check to see if we have our own rand yet. Global rand - // has contention with lots of clients, etc. - if c.in.prand == nil { - c.in.prand = rand.New(rand.NewSource(time.Now().UnixNano())) - } - - var b = [15]byte{'_', 'R', '_', '.'} - rn := c.in.prand.Int63() - for i, l := replyPrefixLen, rn; i < len(b); i++ { - b[i] = digits[l%base] - l /= base - } - return b[:] -} - // Test whether a reply subject is a service import reply. func isServiceReply(reply []byte) bool { + // This function is inlined and checking this way is actually faster + // than byte-by-byte comparison. return len(reply) > 3 && string(reply[:4]) == replyPrefix } +// Test whether a reply subject is a service import or a gateway routed reply. +func isReservedReply(reply []byte) bool { + if isServiceReply(reply) { + return true + } + // Faster to check with string([:]) than byte-by-byte + if len(reply) > gwReplyPrefixLen && string(reply[:gwReplyPrefixLen]) == gwReplyPrefix { + return true + } + return false +} + // This will decide to call the client code or router code. func (c *client) processInboundMsg(msg []byte) { switch c.kind { @@ -2378,6 +2609,12 @@ func (c *client) processInboundClientMsg(msg []byte) { c.traceMsg(msg) } + // Check that client (could be here with SYSTEM) is not publishing on reserved "$GNR" prefix. + if c.kind == CLIENT && hasGWRoutedReplyPrefix(c.pa.subject) { + c.pubPermissionViolation(c.pa.subject) + return + } + // Check pub permissions if c.perms != nil && (c.perms.pub.allow != nil || c.perms.pub.deny != nil) && !c.pubAllowed(string(c.pa.subject)) { c.pubPermissionViolation(c.pa.subject) @@ -2385,7 +2622,7 @@ func (c *client) processInboundClientMsg(msg []byte) { } // Now check for reserved replies. These are used for service imports. - if isServiceReply(c.pa.reply) { + if len(c.pa.reply) > 0 && isReservedReply(c.pa.reply) { c.replySubjectViolation(c.pa.reply) return } @@ -2399,11 +2636,40 @@ func (c *client) processInboundClientMsg(msg []byte) { return } + // Check if this client's gateway replies map is not empty + if atomic.LoadInt32(&c.cgwrt) > 0 && c.handleGWReplyMap(msg) { + return + } + // Check to see if we need to map/route to another account. if c.acc.imports.services != nil { c.checkForImportServices(c.acc, msg) } + // If we have an exported service and we are doing remote tracking, check this subject + // to see if we need to report the latency. + if c.rrTracking != nil { + c.mu.Lock() + rl := c.rrTracking[string(c.pa.subject)] + if rl != nil { + delete(c.rrTracking, string(c.pa.subject)) + } + rtt := c.rtt + c.mu.Unlock() + if rl != nil { + sl := &rl.M2 + // Fill this in and send it off to the other side. + sl.AppName = c.opts.Name + sl.ServiceLatency = time.Since(sl.RequestStart) - rtt + sl.NATSLatency.Responder = rtt + sl.TotalLatency = sl.ServiceLatency + rtt + sanitizeLatencyMetric(sl) + + lsub := remoteLatencySubjectForResponse(c.pa.subject) + c.srv.sendInternalAccountMsg(nil, lsub, &rl) // Send to SYS account + } + } + // Match the subscriptions. We will use our own L1 map if // it's still valid, avoiding contention on the shared sublist. var r *SublistResult @@ -2440,13 +2706,14 @@ func (c *client) processInboundClientMsg(msg []byte) { // This is the fanout scale. if len(r.psubs)+len(r.qsubs) > 0 { flag := pmrNoFlag - // If we have queue subs in this cluster, then if we run in gateway - // mode and the remote gateways have queue subs, then we need to - // collect the queue groups this message was sent to so that we - // exclude them when sending to gateways. + // If there are matching queue subs and we are in gateway mode, + // we need to keep track of the queue names the messages are + // delivered to. When sending to the GWs, the RMSG will include + // those names so that the remote clusters do not deliver messages + // to their queue subs of the same names. if len(r.qsubs) > 0 && c.srv.gateway.enabled && atomic.LoadInt64(&c.srv.gateway.totalQSubs) > 0 { - flag = pmrCollectQueueNames + flag |= pmrCollectQueueNames } qnames = c.processMsgResults(c.acc, r, msg, c.pa.subject, c.pa.reply, flag) } @@ -2457,6 +2724,59 @@ func (c *client) processInboundClientMsg(msg []byte) { } } +// This is invoked knowing that this client has some GW replies +// in its map. It will check if one is find for the c.pa.subject +// and if so will process it directly (send to GWs and LEAF) and +// return true to notify the caller that the message was handled. +// If there is no mapping for the subject, false is returned. +func (c *client) handleGWReplyMap(msg []byte) bool { + c.mu.Lock() + rm, ok := c.gwrm[string(c.pa.subject)] + if !ok { + c.mu.Unlock() + return false + } + // Set subject to the mapped reply subject + c.pa.subject = []byte(rm.ms) + + var rl *remoteLatency + var rtt time.Duration + + if c.rrTracking != nil { + rl = c.rrTracking[string(c.pa.subject)] + if rl != nil { + delete(c.rrTracking, string(c.pa.subject)) + } + rtt = c.rtt + } + c.mu.Unlock() + + if rl != nil { + sl := &rl.M2 + // Fill this in and send it off to the other side. + sl.AppName = c.opts.Name + sl.ServiceLatency = time.Since(sl.RequestStart) - rtt + sl.NATSLatency.Responder = rtt + sl.TotalLatency = sl.ServiceLatency + rtt + sanitizeLatencyMetric(sl) + + lsub := remoteLatencySubjectForResponse(c.pa.subject) + c.srv.sendInternalAccountMsg(nil, lsub, &rl) // Send to SYS account + } + + // Check for leaf nodes + if c.srv.gwLeafSubs.Count() > 0 { + if r := c.srv.gwLeafSubs.Match(string(c.pa.subject)); len(r.psubs) > 0 { + c.processMsgResults(c.acc, r, msg, c.pa.subject, c.pa.reply, pmrNoFlag) + } + } + if c.srv.gateway.enabled { + c.sendMsgToGateways(c.acc, msg, c.pa.subject, c.pa.reply, nil) + } + + return true +} + // This checks and process import services by doing the mapping and sending the // message onward if applicable. func (c *client) checkForImportServices(acc *Account, msg []byte) { @@ -2473,24 +2793,22 @@ func (c *client) checkForImportServices(acc *Account, msg []byte) { // If we have been marked invalid simply return here. if si != nil && !invalid && si.acc != nil && si.acc.sl != nil { var nrr []byte - if si.ae { - acc.removeServiceImport(si.from) - } if c.pa.reply != nil { + var latency *serviceLatency + var tracking bool + if tracking = shouldSample(si.latency); tracking { + latency = si.latency + } // We want to remap this to provide anonymity. - nrr = c.newServiceReply() - si.acc.addResponseServiceImport(acc, string(nrr), string(c.pa.reply), si.rt) + nrr = si.acc.newServiceReply(tracking) + si.acc.addRespServiceImport(acc, string(nrr), string(c.pa.reply), si.rt, latency) // Track our responses for cleanup if not auto-expire. if si.rt != Singleton { acc.addRespMapEntry(si.acc, string(c.pa.reply), string(nrr)) - } - - // If this is a client or leaf connection and we are in gateway mode, - // we need to send RS+ to our local cluster and possibly to inbound - // GW connections for which we are in interest-only mode. - if c.srv.gateway.enabled && (c.kind == CLIENT || c.kind == LEAF) { - c.srv.gatewayHandleServiceImport(si.acc, nrr, c, 1) + } else if si.latency != nil && c.rtt == 0 { + // We have a service import that we are tracking but have not established RTT. + c.sendRTTPing() } } // FIXME(dlc) - Do L1 cache trick from above. @@ -2503,19 +2821,31 @@ func (c *client) checkForImportServices(acc *Account, msg []byte) { si.acc.checkForRespEntry(si.to) } + flags := pmrNoFlag // If we are a route or gateway or leafnode and this message is flipped to a queue subscriber we // need to handle that since the processMsgResults will want a queue filter. - if len(rr.qsubs) > 0 && c.pa.queues == nil && (c.kind == ROUTER || c.kind == GATEWAY || c.kind == LEAF) { - c.makeQFilter(rr.qsubs) + if c.kind == GATEWAY || c.kind == ROUTER || c.kind == LEAF { + flags |= pmrIgnoreEmptyQueueFilter } - - // If this is not a gateway connection but gateway is enabled, - // try to send this converted message to all gateways. - if c.srv.gateway.enabled && (c.kind == CLIENT || c.kind == SYSTEM || c.kind == LEAF) { - queues := c.processMsgResults(si.acc, rr, msg, []byte(si.to), nrr, pmrCollectQueueNames) + if c.srv.gateway.enabled { + flags |= pmrCollectQueueNames + queues := c.processMsgResults(si.acc, rr, msg, []byte(si.to), nrr, flags) c.sendMsgToGateways(si.acc, msg, []byte(si.to), nrr, queues) } else { - c.processMsgResults(si.acc, rr, msg, []byte(si.to), nrr, pmrNoFlag) + c.processMsgResults(si.acc, rr, msg, []byte(si.to), nrr, flags) + } + + shouldRemove := si.ae + + // Calculate tracking info here if we are tracking this request/response. + if si.tracking { + if requesting := firstSubFromResult(rr); requesting != nil { + shouldRemove = acc.sendTrackingLatency(si, requesting.client, c) + } + } + + if shouldRemove { + acc.removeServiceImport(si.from) } } } @@ -2570,13 +2900,24 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, subject, c.in.rts = c.in.rts[:0] } + var rplyHasGWPrefix bool + var creply = reply + + // If the reply subject is a GW routed reply, we will perform some + // tracking in deliverMsg(). We also want to send to the user the + // reply without the prefix. `creply` will be set to that and be + // used to create the message header for client connections. + if rplyHasGWPrefix = isGWRoutedReply(reply); rplyHasGWPrefix { + creply = reply[gwSubjectOffset:] + } + // Loop over all normal subscriptions that match. for _, sub := range r.psubs { // Check if this is a send to a ROUTER. We now process // these after everything else. switch sub.client.kind { case ROUTER: - if c.kind != ROUTER && !c.isSolicitedLeafNode() { + if (c.kind != ROUTER && !c.isSolicitedLeafNode()) || (flags&pmrAllowSendFromRouteToRoute != 0) { c.addSubToRouteTargets(sub) } continue @@ -2603,8 +2944,8 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, subject, si = len(msgh) } // Normal delivery - mh := c.msgHeader(msgh[:si], sub, reply) - c.deliverMsg(sub, mh, msg) + mh := c.msgHeader(msgh[:si], sub, creply) + c.deliverMsg(sub, subject, mh, msg, rplyHasGWPrefix) } // Set these up to optionally filter based on the queue lists. @@ -2615,13 +2956,9 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, subject, // For all non-client connections, we may still want to send messages to // leaf nodes or routes even if there are no queue filters since we collect // them above and do not process inline like normal clients. - if c.kind != CLIENT && qf == nil { - // However, if this is a gateway connection which should be treated - // as a client, still go and pick queue subscriptions, otherwise - // jump to sendToRoutesOrLeafs. - if !(c.kind == GATEWAY && (flags&pmrTreatGatewayAsClient != 0)) { - goto sendToRoutesOrLeafs - } + // However, do select queue subs if asked to ignore empty queue filter. + if c.kind != CLIENT && qf == nil && flags&pmrIgnoreEmptyQueueFilter == 0 { + goto sendToRoutesOrLeafs } // Check to see if we have our own rand yet. Global rand @@ -2690,8 +3027,14 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, subject, si = len(msgh) } - mh := c.msgHeader(msgh[:si], sub, reply) - if c.deliverMsg(sub, mh, msg) { + var rreply = reply + if rplyHasGWPrefix && sub.client.kind == CLIENT { + rreply = creply + } + // "rreply" will be stripped of the $GNR prefix (if present) + // for client connections only. + mh := c.msgHeader(msgh[:si], sub, rreply) + if c.deliverMsg(sub, subject, mh, msg, rplyHasGWPrefix) { // Clear rsub rsub = nil if flags&pmrCollectQueueNames != 0 { @@ -2755,7 +3098,7 @@ sendToRoutesOrLeafs: } mh = append(mh, c.pa.szb...) mh = append(mh, _CRLF_...) - c.deliverMsg(rt.sub, mh, msg) + c.deliverMsg(rt.sub, subject, mh, msg, false) } return queues } @@ -2765,6 +3108,21 @@ func (c *client) pubPermissionViolation(subject []byte) { c.Errorf("Publish Violation - %s, Subject %q", c.getAuthUser(), subject) } +func (c *client) subPermissionViolation(sub *subscription) { + errTxt := fmt.Sprintf("Permissions Violation for Subscription to %q", sub.subject) + logTxt := fmt.Sprintf("Subscription Violation - %s, Subject %q, SID %s", + c.getAuthUser(), sub.subject, sub.sid) + + if sub.queue != nil { + errTxt = fmt.Sprintf("Permissions Violation for Subscription to %q using queue %q", sub.subject, sub.queue) + logTxt = fmt.Sprintf("Subscription Violation - %s, Subject %q, Queue: %q, SID %s", + c.getAuthUser(), sub.subject, sub.queue, sub.sid) + } + + c.sendErr(errTxt) + c.Errorf(logTxt) +} + func (c *client) replySubjectViolation(reply []byte) { c.sendErr(fmt.Sprintf("Permissions Violation for Publish with Reply of %q", reply)) c.Errorf("Publish Violation - %s, Reply %q", c.getAuthUser(), reply) @@ -2808,20 +3166,6 @@ func (c *client) processPingTimer() { c.setPingTimer() } -// Lock should be held -// We randomize the first one by an offset up to 20%, e.g. 2m ~= max 24s. -// This is because the clients by default are usually setting same interval -// and we have alot of cross ping/pongs between clients and servers. -// We will now suppress the server ping/pong if we have received a client ping. -func (c *client) setFirstPingTimer(pingInterval time.Duration) { - if c.srv == nil { - return - } - addDelay := rand.Int63n(int64(pingInterval / 5)) - d := pingInterval + time.Duration(addDelay) - c.ping.tmr = time.AfterFunc(d, c.processPingTimer) -} - // Lock should be held func (c *client) setPingTimer() { if c.srv == nil { @@ -2960,7 +3304,10 @@ func (c *client) processSubsOnConfigReload(awcsti map[string]struct{}) { for _, sub := range c.subs { // Just checking to rebuild mperms under the lock, will collect removed though here. // Only collect under subs array of canSubscribe and checkAcc true. - if !c.canSubscribe(string(sub.subject)) { + canSub := c.canSubscribe(string(sub.subject)) + canQSub := sub.queue != nil && c.canQueueSubscribe(string(sub.subject), string(sub.queue)) + + if !canSub && !canQSub { removed = append(removed, sub) } else if checkAcc { subs = append(subs, sub) @@ -2998,10 +3345,11 @@ type qsub struct { func (c *client) closeConnection(reason ClosedState) { c.mu.Lock() - if c.nc == nil { + if c.nc == nil || c.flags.isSet(closeConnection) { c.mu.Unlock() return } + c.flags.set(closeConnection) // Be consistent with the creation: for routes and gateways, // we use Noticef on create, so use that too for delete. @@ -3241,35 +3589,33 @@ func (c *client) Account() *Account { // prunePerAccountCache will prune off a random number of cache entries. func (c *client) prunePerAccountCache() { n := 0 - now := time.Now().Unix() - if now-c.in.losc >= orphanSubsCheckInterval { - for cacheKey, pac := range c.in.pacache { - for _, sub := range pac.results.psubs { + for cacheKey := range c.in.pacache { + delete(c.in.pacache, cacheKey) + if n++; n > prunePerAccountCacheSize { + break + } + } +} + +// pruneClosedSubFromPerAccountCache remove entries that contain subscriptions +// that have been closed. +func (c *client) pruneClosedSubFromPerAccountCache() { + for cacheKey, pac := range c.in.pacache { + for _, sub := range pac.results.psubs { + if sub.isClosed() { + goto REMOVE + } + } + for _, qsub := range pac.results.qsubs { + for _, sub := range qsub { if sub.isClosed() { goto REMOVE } } - for _, qsub := range pac.results.qsubs { - for _, sub := range qsub { - if sub.isClosed() { - goto REMOVE - } - } - } - continue - REMOVE: - delete(c.in.pacache, cacheKey) - n++ - } - c.in.losc = now - } - if n < prunePerAccountCacheSize { - for cacheKey := range c.in.pacache { - delete(c.in.pacache, cacheKey) - if n++; n > prunePerAccountCacheSize { - break - } } + continue + REMOVE: + delete(c.in.pacache, cacheKey) } } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/const.go b/vendor/github.com/nats-io/nats-server/v2/server/const.go index e8d1d472..50cbd31a 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/const.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/const.go @@ -40,7 +40,7 @@ var ( const ( // VERSION is the current version for the server. - VERSION = "2.0.4" + VERSION = "2.1.2" // PROTO is the currently supported protocol. // 0 was the original @@ -175,4 +175,8 @@ const ( // DEFAULT_ALLOW_RESPONSE_EXPIRATION is the default time allowed for a given // dynamic response permission. DEFAULT_ALLOW_RESPONSE_EXPIRATION = 2 * time.Minute + + // DEFAULT_SERVICE_LATENCY_SAMPLING is the default sampling rate for service + // latency metrics + DEFAULT_SERVICE_LATENCY_SAMPLING = 100 ) diff --git a/vendor/github.com/nats-io/nats-server/v2/server/errors.go b/vendor/github.com/nats-io/nats-server/v2/server/errors.go index f12903e6..87efbf81 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/errors.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/errors.go @@ -40,6 +40,9 @@ var ( // ErrReservedPublishSubject represents an error condition when sending to a reserved subject, e.g. _SYS.> ErrReservedPublishSubject = errors.New("reserved internal subject") + // ErrBadPublishSubject represents an error condition for an invalid publish subject. + ErrBadPublishSubject = errors.New("invalid publish subject") + // ErrBadClientProtocol signals a client requested an invalid client protocol. ErrBadClientProtocol = errors.New("invalid client protocol") @@ -76,6 +79,15 @@ var ( // ErrMissingAccount is returned when an account does not exist. ErrMissingAccount = errors.New("account missing") + // ErrMissingService is returned when an account does not have an exported service. + ErrMissingService = errors.New("service missing") + + // ErrBadServiceType is returned when latency tracking is being applied to non-singleton response types. + ErrBadServiceType = errors.New("bad service response type") + + // ErrBadSampling is returned when the sampling for latency tracking is not 1 >= sample <= 100. + ErrBadSampling = errors.New("bad sampling percentage, should be 1-100") + // ErrAccountValidation is returned when an account has failed validation. ErrAccountValidation = errors.New("account validation failed") @@ -94,6 +106,9 @@ var ( // ErrStreamImportAuthorization is returned when a stream import is not authorized. ErrStreamImportAuthorization = errors.New("stream import not authorized") + // ErrStreamImportBadPrefix is returned when a stream import prefix contains wildcards. + ErrStreamImportBadPrefix = errors.New("stream import prefix can not contain wildcard tokens") + // ErrServiceImportAuthorization is returned when a service import is not authorized. ErrServiceImportAuthorization = errors.New("service import not authorized") @@ -112,6 +127,9 @@ var ( // ErrRevocation is returned when a credential has been revoked. ErrRevocation = errors.New("credentials have been revoked") + + // Used to signal an error that a server is not running. + ErrServerNotRunning = errors.New("server is not running") ) // configErr is a configuration error. diff --git a/vendor/github.com/nats-io/nats-server/v2/server/events.go b/vendor/github.com/nats-io/nats-server/v2/server/events.go index 67f3ad16..abd78d29 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/events.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/events.go @@ -14,15 +14,18 @@ package server import ( + "bytes" + "crypto/sha256" + "encoding/base64" "encoding/json" "fmt" + "math/rand" "strconv" "strings" "sync" "sync/atomic" "time" - "github.com/nats-io/jwt" "github.com/nats-io/nats-server/v2/server/pse" ) @@ -39,32 +42,43 @@ const ( serverStatsReqSubj = "$SYS.REQ.SERVER.%s.STATSZ" serverStatsPingReqSubj = "$SYS.REQ.SERVER.PING" leafNodeConnectEventSubj = "$SYS.ACCOUNT.%s.LEAFNODE.CONNECT" + remoteLatencyEventSubj = "$SYS.LATENCY.M2.%s" + inboxRespSubj = "$SYS._INBOX.%s.%s" + + // FIXME(dlc) - Should account scope, even with wc for now, but later on + // we can then shard as needed. + accNumSubsReqSubj = "$SYS.REQ.ACCOUNT.NSUBS" + + // These are for exported debug services. These are local to this server only. + accSubsSubj = "$SYS.DEBUG.SUBSCRIBERS" shutdownEventTokens = 4 serverSubjectIndex = 2 accUpdateTokens = 5 accUpdateAccIndex = 2 - defaultEventsHBItvl = 30 * time.Second ) // FIXME(dlc) - make configurable. -var eventsHBInterval = defaultEventsHBItvl +var eventsHBInterval = 30 * time.Second // Used to send and receive messages from inside the server. type internal struct { - account *Account - client *client - seq uint64 - sid uint64 - servers map[string]*serverUpdate - sweeper *time.Timer - stmr *time.Timer - subs map[string]msgHandler - sendq chan *pubMsg - wg sync.WaitGroup - orphMax time.Duration - chkOrph time.Duration - statsz time.Duration + account *Account + client *client + seq uint64 + sid uint64 + servers map[string]*serverUpdate + sweeper *time.Timer + stmr *time.Timer + subs map[string]msgHandler + replies map[string]msgHandler + sendq chan *pubMsg + wg sync.WaitGroup + orphMax time.Duration + chkOrph time.Duration + statsz time.Duration + shash string + inboxPre string } // ServerStatsMsg is sent periodically with stats updates. @@ -109,6 +123,7 @@ type accNumConnsReq struct { // ServerInfo identifies remote servers. type ServerInfo struct { + Name string `json:"name"` Host string `json:"host"` ID string `json:"id"` Cluster string `json:"cluster,omitempty"` @@ -151,6 +166,7 @@ type ServerStats struct { // RouteStat holds route statistics. type RouteStat struct { ID uint64 `json:"rid"` + Name string `json:"name,omitempty"` Sent DataStats `json:"sent"` Received DataStats `json:"received"` Pending int `json:"pending"` @@ -173,6 +189,7 @@ type DataStats struct { // Used for internally queueing up messages that the server wants to send. type pubMsg struct { + acc *Account sub string rply string si *ServerInfo @@ -197,9 +214,11 @@ func (s *Server) internalSendLoop(wg *sync.WaitGroup) { return } c := s.sys.client + sysacc := s.sys.account sendq := s.sys.sendq id := s.info.ID host := s.info.Host + servername := s.info.Name seqp := &s.sys.seq var cluster string if s.gateway.enabled { @@ -207,16 +226,26 @@ func (s *Server) internalSendLoop(wg *sync.WaitGroup) { } s.mu.Unlock() + // Warn when internal send queue is backed up past 75% + warnThresh := 3 * internalSendQLen / 4 + warnFreq := time.Second + last := time.Now().Add(-warnFreq) + for s.eventsRunning() { // Setup information for next message - seq := atomic.AddUint64(seqp, 1) + if len(sendq) > warnThresh && time.Since(last) >= warnFreq { + s.Warnf("Internal system send queue > 75%%") + last = time.Now() + } + select { case pm := <-sendq: if pm.si != nil { + pm.si.Name = servername pm.si.Host = host pm.si.Cluster = cluster pm.si.ID = id - pm.si.Seq = seq + pm.si.Seq = atomic.AddUint64(seqp, 1) pm.si.Version = VERSION pm.si.Time = time.Now() } @@ -224,11 +253,20 @@ func (s *Server) internalSendLoop(wg *sync.WaitGroup) { if pm.msg != nil { b, _ = json.MarshalIndent(pm.msg, _EMPTY_, " ") } + c.mu.Lock() + // We can have an override for account here. + if pm.acc != nil { + c.acc = pm.acc + } else { + c.acc = sysacc + } // Prep internal structures needed to send message. c.pa.subject = []byte(pm.sub) c.pa.size = len(b) c.pa.szb = []byte(strconv.FormatInt(int64(len(b)), 10)) c.pa.reply = []byte(pm.rply) + c.mu.Unlock() + // Add in NL b = append(b, _CRLF_...) c.processInboundClientMsg(b) @@ -256,9 +294,32 @@ func (s *Server) sendShutdownEvent() { s.sys.sendq = nil // Unhook all msgHandlers. Normal client cleanup will deal with subs, etc. s.sys.subs = nil + s.sys.replies = nil s.mu.Unlock() // Send to the internal queue and mark as last. - sendq <- &pubMsg{subj, _EMPTY_, nil, nil, true} + sendq <- &pubMsg{nil, subj, _EMPTY_, nil, nil, true} +} + +// Used to send an internal message to an arbitrary account. +func (s *Server) sendInternalAccountMsg(a *Account, subject string, msg interface{}) error { + s.mu.Lock() + if s.sys == nil || s.sys.sendq == nil { + s.mu.Unlock() + return ErrNoSysAccount + } + sendq := s.sys.sendq + // Don't hold lock while placing on the channel. + s.mu.Unlock() + sendq <- &pubMsg{a, subject, "", nil, msg, false} + return nil +} + +// This will queue up a message to be sent. +// Lock should not be held. +func (s *Server) sendInternalMsgLocked(sub, rply string, si *ServerInfo, msg interface{}) { + s.mu.Lock() + s.sendInternalMsg(sub, rply, si, msg) + s.mu.Unlock() } // This will queue up a message to be sent. @@ -270,7 +331,7 @@ func (s *Server) sendInternalMsg(sub, rply string, si *ServerInfo, msg interface sendq := s.sys.sendq // Don't hold lock while placing on the channel. s.mu.Unlock() - sendq <- &pubMsg{sub, rply, si, msg, false} + sendq <- &pubMsg{nil, sub, rply, si, msg, false} s.mu.Lock() } @@ -286,8 +347,9 @@ func (s *Server) eventsRunning() bool { // a defined system account. func (s *Server) EventsEnabled() bool { s.mu.Lock() - defer s.mu.Unlock() - return s.eventsEnabled() + ee := s.eventsEnabled() + s.mu.Unlock() + return ee } // eventsEnabled will report if events are enabled. @@ -296,6 +358,18 @@ func (s *Server) eventsEnabled() bool { return s.sys != nil && s.sys.client != nil && s.sys.account != nil } +// TrackedRemoteServers returns how many remote servers we are tracking +// from a system events perspective. +func (s *Server) TrackedRemoteServers() int { + s.mu.Lock() + if !s.running || !s.eventsEnabled() { + return -1 + } + ns := len(s.sys.servers) + s.mu.Unlock() + return ns +} + // Check for orphan servers who may have gone away without notification. // This should be wrapChk() to setup common locking. func (s *Server) checkRemoteServers() { @@ -341,6 +415,9 @@ func routeStat(r *client) *RouteStat { }, Pending: int(r.out.pb), } + if r.route != nil { + rs.Name = r.route.remoteName + } r.mu.Unlock() return rs } @@ -414,6 +491,9 @@ func (s *Server) startRemoteServerSweepTimer() { s.sys.sweeper = time.AfterFunc(s.sys.chkOrph, s.wrapChk(s.checkRemoteServers)) } +// Length of our system hash used for server targeted messages. +const sysHashLen = 6 + // This will setup our system wide tracking subs. // For now we will setup one wildcard subscription to // monitor all accounts for changes in number of connections. @@ -424,7 +504,19 @@ func (s *Server) initEventTracking() { if !s.eventsEnabled() { return } - subject := fmt.Sprintf(accConnsEventSubj, "*") + // Create a system hash which we use for other servers to target us specifically. + sha := sha256.New() + sha.Write([]byte(s.info.ID)) + s.sys.shash = base64.RawURLEncoding.EncodeToString(sha.Sum(nil))[:sysHashLen] + + // This will be for all inbox responses. + subject := fmt.Sprintf(inboxRespSubj, s.sys.shash, "*") + if _, err := s.sysSubscribe(subject, s.inboxReply); err != nil { + s.Errorf("Error setting up internal tracking: %v", err) + } + s.sys.inboxPre = subject + // This is for remote updates for connection accounting. + subject = fmt.Sprintf(accConnsEventSubj, "*") if _, err := s.sysSubscribe(subject, s.remoteConnsUpdate); err != nil { s.Errorf("Error setting up internal tracking: %v", err) } @@ -438,6 +530,10 @@ func (s *Server) initEventTracking() { if _, err := s.sysSubscribe(subject, s.connsRequest); err != nil { s.Errorf("Error setting up internal tracking: %v", err) } + // Listen for broad requests to respond with number of subscriptions for a given subject. + if _, err := s.sysSubscribe(accNumSubsReqSubj, s.nsubsRequest); err != nil { + s.Errorf("Error setting up internal tracking: %v", err) + } // Listen for all server shutdowns. subject = fmt.Sprintf(shutdownEventSubj, "*") if _, err := s.sysSubscribe(subject, s.remoteServerShutdown); err != nil { @@ -463,13 +559,27 @@ func (s *Server) initEventTracking() { if _, err := s.sysSubscribe(subject, s.leafNodeConnected); err != nil { s.Errorf("Error setting up internal tracking: %v", err) } + // For tracking remote latency measurements. + subject = fmt.Sprintf(remoteLatencyEventSubj, s.sys.shash) + if _, err := s.sysSubscribe(subject, s.remoteLatencyUpdate); err != nil { + s.Errorf("Error setting up internal latency tracking: %v", err) + } + + // These are for system account exports for debugging from client applications. + sacc := s.sys.account + + // This is for simple debugging of number of subscribers that exist in the system. + if _, err := s.sysSubscribeInternal(accSubsSubj, s.debugSubscribers); err != nil { + s.Errorf("Error setting up internal debug service for subscribers: %v", err) + } + if err := sacc.AddServiceExport(accSubsSubj, nil); err != nil { + s.Errorf("Error adding system service export for %q: %v", accSubsSubj, err) + } } // accountClaimUpdate will receive claim updates for accounts. -func (s *Server) accountClaimUpdate(sub *subscription, subject, reply string, msg []byte) { - s.mu.Lock() - defer s.mu.Unlock() - if !s.eventsEnabled() { +func (s *Server) accountClaimUpdate(sub *subscription, _ *client, subject, reply string, msg []byte) { + if !s.EventsEnabled() { return } toks := strings.Split(subject, tsep) @@ -487,19 +597,13 @@ func (s *Server) accountClaimUpdate(sub *subscription, subject, reply string, ms // Lock assume held. func (s *Server) processRemoteServerShutdown(sid string) { s.accounts.Range(func(k, v interface{}) bool { - a := v.(*Account) - a.mu.Lock() - prev := a.strack[sid] - delete(a.strack, sid) - a.nrclients -= prev.conns - a.nrleafs -= prev.leafs - a.mu.Unlock() + v.(*Account).removeRemoteServer(sid) return true }) } // remoteServerShutdownEvent is called when we get an event from another server shutting down. -func (s *Server) remoteServerShutdown(sub *subscription, subject, reply string, msg []byte) { +func (s *Server) remoteServerShutdown(sub *subscription, _ *client, subject, reply string, msg []byte) { s.mu.Lock() defer s.mu.Unlock() if !s.eventsEnabled() { @@ -526,7 +630,7 @@ func (s *Server) updateRemoteServer(ms *ServerInfo) { if su == nil { s.sys.servers[ms.ID] = &serverUpdate{ms.Seq, time.Now()} } else { - // Should alwqys be going up. + // Should always be going up. if ms.Seq <= su.seq { s.Errorf("Received out of order remote server update from: %q", ms.ID) return @@ -557,15 +661,7 @@ func (s *Server) shutdownEventing() { // Whip through all accounts. s.accounts.Range(func(k, v interface{}) bool { - a := v.(*Account) - a.mu.Lock() - a.nrclients = 0 - // Now clear state - clearTimer(&a.etmr) - clearTimer(&a.ctmr) - a.clients = nil - a.strack = nil - a.mu.Unlock() + v.(*Account).clearEventing() return true }) // Turn everything off here. @@ -573,7 +669,7 @@ func (s *Server) shutdownEventing() { } // Request for our local connection count. -func (s *Server) connsRequest(sub *subscription, subject, reply string, msg []byte) { +func (s *Server) connsRequest(sub *subscription, _ *client, subject, reply string, msg []byte) { if !s.eventsRunning() { return } @@ -582,10 +678,16 @@ func (s *Server) connsRequest(sub *subscription, subject, reply string, msg []by s.sys.client.Errorf("Error unmarshalling account connections request message: %v", err) return } - acc, _ := s.lookupAccount(m.Account) + // Here we really only want to lookup the account if its local. We do not want to fetch this + // account if we have no interest in it. + var acc *Account + if v, ok := s.accounts.Load(m.Account); ok { + acc = v.(*Account) + } if acc == nil { return } + // We know this is a local connection. if nlc := acc.NumLocalConnections(); nlc > 0 { s.mu.Lock() s.sendAccConnsUpdate(acc, reply) @@ -595,7 +697,7 @@ func (s *Server) connsRequest(sub *subscription, subject, reply string, msg []by // leafNodeConnected is an event we will receive when a leaf node for a given account // connects. -func (s *Server) leafNodeConnected(sub *subscription, subject, reply string, msg []byte) { +func (s *Server) leafNodeConnected(sub *subscription, _ *client, subject, reply string, msg []byte) { m := accNumConnsReq{} if err := json.Unmarshal(msg, &m); err != nil { s.sys.client.Errorf("Error unmarshalling account connections request message: %v", err) @@ -616,7 +718,7 @@ func (s *Server) leafNodeConnected(sub *subscription, subject, reply string, msg } // statszReq is a request for us to respond with current statz. -func (s *Server) statszReq(sub *subscription, subject, reply string, msg []byte) { +func (s *Server) statszReq(sub *subscription, _ *client, subject, reply string, msg []byte) { s.mu.Lock() defer s.mu.Unlock() if !s.eventsEnabled() || reply == _EMPTY_ { @@ -626,7 +728,7 @@ func (s *Server) statszReq(sub *subscription, subject, reply string, msg []byte) } // remoteConnsUpdate gets called when we receive a remote update from another server. -func (s *Server) remoteConnsUpdate(sub *subscription, subject, reply string, msg []byte) { +func (s *Server) remoteConnsUpdate(sub *subscription, _ *client, subject, reply string, msg []byte) { if !s.eventsRunning() { return } @@ -637,7 +739,15 @@ func (s *Server) remoteConnsUpdate(sub *subscription, subject, reply string, msg } // See if we have the account registered, if not drop it. - acc, _ := s.lookupAccount(m.Account) + // Make sure this does not force us to load this account here. + var acc *Account + if v, ok := s.accounts.Load(m.Account); ok { + acc = v.(*Account) + } + // Silently ignore these if we do not have local interest in the account. + if acc == nil { + return + } s.mu.Lock() defer s.mu.Unlock() @@ -646,33 +756,17 @@ func (s *Server) remoteConnsUpdate(sub *subscription, subject, reply string, msg if !s.running || !s.eventsEnabled() { return } - // Double check that this is not us, should never happen, so error if it does. if m.Server.ID == s.info.ID { s.sys.client.Errorf("Processing our own account connection event message: ignored") return } - if acc == nil { - s.sys.client.Debugf("Received account connection event for unknown account: %s", m.Account) - return - } // If we are here we have interest in tracking this account. Update our accounting. - acc.mu.Lock() - if acc.strack == nil { - acc.strack = make(map[string]sconns) - } - // This does not depend on receiving all updates since each one is idempotent. - prev := acc.strack[m.Server.ID] - acc.strack[m.Server.ID] = sconns{conns: int32(m.Conns), leafs: int32(m.LeafNodes)} - acc.nrclients += int32(m.Conns) - prev.conns - acc.nrleafs += int32(m.LeafNodes) - prev.leafs - acc.mu.Unlock() - + acc.updateRemoteServer(&m) s.updateRemoteServer(&m.Server) } -// Setup tracking for this account. This allows us to track globally -// account activity. +// Setup tracking for this account. This allows us to track global account activity. // Lock should be held on entry. func (s *Server) enableAccountTracking(a *Account) { if a == nil || !s.eventsEnabled() { @@ -715,12 +809,6 @@ func (s *Server) sendAccConnsUpdate(a *Account, subj string) { } a.mu.RLock() - // If no limits set, don't update, no need to. - if a.mconns == jwt.NoLimit && a.mleafs == jwt.NoLimit { - a.mu.RUnlock() - return - } - // Build event with account name and number of local clients and leafnodes. m := AccountNumConns{ Account: a.Name, @@ -763,15 +851,20 @@ func (s *Server) accConnsUpdate(a *Account) { // This is a billing event. func (s *Server) accountConnectEvent(c *client) { s.mu.Lock() + gacc := s.gacc if !s.eventsEnabled() { s.mu.Unlock() return } s.mu.Unlock() - subj := fmt.Sprintf(connectEventSubj, c.acc.Name) - c.mu.Lock() + // Ignore global account activity + if c.acc == nil || c.acc == gacc { + c.mu.Unlock() + return + } + m := ConnectEventMsg{ Client: ClientInfo{ Start: c.start, @@ -786,9 +879,8 @@ func (s *Server) accountConnectEvent(c *client) { } c.mu.Unlock() - s.mu.Lock() - s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) - s.mu.Unlock() + subj := fmt.Sprintf(connectEventSubj, c.acc.Name) + s.sendInternalMsgLocked(subj, _EMPTY_, &m.Server, &m) } // accountDisconnectEvent will send an account client disconnect event if there is interest. @@ -824,8 +916,8 @@ func (s *Server) accountDisconnectEvent(c *client, now time.Time, reason string) RTT: c.getRTT(), }, Sent: DataStats{ - Msgs: c.inMsgs, - Bytes: c.inBytes, + Msgs: atomic.LoadInt64(&c.inMsgs), + Bytes: atomic.LoadInt64(&c.inBytes), }, Received: DataStats{ Msgs: c.outMsgs, @@ -836,10 +928,7 @@ func (s *Server) accountDisconnectEvent(c *client, now time.Time, reason string) c.mu.Unlock() subj := fmt.Sprintf(disconnectEventSubj, c.acc.Name) - - s.mu.Lock() - s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) - s.mu.Unlock() + s.sendInternalMsgLocked(subj, _EMPTY_, &m.Server, &m) } func (s *Server) sendAuthErrorEvent(c *client) { @@ -880,14 +969,13 @@ func (s *Server) sendAuthErrorEvent(c *client) { subj := fmt.Sprintf(authErrorEventSubj, s.info.ID) s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) s.mu.Unlock() - } // Internal message callback. If the msg is needed past the callback it is // required to be copied. -type msgHandler func(sub *subscription, subject, reply string, msg []byte) +type msgHandler func(sub *subscription, client *client, subject, reply string, msg []byte) -func (s *Server) deliverInternalMsg(sub *subscription, subject, reply, msg []byte) { +func (s *Server) deliverInternalMsg(sub *subscription, c *client, subject, reply, msg []byte) { s.mu.Lock() if !s.eventsEnabled() || s.sys.subs == nil { s.mu.Unlock() @@ -896,12 +984,21 @@ func (s *Server) deliverInternalMsg(sub *subscription, subject, reply, msg []byt cb := s.sys.subs[string(sub.sid)] s.mu.Unlock() if cb != nil { - cb(sub, string(subject), string(reply), msg) + cb(sub, c, string(subject), string(reply), msg) } } // Create an internal subscription. No support for queue groups atm. func (s *Server) sysSubscribe(subject string, cb msgHandler) (*subscription, error) { + return s.systemSubscribe(subject, false, cb) +} + +// Create an internal subscription but do not forward interest. +func (s *Server) sysSubscribeInternal(subject string, cb msgHandler) (*subscription, error) { + return s.systemSubscribe(subject, true, cb) +} + +func (s *Server) systemSubscribe(subject string, internalOnly bool, cb msgHandler) (*subscription, error) { if !s.eventsEnabled() { return nil, ErrNoSysAccount } @@ -916,13 +1013,7 @@ func (s *Server) sysSubscribe(subject string, cb msgHandler) (*subscription, err s.mu.Unlock() // Now create the subscription - if err := c.processSub([]byte(subject + " " + sid)); err != nil { - return nil, err - } - c.mu.Lock() - sub := c.subs[sid] - c.mu.Unlock() - return sub, nil + return c.processSub([]byte(subject+" "+sid), internalOnly) } func (s *Server) sysUnsubscribe(sub *subscription) { @@ -937,6 +1028,286 @@ func (s *Server) sysUnsubscribe(sub *subscription) { c.unsubscribe(acc, sub, true, true) } +// This will generate the tracking subject for remote latency from the response subject. +func remoteLatencySubjectForResponse(subject []byte) string { + if !isTrackedReply(subject) { + return "" + } + toks := bytes.Split(subject, []byte(tsep)) + // FIXME(dlc) - Sprintf may become a performance concern at some point. + return fmt.Sprintf(remoteLatencyEventSubj, toks[len(toks)-2]) +} + +// remoteLatencyUpdate is used to track remote latency measurements for tracking on exported services. +func (s *Server) remoteLatencyUpdate(sub *subscription, _ *client, subject, _ string, msg []byte) { + if !s.eventsRunning() { + return + } + rl := remoteLatency{} + if err := json.Unmarshal(msg, &rl); err != nil { + s.Errorf("Error unmarshalling remot elatency measurement: %v", err) + return + } + // Now we need to look up the responseServiceImport associated with this measurement. + acc, err := s.LookupAccount(rl.Account) + if err != nil { + s.Warnf("Could not lookup account %q for latency measurement", rl.Account) + return + } + // Now get the request id / reply. We need to see if we have a GW prefix and if so strip that off. + reply := rl.ReqId + if gwPrefix, old := isGWRoutedSubjectAndIsOldPrefix([]byte(reply)); gwPrefix { + reply = string(getSubjectFromGWRoutedReply([]byte(reply), old)) + } + acc.mu.RLock() + si := acc.imports.services[reply] + if si == nil { + acc.mu.RUnlock() + return + } + m1 := si.m1 + m2 := rl.M2 + lsub := si.latency.subject + acc.mu.RUnlock() + + // So we have not processed the response tracking measurement yet. + if m1 == nil { + si.acc.mu.Lock() + // Double check since could have slipped in. + m1 = si.m1 + if m1 == nil { + // Store our value there for them to pick up. + si.m1 = &m2 + } + si.acc.mu.Unlock() + if m1 == nil { + return + } + } + + // Calculate the correct latency given M1 and M2. + // M2 ServiceLatency is correct, so use that. + // M1 TotalLatency is correct, so use that. + // Will use those to back into NATS latency. + m1.merge(&m2) + + // Make sure we remove the entry here. + acc.removeServiceImport(si.from) + // Send the metrics + s.sendInternalAccountMsg(acc, lsub, &m1) +} + +// This is used for all inbox replies so that we do not send supercluster wide interest +// updates for every request. Same trick used in modern NATS clients. +func (s *Server) inboxReply(sub *subscription, c *client, subject, reply string, msg []byte) { + s.mu.Lock() + if !s.eventsEnabled() || s.sys.replies == nil { + s.mu.Unlock() + return + } + cb, ok := s.sys.replies[subject] + s.mu.Unlock() + + if ok && cb != nil { + cb(sub, c, subject, reply, msg) + } +} + +// Copied from go client. +// We could use serviceReply here instead to save some code. +// I prefer these semantics for the moment, when tracing you know +// what this is. +const ( + InboxPrefix = "$SYS._INBOX." + inboxPrefixLen = len(InboxPrefix) + respInboxPrefixLen = inboxPrefixLen + sysHashLen + 1 + replySuffixLen = 8 // Gives us 62^8 +) + +// Creates an internal inbox used for replies that will be processed by the global wc handler. +func (s *Server) newRespInbox() string { + var b [respInboxPrefixLen + replySuffixLen]byte + pres := b[:respInboxPrefixLen] + copy(pres, s.sys.inboxPre) + rn := rand.Int63() + for i, l := respInboxPrefixLen, rn; i < len(b); i++ { + b[i] = digits[l%base] + l /= base + } + return string(b[:]) +} + +// accNumSubsReq is sent when we need to gather remote info on subs. +type accNumSubsReq struct { + Account string `json:"acc"` + Subject string `json:"subject"` + Queue []byte `json:"queue,omitempty"` +} + +// helper function to total information from results to count subs. +func totalSubs(rr *SublistResult, qg []byte) (nsubs int32) { + if rr == nil { + return + } + checkSub := func(sub *subscription) { + // TODO(dlc) - This could be smarter. + if qg != nil && !bytes.Equal(qg, sub.queue) { + return + } + if sub.client.kind == CLIENT || sub.client.isUnsolicitedLeafNode() { + nsubs++ + } + } + if qg == nil { + for _, sub := range rr.psubs { + checkSub(sub) + } + } + for _, qsub := range rr.qsubs { + for _, sub := range qsub { + checkSub(sub) + } + } + return +} + +// Allows users of large systems to debug active subscribers for a given subject. +// Payload should be the subject of interest. +func (s *Server) debugSubscribers(sub *subscription, c *client, subject, reply string, msg []byte) { + // Even though this is an internal only subscription, meaning interest was not forwarded, we could + // get one here from a GW in optimistic mode. Ignore for now. + // FIXME(dlc) - Should we send no interest here back to the GW? + if c.kind != CLIENT { + return + } + + var nsubs int32 + + // We could have a single subject or we could have a subject and a wildcard separated by whitespace. + args := strings.Split(strings.TrimSpace(string(msg)), " ") + if len(args) == 0 { + s.sendInternalAccountMsg(c.acc, reply, 0) + return + } + + tsubj := args[0] + var qgroup []byte + if len(args) > 1 { + qgroup = []byte(args[1]) + } + + if subjectIsLiteral(tsubj) { + // We will look up subscribers locally first then determine if we need to solicit other servers. + rr := c.acc.sl.Match(tsubj) + nsubs = totalSubs(rr, qgroup) + } else { + // We have a wildcard, so this is a bit slower path. + var _subs [32]*subscription + subs := _subs[:0] + c.acc.sl.All(&subs) + for _, sub := range subs { + if subjectIsSubsetMatch(string(sub.subject), tsubj) { + if qgroup != nil && !bytes.Equal(qgroup, sub.queue) { + continue + } + if sub.client.kind == CLIENT || sub.client.isUnsolicitedLeafNode() { + nsubs++ + } + } + } + } + + // We should have an idea of how many responses to expect from remote servers. + var expected = c.acc.expectedRemoteResponses() + + // If we are only local, go ahead and return. + if expected == 0 { + s.sendInternalAccountMsg(c.acc, reply, nsubs) + return + } + + // We need to solicit from others. + // To track status. + responses := int32(0) + done := make(chan (bool)) + + s.mu.Lock() + // Create direct reply inbox that we multiplex under the WC replies. + replySubj := s.newRespInbox() + // Store our handler. + s.sys.replies[replySubj] = func(sub *subscription, _ *client, subject, _ string, msg []byte) { + if n, err := strconv.Atoi(string(msg)); err == nil { + atomic.AddInt32(&nsubs, int32(n)) + } + if atomic.AddInt32(&responses, 1) >= expected { + select { + case done <- true: + default: + } + } + } + // Send the request to the other servers. + request := &accNumSubsReq{ + Account: c.acc.Name, + Subject: tsubj, + Queue: qgroup, + } + s.sendInternalMsg(accNumSubsReqSubj, replySubj, nil, request) + s.mu.Unlock() + + // FIXME(dlc) - We should rate limit here instead of blind Go routine. + go func() { + select { + case <-done: + case <-time.After(500 * time.Millisecond): + } + // Cleanup the WC entry. + s.mu.Lock() + delete(s.sys.replies, replySubj) + s.mu.Unlock() + // Send the response. + s.sendInternalAccountMsg(c.acc, reply, atomic.LoadInt32(&nsubs)) + }() +} + +// Request for our local subscription count. This will come from a remote origin server +// that received the initial request. +func (s *Server) nsubsRequest(sub *subscription, _ *client, subject, reply string, msg []byte) { + if !s.eventsRunning() { + return + } + m := accNumSubsReq{} + if err := json.Unmarshal(msg, &m); err != nil { + s.sys.client.Errorf("Error unmarshalling account nsubs request message: %v", err) + return + } + // Grab account. + acc, _ := s.lookupAccount(m.Account) + if acc == nil || acc.numLocalAndLeafConnections() == 0 { + return + } + // We will look up subscribers locally first then determine if we need to solicit other servers. + var nsubs int32 + if subjectIsLiteral(m.Subject) { + rr := acc.sl.Match(m.Subject) + nsubs = totalSubs(rr, m.Queue) + } else { + // We have a wildcard, so this is a bit slower path. + var _subs [32]*subscription + subs := _subs[:0] + acc.sl.All(&subs) + for _, sub := range subs { + if (sub.client.kind == CLIENT || sub.client.isUnsolicitedLeafNode()) && subjectIsSubsetMatch(string(sub.subject), m.Subject) { + if m.Queue != nil && !bytes.Equal(m.Queue, sub.queue) { + continue + } + nsubs++ + } + } + } + s.sendInternalMsgLocked(reply, _EMPTY_, nil, nsubs) +} + // Helper to grab name for a client. func nameForClient(c *client) string { if c.user != nil { @@ -966,10 +1337,11 @@ func clearTimer(tp **time.Timer) { func (s *Server) wrapChk(f func()) func() { return func() { s.mu.Lock() - defer s.mu.Unlock() if !s.eventsEnabled() { + s.mu.Unlock() return } f() + s.mu.Unlock() } } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/gateway.go b/vendor/github.com/nats-io/nats-server/v2/server/gateway.go index 9a816393..ce23ac91 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/gateway.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/gateway.go @@ -33,10 +33,21 @@ const ( defaultSolicitGatewaysDelay = time.Second defaultGatewayConnectDelay = time.Second defaultGatewayReconnectDelay = time.Second - defaultGatewayRecentSubExpiration = 5 * time.Second + defaultGatewayRecentSubExpiration = 250 * time.Millisecond defaultGatewayMaxRUnsubBeforeSwitch = 1000 - gwReplyPrefix = "$GR." - gwReplyStart = len(gwReplyPrefix) + 5 // len of prefix above + len of hash (4) + "." + + oldGWReplyPrefix = "$GR." + oldGWReplyPrefixLen = len(oldGWReplyPrefix) + oldGWReplyStart = oldGWReplyPrefixLen + 5 // len of prefix above + len of hash (4) + "." + + // The new prefix is "_GR_..." where is 6 characters + // hash of origin cluster name and is 6 characters hash of origin server pub key. + gwReplyPrefix = "_GR_." + gwReplyPrefixLen = len(gwReplyPrefix) + gwHashLen = 6 + gwClusterOffset = gwReplyPrefixLen + gwServerOffset = gwClusterOffset + gwHashLen + 1 + gwSubjectOffset = gwServerOffset + gwHashLen + 1 ) var ( @@ -116,7 +127,11 @@ type srvGateway struct { info *Info // Gateway Info protocol infoJSON []byte // Marshal'ed Info protocol runknown bool // Rejects unknown (not configured) gateway connections - replyPfx []byte // Will be "$GR.." + replyPfx []byte // Will be "$GNR.<1:reserved>.<8:cluster hash>.<8:server hash>." + + // For backward compatibility + oldReplyPfx []byte + oldHash []byte // We maintain the interest of subjects and queues per account. // For a given account, entries in the map could be something like this: @@ -152,7 +167,8 @@ type sitally struct { type gatewayCfg struct { sync.RWMutex *RemoteGatewayOpts - replyPfx []byte + hash []byte + oldHash []byte urls map[string]*url.URL connAttempts int tlsName string @@ -172,6 +188,8 @@ type gateway struct { // Set/check in readLoop without lock. This is to know that an inbound has sent the CONNECT protocol first connected bool + // Set to true if outbound is to a server that only knows about $GR, not $GNR + useOldPrefix bool } // Outbound subject interest entry. @@ -207,6 +225,11 @@ type insie struct { mode GatewayInterestMode } +type gwReplyMap struct { + ms string + exp int64 +} + // clone returns a deep copy of the RemoteGatewayOpts object func (r *RemoteGatewayOpts) clone() *RemoteGatewayOpts { if r == nil { @@ -245,24 +268,30 @@ func validateGatewayOptions(o *Options) error { return nil } -// Computes a hash of 4 characters for the given gateway name. +// Computes a hash of 8 characters for the name. // This will be used for routing of replies. -func getReplyPrefixForGateway(name string) []byte { +func getHash(name string) []byte { + sha := sha256.New() + sha.Write([]byte(name)) + b := sha.Sum(nil) + for i := 0; i < gwHashLen; i++ { + b[i] = digits[int(b[i]%base)] + } + return b[:gwHashLen] +} + +func getOldHash(name string) []byte { sha := sha256.New() sha.Write([]byte(name)) fullHash := []byte(fmt.Sprintf("%x", sha.Sum(nil))) - prefix := make([]byte, 0, len(gwReplyPrefix)+5) - prefix = append(prefix, gwReplyPrefix...) - prefix = append(prefix, fullHash[:4]...) - prefix = append(prefix, '.') - return prefix + return fullHash[:4] } // Initialize the s.gateway structure. We do this even if the server // does not have a gateway configured. In some part of the code, the // server will check the number of outbound gateways, etc.. and so // we don't have to check if s.gateway is nil or not. -func newGateway(opts *Options) (*srvGateway, error) { +func (s *Server) newGateway(opts *Options) error { gateway := &srvGateway{ name: opts.Gateway.Name, out: make(map[string]*client), @@ -272,11 +301,27 @@ func newGateway(opts *Options) (*srvGateway, error) { URLs: make(map[string]struct{}), resolver: opts.Gateway.resolver, runknown: opts.Gateway.RejectUnknown, - replyPfx: getReplyPrefixForGateway(opts.Gateway.Name), + oldHash: getOldHash(opts.Gateway.Name), } gateway.Lock() defer gateway.Unlock() + s.hash = getHash(s.info.ID) + clusterHash := getHash(opts.Gateway.Name) + prefix := make([]byte, 0, gwSubjectOffset) + prefix = append(prefix, gwReplyPrefix...) + prefix = append(prefix, clusterHash...) + prefix = append(prefix, '.') + prefix = append(prefix, s.hash...) + prefix = append(prefix, '.') + gateway.replyPfx = prefix + + prefix = make([]byte, 0, oldGWReplyStart) + prefix = append(prefix, oldGWReplyPrefix...) + prefix = append(prefix, gateway.oldHash...) + prefix = append(prefix, '.') + gateway.oldReplyPfx = prefix + gateway.pasi.m = make(map[string]map[string]*sitally) if gateway.resolver == nil { @@ -291,7 +336,8 @@ func newGateway(opts *Options) (*srvGateway, error) { } cfg := &gatewayCfg{ RemoteGatewayOpts: rgo.clone(), - replyPfx: getReplyPrefixForGateway(rgo.Name), + hash: getHash(rgo.Name), + oldHash: getOldHash(rgo.Name), urls: make(map[string]*url.URL, len(rgo.URLs)), } if opts.Gateway.TLSConfig != nil && cfg.TLSConfig == nil { @@ -315,7 +361,8 @@ func newGateway(opts *Options) (*srvGateway, error) { gateway.recSubExp = defaultGatewayRecentSubExpiration gateway.enabled = opts.Gateway.Name != "" && opts.Gateway.Port != 0 - return gateway, nil + s.gateway = gateway + return nil } // Returns the Gateway's name of this server. @@ -416,6 +463,7 @@ func (s *Server) gatewayAcceptLoop(ch chan struct{}) { TLSVerify: tlsReq, MaxPayload: s.info.MaxPayload, Gateway: opts.Gateway.Name, + GatewayNRP: true, } // If we have selected a random port... if port == 0 { @@ -580,7 +628,6 @@ func (s *Server) solicitGateway(cfg *gatewayCfg, firstConnect bool) { var ( opts = s.getOpts() isImplicit = cfg.isImplicit() - urls = cfg.getURLs() attempts int typeStr string ) @@ -593,7 +640,11 @@ func (s *Server) solicitGateway(cfg *gatewayCfg, firstConnect bool) { const connFmt = "Connecting to %s gateway %q (%s) at %s (attempt %v)" const connErrFmt = "Error connecting to %s gateway %q (%s) at %s (attempt %v): %v" - for s.isRunning() && len(urls) > 0 { + for s.isRunning() { + urls := cfg.getURLs() + if len(urls) == 0 { + break + } attempts++ report := s.shouldReportConnectErr(firstConnect, attempts) // Iteration is random @@ -782,7 +833,7 @@ func (s *Server) createGateway(cfg *gatewayCfg, url *url.URL, conn net.Conn) { } // Set the Ping timer after sending connect and info. - c.setPingTimer() + s.setFirstPingTimer(c) c.mu.Unlock() } @@ -949,6 +1000,7 @@ func (c *client) processGatewayInfo(info *Info) { // Send INFO too c.sendInfo(c.gw.infoJSON) c.gw.infoJSON = nil + c.gw.useOldPrefix = !info.GatewayNRP c.mu.Unlock() // Register as an outbound gateway.. if we had a protocol to ack our connect, @@ -1270,7 +1322,8 @@ func (s *Server) processImplicitGateway(info *Info) { opts := s.getOpts() cfg = &gatewayCfg{ RemoteGatewayOpts: &RemoteGatewayOpts{Name: gwName}, - replyPfx: getReplyPrefixForGateway(gwName), + hash: getHash(gwName), + oldHash: getOldHash(gwName), urls: make(map[string]*url.URL, len(info.GatewayURLs)), implicit: true, } @@ -1710,6 +1763,12 @@ func (c *client) processGatewayRUnsub(arg []byte) error { }() c.mu.Lock() + if c.gw.outsim == nil { + c.Errorf("Received RS- from gateway on inbound connection") + c.mu.Unlock() + c.closeConnection(ProtocolViolation) + return nil + } defer c.mu.Unlock() ei, _ := c.gw.outsim.Load(accName) @@ -1812,6 +1871,12 @@ func (c *client) processGatewayRSub(arg []byte) error { }() c.mu.Lock() + if c.gw.outsim == nil { + c.Errorf("Received RS+ from gateway on inbound connection") + c.mu.Unlock() + c.closeConnection(ProtocolViolation) + return nil + } defer c.mu.Unlock() ei, _ := c.gw.outsim.Load(string(accName)) @@ -2170,31 +2235,31 @@ func (s *Server) gatewayUpdateSubInterest(accName string, sub *subscription, cha } } } - if first || last { - if sub.client != nil { - rsubs := &s.gateway.rsubs - c := sub.client - sli, _ := rsubs.Load(c) - if first { - var sl *Sublist - if sli == nil { - sl = NewSublistNoCache() - rsubs.Store(c, sl) - } else { - sl = sli.(*Sublist) - } - sl.Insert(sub) - time.AfterFunc(s.gateway.recSubExp, func() { - sl.Remove(sub) - }) - } else if sli != nil { - sl := sli.(*Sublist) + if sub.client != nil { + rsubs := &s.gateway.rsubs + c := sub.client + sli, _ := rsubs.Load(c) + if change > 0 { + var sl *Sublist + if sli == nil { + sl = NewSublistNoCache() + rsubs.Store(c, sl) + } else { + sl = sli.(*Sublist) + } + sl.Insert(sub) + time.AfterFunc(s.gateway.recSubExp, func() { sl.Remove(sub) - if sl.Count() == 0 { - rsubs.Delete(c) - } + }) + } else if sli != nil { + sl := sli.(*Sublist) + sl.Remove(sub) + if sl.Count() == 0 { + rsubs.Delete(c) } } + } + if first || last { if entry.q { s.sendQueueSubOrUnsubToGateways(accName, sub, first) } else { @@ -2203,27 +2268,52 @@ func (s *Server) gatewayUpdateSubInterest(accName string, sub *subscription, cha } } -// Returns true if the given subject starts with `$GR.` -func subjectStartsWithGatewayReplyPrefix(subj []byte) bool { - return len(subj) > gwReplyStart && string(subj[:len(gwReplyPrefix)]) == gwReplyPrefix +// Returns true if the given subject is a GW routed reply subject, +// that is, starts with $GNR and is long enough to contain cluster/server hash +// and subject. +func isGWRoutedReply(subj []byte) bool { + return len(subj) > gwSubjectOffset && string(subj[:gwReplyPrefixLen]) == gwReplyPrefix } -// Evaluates if the given reply should be mapped (adding the origin cluster -// hash as a prefix) or not. -func (g *srvGateway) shouldMapReplyForGatewaySend(c *client, reply []byte) bool { +// Same than isGWRoutedReply but accepts the old prefix $GR and returns +// a boolean indicating if this is the old prefix +func isGWRoutedSubjectAndIsOldPrefix(subj []byte) (bool, bool) { + if isGWRoutedReply(subj) { + return true, false + } + if len(subj) > oldGWReplyStart && string(subj[:oldGWReplyPrefixLen]) == oldGWReplyPrefix { + return true, true + } + return false, false +} + +// Returns true if subject starts with "$GNR.". This is to check that +// clients can't publish on this subject. +func hasGWRoutedReplyPrefix(subj []byte) bool { + return len(subj) > gwReplyPrefixLen && string(subj[:gwReplyPrefixLen]) == gwReplyPrefix +} + +// Evaluates if the given reply should be mapped or not. +func (g *srvGateway) shouldMapReplyForGatewaySend(c *client, acc *Account, reply []byte) bool { + // If the reply is a service reply (_R_), we will use the replyClient + // instead of the client handed to us. This client holds the wildcard + // for all service replies. + if isServiceReply(reply) { + c = acc.replyClient() + } + // If for this client there is a recent matching subscription interest + // then we will map. sli, _ := g.rsubs.Load(c) if sli == nil { return false } sl := sli.(*Sublist) - if sl.Count() == 0 { - return false - } - if subjectStartsWithGatewayReplyPrefix(reply) { - return false + if sl.Count() > 0 { + if r := sl.Match(string(reply)); len(r.psubs)+len(r.qsubs) > 0 { + return true + } } - r := sl.Match(string(reply)) - return len(r.psubs)+len(r.qsubs) > 0 + return false } var subPool = &sync.Pool{ @@ -2249,6 +2339,7 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr gws = append(gws, gw.outo[i]) } thisClusterReplyPrefix := gw.replyPfx + thisClusterOldReplyPrefix := gw.oldReplyPfx gw.RUnlock() if len(gws) == 0 { return @@ -2260,7 +2351,7 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr accName = acc.Name mreplya [256]byte mreply []byte - dstPfx []byte + dstHash []byte checkReply = len(reply) > 0 ) @@ -2270,18 +2361,28 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr // Make sure we are an 'R' proto c.msgb[0] = 'R' - // Check if the subject is on "$GR..", - // and if so, send to that GW regardless of its - // interest on the real subject (that is, skip the - // check of subject interest). - if subjectStartsWithGatewayReplyPrefix(subject) { - dstPfx = subject[:gwReplyStart] + // Check if the subject is on the reply prefix, if so, we + // need to send that message directly to the origin cluster. + directSend, old := isGWRoutedSubjectAndIsOldPrefix(subject) + if directSend { + if old { + dstHash = subject[oldGWReplyPrefixLen : oldGWReplyStart-1] + } else { + dstHash = subject[gwClusterOffset : gwClusterOffset+gwHashLen] + } } for i := 0; i < len(gws); i++ { gwc := gws[i] - if dstPfx != nil { + if directSend { gwc.mu.Lock() - ok := gwc.gw.cfg != nil && bytes.Equal(dstPfx, gwc.gw.cfg.replyPfx) + var ok bool + if gwc.gw.cfg != nil { + if old { + ok = bytes.Equal(dstHash, gwc.gw.cfg.oldHash) + } else { + ok = bytes.Equal(dstHash, gwc.gw.cfg.hash) + } + } gwc.mu.Unlock() if !ok { continue @@ -2322,11 +2423,17 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr checkReply = false // Assume we will use original mreply = reply - // If there was a recent matching subscription on that connection - // and the reply is not already mapped, then map (add prefix). - if gw.shouldMapReplyForGatewaySend(c, reply) { + // Decide if we should map. + if gw.shouldMapReplyForGatewaySend(c, acc, reply) { mreply = mreplya[:0] - mreply = append(mreply, thisClusterReplyPrefix...) + gwc.mu.Lock() + useOldPrefix := gwc.gw.useOldPrefix + gwc.mu.Unlock() + if useOldPrefix { + mreply = append(mreply, thisClusterOldReplyPrefix...) + } else { + mreply = append(mreply, thisClusterReplyPrefix...) + } mreply = append(mreply, reply...) } } @@ -2355,46 +2462,14 @@ func (c *client) sendMsgToGateways(acc *Account, msg, subject, reply []byte, qgr // So set/reset important fields. sub.nm, sub.max = 0, 0 sub.client = gwc - sub.subject = c.pa.subject - c.deliverMsg(sub, mh, msg) + sub.subject = subject + c.deliverMsg(sub, subject, mh, msg, false) } // Done with subscription, put back to pool. We don't need // to reset content since we explicitly set when using it. subPool.Put(sub) } -func (s *Server) gatewayHandleServiceImport(acc *Account, subject []byte, c *client, change int32) { - sid := make([]byte, 0, len(acc.Name)+len(subject)+1) - sid = append(sid, acc.Name...) - sid = append(sid, ' ') - sid = append(sid, subject...) - sub := &subscription{client: c, subject: subject, sid: sid} - - var rspa [1024]byte - rsproto := rspa[:0] - if change > 0 { - rsproto = append(rsproto, rSubBytes...) - } else { - rsproto = append(rsproto, rUnsubBytes...) - } - rsproto = append(rsproto, ' ') - rsproto = append(rsproto, sid...) - rsproto = append(rsproto, CR_LF...) - - s.mu.Lock() - for _, r := range s.routes { - r.mu.Lock() - r.sendProto(rsproto, false) - if r.trace { - r.traceOutOp("", rsproto[:len(rsproto)-LEN_CR_LF]) - } - r.mu.Unlock() - } - s.mu.Unlock() - // Possibly send RS+ to gateways too. - s.gatewayUpdateSubInterest(acc.Name, sub, change) -} - // Possibly sends an A- to the remote gateway `c`. // Invoked when processing an inbound message and the account is not found. // A check under a lock that protects processing of SUBs and UNSUBs is @@ -2498,15 +2573,159 @@ func (s *Server) gatewayHandleSubjectNoInterest(c *client, acc *Account, accName } } -func (g *srvGateway) getReplyPrefix() []byte { +// Returns the cluster hash from the gateway reply prefix +func (g *srvGateway) getClusterHash() []byte { g.RLock() - replyPfx := g.replyPfx + clusterHash := g.replyPfx[gwClusterOffset : gwClusterOffset+gwHashLen] g.RUnlock() - return replyPfx + return clusterHash } -func (s *Server) isGatewayReplyForThisCluster(subj []byte) bool { - return string(s.gateway.getReplyPrefix()) == string(subj[:gwReplyStart]) +// Returns the route with given hash or nil if not found. +func (s *Server) getRouteByHash(srvHash []byte) *client { + var route *client + if v, ok := s.routesByHash.Load(string(srvHash)); ok { + route = v.(*client) + } + return route +} + +// Returns the subject from the routed reply +func getSubjectFromGWRoutedReply(reply []byte, isOldPrefix bool) []byte { + if isOldPrefix { + return reply[oldGWReplyStart:] + } + return reply[gwSubjectOffset:] +} + +// This should be invoked only from processInboundGatewayMsg() or +// processInboundRoutedMsg() and is checking if the subject +// (c.pa.subject) has the $GNR prefix. If so, this is processed +// as a GW reply and `true` is returned to indicate to the caller +// that it should stop processing. +// If gateway is not enabled on this server or if the subject +// does not start with $GR, `false` is returned and caller should +// process message as usual. +func (c *client) handleGatewayReply(msg []byte) (processed bool) { + // Do not handle GW prefixed messages if this server does not have + // gateway enabled or if the subject does not start with the previx. + if !c.srv.gateway.enabled { + return false + } + isGWPrefix, oldPrefix := isGWRoutedSubjectAndIsOldPrefix(c.pa.subject) + if !isGWPrefix { + return false + } + // Save original subject (in case we have to forward) + orgSubject := c.pa.subject + + var clusterHash []byte + var srvHash []byte + var subject []byte + + if oldPrefix { + clusterHash = c.pa.subject[oldGWReplyPrefixLen : oldGWReplyStart-1] + // Check if this reply is intended for our cluster. + if !bytes.Equal(clusterHash, c.srv.gateway.oldHash) { + // We could report, for now, just drop. + return true + } + subject = c.pa.subject[oldGWReplyStart:] + } else { + clusterHash = c.pa.subject[gwClusterOffset : gwClusterOffset+gwHashLen] + // Check if this reply is intended for our cluster. + if !bytes.Equal(clusterHash, c.srv.gateway.getClusterHash()) { + // We could report, for now, just drop. + return true + } + srvHash = c.pa.subject[gwServerOffset : gwServerOffset+gwHashLen] + subject = c.pa.subject[gwSubjectOffset:] + } + + var route *client + + // If the origin is not this server, get the route this should be sent to. + if c.kind == GATEWAY && srvHash != nil && !bytes.Equal(srvHash, c.srv.hash) { + route = c.srv.getRouteByHash(srvHash) + // This will be possibly nil, and in this case we will try to process + // the interest from this server. + } + + // Adjust the subject + c.pa.subject = subject + + // Use a stack buffer to rewrite c.pa.cache since we only need it for + // getAccAndResultFromCache() + var _pacache [256]byte + pacache := _pacache[:0] + pacache = append(pacache, c.pa.account...) + pacache = append(pacache, ' ') + pacache = append(pacache, c.pa.subject...) + c.pa.pacache = pacache + + acc, r := c.getAccAndResultFromCache() + if acc == nil { + typeConn := "routed" + if c.kind == GATEWAY { + typeConn = "gateway" + } + c.Debugf("Unknown account %q for %s message on subject: %q", c.pa.account, typeConn, c.pa.subject) + if c.kind == GATEWAY { + c.srv.gatewayHandleAccountNoInterest(c, c.pa.account) + } + return true + } + // If route is nil, we will process the incoming message locally. + if route == nil { + // Check if this is a service reply subject (_R_) + if acc.imports.services != nil && isServiceReply(c.pa.subject) { + // This will map the _R_ back to a real subject and get + // the interest for that subject and process the message. + c.checkForImportServices(acc, msg) + return true + } + var queues [][]byte + if len(r.psubs)+len(r.qsubs) > 0 { + flags := pmrCollectQueueNames | pmrIgnoreEmptyQueueFilter + // If this message came from a ROUTE, allow to pick queue subs + // only if the message was directly sent by the "gateway" server + // in our cluster that received it. + if c.kind == ROUTER { + flags |= pmrAllowSendFromRouteToRoute + } + queues = c.processMsgResults(acc, r, msg, c.pa.subject, c.pa.reply, flags) + } + // Since this was a reply that made it to the origin cluster, + // we now need to send the message with the real subject to + // gateways in case they have interest on that reply subject. + c.sendMsgToGateways(acc, msg, c.pa.subject, c.pa.reply, queues) + } else if c.kind == GATEWAY { + // Only if we are a gateway connection should we try to route + // to the server where the request originated. + var bufa [256]byte + var buf = bufa[:0] + buf = append(buf, msgHeadProto...) + buf = append(buf, acc.Name...) + buf = append(buf, ' ') + buf = append(buf, orgSubject...) + buf = append(buf, ' ') + if len(c.pa.reply) > 0 { + buf = append(buf, c.pa.reply...) + buf = append(buf, ' ') + } + buf = append(buf, c.pa.szb...) + mhEnd := len(buf) + buf = append(buf, _CRLF_...) + buf = append(buf, msg...) + + route.mu.Lock() + route.sendProto(buf, true) + if route.trace { + route.traceOutOp("", buf[:mhEnd]) + } + route.mu.Unlock() + } + return true } // Process a message coming from a remote gateway. Send to any sub/qsub @@ -2533,32 +2752,11 @@ func (c *client) processInboundGatewayMsg(msg []byte) { return } - // If we receive a message on $GR.. - // we will drop the prefix before processing interest - // in this cluster, but we also need to resend to - // other gateways. - sendBackToGateways := false - - // First thing to do is to check if the subject starts - // with "$GR..". - if subjectStartsWithGatewayReplyPrefix(c.pa.subject) { - // If it does, then is this server/cluster the actual - // destination for this message? - if !c.srv.isGatewayReplyForThisCluster(c.pa.subject) { - // We could report, for now, just drop. - return - } - // Adjust the subject to past the prefix - c.pa.subject = c.pa.subject[gwReplyStart:] - // Use a stack buffer to rewrite c.pa.cache since we - // only need it for getAccAndResultFromCache() - var _pacache [256]byte - pacache := _pacache[:0] - pacache = append(pacache, c.pa.account...) - pacache = append(pacache, ' ') - pacache = append(pacache, c.pa.subject...) - c.pa.pacache = pacache - sendBackToGateways = true + // If the subject (c.pa.subject) has the gateway prefix, this function will + // handle it. + if c.handleGatewayReply(msg) { + // We are done here. + return } acc, r := c.getAccAndResultFromCache() @@ -2568,36 +2766,24 @@ func (c *client) processInboundGatewayMsg(msg []byte) { return } - // Check to see if we need to map/route to another account. + // Check if this is a service reply subject (_R_) + checkNoInterest := true if acc.imports.services != nil && isServiceReply(c.pa.subject) { - // We are handling a response to a request that we mapped - // via service imports, so if we are here we are the - // origin server c.checkForImportServices(acc, msg) + checkNoInterest = false } - - if !sendBackToGateways { + if checkNoInterest && len(r.psubs) == 0 { // If there is no interest on plain subs, possibly send an RS-, // even if there is qsubs interest. - if len(r.psubs) == 0 { - c.srv.gatewayHandleSubjectNoInterest(c, acc, c.pa.account, c.pa.subject) + c.srv.gatewayHandleSubjectNoInterest(c, acc, c.pa.account, c.pa.subject) - // If there is also no queue filter, then no point in continuing - // (even if r.qsubs i > 0). - if len(c.pa.queues) == 0 { - return - } + // If there is also no queue filter, then no point in continuing + // (even if r.qsubs i > 0). + if len(c.pa.queues) == 0 { + return } - c.processMsgResults(acc, r, msg, c.pa.subject, c.pa.reply, pmrNoFlag) - } else { - // We normally would not allow sending to a queue unless the - // RMSG contains the queue groups, however, if the incoming - // message was a "$GR." then we need to act as if this was - // a CLIENT connection.. - qnames := c.processMsgResults(acc, r, msg, c.pa.subject, c.pa.reply, - pmrCollectQueueNames|pmrTreatGatewayAsClient) - c.sendMsgToGateways(c.acc, msg, c.pa.subject, c.pa.reply, qnames) } + c.processMsgResults(acc, r, msg, c.pa.subject, c.pa.reply, pmrNoFlag) } // Indicates that the remote which we are sending messages to @@ -2734,3 +2920,91 @@ func (c *client) gatewaySwitchAccountToSendAllSubs(e *insie, accName string) { remoteGWName, accName, InterestOnly) }) } + +// Keeps track of the routed reply to be used when/if application +// sends back a message on the reply without the prefix. +// Client lock held on entry. This is a server receiver because +// we use a timer interval that is avail in Server.gateway object. +func (s *Server) trackGWReply(c *client, reply []byte) { + ttl := s.gateway.recSubExp + rm := c.gwrm + var we bool // will be true if map was empty on entry + if rm == nil { + rm = make(map[string]*gwReplyMap) + c.gwrm = rm + we = true + } else { + we = len(rm) == 0 + } + // We need to make a copy so that we don't reference the underlying + // read buffer. + ms := string(reply) + grm := &gwReplyMap{ms: ms, exp: time.Now().Add(ttl).UnixNano()} + // If we are here with the same key but different mapped replies + // (say $GNR._.A.srv1.bar and then $GNR._.B.srv2.bar), we need to + // store it otherwise we would take the risk of the reply not + // making it back. + rm[ms[gwSubjectOffset:]] = grm + if we { + atomic.StoreInt32(&c.cgwrt, 1) + s.gwrm.m.Store(c, nil) + if atomic.CompareAndSwapInt32(&s.gwrm.w, 0, 1) { + select { + case s.gwrm.ch <- ttl: + default: + } + } + } +} + +// Starts a long lived go routine that is responsible to +// remove GW reply mapping that have expired. +func (s *Server) startGWReplyMapExpiration() { + s.mu.Lock() + s.gwrm.ch = make(chan time.Duration, 1) + s.mu.Unlock() + s.startGoRoutine(func() { + defer s.grWG.Done() + + t := time.NewTimer(time.Hour) + var ttl time.Duration + for { + select { + case <-t.C: + if ttl == 0 { + t.Reset(time.Hour) + continue + } + now := time.Now().UnixNano() + mapEmpty := true + s.gwrm.m.Range(func(k, _ interface{}) bool { + c := k.(*client) + c.mu.Lock() + for k, grm := range c.gwrm { + if grm.exp <= now { + delete(c.gwrm, k) + if len(c.gwrm) == 0 { + atomic.StoreInt32(&c.cgwrt, 0) + s.gwrm.m.Delete(c) + } + } + } + c.mu.Unlock() + mapEmpty = false + return true + }) + if mapEmpty && atomic.CompareAndSwapInt32(&s.gwrm.w, 1, 0) { + ttl = 0 + t.Reset(time.Hour) + } else { + t.Reset(ttl) + } + case cttl := <-s.gwrm.ch: + ttl = cttl + t.Reset(ttl) + case <-s.quitCh: + return + } + } + }) +} diff --git a/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go b/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go index cb7e9753..fc954a54 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go @@ -34,11 +34,18 @@ import ( "time" "github.com/nats-io/nkeys" + "github.com/nats-io/nuid" ) // Warning when user configures leafnode TLS insecure const leafnodeTLSInsecureWarning = "TLS certificate chain and hostname of solicited leafnodes will not be verified. DO NOT USE IN PRODUCTION!" +// When a loop is detected, delay the reconnect of solicited connection. +const leafNodeReconnectDelayAfterLoopDetected = 30 * time.Second + +// Prefix for loop detection subject +const leafNodeLoopDetectionSubjectPrefix = "lds." + type leaf struct { // Used to suppress sub and unsub interest. Same as routes but our audience // here is tied to this leaf node. This will hold all subscriptions except this @@ -52,9 +59,12 @@ type leaf struct { type leafNodeCfg struct { sync.RWMutex *RemoteLeafOpts - urls []*url.URL - curURL *url.URL - tlsName string + urls []*url.URL + curURL *url.URL + tlsName string + username string + password string + loopDelay time.Duration // A loop condition was detected } // Check to see if this is a solicited leafnode. We do special processing for solicited. @@ -62,6 +72,10 @@ func (c *client) isSolicitedLeafNode() bool { return c.kind == LEAF && c.leaf.remote != nil } +func (c *client) isUnsolicitedLeafNode() bool { + return c.kind == LEAF && c.leaf.remote == nil +} + // This will spin up go routines to solicit the remote leaf node connections. func (s *Server) solicitLeafNodeRemotes(remotes []*RemoteLeafOpts) { for _, r := range remotes { @@ -82,6 +96,9 @@ func (s *Server) remoteLeafNodeStillValid(remote *leafNodeCfg) bool { // Ensure that leafnode is properly configured. func validateLeafNode(o *Options) error { + if err := validateLeafNodeAuthOptions(o); err != nil { + return err + } if o.LeafNode.Port == 0 { return nil } @@ -96,6 +113,26 @@ func validateLeafNode(o *Options) error { return nil } +// Used to validate user names in LeafNode configuration. +// - rejects mix of single and multiple users. +// - rejects duplicate user names. +func validateLeafNodeAuthOptions(o *Options) error { + if len(o.LeafNode.Users) == 0 { + return nil + } + if o.LeafNode.Username != _EMPTY_ { + return fmt.Errorf("can not have a single user/pass and a users array") + } + users := map[string]struct{}{} + for _, u := range o.LeafNode.Users { + if _, exists := users[u.Username]; exists { + return fmt.Errorf("duplicate user %q detected in leafnode authorization", u.Username) + } + users[u.Username] = struct{}{} + } + return nil +} + func (s *Server) reConnectToRemoteLeafNode(remote *leafNodeCfg) { delay := s.getOpts().LeafNode.ReconnectInterval select { @@ -117,8 +154,11 @@ func newLeafNodeCfg(remote *RemoteLeafOpts) *leafNodeCfg { // array when receiving async leafnode INFOs. cfg.urls = append(cfg.urls, cfg.URLs...) // If we are TLS make sure we save off a proper servername if possible. + // Do same for user/password since we may need them to connect to + // a bare URL that we get from INFO protocol. for _, u := range cfg.urls { cfg.saveTLSHostname(u) + cfg.saveUserPassword(u) } return cfg } @@ -145,6 +185,24 @@ func (cfg *leafNodeCfg) getCurrentURL() *url.URL { return cfg.curURL } +// Returns how long the server should wait before attempting +// to solicit a remote leafnode connection following the +// detection of a loop. +// Returns 0 if no loop was detected. +func (cfg *leafNodeCfg) getLoopDelay() time.Duration { + cfg.RLock() + delay := cfg.loopDelay + cfg.RUnlock() + return delay +} + +// Reset the loop delay. +func (cfg *leafNodeCfg) resetLoopDelay() { + cfg.Lock() + cfg.loopDelay = 0 + cfg.Unlock() +} + // Ensure that non-exported options (used in tests) have // been properly set. func (s *Server) setLeafNodeNonExportedOptions() { @@ -175,6 +233,15 @@ func (s *Server) connectToRemoteLeafNode(remote *leafNodeCfg, firstConnect bool) resolver := s.leafNodeOpts.resolver s.mu.Unlock() + if loopDelay := remote.getLoopDelay(); loopDelay > 0 { + select { + case <-time.After(loopDelay): + case <-s.quitCh: + return + } + remote.resetLoopDelay() + } + var conn net.Conn const connErrFmt = "Error trying to connect as leafnode to remote server %q (attempt %v): %v" @@ -225,10 +292,19 @@ func (s *Server) connectToRemoteLeafNode(remote *leafNodeCfg, firstConnect bool) // Save off the tlsName for when we use TLS and mix hostnames and IPs. IPs usually // come from the server we connect to. -func (lcfg *leafNodeCfg) saveTLSHostname(u *url.URL) { - isTLS := lcfg.TLSConfig != nil || u.Scheme == "tls" - if isTLS && lcfg.tlsName == "" && net.ParseIP(u.Hostname()) == nil { - lcfg.tlsName = u.Hostname() +func (cfg *leafNodeCfg) saveTLSHostname(u *url.URL) { + isTLS := cfg.TLSConfig != nil || u.Scheme == "tls" + if isTLS && cfg.tlsName == "" && net.ParseIP(u.Hostname()) == nil { + cfg.tlsName = u.Hostname() + } +} + +// Save off the username/password for when we connect using a bare URL +// that we get from the INFO protocol. +func (cfg *leafNodeCfg) saveUserPassword(u *url.URL) { + if cfg.username == _EMPTY_ && u.User != nil { + cfg.username = u.User.Username() + cfg.password, _ = u.User.Password() } } @@ -384,10 +460,11 @@ func (c *client) sendLeafConnect(tlsRequired bool) { cinfo.Sig = sig } else if userInfo := c.leaf.remote.curURL.User; userInfo != nil { cinfo.User = userInfo.Username() - pass, _ := userInfo.Password() - cinfo.Pass = pass + cinfo.Pass, _ = userInfo.Password() + } else if c.leaf.remote.username != _EMPTY_ { + cinfo.User = c.leaf.remote.username + cinfo.Pass = c.leaf.remote.password } - b, err := json.Marshal(cinfo) if err != nil { c.Errorf("Error marshaling CONNECT to route: %v\n", err) @@ -489,6 +566,8 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { // Determines if we are soliciting the connection or not. var solicited bool + c.mu.Lock() + c.initClient() if remote != nil { solicited = true // Users can bind to any local account, if its empty @@ -496,16 +575,21 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { if remote.LocalAccount == "" { remote.LocalAccount = globalAccountName } - // FIXME(dlc) - Make this resolve at startup. + c.leaf.remote = remote + c.mu.Unlock() + // TODO: Decide what should be the optimal behavior here. + // For now, if lookup fails, we will constantly try + // to recreate this LN connection. acc, err := s.LookupAccount(remote.LocalAccount) if err != nil { - c.Debugf("No local account %q for leafnode", remote.LocalAccount) + c.Errorf("No local account %q for leafnode: %v", remote.LocalAccount, err) c.closeConnection(MissingAccount) return nil } + c.mu.Lock() c.acc = acc - c.leaf.remote = remote } + c.mu.Unlock() var nonce [nonceLen]byte @@ -520,8 +604,6 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { // Grab lock c.mu.Lock() - c.initClient() - if solicited { // We need to wait here for the info, but not for too long. c.nc.SetReadDeadline(time.Now().Add(DEFAULT_LEAFNODE_INFO_WAIT)) @@ -661,7 +743,7 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { s.startGoRoutine(func() { c.writeLoop() }) // Set the Ping timer - c.setPingTimer() + s.setFirstPingTimer(c) c.mu.Unlock() @@ -725,7 +807,10 @@ func (c *client) updateLeafNodeURLs(info *Info) { // Do not add if it's the same as what we already have configured. var dup bool for _, u := range cfg.URLs { - if urlsAreEqual(url, u) { + // URLs that we receive never have user info, but the + // ones that were configured may have. Simply compare + // host and port to decide if they are equal or not. + if url.Host == u.Host && url.Port() == u.Port() { dup = true break } @@ -841,13 +926,16 @@ func (c *client) processLeafNodeConnect(s *Server, arg []byte, lang string) erro c.opts.Pedantic = false // Create and initialize the smap since we know our bound account now. - s.initLeafNodeSmap(c) - + lm := s.initLeafNodeSmap(c) // We are good to go, send over all the bound account subscriptions. - s.startGoRoutine(func() { + if lm <= 128 { c.sendAllLeafSubs() - s.grWG.Done() - }) + } else { + s.startGoRoutine(func() { + c.sendAllLeafSubs() + s.grWG.Done() + }) + } // Add in the leafnode here since we passed through auth at this point. s.addLeafNodeConnection(c) @@ -861,17 +949,17 @@ func (c *client) processLeafNodeConnect(s *Server, arg []byte, lang string) erro // Snapshot the current subscriptions from the sublist into our smap which // we will keep updated from now on. -func (s *Server) initLeafNodeSmap(c *client) { +func (s *Server) initLeafNodeSmap(c *client) int { acc := c.acc if acc == nil { c.Debugf("Leafnode does not have an account bound") - return + return 0 } // Collect all account subs here. _subs := [32]*subscription{} subs := _subs[:0] ims := []string{} - acc.mu.RLock() + acc.mu.Lock() accName := acc.Name // If we are solicited we only send interest for local clients. if c.isSolicitedLeafNode() { @@ -884,7 +972,13 @@ func (s *Server) initLeafNodeSmap(c *client) { for isubj := range acc.imports.services { ims = append(ims, isubj) } - acc.mu.RUnlock() + // Create a unique subject that will be used for loop detection. + lds := acc.lds + if lds == _EMPTY_ { + lds = leafNodeLoopDetectionSubjectPrefix + nuid.Next() + acc.lds = lds + } + acc.mu.Unlock() // Now check for gateway interest. Leafnodes will put this into // the proper mode to propagate, but they are not held in the account. @@ -905,6 +999,11 @@ func (s *Server) initLeafNodeSmap(c *client) { } applyGlobalRouting := s.gateway.enabled + if c.isSolicitedLeafNode() { + // Add a fake subscription for this solicited leafnode connection + // so that we can send back directly for mapped GW replies. + c.srv.gwLeafSubs.Insert(&subscription{client: c, subject: []byte(gwReplyPrefix + ">")}) + } // Now walk the results and add them to our smap c.mu.Lock() @@ -922,9 +1021,17 @@ func (s *Server) initLeafNodeSmap(c *client) { // that have been augmented from the original subscription. // TODO(dlc) - Should we lock this down more? if applyGlobalRouting { - c.leaf.smap[gwReplyPrefix+"*.>"]++ + c.leaf.smap[oldGWReplyPrefix+"*.>"]++ + c.leaf.smap[gwReplyPrefix+">"]++ + } + // Detect loop by subscribing to a specific subject and checking + // if this is coming back to us. + if c.leaf.remote == nil { + c.leaf.smap[lds]++ } + lenMap := len(c.leaf.smap) c.mu.Unlock() + return lenMap } // updateInterestForAccountOnGateway called from gateway code when processing RS+ and RS-. @@ -968,8 +1075,9 @@ func (c *client) updateSmap(sub *subscription, delta int32) { c.mu.Lock() - // If we are solicited make sure this is a local client. - if c.isSolicitedLeafNode() && sub.client.kind != CLIENT { + // If we are solicited make sure this is a local client or a non-solicited leaf node + skind := sub.client.kind + if c.isSolicitedLeafNode() && !(skind == CLIENT || (skind == LEAF && !sub.client.isSolicitedLeafNode())) { c.mu.Unlock() return } @@ -977,13 +1085,14 @@ func (c *client) updateSmap(sub *subscription, delta int32) { n := c.leaf.smap[key] // We will update if its a queue, if count is zero (or negative), or we were 0 and are N > 0. update := sub.queue != nil || n == 0 || n+delta <= 0 + n += delta if n > 0 { c.leaf.smap[key] = n } else { delete(c.leaf.smap, key) } - if update { + if update && c.flags.isSet(leafAllSubsSent) { c.sendLeafNodeSubUpdate(key, n) } c.mu.Unlock() @@ -1022,6 +1131,10 @@ func (c *client) sendAllLeafSubs() { var b bytes.Buffer c.mu.Lock() + // Set the flag here before first call to flushOutbound() since that + // releases the lock and so an update could sneak in. + c.flags.set(leafAllSubsSent) + for key, n := range c.leaf.smap { c.writeLeafSub(&b, key, n) } @@ -1103,6 +1216,14 @@ func (c *client) processLeafSub(argo []byte) (err error) { return nil } + acc := c.acc + // Check if we have a loop. + if string(sub.subject) == acc.getLds() { + c.mu.Unlock() + srv.reportLeafNodeLoop(c) + return nil + } + // Check permissions if applicable. if !c.canExport(string(sub.subject)) { c.mu.Unlock() @@ -1124,7 +1245,6 @@ func (c *client) processLeafSub(argo []byte) (err error) { } else { sub.sid = arg } - acc := c.acc key := string(sub.sid) osub := c.subs[key] updateGWs := false @@ -1160,12 +1280,32 @@ func (c *client) processLeafSub(argo []byte) (err error) { if updateGWs { srv.gatewayUpdateSubInterest(acc.Name, sub, 1) } - // Now check on leafnode updates for other leaf nodes. - srv.updateLeafNodes(acc, sub, 1) } + // Now check on leafnode updates for other leaf nodes. We understand solicited + // and non-solicited state in this call so we will do the right thing. + srv.updateLeafNodes(acc, sub, 1) + return nil } +func (s *Server) reportLeafNodeLoop(c *client) { + delay := leafNodeReconnectDelayAfterLoopDetected + opts := s.getOpts() + if opts.LeafNode.loopDelay != 0 { + delay = opts.LeafNode.loopDelay + } + c.mu.Lock() + if c.leaf.remote != nil { + c.leaf.remote.Lock() + c.leaf.remote.loopDelay = delay + c.leaf.remote.Unlock() + } + accName := c.acc.Name + c.mu.Unlock() + c.sendErrAndErr(fmt.Sprintf("Loop detected for leafnode account=%q. Delaying attempt to reconnect for %v", + accName, delay)) +} + // processLeafUnsub will process an inbound unsub request for the remote leaf node. func (c *client) processLeafUnsub(arg []byte) error { c.traceInOp("LS-", arg) @@ -1351,7 +1491,7 @@ func (c *client) processInboundLeafMsg(msg []byte) { // exclude them when sending to gateways. if len(r.qsubs) > 0 && c.srv.gateway.enabled && atomic.LoadInt64(&c.srv.gateway.totalQSubs) > 0 { - flag = pmrCollectQueueNames + flag |= pmrCollectQueueNames } qnames = c.processMsgResults(acc, r, msg, c.pa.subject, c.pa.reply, flag) } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/monitor.go b/vendor/github.com/nats-io/nats-server/v2/server/monitor.go index 63848116..bc0a4f4b 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/monitor.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/monitor.go @@ -72,6 +72,14 @@ type ConnzOptions struct { // Filter by connection state. State ConnState `json:"state"` + + // The below options only apply if auth is true. + + // Filter by username. + User string `json:"user"` + + // Filter by account. + Account string `json:"acc"` } // ConnState is for filtering states of connections. We will only have two, open and closed. @@ -110,6 +118,7 @@ type ConnInfo struct { TLSVersion string `json:"tls_version,omitempty"` TLSCipher string `json:"tls_cipher_suite,omitempty"` AuthorizedUser string `json:"authorized_user,omitempty"` + Account string `json:"account,omitempty"` Subs []string `json:"subscriptions_list,omitempty"` } @@ -121,7 +130,7 @@ const DefaultSubListSize = 1024 const defaultStackBufSize = 10000 -// Connz returns a Connz struct containing inormation about connections. +// Connz returns a Connz struct containing information about connections. func (s *Server) Connz(opts *ConnzOptions) (*Connz, error) { var ( sortOpt = ByCid @@ -131,6 +140,8 @@ func (s *Server) Connz(opts *ConnzOptions) (*Connz, error) { limit = DefaultConnListSize cid = uint64(0) state = ConnOpen + user string + acc string ) if opts != nil { @@ -143,7 +154,15 @@ func (s *Server) Connz(opts *ConnzOptions) (*Connz, error) { return nil, fmt.Errorf("invalid sorting option: %s", sortOpt) } } + + // Auth specifics. auth = opts.Username + if !auth && (user != "" || acc != "") { + return nil, fmt.Errorf("filter by user or account only allowed with auth option") + } + user = opts.User + acc = opts.Account + subs = opts.Subscriptions offset = opts.Offset if offset < 0 { @@ -247,6 +266,14 @@ func (s *Server) Connz(opts *ConnzOptions) (*Connz, error) { // Gather all open clients. if state == ConnOpen || state == ConnAll { for _, client := range s.clients { + // If we have an account specified we need to filter. + if acc != "" && (client.acc == nil || client.acc.Name != acc) { + continue + } + // Do user filtering second + if user != "" && client.opts.Username != user { + continue + } openClients = append(openClients, client) } } @@ -277,6 +304,10 @@ func (s *Server) Connz(opts *ConnzOptions) (*Connz, error) { // Fill in user if auth requested. if auth { ci.AuthorizedUser = client.opts.Username + // Add in account iff not the global account. + if client.acc != nil && (client.acc.Name != globalAccountName) { + ci.Account = client.acc.Name + } } client.mu.Unlock() pconns[i] = ci @@ -288,6 +319,15 @@ func (s *Server) Connz(opts *ConnzOptions) (*Connz, error) { needCopy = true } for _, cc := range closedClients { + // If we have an account specified we need to filter. + if acc != "" && cc.acc != acc { + continue + } + // Do user filtering second + if user != "" && cc.user != user { + continue + } + // Copy if needed for any changes to the ConnInfo if needCopy { cx := *cc @@ -300,11 +340,21 @@ func (s *Server) Connz(opts *ConnzOptions) (*Connz, error) { // Fill in user if auth requested. if auth { cc.AuthorizedUser = cc.user + // Add in account iff not the global account. + if cc.acc != "" && (cc.acc != globalAccountName) { + cc.Account = cc.acc + } } pconns[i] = &cc.ConnInfo i++ } + // This will trip if we have filtered out client connections. + if len(pconns) != i { + pconns = pconns[:i] + totalClients = i + } + switch sortOpt { case ByCid, ByStart: sort.Sort(byCid{pconns}) @@ -398,8 +448,8 @@ func (c *client) getRTT() string { if c.rtt == 0 { // If a real client, go ahead and send ping now to get a value // for RTT. For tests and telnet, or if client is closing, etc skip. - if !c.flags.isSet(clearConnection) && c.flags.isSet(connectReceived) && c.opts.Lang != "" { - c.sendPing() + if c.opts.Lang != "" { + c.sendRTTPingLocked() } return "" } @@ -502,6 +552,9 @@ func (s *Server) HandleConnz(w http.ResponseWriter, r *http.Request) { return } + user := r.URL.Query().Get("user") + acc := r.URL.Query().Get("acc") + connzOpts := &ConnzOptions{ Sort: sortOpt, Username: auth, @@ -510,6 +563,8 @@ func (s *Server) HandleConnz(w http.ResponseWriter, r *http.Request) { Limit: limit, CID: cid, State: state, + User: user, + Account: acc, } s.mu.Lock() @@ -558,6 +613,7 @@ type RouteInfo struct { Import *SubjectPermission `json:"import,omitempty"` Export *SubjectPermission `json:"export,omitempty"` Pending int `json:"pending_size"` + RTT string `json:"rtt,omitempty"` InMsgs int64 `json:"in_msgs"` OutMsgs int64 `json:"out_msgs"` InBytes int64 `json:"in_bytes"` @@ -566,7 +622,7 @@ type RouteInfo struct { Subs []string `json:"subscriptions_list,omitempty"` } -// Routez returns a Routez struct containing inormation about routes. +// Routez returns a Routez struct containing information about routes. func (s *Server) Routez(routezOpts *RoutezOptions) (*Routez, error) { rs := &Routez{Routes: []*RouteInfo{}} rs.Now = time.Now() @@ -600,6 +656,7 @@ func (s *Server) Routez(routezOpts *RoutezOptions) (*Routez, error) { NumSubs: uint32(len(r.subs)), Import: r.opts.Import, Export: r.opts.Export, + RTT: r.getRTT(), } if subs && len(r.subs) > 0 { @@ -836,6 +893,7 @@ func (s *Server) HandleStacksz(w http.ResponseWriter, r *http.Request) { // Varz will output server information on the monitoring port at /varz. type Varz struct { ID string `json:"server_id"` + Name string `json:"server_name"` Version string `json:"version"` Proto int `json:"proto"` GitCommit string `json:"git_commit,omitempty"` @@ -980,9 +1038,10 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) { connz
routez
gatewayz
+ leafz
subsz

- help + help `) } @@ -1019,6 +1078,7 @@ func (s *Server) createVarz(pcpu float64, rss int64) *Varz { Proto: info.Proto, GitCommit: info.GitCommit, GoVersion: info.GoVersion, + Name: info.Name, Host: info.Host, Port: info.Port, IP: info.IP, @@ -1532,6 +1592,111 @@ func (s *Server) HandleGatewayz(w http.ResponseWriter, r *http.Request) { ResponseHandler(w, r, b) } +// Leafz represents detailed information on Leafnodes. +type Leafz struct { + ID string `json:"server_id"` + Now time.Time `json:"now"` + NumLeafs int `json:"leafnodes"` + Leafs []*LeafInfo `json:"leafs"` +} + +// LeafzOptions are options passed to Leafz +type LeafzOptions struct { + // Subscriptions indicates that Leafz will return a leafnode's subscriptions + Subscriptions bool `json:"subscriptions"` +} + +// LeafInfo has detailed information on each remote leafnode connection. +type LeafInfo struct { + Account string `json:"account"` + IP string `json:"ip"` + Port int `json:"port"` + RTT string `json:"rtt,omitempty"` + InMsgs int64 `json:"in_msgs"` + OutMsgs int64 `json:"out_msgs"` + InBytes int64 `json:"in_bytes"` + OutBytes int64 `json:"out_bytes"` + NumSubs uint32 `json:"subscriptions"` + Subs []string `json:"subscriptions_list,omitempty"` +} + +// Leafz returns a Leafz structure containing information about leafnodes. +func (s *Server) Leafz(opts *LeafzOptions) (*Leafz, error) { + // Grab leafnodes + var lconns []*client + s.mu.Lock() + if len(s.leafs) > 0 { + lconns = make([]*client, 0, len(s.leafs)) + for _, ln := range s.leafs { + lconns = append(lconns, ln) + } + } + s.mu.Unlock() + + var leafnodes []*LeafInfo + if len(lconns) > 0 { + leafnodes = make([]*LeafInfo, 0, len(lconns)) + for _, ln := range lconns { + ln.mu.Lock() + lni := &LeafInfo{ + Account: ln.acc.Name, + IP: ln.host, + Port: int(ln.port), + RTT: ln.getRTT(), + InMsgs: atomic.LoadInt64(&ln.inMsgs), + OutMsgs: ln.outMsgs, + InBytes: atomic.LoadInt64(&ln.inBytes), + OutBytes: ln.outBytes, + NumSubs: uint32(len(ln.subs)), + } + if opts != nil && opts.Subscriptions { + lni.Subs = make([]string, 0, len(ln.subs)) + for _, sub := range ln.subs { + lni.Subs = append(lni.Subs, string(sub.subject)) + } + } + ln.mu.Unlock() + leafnodes = append(leafnodes, lni) + } + } + return &Leafz{ + ID: s.ID(), + Now: time.Now(), + NumLeafs: len(leafnodes), + Leafs: leafnodes, + }, nil +} + +// HandleLeafz process HTTP requests for leafnode information. +func (s *Server) HandleLeafz(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + s.httpReqStats[LeafzPath]++ + s.mu.Unlock() + + subs, err := decodeBool(w, r, "subs") + if err != nil { + return + } + var opts *LeafzOptions + if subs { + opts = &LeafzOptions{Subscriptions: true} + } + + l, err := s.Leafz(opts) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(err.Error())) + return + } + b, err := json.MarshalIndent(l, "", " ") + if err != nil { + s.Errorf("Error marshaling response to /leafz request: %v", err) + } + + // Handle response + ResponseHandler(w, r, b) +} + // ResponseHandler handles responses for monitoring routes func ResponseHandler(w http.ResponseWriter, r *http.Request, data []byte) { // Get callback from request diff --git a/vendor/github.com/nats-io/nats-server/v2/server/opts.go b/vendor/github.com/nats-io/nats-server/v2/server/opts.go index 5be2aa4a..c8830218 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/opts.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/opts.go @@ -24,7 +24,9 @@ import ( "net" "net/url" "os" + "path/filepath" "regexp" + "runtime" "strconv" "strings" "sync/atomic" @@ -107,6 +109,8 @@ type LeafNodeOpts struct { Port int `json:"port,omitempty"` Username string `json:"-"` Password string `json:"-"` + Account string `json:"-"` + Users []*User `json:"-"` AuthTimeout float64 `json:"auth_timeout,omitempty"` TLSConfig *tls.Config `json:"-"` TLSTimeout float64 `json:"tls_timeout,omitempty"` @@ -121,6 +125,7 @@ type LeafNodeOpts struct { // Not exported, for tests. resolver netResolver dialTimeout time.Duration + loopDelay time.Duration } // RemoteLeafOpts are options for connecting to a remote server as a leaf node. @@ -137,57 +142,59 @@ type RemoteLeafOpts struct { // NOTE: This structure is no longer used for monitoring endpoints // and json tags are deprecated and may be removed in the future. type Options struct { - ConfigFile string `json:"-"` - Host string `json:"addr"` - Port int `json:"port"` - ClientAdvertise string `json:"-"` - Trace bool `json:"-"` - Debug bool `json:"-"` - NoLog bool `json:"-"` - NoSigs bool `json:"-"` - NoSublistCache bool `json:"-"` - Logtime bool `json:"-"` - MaxConn int `json:"max_connections"` - MaxSubs int `json:"max_subscriptions,omitempty"` - Nkeys []*NkeyUser `json:"-"` - Users []*User `json:"-"` - Accounts []*Account `json:"-"` - SystemAccount string `json:"-"` - AllowNewAccounts bool `json:"-"` - Username string `json:"-"` - Password string `json:"-"` - Authorization string `json:"-"` - PingInterval time.Duration `json:"ping_interval"` - MaxPingsOut int `json:"ping_max"` - HTTPHost string `json:"http_host"` - HTTPPort int `json:"http_port"` - HTTPSPort int `json:"https_port"` - AuthTimeout float64 `json:"auth_timeout"` - MaxControlLine int32 `json:"max_control_line"` - MaxPayload int32 `json:"max_payload"` - MaxPending int64 `json:"max_pending"` - Cluster ClusterOpts `json:"cluster,omitempty"` - Gateway GatewayOpts `json:"gateway,omitempty"` - LeafNode LeafNodeOpts `json:"leaf,omitempty"` - ProfPort int `json:"-"` - PidFile string `json:"-"` - PortsFileDir string `json:"-"` - LogFile string `json:"-"` - Syslog bool `json:"-"` - RemoteSyslog string `json:"-"` - Routes []*url.URL `json:"-"` - RoutesStr string `json:"-"` - TLSTimeout float64 `json:"tls_timeout"` - TLS bool `json:"-"` - TLSVerify bool `json:"-"` - TLSMap bool `json:"-"` - TLSCert string `json:"-"` - TLSKey string `json:"-"` - TLSCaCert string `json:"-"` - TLSConfig *tls.Config `json:"-"` - WriteDeadline time.Duration `json:"-"` - MaxClosedClients int `json:"-"` - LameDuckDuration time.Duration `json:"-"` + ConfigFile string `json:"-"` + ServerName string `json:"server_name"` + Host string `json:"addr"` + Port int `json:"port"` + ClientAdvertise string `json:"-"` + Trace bool `json:"-"` + Debug bool `json:"-"` + NoLog bool `json:"-"` + NoSigs bool `json:"-"` + NoSublistCache bool `json:"-"` + DisableShortFirstPing bool `json:"-"` + Logtime bool `json:"-"` + MaxConn int `json:"max_connections"` + MaxSubs int `json:"max_subscriptions,omitempty"` + Nkeys []*NkeyUser `json:"-"` + Users []*User `json:"-"` + Accounts []*Account `json:"-"` + SystemAccount string `json:"-"` + AllowNewAccounts bool `json:"-"` + Username string `json:"-"` + Password string `json:"-"` + Authorization string `json:"-"` + PingInterval time.Duration `json:"ping_interval"` + MaxPingsOut int `json:"ping_max"` + HTTPHost string `json:"http_host"` + HTTPPort int `json:"http_port"` + HTTPSPort int `json:"https_port"` + AuthTimeout float64 `json:"auth_timeout"` + MaxControlLine int32 `json:"max_control_line"` + MaxPayload int32 `json:"max_payload"` + MaxPending int64 `json:"max_pending"` + Cluster ClusterOpts `json:"cluster,omitempty"` + Gateway GatewayOpts `json:"gateway,omitempty"` + LeafNode LeafNodeOpts `json:"leaf,omitempty"` + ProfPort int `json:"-"` + PidFile string `json:"-"` + PortsFileDir string `json:"-"` + LogFile string `json:"-"` + Syslog bool `json:"-"` + RemoteSyslog string `json:"-"` + Routes []*url.URL `json:"-"` + RoutesStr string `json:"-"` + TLSTimeout float64 `json:"tls_timeout"` + TLS bool `json:"-"` + TLSVerify bool `json:"-"` + TLSMap bool `json:"-"` + TLSCert string `json:"-"` + TLSKey string `json:"-"` + TLSCaCert string `json:"-"` + TLSConfig *tls.Config `json:"-"` + WriteDeadline time.Duration `json:"-"` + MaxClosedClients int `json:"-"` + LameDuckDuration time.Duration `json:"-"` // MaxTracedMsgLen is the maximum printable length for traced messages. MaxTracedMsgLen int `json:"-"` @@ -220,6 +227,7 @@ type Options struct { // private fields, used for testing gatewaysSolicitDelay time.Duration + routeProto int } type netResolver interface { @@ -288,6 +296,7 @@ type authorization struct { user string pass string token string + acc string // Multiple Nkeys/Users nkeys []*NkeyUser users []*User @@ -371,6 +380,28 @@ func unwrapValue(v interface{}) (token, interface{}) { } } +// configureSystemAccount configures a system account +// if present in the configuration. +func configureSystemAccount(o *Options, m map[string]interface{}) error { + configure := func(v interface{}) error { + tk, v := unwrapValue(v) + sa, ok := v.(string) + if !ok { + return &configErr{tk, fmt.Sprintf("system account name must be a string")} + } + o.SystemAccount = sa + return nil + } + + if v, ok := m["system_account"]; ok { + return configure(v) + } else if v, ok := m["system"]; ok { + return configure(v) + } + + return nil +} + // ProcessConfigFile updates the Options structure with options // present in the given configuration file. // This version is convenient if one wants to set some default @@ -398,6 +429,12 @@ func (o *Options) ProcessConfigFile(configFile string) error { errors := make([]error, 0) warnings := make([]error, 0) + // First check whether a system account has been defined, + // as that is a condition for other features to be enabled. + if err := configureSystemAccount(o, m); err != nil { + errors = append(errors, err) + } + for k, v := range m { tk, v := unwrapValue(v) switch strings.ToLower(k) { @@ -413,6 +450,8 @@ func (o *Options) ProcessConfigFile(configFile string) error { o.ClientAdvertise = v.(string) case "port": o.Port = int(v.(int64)) + case "server_name": + o.ServerName = v.(string) case "host", "net": o.Host = v.(string) case "debug": @@ -677,12 +716,9 @@ func (o *Options) ProcessConfigFile(configFile string) error { } } case "system_account", "system": - if sa, ok := v.(string); !ok { - err := &configErr{tk, fmt.Sprintf("system account name must be a string")} - errors = append(errors, err) - } else { - o.SystemAccount = sa - } + // Already processed at the beginning so we just skip them + // to not treat them as unknown values. + continue case "trusted", "trusted_keys": switch v := v.(type) { case string: @@ -1014,20 +1050,21 @@ func parseLeafNodes(v interface{}, opts *Options, errors *[]error, warnings *[]e case "host", "net": opts.LeafNode.Host = mv.(string) case "authorization": - auth, err := parseAuthorization(tk, opts, errors, warnings) + auth, err := parseLeafAuthorization(tk, errors, warnings) if err != nil { *errors = append(*errors, err) continue } - if auth.users != nil { - err := &configErr{tk, fmt.Sprintf("Leafnode authorization does not allow multiple users")} - *errors = append(*errors, err) - continue - } opts.LeafNode.Username = auth.user opts.LeafNode.Password = auth.pass opts.LeafNode.AuthTimeout = auth.timeout - + opts.LeafNode.Account = auth.acc + opts.LeafNode.Users = auth.users + // Validate user info config for leafnode authorization + if err := validateLeafNodeAuthOptions(opts); err != nil { + *errors = append(*errors, &configErr{tk, err.Error()}) + continue + } case "remotes": // Parse the remote options here. remotes, err := parseRemoteLeafNodes(mv, errors, warnings) @@ -1070,6 +1107,114 @@ func parseLeafNodes(v interface{}, opts *Options, errors *[]error, warnings *[]e return nil } +// This is the authorization parser adapter for the leafnode's +// authorization config. +func parseLeafAuthorization(v interface{}, errors *[]error, warnings *[]error) (*authorization, error) { + var ( + am map[string]interface{} + tk token + auth = &authorization{} + ) + _, v = unwrapValue(v) + am = v.(map[string]interface{}) + for mk, mv := range am { + tk, mv = unwrapValue(mv) + switch strings.ToLower(mk) { + case "user", "username": + auth.user = mv.(string) + case "pass", "password": + auth.pass = mv.(string) + case "timeout": + at := float64(1) + switch mv := mv.(type) { + case int64: + at = float64(mv) + case float64: + at = mv + } + auth.timeout = at + case "users": + users, err := parseLeafUsers(tk, errors, warnings) + if err != nil { + *errors = append(*errors, err) + continue + } + auth.users = users + case "account": + auth.acc = mv.(string) + default: + if !tk.IsUsedVariable() { + err := &unknownConfigFieldErr{ + field: mk, + configErr: configErr{ + token: tk, + }, + } + *errors = append(*errors, err) + } + continue + } + } + return auth, nil +} + +// This is a trimmed down version of parseUsers that is adapted +// for the users possibly defined in the authorization{} section +// of leafnodes {}. +func parseLeafUsers(mv interface{}, errors *[]error, warnings *[]error) ([]*User, error) { + var ( + tk token + users = []*User{} + ) + tk, mv = unwrapValue(mv) + // Make sure we have an array + uv, ok := mv.([]interface{}) + if !ok { + return nil, &configErr{tk, fmt.Sprintf("Expected users field to be an array, got %v", mv)} + } + for _, u := range uv { + tk, u = unwrapValue(u) + // Check its a map/struct + um, ok := u.(map[string]interface{}) + if !ok { + err := &configErr{tk, fmt.Sprintf("Expected user entry to be a map/struct, got %v", u)} + *errors = append(*errors, err) + continue + } + user := &User{} + for k, v := range um { + tk, v = unwrapValue(v) + switch strings.ToLower(k) { + case "user", "username": + user.Username = v.(string) + case "pass", "password": + user.Password = v.(string) + case "account": + // We really want to save just the account name here, but + // the User object is *Account. So we create an account object + // but it won't be registered anywhere. The server will just + // use opts.LeafNode.Users[].Account.Name. Alternatively + // we need to create internal objects to store u/p and account + // name and have a server structure to hold that. + user.Account = NewAccount(v.(string)) + default: + if !tk.IsUsedVariable() { + err := &unknownConfigFieldErr{ + field: k, + configErr: configErr{ + token: tk, + }, + } + *errors = append(*errors, err) + continue + } + } + } + users = append(users, user) + } + return users, nil +} + func parseRemoteLeafNodes(v interface{}, errors *[]error, warnings *[]error) ([]*RemoteLeafOpts, error) { tk, v := unwrapValue(v) ra, ok := v.([]interface{}) @@ -1109,7 +1254,12 @@ func parseRemoteLeafNodes(v interface{}, errors *[]error, warnings *[]error) ([] case "account", "local": remote.LocalAccount = v.(string) case "creds", "credentials": - remote.Credentials = v.(string) + p, err := expandPath(v.(string)) + if err != nil { + *errors = append(*errors, &configErr{tk, err.Error()}) + continue + } + remote.Credentials = p case "tls": tc, err := parseTLS(tk) if err != nil { @@ -1250,6 +1400,7 @@ type export struct { sub string accs []string rt ServiceRespType + lat *serviceLatency } type importStream struct { @@ -1447,6 +1598,20 @@ func parseAccounts(v interface{}, opts *Options, errors *[]error, warnings *[]er *errors = append(*errors, &configErr{tk, msg}) continue } + + if service.lat != nil { + if opts.SystemAccount == "" { + msg := fmt.Sprintf("Error adding service latency sampling for %q: %v", service.sub, ErrNoSysAccount.Error()) + *errors = append(*errors, &configErr{tk, msg}) + continue + } + + if err := service.acc.TrackServiceExportWithSampling(service.sub, service.lat.subject, int(service.lat.sampling)); err != nil { + msg := fmt.Sprintf("Error adding service latency sampling for %q on subject %q: %v", service.sub, service.lat.subject, err) + *errors = append(*errors, &configErr{tk, msg}) + continue + } + } } for _, stream := range importStreams { ta := am[stream.an] @@ -1481,7 +1646,7 @@ func parseAccounts(v interface{}, opts *Options, errors *[]error, warnings *[]er return nil } -// Parse the account imports +// Parse the account exports func parseAccountExports(v interface{}, acc *Account, errors, warnings *[]error) ([]*export, []*export, error) { // This should be an array of objects/maps. tk, v := unwrapValue(v) @@ -1523,6 +1688,7 @@ func parseAccountImports(v interface{}, acc *Account, errors, warnings *[]error) var services []*importService var streams []*importStream + svcSubjects := map[string]*importService{} for _, v := range ims { // Should have stream or service @@ -1532,6 +1698,15 @@ func parseAccountImports(v interface{}, acc *Account, errors, warnings *[]error) continue } if service != nil { + if dup := svcSubjects[service.to]; dup != nil { + tk, _ := unwrapValue(v) + err := &configErr{tk, + fmt.Sprintf("Duplicate service import subject %q, previously used in import for account %q, subject %q", + service.to, dup.an, dup.sub)} + *errors = append(*errors, err) + continue + } + svcSubjects[service.to] = service service.acc = acc services = append(services, service) } @@ -1568,7 +1743,7 @@ func parseAccount(v map[string]interface{}, errors, warnings *[]error) (string, return accountName, subject, nil } -// Parse an import stream or service. +// Parse an export stream or service. // e.g. // {stream: "public.>"} # No accounts means public. // {stream: "synadia.private.>", accounts: [cncf, natsio]} @@ -1581,6 +1756,9 @@ func parseExportStreamOrService(v interface{}, errors, warnings *[]error) (*expo accounts []string rt ServiceRespType rtSeen bool + rtToken token + lat *serviceLatency + latToken token ) tk, v := unwrapValue(v) vv, ok := v.(map[string]interface{}) @@ -1596,8 +1774,13 @@ func parseExportStreamOrService(v interface{}, errors, warnings *[]error) (*expo *errors = append(*errors, err) continue } - if rtSeen { - err := &configErr{tk, "Detected response directive on non-service"} + if rtToken != nil { + err := &configErr{rtToken, "Detected response directive on non-service"} + *errors = append(*errors, err) + continue + } + if latToken != nil { + err := &configErr{latToken, "Detected latency directive on non-service"} *errors = append(*errors, err) continue } @@ -1613,6 +1796,7 @@ func parseExportStreamOrService(v interface{}, errors, warnings *[]error) (*expo } case "response", "response_type": rtSeen = true + rtToken = tk mvs, ok := mv.(string) if !ok { err := &configErr{tk, fmt.Sprintf("Expected response type to be string, got %T", mv)} @@ -1657,6 +1841,9 @@ func parseExportStreamOrService(v interface{}, errors, warnings *[]error) (*expo if rtSeen { curService.rt = rt } + if lat != nil { + curService.lat = lat + } case "accounts": for _, iv := range mv.([]interface{}) { _, mv := unwrapValue(iv) @@ -1667,6 +1854,22 @@ func parseExportStreamOrService(v interface{}, errors, warnings *[]error) (*expo } else if curService != nil { curService.accs = accounts } + case "latency": + latToken = tk + var err error + lat, err = parseServiceLatency(tk, mv) + if err != nil { + *errors = append(*errors, err) + continue + } + if curStream != nil { + err = &configErr{tk, "Detected latency directive on non-service"} + *errors = append(*errors, err) + continue + } + if curService != nil { + curService.lat = lat + } default: if !tk.IsUsedVariable() { err := &unknownConfigFieldErr{ @@ -1678,11 +1881,77 @@ func parseExportStreamOrService(v interface{}, errors, warnings *[]error) (*expo *errors = append(*errors, err) } } - } return curStream, curService, nil } +// parseServiceLatency returns a latency config block. +func parseServiceLatency(root token, v interface{}) (*serviceLatency, error) { + if subject, ok := v.(string); ok { + return &serviceLatency{ + subject: subject, + sampling: DEFAULT_SERVICE_LATENCY_SAMPLING, + }, nil + } + + latency, ok := v.(map[string]interface{}) + if !ok { + return nil, &configErr{token: root, + reason: fmt.Sprintf("Expected latency entry to be a map/struct or string, got %T", v)} + } + + sl := serviceLatency{ + sampling: DEFAULT_SERVICE_LATENCY_SAMPLING, + } + + // Read sampling value. + if v, ok := latency["sampling"]; ok { + tk, v := unwrapValue(v) + + var sample int64 + switch vv := v.(type) { + case int64: + // Sample is an int, like 50. + sample = vv + case string: + // Sample is a string, like "50%". + s := strings.TrimSuffix(vv, "%") + n, err := strconv.Atoi(s) + if err != nil { + return nil, &configErr{token: tk, + reason: fmt.Sprintf("Failed to parse latency sample: %v", err)} + } + sample = int64(n) + default: + return nil, &configErr{token: tk, + reason: fmt.Sprintf("Expected latency sample to be a string or map/struct, got %T", v)} + } + if sample < 1 || sample > 100 { + return nil, &configErr{token: tk, + reason: ErrBadSampling.Error()} + } + + sl.sampling = int8(sample) + } + + // Read subject value. + v, ok = latency["subject"] + if !ok { + return nil, &configErr{token: root, + reason: "Latency subject required, but missing"} + } + + tk, v := unwrapValue(v) + subject, ok := v.(string) + if !ok { + return nil, &configErr{token: tk, + reason: fmt.Sprintf("Expected latency subject to be a string, got %T", subject)} + } + sl.subject = subject + + return &sl, nil +} + // Parse an import stream or service. // e.g. // {stream: {account: "synadia", subject:"public.synadia"}, prefix: "imports.synadia"} @@ -2068,7 +2337,12 @@ func parseAllowResponses(v interface{}, errors, warnings *[]error) *ResponsePerm tk, v = unwrapValue(v) switch strings.ToLower(k) { case "max", "max_msgs", "max_messages", "max_responses": - rp.MaxMsgs = int(v.(int64)) + max := int(v.(int64)) + // Negative values are accepted (mean infinite), and 0 + // means default value (set above). + if max != 0 { + rp.MaxMsgs = max + } case "expires", "expiration", "ttl": wd, ok := v.(string) if ok { @@ -2078,7 +2352,11 @@ func parseAllowResponses(v interface{}, errors, warnings *[]error) *ResponsePerm *errors = append(*errors, err) return nil } - rp.Expires = ttl + // Negative values are accepted (mean infinite), and 0 + // means default value (set above). + if ttl != 0 { + rp.Expires = ttl + } } else { err := &configErr{tk, "error parsing expires, not a duration string"} *errors = append(*errors, err) @@ -2885,6 +3163,15 @@ func overrideCluster(opts *Options) error { opts.Cluster.Port = 0 return nil } + // -1 will fail url.Parse, so if we have -1, change it to + // 0, and then after parse, replace the port with -1 so we get + // automatic port allocation + wantsRandom := false + if strings.HasSuffix(opts.Cluster.ListenStr, ":-1") { + wantsRandom = true + cls := fmt.Sprintf("%s:0", opts.Cluster.ListenStr[0:len(opts.Cluster.ListenStr)-3]) + opts.Cluster.ListenStr = cls + } clusterURL, err := url.Parse(opts.Cluster.ListenStr) if err != nil { return err @@ -2893,6 +3180,9 @@ func overrideCluster(opts *Options) error { if err != nil { return err } + if wantsRandom { + p = "-1" + } opts.Cluster.Host = h _, err = fmt.Sscan(p, &opts.Cluster.Port) if err != nil { @@ -2945,3 +3235,41 @@ func maybeReadPidFile(pidStr string) string { } return pidStr } + +func homeDir() (string, error) { + if runtime.GOOS == "windows" { + homeDrive, homePath := os.Getenv("HOMEDRIVE"), os.Getenv("HOMEPATH") + userProfile := os.Getenv("USERPROFILE") + + home := filepath.Join(homeDrive, homePath) + if homeDrive == "" || homePath == "" { + if userProfile == "" { + return "", errors.New("nats: failed to get home dir, require %HOMEDRIVE% and %HOMEPATH% or %USERPROFILE%") + } + home = userProfile + } + + return home, nil + } + + home := os.Getenv("HOME") + if home == "" { + return "", errors.New("failed to get home dir, require $HOME") + } + return home, nil +} + +func expandPath(p string) (string, error) { + p = os.ExpandEnv(p) + + if !strings.HasPrefix(p, "~") { + return p, nil + } + + home, err := homeDir() + if err != nil { + return "", err + } + + return filepath.Join(home, p[1:]), nil +} diff --git a/vendor/github.com/nats-io/nats-server/v2/server/parser.go b/vendor/github.com/nats-io/nats-server/v2/server/parser.go index f03826b6..72eb293f 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/parser.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/parser.go @@ -282,7 +282,7 @@ func (c *client) parse(buf []byte) error { c.drop, c.as, c.state = 0, i+1, OP_START // Drop all pub args c.pa.arg, c.pa.pacache, c.pa.account, c.pa.subject = nil, nil, nil, nil - c.pa.reply, c.pa.szb, c.pa.queues = nil, nil, nil + c.pa.reply, c.pa.size, c.pa.szb, c.pa.queues = nil, 0, nil, nil case OP_A: switch b { case '+': @@ -407,7 +407,7 @@ func (c *client) parse(buf []byte) error { switch c.kind { case CLIENT: - err = c.processSub(arg) + _, err = c.processSub(arg, false) case ROUTER: err = c.processRemoteSub(arg) case GATEWAY: @@ -867,7 +867,9 @@ func (c *client) parse(buf []byte) error { // read buffer and we are not able to process the msg. if c.argBuf == nil { // Works also for MSG_ARG, when message comes from ROUTE. - c.clonePubArg() + if err := c.clonePubArg(); err != nil { + goto parseErr + } } // If we will overflow the scratch buffer, just create a @@ -917,15 +919,17 @@ func protoSnippet(start int, buf []byte) string { // clonePubArg is used when the split buffer scenario has the pubArg in the existing read buffer, but // we need to hold onto it into the next read. -func (c *client) clonePubArg() { +func (c *client) clonePubArg() error { // Just copy and re-process original arg buffer. c.argBuf = c.scratch[:0] c.argBuf = append(c.argBuf, c.pa.arg...) - // This is a routed msg - if c.pa.account != nil { - c.processRoutedMsgArgs(false, c.argBuf) - } else { - c.processPub(false, c.argBuf) + switch c.kind { + case ROUTER, GATEWAY: + return c.processRoutedMsgArgs(false, c.argBuf) + case LEAF: + return c.processLeafMsgArgs(false, c.argBuf) + default: + return c.processPub(false, c.argBuf) } } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/pse/pse_windows.go b/vendor/github.com/nats-io/nats-server/v2/server/pse/pse_windows.go index a8b11070..e159ebb7 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/pse/pse_windows.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/pse/pse_windows.go @@ -160,7 +160,7 @@ func getCounterArrayData(counter PDH_HCOUNTER) ([]float64, error) { // the performance counter API. func getProcessImageName() (name string) { name = filepath.Base(os.Args[0]) - name = strings.TrimRight(name, ".exe") + name = strings.TrimSuffix(name, ".exe") return } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/reload.go b/vendor/github.com/nats-io/nats-server/v2/server/reload.go index 9260d3de..4ae0d547 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/reload.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/reload.go @@ -785,6 +785,9 @@ func (s *Server) diffOptions(newOpts *Options) ([]option, error) { // Ignore NoLog and NoSigs options since they are not parsed and only used in // testing. continue + case "disableshortfirstping": + newOpts.DisableShortFirstPing = oldValue.(bool) + continue case "maxtracedmsglen": diffOpts = append(diffOpts, &maxTracedMsgLenOption{newValue: newValue.(int)}) case "port": @@ -912,6 +915,8 @@ func (s *Server) reloadAuthorization() { acc.mu.RLock() accName := acc.Name acc.mu.RUnlock() + // Release server lock for following actions + s.mu.Unlock() accClaims, claimJWT, _ := s.fetchAccountClaims(accName) if accClaims != nil { err := s.updateAccountWithClaimJWT(acc, claimJWT) @@ -923,9 +928,10 @@ func (s *Server) reloadAuthorization() { s.Noticef("Reloaded: deleting account [removed]: %q", accName) s.accounts.Delete(k) } + // Regrab server lock. + s.mu.Lock() return true }) - } } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/ring.go b/vendor/github.com/nats-io/nats-server/v2/server/ring.go index e95623b0..194c7481 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/ring.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/ring.go @@ -18,6 +18,7 @@ type closedClient struct { ConnInfo subs []string user string + acc string } // Fixed sized ringbuffer for closed connections. diff --git a/vendor/github.com/nats-io/nats-server/v2/server/route.go b/vendor/github.com/nats-io/nats-server/v2/server/route.go index be79d0e9..ef3a2478 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/route.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/route.go @@ -59,10 +59,13 @@ var ( ) // Used by tests -var testRouteProto = RouteProtoV2 +func setRouteProtoForTest(wantedProto int) int { + return (wantedProto + 1) * -1 +} type route struct { remoteID string + remoteName string didSolicit bool retry bool routeType RouteType @@ -73,6 +76,7 @@ type route struct { replySubs map[*subscription]*time.Timer gatewayURL string leafnodeURL string + hash string } type connectInfo struct { @@ -92,34 +96,18 @@ const ( InfoProto = "INFO %s" + _CRLF_ ) -// Used to decide if the sending of the route SUBs list should be -// done in place or in separate go routine. -const sendRouteSubsInGoRoutineThreshold = 1024 * 1024 // 1MB +const ( + // Used to decide if the sending of the route SUBs list should be + // done in place or in separate go routine. + sendRouteSubsInGoRoutineThreshold = 1024 * 1024 // 1MB -// Warning when user configures cluster TLS insecure -const clusterTLSInsecureWarning = "TLS certificate chain and hostname of solicited routes will not be verified. DO NOT USE IN PRODUCTION!" + // Warning when user configures cluster TLS insecure + clusterTLSInsecureWarning = "TLS certificate chain and hostname of solicited routes will not be verified. DO NOT USE IN PRODUCTION!" +) // Can be changed for tests var routeConnectDelay = DEFAULT_ROUTE_CONNECT -// This will add a timer to watch over remote reply subjects in case -// they fail to receive a response. The duration will be taken from the -// accounts map timeout to match. -// Lock should be held upon entering. -func (c *client) addReplySubTimeout(acc *Account, sub *subscription, d time.Duration) { - if c.route.replySubs == nil { - c.route.replySubs = make(map[*subscription]*time.Timer) - } - rs := c.route.replySubs - rs[sub] = time.AfterFunc(d, func() { - c.mu.Lock() - delete(rs, sub) - sub.max = 0 - c.mu.Unlock() - c.unsubscribe(acc, sub, true, true) - }) -} - // removeReplySub is called when we trip the max on remoteReply subs. func (c *client) removeReplySub(sub *subscription) { if sub == nil { @@ -263,6 +251,12 @@ func (c *client) processInboundRoutedMsg(msg []byte) { return } + // If the subject (c.pa.subject) has the gateway prefix, this function will handle it. + if c.handleGatewayReply(msg) { + // We are done here. + return + } + acc, r := c.getAccAndResultFromCache() if acc == nil { c.Debugf("Unknown account %q for routed message on subject: %q", c.pa.account, c.pa.subject) @@ -276,45 +270,9 @@ func (c *client) processInboundRoutedMsg(msg []byte) { // Check for no interest, short circuit if so. // This is the fanout scale. - if len(r.psubs)+len(r.qsubs) == 0 { - return + if len(r.psubs)+len(r.qsubs) > 0 { + c.processMsgResults(acc, r, msg, c.pa.subject, c.pa.reply, pmrNoFlag) } - - // Check to see if we have a routed message with a service reply. - if isServiceReply(c.pa.reply) && acc != nil { - // Need to add a sub here for local interest to send a response back - // to the originating server/requestor where it will be re-mapped. - sid := make([]byte, 0, len(acc.Name)+len(c.pa.reply)+1) - sid = append(sid, acc.Name...) - sid = append(sid, ' ') - sid = append(sid, c.pa.reply...) - // Copy off the reply since otherwise we are referencing a buffer that will be reused. - reply := make([]byte, len(c.pa.reply)) - copy(reply, c.pa.reply) - sub := &subscription{client: c, subject: reply, sid: sid, max: 1} - if err := acc.sl.Insert(sub); err != nil { - c.Errorf("Could not insert subscription: %v", err) - } else { - ttl := acc.AutoExpireTTL() - c.mu.Lock() - c.subs[string(sid)] = sub - c.addReplySubTimeout(acc, sub, ttl) - c.mu.Unlock() - } - } - c.processMsgResults(acc, r, msg, c.pa.subject, c.pa.reply, pmrNoFlag) -} - -// Helper function for routes and gateways and leafnodes to create qfilters -// needed for converted subs from imports, etc. -func (c *client) makeQFilter(qsubs [][]*subscription) { - qs := make([][]byte, 0, len(qsubs)) - for _, qsub := range qsubs { - if len(qsub) > 0 { - qs = append(qs, qsub[0].queue) - } - } - c.pa.queues = qs } // Lock should be held entering here. @@ -409,10 +367,13 @@ func (c *client) processRouteInfo(info *Info) { c.route.authRequired = info.AuthRequired c.route.tlsRequired = info.TLSRequired c.route.gatewayURL = info.GatewayURL + c.route.remoteName = info.Name // When sent through route INFO, if the field is set, it should be of size 1. if len(info.LeafNodeURLs) == 1 { c.route.leafnodeURL = info.LeafNodeURLs[0] } + // Compute the hash of this route based on remoteID + c.route.hash = string(getHash(info.ID)) // If this is an update due to config reload on the remote server, // need to possibly send local subs to the remote server. @@ -1039,15 +1000,7 @@ func (c *client) sendRouteSubOrUnSubProtos(subs []*subscription, isSubProto, tra // the lock, which could cause pingTimer to think that this // connection is stale otherwise. c.last = time.Now() - if !c.flushOutbound() { - // Another go-routine has set this and is either - // doing the write or waiting to re-acquire the - // lock post write. Release lock to give it a - // chance to complete. - c.mu.Unlock() - runtime.Gosched() - c.mu.Lock() - } + c.flushOutbound() if closed = c.flags.isSet(clearConnection); closed { break } @@ -1178,7 +1131,7 @@ func (s *Server) createRoute(conn net.Conn, rURL *url.URL) *client { } // Set the Ping timer - c.setPingTimer() + s.setFirstPingTimer(c) // For routes, the "client" is added to s.routes only when processing // the INFO protocol, that is much later. @@ -1248,8 +1201,12 @@ func (s *Server) addRoute(c *client, info *Info) (bool, bool) { c.mu.Lock() c.route.connectURLs = info.ClientConnectURLs cid := c.cid + hash := string(c.route.hash) c.mu.Unlock() + // Store this route using the hash as the key + s.routesByHash.Store(hash, c) + // Now that we have registered the route, we can remove from the temp map. s.removeFromTempClients(cid) @@ -1285,6 +1242,8 @@ func (s *Server) addRoute(c *client, info *Info) (bool, bool) { // would cause the leafnode URL for that remote server to be removed // from our list. c.route.leafnodeURL = _EMPTY_ + // Same for the route hash otherwise it would be removed from s.routesByHash. + c.route.hash = _EMPTY_ c.mu.Unlock() remote.mu.Lock() @@ -1326,18 +1285,36 @@ func (s *Server) updateRouteSubscriptionMap(acc *Account, sub *subscription, del var n int32 var ok bool - acc.mu.Lock() + isq := len(sub.queue) > 0 + + accLock := func() { + // Not required for code correctness, but helps reduce the number of + // updates sent to the routes when processing high number of concurrent + // queue subscriptions updates (sub/unsub). + // See https://github.com/nats-io/nats-server/pull/1126 ffor more details. + if isq { + acc.sqmu.Lock() + } + acc.mu.Lock() + } + accUnlock := func() { + acc.mu.Unlock() + if isq { + acc.sqmu.Unlock() + } + } + + accLock() // This is non-nil when we know we are in cluster mode. rm, lqws := acc.rm, acc.lqws if rm == nil { - acc.mu.Unlock() + accUnlock() return } // Create the fast key which will use the subject or 'subjectqueue' for queue subscribers. key := keyFromSub(sub) - isq := len(sub.queue) > 0 // Decide whether we need to send an update out to all the routes. update := isq @@ -1362,7 +1339,7 @@ func (s *Server) updateRouteSubscriptionMap(acc *Account, sub *subscription, del update = true // Adding a new entry for normal sub means update (0->1) } - acc.mu.Unlock() + accUnlock() if !update { return @@ -1394,17 +1371,23 @@ func (s *Server) updateRouteSubscriptionMap(acc *Account, sub *subscription, del // here but not necessarily all updates need to be sent. We need to block and recheck the // n count with the lock held through sending here. We will suppress duplicate sends of same qw. if isq { + // However, we can't hold the acc.mu lock since we allow client.mu.Lock -> acc.mu.Lock + // but not the opposite. So use a dedicated lock while holding the route's lock. + acc.sqmu.Lock() + defer acc.sqmu.Unlock() + acc.mu.Lock() - defer acc.mu.Unlock() n = rm[key] sub.qw = n // Check the last sent weight here. If same, then someone // beat us to it and we can just return here. Otherwise update if ls, ok := lqws[key]; ok && ls == n { + acc.mu.Unlock() return } else { lqws[key] = n } + acc.mu.Unlock() } // Snapshot into array @@ -1447,13 +1430,22 @@ func (s *Server) routeAcceptLoop(ch chan struct{}) { net.JoinHostPort(opts.Cluster.Host, strconv.Itoa(l.Addr().(*net.TCPAddr).Port))) s.mu.Lock() + proto := RouteProtoV2 // For tests, we want to be able to make this server behave - // as an older server so we use the variable which we can override. - proto := testRouteProto + // as an older server so check this option to see if we should override + if opts.routeProto < 0 { + // We have a private option that allows test to override the route + // protocol. We want this option initial value to be 0, however, + // since original proto is RouteProtoZero, tests call setRouteProtoForTest(), + // which sets as negative value the (desired proto + 1) * -1. + // Here we compute back the real value. + proto = (opts.routeProto * -1) - 1 + } // Check for TLSConfig tlsReq := opts.Cluster.TLSConfig != nil info := Info{ ID: s.info.ID, + Name: s.info.Name, Version: s.info.Version, GoVersion: runtime.Version(), AuthRequired: false, @@ -1695,12 +1687,14 @@ func (c *client) processRouteConnect(srv *Server, arg []byte, lang string) error func (s *Server) removeRoute(c *client) { var rID string var lnURL string + var hash string c.mu.Lock() cid := c.cid r := c.route if r != nil { rID = r.remoteID lnURL = r.leafnodeURL + hash = r.hash } c.mu.Unlock() s.mu.Lock() @@ -1717,6 +1711,7 @@ func (s *Server) removeRoute(c *client) { if lnURL != _EMPTY_ && s.removeLeafNodeURL(lnURL) { s.sendAsyncLeafNodeInfo() } + s.routesByHash.Delete(hash) } s.removeFromTempClients(cid) s.mu.Unlock() diff --git a/vendor/github.com/nats-io/nats-server/v2/server/server.go b/vendor/github.com/nats-io/nats-server/v2/server/server.go index 085c7c92..dc65421f 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/server.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/server.go @@ -41,8 +41,16 @@ import ( "github.com/nats-io/nkeys" ) -// Time to wait before starting closing clients when in LD mode. -const lameDuckModeDefaultInitialDelay = int64(10 * time.Second) +const ( + // Time to wait before starting closing clients when in LD mode. + lameDuckModeDefaultInitialDelay = int64(10 * time.Second) + + // Interval for the first PING for non client connections. + firstPingInterval = time.Second + + // This is for the first ping for client connections. + firstClientPingInterval = 2 * time.Second +) // Make this a variable so that we can change during tests var lameDuckModeInitialDelay = int64(lameDuckModeDefaultInitialDelay) @@ -51,6 +59,7 @@ var lameDuckModeInitialDelay = int64(lameDuckModeDefaultInitialDelay) // to help them understand information about this server. type Info struct { ID string `json:"server_id"` + Name string `json:"server_name"` Version string `json:"version"` Proto int `json:"proto"` GitCommit string `json:"git_commit,omitempty"` @@ -77,6 +86,7 @@ type Info struct { GatewayURL string `json:"gateway_url,omitempty"` // Gateway URL on that server (sent by route's INFO) GatewayCmd byte `json:"gateway_cmd,omitempty"` // Command code for the receiving server to know what to do GatewayCmdPayload []byte `json:"gateway_cmd_payload,omitempty"` // Command payload when needed + GatewayNRP bool `json:"gateway_nrp,omitempty"` // Uses new $GNR. prefix for mapped replies // LeafNode Specific LeafNodeURLs []string `json:"leafnode_urls,omitempty"` // LeafNode URLs that the server can reconnect to. @@ -99,10 +109,13 @@ type Server struct { gacc *Account sys *internal accounts sync.Map + tmpAccounts sync.Map // Temporarily stores accounts that are being built activeAccounts int32 accResolver AccountResolver clients map[uint64]*client routes map[uint64]*client + routesByHash sync.Map + hash []byte remotes map[string]*client leafs map[uint64]*client users map[string]*User @@ -175,6 +188,18 @@ type Server struct { // added/removed routes. The monitoring code then check that // to know if it should update the cluster's URLs array. varzUpdateRouteURLs bool + + // Keeps a sublist of of subscriptions attached to leafnode connections + // for the $GNR.*.*.*.> subject so that a server can send back a mapped + // gateway reply. + gwLeafSubs *Sublist + + // Used for expiration of mapped GW replies + gwrm struct { + w int32 + ch chan time.Duration + m sync.Map + } } // Make sure all are 64bits for atomic use @@ -206,6 +231,11 @@ func NewServer(opts *Options) (*Server, error) { kp, _ := nkeys.CreateServer() pub, _ := kp.PublicKey() + serverName := pub + if opts.ServerName != "" { + serverName = opts.ServerName + } + // Validate some options. This is here because we cannot assume that // server will always be started with configuration parsing (that could // report issues). Its options can be (incorrectly) set by hand when @@ -220,6 +250,7 @@ func NewServer(opts *Options) (*Server, error) { Proto: PROTO, GitCommit: gitCommit, GoVersion: runtime.Version(), + Name: serverName, Host: opts.Host, Port: opts.Port, AuthRequired: false, @@ -238,6 +269,7 @@ func NewServer(opts *Options) (*Server, error) { done: make(chan bool, 1), start: now, configTime: now, + gwLeafSubs: NewSublistWithCache(), } // Trusted root operator keys. @@ -257,11 +289,9 @@ func NewServer(opts *Options) (*Server, error) { // Call this even if there is no gateway defined. It will // initialize the structure so we don't have to check for // it to be nil or not in various places in the code. - gws, err := newGateway(opts) - if err != nil { + if err := s.newGateway(opts); err != nil { return nil, err } - s.gateway = gws if s.gateway.enabled { s.info.Cluster = s.getGatewayName() @@ -299,6 +329,39 @@ func NewServer(opts *Options) (*Server, error) { return nil, err } + // In local config mode, check that leafnode configuration + // refers to account that exist. + if len(opts.TrustedOperators) == 0 { + checkAccountExists := func(accName string) error { + if accName == _EMPTY_ { + return nil + } + if _, ok := s.accounts.Load(accName); !ok { + return fmt.Errorf("cannot find account %q specified in leafnode authorization", accName) + } + return nil + } + if err := checkAccountExists(opts.LeafNode.Account); err != nil { + return nil, err + } + for _, lu := range opts.LeafNode.Users { + if lu.Account == nil { + continue + } + if err := checkAccountExists(lu.Account.Name); err != nil { + return nil, err + } + } + for _, r := range opts.LeafNode.Remotes { + if r.LocalAccount == _EMPTY_ { + continue + } + if _, ok := s.accounts.Load(r.LocalAccount); !ok { + return nil, fmt.Errorf("no local account %q for remote leafnode", r.LocalAccount) + } + } + } + // Used to setup Authorization. s.configureAuthorization() @@ -308,6 +371,18 @@ func NewServer(opts *Options) (*Server, error) { return s, nil } +// ClientURL returns the URL used to connect clients. Helpful in testing +// when we designate a random client port (-1). +func (s *Server) ClientURL() string { + // FIXME(dlc) - should we add in user and pass if defined single? + opts := s.getOpts() + scheme := "nats://" + if opts.TLSConfig != nil { + scheme = "tls://" + } + return fmt.Sprintf("%s%s:%d", scheme, opts.Host, opts.Port) +} + func validateOptions(o *Options) error { // Check that the trust configuration is correct. if err := validateTrustedOperators(o); err != nil { @@ -344,11 +419,12 @@ func (s *Server) globalAccount() *Account { } // Used to setup Accounts. +// Lock is held upon entry. func (s *Server) configureAccounts() error { // Create global account. if s.gacc == nil { s.gacc = NewAccount(globalAccountName) - s.registerAccount(s.gacc) + s.registerAccountNoLock(s.gacc) } opts := s.opts @@ -359,15 +435,12 @@ func (s *Server) configureAccounts() error { a := acc.shallowCopy() acc.sl = nil acc.clients = nil - s.registerAccount(a) + s.registerAccountNoLock(a) } // Now that we have this we need to remap any referenced accounts in // import or export maps to the new ones. swapApproved := func(ea *exportAuth) { - if ea == nil { - return - } for sub, a := range ea.approved { var acc *Account if v, ok := s.accounts.Load(a.Name); ok { @@ -376,14 +449,19 @@ func (s *Server) configureAccounts() error { ea.approved[sub] = acc } } + s.accounts.Range(func(k, v interface{}) bool { acc := v.(*Account) // Exports for _, ea := range acc.exports.streams { - swapApproved(ea) + if ea != nil { + swapApproved(&ea.exportAuth) + } } for _, ea := range acc.exports.services { - swapApproved(ea) + if ea != nil { + swapApproved(&ea.exportAuth) + } } // Imports for _, si := range acc.imports.streams { @@ -406,7 +484,7 @@ func (s *Server) configureAccounts() error { // Set the system account if it was configured. if opts.SystemAccount != _EMPTY_ { - // Lock is held entering this function, so release to call lookupAccount. + // Lock may be acquired in lookupAccount, so release to call lookupAccount. s.mu.Unlock() _, err := s.lookupAccount(opts.SystemAccount) s.mu.Lock() @@ -621,51 +699,57 @@ func (s *Server) numAccounts() int { return count } +// NumLoadedAccounts returns the number of loaded accounts. +func (s *Server) NumLoadedAccounts() int { + return s.numAccounts() +} + // LookupOrRegisterAccount will return the given account if known or create a new entry. func (s *Server) LookupOrRegisterAccount(name string) (account *Account, isNew bool) { + s.mu.Lock() + defer s.mu.Unlock() if v, ok := s.accounts.Load(name); ok { return v.(*Account), false } - s.mu.Lock() acc := NewAccount(name) - s.registerAccount(acc) - s.mu.Unlock() + s.registerAccountNoLock(acc) return acc, true } // RegisterAccount will register an account. The account must be new // or this call will fail. func (s *Server) RegisterAccount(name string) (*Account, error) { + s.mu.Lock() + defer s.mu.Unlock() if _, ok := s.accounts.Load(name); ok { return nil, ErrAccountExists } - s.mu.Lock() acc := NewAccount(name) - s.registerAccount(acc) - s.mu.Unlock() + s.registerAccountNoLock(acc) return acc, nil } // SetSystemAccount will set the internal system account. // If root operators are present it will also check validity. func (s *Server) SetSystemAccount(accName string) error { + // Lookup from sync.Map first. if v, ok := s.accounts.Load(accName); ok { return s.setSystemAccount(v.(*Account)) } - s.mu.Lock() // If we are here we do not have local knowledge of this account. // Do this one by hand to return more useful error. ac, jwt, err := s.fetchAccountClaims(accName) if err != nil { - s.mu.Unlock() return err } acc := s.buildInternalAccount(ac) acc.claimJWT = jwt - s.registerAccount(acc) - s.mu.Unlock() - + // Due to race, we need to make sure that we are not + // registering twice. + if racc := s.registerAccount(acc); racc != nil { + return nil + } return s.setSystemAccount(acc) } @@ -679,9 +763,12 @@ func (s *Server) SystemAccount() *Account { return nil } +// For internal sends. +const internalSendQLen = 4096 + // Assign a system account. Should only be called once. -// This sets up a server to send and receive messages from inside -// the server itself. +// This sets up a server to send and receive messages from +// inside the server itself. func (s *Server) setSystemAccount(acc *Account) error { if acc == nil { return ErrMissingAccount @@ -703,14 +790,24 @@ func (s *Server) setSystemAccount(acc *Account) error { return ErrAccountExists } + // This is here in an attempt to quiet the race detector and not have to place + // locks on fast path for inbound messages and checking service imports. + acc.mu.Lock() + if acc.imports.services == nil { + acc.imports.services = make(map[string]*serviceImport) + } + acc.mu.Unlock() + + now := time.Now() s.sys = &internal{ account: acc, - client: &client{srv: s, kind: SYSTEM, opts: internalOpts, msubs: -1, mpay: -1, start: time.Now(), last: time.Now()}, + client: &client{srv: s, kind: SYSTEM, opts: internalOpts, msubs: -1, mpay: -1, start: now, last: now}, seq: 1, sid: 1, servers: make(map[string]*serverUpdate), subs: make(map[string]msgHandler), - sendq: make(chan *pubMsg, 128), + replies: make(map[string]msgHandler), + sendq: make(chan *pubMsg, internalSendQLen), statsz: eventsHBInterval, orphMax: 5 * eventsHBInterval, chkOrph: 3 * eventsHBInterval, @@ -756,10 +853,19 @@ func (s *Server) shouldTrackSubscriptions() bool { return (s.opts.Cluster.Port != 0 || s.opts.Gateway.Port != 0) } -// Place common account setup here. -// Lock should be held on entry. -func (s *Server) registerAccount(acc *Account) { - if acc.sl == nil { +// Invokes registerAccountNoLock under the protection of the server lock. +// That is, server lock is acquired/released in this function. +// See registerAccountNoLock for comment on returned value. +func (s *Server) registerAccount(acc *Account) *Account { + s.mu.Lock() + racc := s.registerAccountNoLock(acc) + s.mu.Unlock() + return racc +} + +// Helper to set the sublist based on preferences. +func (s *Server) setAccountSublist(acc *Account) { + if acc != nil && acc.sl == nil { opts := s.getOpts() if opts != nil && opts.NoSublistCache { acc.sl = NewSublistNoCache() @@ -767,6 +873,22 @@ func (s *Server) registerAccount(acc *Account) { acc.sl = NewSublistWithCache() } } +} + +// Registers an account in the server. +// Due to some locking considerations, we may end-up trying +// to register the same account twice. This function will +// then return the already registered account. +// Lock should be held on entry. +func (s *Server) registerAccountNoLock(acc *Account) *Account { + // We are under the server lock. Lookup from map, if present + // return existing account. + if a, _ := s.accounts.Load(acc.Name); a != nil { + s.tmpAccounts.Delete(acc.Name) + return a.(*Account) + } + // Finish account setup and store. + s.setAccountSublist(acc) if acc.maxnae == 0 { acc.maxnae = DEFAULT_MAX_ACCOUNT_AE_RESPONSE_MAPS } @@ -793,39 +915,42 @@ func (s *Server) registerAccount(acc *Account) { acc.srv = s acc.mu.Unlock() s.accounts.Store(acc.Name, acc) + s.tmpAccounts.Delete(acc.Name) s.enableAccountTracking(acc) + return nil } // lookupAccount is a function to return the account structure // associated with an account name. +// Lock MUST NOT be held upon entry. func (s *Server) lookupAccount(name string) (*Account, error) { + var acc *Account if v, ok := s.accounts.Load(name); ok { - acc := v.(*Account) + acc = v.(*Account) + } else if v, ok := s.tmpAccounts.Load(name); ok { + acc = v.(*Account) + } + if acc != nil { // If we are expired and we have a resolver, then // return the latest information from the resolver. if acc.IsExpired() { s.Debugf("Requested account [%s] has expired", name) - var err error - s.mu.Lock() - if s.accResolver != nil { - err = s.updateAccount(acc) - } - s.mu.Unlock() - if err != nil { - // This error could mask expired, so just return expired here. + if s.AccountResolver() != nil { + if err := s.updateAccount(acc); err != nil { + // This error could mask expired, so just return expired here. + return nil, ErrAccountExpired + } + } else { return nil, ErrAccountExpired } } return acc, nil } // If we have a resolver see if it can fetch the account. - if s.accResolver == nil { + if s.AccountResolver() == nil { return nil, ErrMissingAccount } - s.mu.Lock() - acc, err := s.fetchAccount(name) - s.mu.Unlock() - return acc, err + return s.fetchAccount(name) } // LookupAccount is a public function to return the account structure @@ -835,7 +960,7 @@ func (s *Server) LookupAccount(name string) (*Account, error) { } // This will fetch new claims and if found update the account with new claims. -// Lock should be held upon entry. +// Lock MUST NOT be held upon entry. func (s *Server) updateAccount(acc *Account) error { // TODO(dlc) - Make configurable if time.Since(acc.updated) < time.Second { @@ -850,6 +975,7 @@ func (s *Server) updateAccount(acc *Account) error { } // updateAccountWithClaimJWT will check and apply the claim update. +// Lock MUST NOT be held upon entry. func (s *Server) updateAccountWithClaimJWT(acc *Account, claimJWT string) error { if acc == nil { return ErrMissingAccount @@ -869,32 +995,30 @@ func (s *Server) updateAccountWithClaimJWT(acc *Account, claimJWT string) error } // fetchRawAccountClaims will grab raw account claims iff we have a resolver. -// Lock is held upon entry. +// Lock is NOT held upon entry. func (s *Server) fetchRawAccountClaims(name string) (string, error) { - accResolver := s.accResolver + accResolver := s.AccountResolver() if accResolver == nil { return "", ErrNoAccountResolver } - // Need to do actual Fetch without the lock. - s.mu.Unlock() + // Need to do actual Fetch start := time.Now() claimJWT, err := accResolver.Fetch(name) fetchTime := time.Since(start) - s.mu.Lock() if fetchTime > time.Second { - s.Warnf("Account [%s] fetch took %v\n", name, fetchTime) + s.Warnf("Account [%s] fetch took %v", name, fetchTime) } else { - s.Debugf("Account [%s] fetch took %v\n", name, fetchTime) + s.Debugf("Account [%s] fetch took %v", name, fetchTime) } if err != nil { - s.Warnf("Account fetch failed: %v\n", err) + s.Warnf("Account fetch failed: %v", err) return "", err } return claimJWT, nil } // fetchAccountClaims will attempt to fetch new claims if a resolver is present. -// Lock is held upon entry. +// Lock is NOT held upon entry. func (s *Server) fetchAccountClaims(name string) (*jwt.AccountClaims, string, error) { claimJWT, err := s.fetchRawAccountClaims(name) if err != nil { @@ -918,27 +1042,25 @@ func (s *Server) verifyAccountClaims(claimJWT string) (*jwt.AccountClaims, strin } // This will fetch an account from a resolver if defined. -// Lock should be held upon entry. +// Lock is NOT held upon entry. func (s *Server) fetchAccount(name string) (*Account, error) { accClaims, claimJWT, err := s.fetchAccountClaims(name) if accClaims != nil { - // We have released the lock during the low level fetch. - // Now that we are back under lock, check again if account - // is in the map or not. If it is, simply return it. - if v, ok := s.accounts.Load(name); ok { - acc := v.(*Account) + acc := s.buildInternalAccount(accClaims) + acc.claimJWT = claimJWT + // Due to possible race, if registerAccount() returns a non + // nil account, it means the same account was already + // registered and we should use this one. + if racc := s.registerAccount(acc); racc != nil { // Update with the new claims in case they are new. // Following call will return ErrAccountResolverSameClaims // if claims are the same. - err = s.updateAccountWithClaimJWT(acc, claimJWT) + err = s.updateAccountWithClaimJWT(racc, claimJWT) if err != nil && err != ErrAccountResolverSameClaims { return nil, err } - return acc, nil + return racc, nil } - acc := s.buildInternalAccount(accClaims) - acc.claimJWT = claimJWT - s.registerAccount(acc) return acc, nil } return nil, err @@ -1011,6 +1133,10 @@ func (s *Server) Start() { } } + // Start expiration of mapped GW replies, regardless if + // this server is configured with gateway or not. + s.startGWReplyMapExpiration() + // Start up gateway if needed. Do this before starting the routes, because // we want to resolve the gateway host:port so that this information can // be sent to other routes. @@ -1072,7 +1198,7 @@ func (s *Server) Shutdown() { s.mu.Unlock() return } - s.Noticef("Server Exiting..") + s.Noticef("Initiating Shutdown...") opts := s.getOpts() @@ -1175,6 +1301,7 @@ func (s *Server) Shutdown() { s.deletePortsFile(opts.PortsFileDir) } + s.Noticef("Server Exiting..") // Close logger if applicable. It allows tests on Windows // to be able to do proper cleanup (delete log file). s.logging.RLock() @@ -1381,6 +1508,7 @@ const ( ConnzPath = "/connz" RoutezPath = "/routez" GatewayzPath = "/gatewayz" + LeafzPath = "/leafz" SubszPath = "/subsz" StackszPath = "/stacksz" ) @@ -1448,6 +1576,8 @@ func (s *Server) startMonitoring(secure bool) error { mux.HandleFunc(RoutezPath, s.HandleRoutez) // Gatewayz mux.HandleFunc(GatewayzPath, s.HandleGatewayz) + // Leafz + mux.HandleFunc(LeafzPath, s.HandleLeafz) // Subz mux.HandleFunc(SubszPath, s.HandleSubsz) // Subz alias for backwards compatibility @@ -1621,7 +1751,7 @@ func (s *Server) createClient(conn net.Conn) *client { // Do final client initialization // Set the First Ping timer. - c.setFirstPingTimer(opts.PingInterval) + s.setFirstPingTimer(c) // Spin up the read loop. s.startGoRoutine(func() { c.readLoop() }) @@ -1663,6 +1793,10 @@ func (s *Server) saveClosedClient(c *client, nc net.Conn, reason ClosedState) { } // Hold user as well. cc.user = c.opts.Username + // Hold account name if not the global account. + if c.acc != nil && c.acc.Name != globalAccountName { + cc.acc = c.acc.Name + } c.mu.Unlock() // Place in the ring buffer @@ -2438,3 +2572,26 @@ func (s *Server) shouldReportConnectErr(firstConnect bool, attempts int) bool { } return false } + +// Invoked for route, leaf and gateway connections. Set the very first +// PING to a lower interval to capture the initial RTT. +// After that the PING interval will be set to the user defined value. +// Client lock should be held. +func (s *Server) setFirstPingTimer(c *client) { + opts := s.getOpts() + d := opts.PingInterval + + if !opts.DisableShortFirstPing { + if c.kind != CLIENT { + if d > firstPingInterval { + d = firstPingInterval + } + } else if d > firstClientPingInterval { + d = firstClientPingInterval + } + } + // We randomize the first one by an offset up to 20%, e.g. 2m ~= max 24s. + addDelay := rand.Int63n(int64(d / 5)) + d += time.Duration(addDelay) + c.ping.tmr = time.AfterFunc(d, c.processPingTimer) +} diff --git a/vendor/github.com/nats-io/nats-server/v2/server/service_windows.go b/vendor/github.com/nats-io/nats-server/v2/server/service_windows.go index 56450413..d24f6414 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/service_windows.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/service_windows.go @@ -18,7 +18,6 @@ import ( "time" "golang.org/x/sys/windows/svc" - "golang.org/x/sys/windows/svc/debug" ) const ( @@ -110,15 +109,15 @@ func Run(server *Server) error { server.Start() return nil } - run := svc.Run isInteractive, err := svc.IsAnInteractiveSession() if err != nil { return err } if isInteractive { - run = debug.Run + server.Start() + return nil } - return run(serviceName, &winServiceWrapper{server}) + return svc.Run(serviceName, &winServiceWrapper{server}) } // isWindowsService indicates if NATS is running as a Windows service. diff --git a/vendor/github.com/nats-io/nats-server/v2/server/signal.go b/vendor/github.com/nats-io/nats-server/v2/server/signal.go index a6533044..254e3ecb 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/signal.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/signal.go @@ -49,7 +49,7 @@ func (s *Server) handleSignals() { s.Debugf("Trapped %q signal", sig) switch sig { case syscall.SIGINT: - s.Noticef("Server Exiting..") + s.Shutdown() os.Exit(0) case syscall.SIGUSR1: // File log re-open for rotating file logs. diff --git a/vendor/github.com/nats-io/nats-server/v2/server/signal_windows.go b/vendor/github.com/nats-io/nats-server/v2/server/signal_windows.go index b940202f..a3667c80 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/signal_windows.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/signal_windows.go @@ -35,7 +35,7 @@ func (s *Server) handleSignals() { go func() { for sig := range c { s.Debugf("Trapped %q signal", sig) - s.Noticef("Server Exiting..") + s.Shutdown() os.Exit(0) } }() diff --git a/vendor/github.com/nats-io/nats-server/v2/server/sublist.go b/vendor/github.com/nats-io/nats-server/v2/server/sublist.go index b9c49bde..6b75c3fc 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/sublist.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/sublist.go @@ -802,6 +802,11 @@ func subjectIsLiteral(subject string) bool { return true } +// IsValidPublishSubject returns true if a subject is valid and a literal, false otherwise +func IsValidPublishSubject(subject string) bool { + return IsValidSubject(subject) && subjectIsLiteral(subject) +} + // IsValidSubject returns true if a subject is valid, false otherwise func IsValidSubject(subject string) bool { if subject == "" { @@ -974,7 +979,7 @@ func matchLiteral(literal, subject string) bool { } func addLocalSub(sub *subscription, subs *[]*subscription) { - if sub != nil && sub.client != nil && sub.client.kind == CLIENT && sub.im == nil { + if sub != nil && sub.client != nil && (sub.client.kind == CLIENT || sub.client.kind == SYSTEM) && sub.im == nil { *subs = append(*subs, sub) } } @@ -1058,3 +1063,17 @@ func (s *Sublist) collectAllSubs(l *level, subs *[]*subscription) { s.collectAllSubs(l.fwc.next, subs) } } + +// Helper to get the first result sub. +func firstSubFromResult(rr *SublistResult) *subscription { + if rr == nil { + return nil + } + if len(rr.psubs) > 0 { + return rr.psubs[0] + } + if len(rr.qsubs) > 0 { + return rr.qsubs[0][0] + } + return nil +} diff --git a/vendor/github.com/nats-io/nats.go/.travis.yml b/vendor/github.com/nats-io/nats.go/.travis.yml index 9e73bb2d..2594b74e 100644 --- a/vendor/github.com/nats-io/nats.go/.travis.yml +++ b/vendor/github.com/nats-io/nats.go/.travis.yml @@ -16,8 +16,8 @@ install: before_script: - $(exit $(go fmt ./... | wc -l)) - go vet ./... -- misspell -error -locale US . +- find . -type f -name "*.go" | xargs misspell -error -locale US - staticcheck ./... script: - go test -i -race ./... -- if [[ "$TRAVIS_GO_VERSION" =~ 1.12 ]]; then ./scripts/cov.sh TRAVIS; else go test -race ./...; fi +- if [[ "$TRAVIS_GO_VERSION" =~ 1.12 ]]; then ./scripts/cov.sh TRAVIS; else go test -race -v -p=1 ./... --failfast; fi diff --git a/vendor/github.com/nats-io/nats.go/README.md b/vendor/github.com/nats-io/nats.go/README.md index f9b1b644..83cbbd27 100644 --- a/vendor/github.com/nats-io/nats.go/README.md +++ b/vendor/github.com/nats-io/nats.go/README.md @@ -15,6 +15,20 @@ go get github.com/nats-io/nats.go/ go get github.com/nats-io/nats-server ``` +When using or transitioning to Go modules support: + +```bash +# Go client latest or explicit version +go get github.com/nats-io/nats.go/@latest +go get github.com/nats-io/nats.go/@v1.9.1 + +# For latest NATS Server, add /v2 at the end +go get github.com/nats-io/nats-server/v2 + +# NATS Server v1 is installed otherwise +# go get github.com/nats-io/nats-server +``` + ## Basic Usage ```go @@ -33,7 +47,7 @@ nc.Subscribe("foo", func(m *nats.Msg) { // Responding to a request message nc.Subscribe("request", func(m *nats.Msg) { - m.Respond([]byte("answer is 42") + m.Respond([]byte("answer is 42")) }) // Simple Sync Subscriber @@ -55,7 +69,7 @@ sub.Drain() msg, err := nc.Request("help", []byte("help me"), 10*time.Millisecond) // Replies -nc.Subscribe("help", func(m *Msg) { +nc.Subscribe("help", func(m *nats.Msg) { nc.Publish(m.Reply, []byte("I can help!")) }) @@ -102,12 +116,12 @@ c.Publish("hello", me) // Unsubscribe sub, err := c.Subscribe("foo", nil) -... +// ... sub.Unsubscribe() // Requests var response string -err := c.Request("help", "help me", &response, 10*time.Millisecond) +err = c.Request("help", "help me", &response, 10*time.Millisecond) if err != nil { fmt.Printf("Request failed: %v\n", err) } @@ -127,7 +141,7 @@ This requires server with version >= 2.0.0 NATS servers have a new security and authentication mechanism to authenticate with user credentials and Nkeys. The simplest form is to use the helper method UserCredentials(credsFilepath). ```go -nc, err := nats.Connect(url, UserCredentials("user.creds")) +nc, err := nats.Connect(url, nats.UserCredentials("user.creds")) ``` The helper methods creates two callback handlers to present the user JWT and sign the nonce challenge from the server. @@ -136,12 +150,12 @@ The helper will load and wipe and erase memory it uses for each connect or recon The helper also can take two entries, one for the JWT and one for the NKey seed file. ```go -nc, err := nats.Connect(url, UserCredentials("user.jwt", "user.nk")) +nc, err := nats.Connect(url, nats.UserCredentials("user.jwt", "user.nk")) ``` You can also set the callback handlers directly and manage challenge signing directly. ```go -nc, err := nats.Connect(url, UserJWT(jwtCB, sigCB)) +nc, err := nats.Connect(url, nats.UserJWT(jwtCB, sigCB)) ``` Bare Nkeys are also supported. The nkey seed should be in a read only file, e.g. seed.txt @@ -160,7 +174,7 @@ opt, err := nats.NkeyOptionFromSeed("seed.txt") nc, err := nats.Connect(serverUrl, opt) // Direct -nc, err := nats.Connect(serverUrl, Nkey(pubNkey, sigCB)) +nc, err := nats.Connect(serverUrl, nats.Nkey(pubNkey, sigCB)) ``` ## TLS diff --git a/vendor/github.com/nats-io/nats.go/context.go b/vendor/github.com/nats-io/nats.go/context.go index 14aa355e..c921d6be 100644 --- a/vendor/github.com/nats-io/nats.go/context.go +++ b/vendor/github.com/nats-io/nats.go/context.go @@ -43,29 +43,7 @@ func (nc *Conn) RequestWithContext(ctx context.Context, subj string, data []byte return nc.oldRequestWithContext(ctx, subj, data) } - // Do setup for the new style. - if nc.respMap == nil { - nc.initNewResp() - } - // Create literal Inbox and map to a chan msg. - mch := make(chan *Msg, RequestChanLen) - respInbox := nc.newRespInbox() - token := respToken(respInbox) - nc.respMap[token] = mch - createSub := nc.respMux == nil - ginbox := nc.respSub - nc.mu.Unlock() - - if createSub { - // Make sure scoped subscription is setup only once. - var err error - nc.respSetup.Do(func() { err = nc.createRespMux(ginbox) }) - if err != nil { - return nil, err - } - } - - err := nc.PublishRequest(subj, respInbox, data) + mch, token, err := nc.createNewRequestAndSend(subj, data) if err != nil { return nil, err } @@ -140,7 +118,7 @@ func (s *Subscription) NextMsgWithContext(ctx context.Context) (*Msg, error) { select { case msg, ok = <-mch: if !ok { - return nil, ErrConnectionClosed + return nil, s.getNextMsgErr() } if err := s.processNextMsgDelivered(msg); err != nil { return nil, err @@ -153,7 +131,7 @@ func (s *Subscription) NextMsgWithContext(ctx context.Context) (*Msg, error) { select { case msg, ok = <-mch: if !ok { - return nil, ErrConnectionClosed + return nil, s.getNextMsgErr() } if err := s.processNextMsgDelivered(msg); err != nil { return nil, err diff --git a/vendor/github.com/nats-io/nats.go/enc.go b/vendor/github.com/nats-io/nats.go/enc.go index 6d5c2790..0ed71a2c 100644 --- a/vendor/github.com/nats-io/nats.go/enc.go +++ b/vendor/github.com/nats-io/nats.go/enc.go @@ -33,7 +33,7 @@ type Encoder interface { var encMap map[string]Encoder var encLock sync.Mutex -// Indexe names into the Registered Encoders. +// Indexed names into the Registered Encoders. const ( JSON_ENCODER = "json" GOB_ENCODER = "gob" @@ -109,7 +109,7 @@ func (c *EncodedConn) PublishRequest(subject, reply string, v interface{}) error // Request will create an Inbox and perform a Request() call // with the Inbox reply for the data v. A response will be -// decoded into the vPtrResponse. +// decoded into the vPtr Response. func (c *EncodedConn) Request(subject string, v interface{}, vPtr interface{}, timeout time.Duration) error { b, err := c.Enc.Encode(subject, v) if err != nil { diff --git a/vendor/github.com/nats-io/nats.go/go.mod b/vendor/github.com/nats-io/nats.go/go.mod index 7cfd5639..f82ceee6 100644 --- a/vendor/github.com/nats-io/nats.go/go.mod +++ b/vendor/github.com/nats-io/nats.go/go.mod @@ -1,6 +1,7 @@ module github.com/nats-io/nats.go require ( - github.com/nats-io/nkeys v0.0.2 + github.com/nats-io/jwt v0.3.0 + github.com/nats-io/nkeys v0.1.0 github.com/nats-io/nuid v1.0.1 ) diff --git a/vendor/github.com/nats-io/nats.go/go.sum b/vendor/github.com/nats-io/nats.go/go.sum index 7f95527f..0cd4f648 100644 --- a/vendor/github.com/nats-io/nats.go/go.sum +++ b/vendor/github.com/nats-io/nats.go/go.sum @@ -1,6 +1,13 @@ -github.com/nats-io/nkeys v0.0.2 h1:+qM7QpgXnvDDixitZtQUBDY9w/s9mu1ghS+JIbsrx6M= -github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4= +github.com/nats-io/jwt v0.3.0 h1:xdnzwFETV++jNc4W1mw//qFyJGb2ABOombmZJQS4+Qo= +github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/nkeys v0.1.0 h1:qMd4+pRHgdr1nAClu+2h/2a5F2TmKcCzjCDazVgRoX4= +github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= -golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 h1:mKdxBk7AujPs8kU4m80U72y/zjbZ3UcXC7dClwKbUI0= -golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/vendor/github.com/nats-io/nats.go/nats.go b/vendor/github.com/nats-io/nats.go/nats.go index 8785d6da..911df590 100644 --- a/vendor/github.com/nats-io/nats.go/nats.go +++ b/vendor/github.com/nats-io/nats.go/nats.go @@ -28,7 +28,8 @@ import ( "math/rand" "net" "net/url" - "regexp" + "os" + "path/filepath" "runtime" "strconv" "strings" @@ -36,6 +37,7 @@ import ( "sync/atomic" "time" + "github.com/nats-io/jwt" "github.com/nats-io/nats.go/util" "github.com/nats-io/nkeys" "github.com/nats-io/nuid" @@ -43,7 +45,7 @@ import ( // Default Constants const ( - Version = "1.8.1" + Version = "1.9.1" DefaultURL = "nats://127.0.0.1:4222" DefaultPort = 4222 DefaultMaxReconnect = 60 @@ -67,6 +69,9 @@ const ( // AUTHORIZATION_ERR is for when nats server user authorization has failed. AUTHORIZATION_ERR = "authorization violation" + + // AUTHENTICATION_EXPIRED_ERR is for when nats server user authorization has expired. + AUTHENTICATION_EXPIRED_ERR = "user authentication expired" ) // Errors @@ -80,10 +85,12 @@ var ( ErrBadSubscription = errors.New("nats: invalid subscription") ErrTypeSubscription = errors.New("nats: invalid subscription type") ErrBadSubject = errors.New("nats: invalid subject") + ErrBadQueueName = errors.New("nats: invalid queue name") ErrSlowConsumer = errors.New("nats: slow consumer, messages dropped") ErrTimeout = errors.New("nats: timeout") ErrBadTimeout = errors.New("nats: timeout invalid") ErrAuthorization = errors.New("nats: authorization violation") + ErrAuthExpired = errors.New("nats: authentication expired") ErrNoServers = errors.New("nats: no servers available for connection") ErrJsonParse = errors.New("nats: connect message, json parse error") ErrChanArg = errors.New("nats: argument needs to be a channel type") @@ -166,7 +173,8 @@ type UserJWTHandler func() (string, error) // SignatureHandler is used to sign a nonce from the server while // authenticating with nkeys. The user should sign the nonce and -// return the base64 encoded signature. +// return the raw signature. The client will base64 encode this to +// send to the server. type SignatureHandler func([]byte) ([]byte, error) // AuthTokenHandler is used to generate a new token. @@ -339,6 +347,11 @@ type Options struct { // UseOldRequestStyle forces the old method of Requests that utilize // a new Inbox and a new Subscription for each request. UseOldRequestStyle bool + + // NoCallbacksAfterClientClose allows preventing the invocation of + // callbacks after Close() is called. Client won't receive notifications + // when Close is invoked by user code. Default is to invoke the callbacks. + NoCallbacksAfterClientClose bool } const ( @@ -363,6 +376,7 @@ const ( // A Conn represents a bare connection to a nats-server. // It can send and receive []byte payloads. +// The connection is safe to use in multiple Go routines concurrently. type Conn struct { // Keep all members for which we use atomic at the beginning of the // struct and make sure they are all 64bits (or use padding if necessary). @@ -394,13 +408,15 @@ type Conn struct { ps *parseState ptmr *time.Timer pout int + ar bool // abort reconnect // New style response handler respSub string // The wildcard subject + respScanf string // The scanf template to extract mux token respMux *Subscription // A single response subscription respMap map[string]chan *Msg // Request map for the response msg channels respSetup sync.Once // Ensures response subscription occurs once - respRand *rand.Rand // Used for generating suffix. + respRand *rand.Rand // Used for generating suffix } // A Subscription represents interest in a given subject. @@ -475,6 +491,7 @@ type srv struct { didConnect bool reconnects int lastAttempt time.Time + lastErr error isImplicit bool tlsName string } @@ -872,6 +889,16 @@ func UseOldRequestStyle() Option { } } +// NoCallbacksAfterClientClose is an Option to disable callbacks when user code +// calls Close(). If close is initiated by any other condition, callbacks +// if any will be invoked. +func NoCallbacksAfterClientClose() Option { + return func(o *Options) error { + o.NoCallbacksAfterClientClose = true + return nil + } +} + // Handler processing // SetDisconnectHandler will set the disconnect event handler. @@ -1445,6 +1472,7 @@ func (nc *Conn) connect() error { if err == nil { nc.srvPool[i].didConnect = true nc.srvPool[i].reconnects = 0 + nc.current.lastErr = nil returnedErr = nil break } else { @@ -1614,7 +1642,6 @@ func normalizeErr(line string) string { // applicable. Will wait for a flush to return from the server for error // processing. func (nc *Conn) sendConnect() error { - // Construct the CONNECT protocol string cProto, err := nc.connectProto() if err != nil { @@ -1668,6 +1695,17 @@ func (nc *Conn) sendConnect() error { if strings.HasPrefix(proto, _ERR_OP_) { // Remove -ERR, trim spaces and quotes, and convert to lower case. proto = normalizeErr(proto) + + // Check if this is an auth error + if authErr := checkAuthError(strings.ToLower(proto)); authErr != nil { + // This will schedule an async error if we are in reconnect, + // and keep track of the auth error for the current server. + // If we have got the same error twice, this sets nc.ar to true to + // indicate that the reconnect should be aborted (will be checked + // in doReconnect()). + nc.processAuthError(authErr) + } + return errors.New("nats: " + proto) } @@ -1841,6 +1879,11 @@ func (nc *Conn) doReconnect(err error) { // Process connect logic if nc.err = nc.processConnectInit(); nc.err != nil { + // Check if we should abort reconnect. If so, break out + // of the loop and connection will be closed. + if nc.ar { + break + } nc.status = RECONNECTING // Reset the buffered writer to the pending buffer // (was set to a buffered writer on nc.conn in createConn) @@ -1848,6 +1891,10 @@ func (nc *Conn) doReconnect(err error) { continue } + // Clear possible lastErr under the connection lock after + // a successful processConnectInit(). + nc.current.lastErr = nil + // Clear out server stats for the server we connected to.. cur.didConnect = true cur.reconnects = 0 @@ -1883,6 +1930,7 @@ func (nc *Conn) doReconnect(err error) { if nc.Opts.ReconnectedCB != nil { nc.ach.push(func() { nc.Opts.ReconnectedCB(nc) }) } + // Release lock here, we will return below. nc.mu.Unlock() @@ -1897,7 +1945,7 @@ func (nc *Conn) doReconnect(err error) { nc.err = ErrNoServers } nc.mu.Unlock() - nc.Close() + nc.close(CLOSED, true, nil) } // processOpErr handles errors from reading or parsing the protocol. @@ -1932,7 +1980,7 @@ func (nc *Conn) processOpErr(err error) { nc.status = DISCONNECTED nc.err = err nc.mu.Unlock() - nc.Close() + nc.close(CLOSED, true, nil) } // dispatch is responsible for calling any async callbacks @@ -2132,8 +2180,8 @@ func (nc *Conn) processMsg(data []byte) { nc.subsMu.RLock() // Stats - nc.InMsgs++ - nc.InBytes += uint64(len(data)) + atomic.AddUint64(&nc.InMsgs, 1) + atomic.AddUint64(&nc.InBytes, uint64(len(data))) sub := nc.subs[nc.ps.ma.sid] if sub == nil { @@ -2239,15 +2287,24 @@ func (nc *Conn) processPermissionsViolation(err string) { nc.mu.Unlock() } -// processAuthorizationViolation is called when the server signals a user -// authorization violation. -func (nc *Conn) processAuthorizationViolation(err string) { - nc.mu.Lock() - nc.err = ErrAuthorization - if nc.Opts.AsyncErrorCB != nil { - nc.ach.push(func() { nc.Opts.AsyncErrorCB(nc, nil, ErrAuthorization) }) +// processAuthError generally processing for auth errors. We want to do retries +// unless we get the same error again. This allows us for instance to swap credentials +// and have the app reconnect, but if nothing is changing we should bail. +// This function will return true if the connection should be closed, false otherwise. +// Connection lock is held on entry +func (nc *Conn) processAuthError(err error) bool { + nc.err = err + if !nc.initc && nc.Opts.AsyncErrorCB != nil { + nc.ach.push(func() { nc.Opts.AsyncErrorCB(nc, nil, err) }) } - nc.mu.Unlock() + // We should give up if we tried twice on this server and got the + // same error. + if nc.current.lastErr == err { + nc.ar = true + } else { + nc.current.lastErr = err + } + return nc.ar } // flusher is a separate Go routine that will process flush requests for the write @@ -2417,6 +2474,18 @@ func (nc *Conn) LastError() error { return err } +// Check if the given error string is an auth error, and if so returns +// the corresponding ErrXXX error, nil otherwise +func checkAuthError(e string) error { + if strings.HasPrefix(e, AUTHORIZATION_ERR) { + return ErrAuthorization + } + if strings.HasPrefix(e, AUTHENTICATION_EXPIRED_ERR) { + return ErrAuthExpired + } + return nil +} + // processErr processes any error messages from the server and // sets the connection's lastError. func (nc *Conn) processErr(ie string) { @@ -2425,18 +2494,25 @@ func (nc *Conn) processErr(ie string) { // convert to lower case. e := strings.ToLower(ne) + close := false + // FIXME(dlc) - process Slow Consumer signals special. if e == STALE_CONNECTION { nc.processOpErr(ErrStaleConnection) } else if strings.HasPrefix(e, PERMISSIONS_ERR) { nc.processPermissionsViolation(ne) - } else if strings.HasPrefix(e, AUTHORIZATION_ERR) { - nc.processAuthorizationViolation(ne) + } else if authErr := checkAuthError(e); authErr != nil { + nc.mu.Lock() + close = nc.processAuthError(authErr) + nc.mu.Unlock() } else { + close = true nc.mu.Lock() nc.err = errors.New("nats: " + ne) nc.mu.Unlock() - nc.Close() + } + if close { + nc.close(CLOSED, true, nil) } } @@ -2572,21 +2648,32 @@ func (nc *Conn) publish(subj, reply string, data []byte) error { // the appropriate channel based on the last token and place // the message on the channel if possible. func (nc *Conn) respHandler(m *Msg) { - rt := respToken(m.Subject) - nc.mu.Lock() + // Just return if closed. if nc.isClosed() { nc.mu.Unlock() return } + var mch chan *Msg + // Grab mch - mch := nc.respMap[rt] - // Delete the key regardless, one response only. - // FIXME(dlc) - should we track responses past 1 - // just statistics wise? - delete(nc.respMap, rt) + rt := nc.respToken(m.Subject) + if rt != _EMPTY_ { + mch = nc.respMap[rt] + // Delete the key regardless, one response only. + delete(nc.respMap, rt) + } else if len(nc.respMap) == 1 { + // If the server has rewritten the subject, the response token (rt) + // will not match (could be the case with JetStream). If that is the + // case and there is a single entry, use that. + for k, v := range nc.respMap { + mch = v + delete(nc.respMap, k) + break + } + } nc.mu.Unlock() // Don't block, let Request timeout instead, mch is @@ -2610,33 +2697,22 @@ func (nc *Conn) createRespMux(respSub string) error { return err } nc.mu.Lock() + nc.respScanf = strings.Replace(respSub, "*", "%s", -1) nc.respMux = s nc.mu.Unlock() return nil } -// Request will send a request payload and deliver the response message, -// or an error, including a timeout if no message was received properly. -func (nc *Conn) Request(subj string, data []byte, timeout time.Duration) (*Msg, error) { - if nc == nil { - return nil, ErrInvalidConnection - } - - nc.mu.Lock() - // If user wants the old style. - if nc.Opts.UseOldRequestStyle { - nc.mu.Unlock() - return nc.oldRequest(subj, data, timeout) - } - - // Do setup for the new style. +// Helper to setup and send new request style requests. Return the chan to receive the response. +func (nc *Conn) createNewRequestAndSend(subj string, data []byte) (chan *Msg, string, error) { + // Do setup for the new style if needed. if nc.respMap == nil { nc.initNewResp() } - // Create literal Inbox and map to a chan msg. + // Create new literal Inbox and map to a chan msg. mch := make(chan *Msg, RequestChanLen) respInbox := nc.newRespInbox() - token := respToken(respInbox) + token := respInbox[respInboxPrefixLen:] nc.respMap[token] = mch createSub := nc.respMux == nil ginbox := nc.respSub @@ -2647,11 +2723,33 @@ func (nc *Conn) Request(subj string, data []byte, timeout time.Duration) (*Msg, var err error nc.respSetup.Do(func() { err = nc.createRespMux(ginbox) }) if err != nil { - return nil, err + return nil, token, err } } if err := nc.PublishRequest(subj, respInbox, data); err != nil { + return nil, token, err + } + + return mch, token, nil +} + +// Request will send a request payload and deliver the response message, +// or an error, including a timeout if no message was received properly. +func (nc *Conn) Request(subj string, data []byte, timeout time.Duration) (*Msg, error) { + if nc == nil { + return nil, ErrInvalidConnection + } + + nc.mu.Lock() + // If user wants the old style. + if nc.Opts.UseOldRequestStyle { + nc.mu.Unlock() + return nc.oldRequest(subj, data, timeout) + } + + mch, token, err := nc.createNewRequestAndSend(subj, data) + if err != nil { return nil, err } @@ -2754,9 +2852,16 @@ func (nc *Conn) NewRespInbox() string { } // respToken will return the last token of a literal response inbox -// which we use for the message channel lookup. -func respToken(respInbox string) string { - return respInbox[respInboxPrefixLen:] +// which we use for the message channel lookup. This needs to do a +// scan to protect itself against the server changing the subject. +// Lock should be held. +func (nc *Conn) respToken(respInbox string) string { + var token string + n, err := fmt.Sscanf(respInbox, nc.respScanf, &token) + if err != nil || n != 1 { + return "" + } + return token } // Subscribe will express interest in the given subject. The subject @@ -2822,11 +2927,37 @@ func (nc *Conn) QueueSubscribeSyncWithChan(subj, queue string, ch chan *Msg) (*S return nc.subscribe(subj, queue, nil, ch, false) } +// badSubject will do quick test on whether a subject is acceptable. +// Spaces are not allowed and all tokens should be > 0 in len. +func badSubject(subj string) bool { + if strings.ContainsAny(subj, " \t\r\n") { + return true + } + tokens := strings.Split(subj, ".") + for _, t := range tokens { + if len(t) == 0 { + return true + } + } + return false +} + +// badQueue will check a queue name for whitespace. +func badQueue(qname string) bool { + return strings.ContainsAny(qname, " \t\r\n") +} + // subscribe is the internal subscribe function that indicates interest in a subject. func (nc *Conn) subscribe(subj, queue string, cb MsgHandler, ch chan *Msg, isSync bool) (*Subscription, error) { if nc == nil { return nil, ErrInvalidConnection } + if badSubject(subj) { + return nil, ErrBadSubject + } + if queue != "" && badQueue(queue) { + return nil, ErrBadQueueName + } nc.mu.Lock() // ok here, but defer is generally expensive defer nc.mu.Unlock() @@ -2902,7 +3033,6 @@ func (nc *Conn) removeSub(s *Subscription) { s.mch = nil // Mark as invalid - s.conn = nil s.closed = true if s.pCond != nil { s.pCond.Broadcast() @@ -2939,7 +3069,7 @@ func (s *Subscription) IsValid() bool { } s.mu.Lock() defer s.mu.Unlock() - return s.conn != nil + return s.conn != nil && !s.closed } // Drain will remove interest but continue callbacks until all messages @@ -2964,8 +3094,12 @@ func (s *Subscription) Unsubscribe() error { } s.mu.Lock() conn := s.conn + closed := s.closed s.mu.Unlock() - if conn == nil { + if conn == nil || conn.IsClosed() { + return ErrConnectionClosed + } + if closed { return ErrBadSubscription } if conn.IsDraining() { @@ -3021,8 +3155,9 @@ func (s *Subscription) AutoUnsubscribe(max int) error { } s.mu.Lock() conn := s.conn + closed := s.closed s.mu.Unlock() - if conn == nil { + if conn == nil || closed { return ErrBadSubscription } return conn.unsubscribe(s, max, false) @@ -3069,8 +3204,8 @@ func (nc *Conn) unsubscribe(sub *Subscription, max int, drainMode bool) error { } // NextMsg will return the next message available to a synchronous subscriber -// or block until one is available. A timeout can be used to return when no -// message has been delivered. +// or block until one is available. An error is returned if the subscription is invalid (ErrBadSubscription), +// the connection is closed (ErrConnectionClosed), or the timeout is reached (ErrTimeout). func (s *Subscription) NextMsg(timeout time.Duration) (*Msg, error) { if s == nil { return nil, ErrBadSubscription @@ -3094,7 +3229,7 @@ func (s *Subscription) NextMsg(timeout time.Duration) (*Msg, error) { select { case msg, ok = <-mch: if !ok { - return nil, ErrConnectionClosed + return nil, s.getNextMsgErr() } if err := s.processNextMsgDelivered(msg); err != nil { return nil, err @@ -3113,7 +3248,7 @@ func (s *Subscription) NextMsg(timeout time.Duration) (*Msg, error) { select { case msg, ok = <-mch: if !ok { - return nil, ErrConnectionClosed + return nil, s.getNextMsgErr() } if err := s.processNextMsgDelivered(msg); err != nil { return nil, err @@ -3150,6 +3285,18 @@ func (s *Subscription) validateNextMsgState() error { return nil } +// This is called when the sync channel has been closed. +// The error returned will be either connection or subscription +// closed depending on what caused NextMsg() to fail. +func (s *Subscription) getNextMsgErr() error { + s.mu.Lock() + defer s.mu.Unlock() + if s.connClosed { + return ErrConnectionClosed + } + return ErrBadSubscription +} + // processNextMsgDelivered takes a message and applies the needed // accounting to the stats from the subscription, returning an // error in case we have the maximum number of messages have been @@ -3197,7 +3344,7 @@ func (s *Subscription) Pending() (int, int, error) { } s.mu.Lock() defer s.mu.Unlock() - if s.conn == nil { + if s.conn == nil || s.closed { return -1, -1, ErrBadSubscription } if s.typ == ChanSubscription { @@ -3213,7 +3360,7 @@ func (s *Subscription) MaxPending() (int, int, error) { } s.mu.Lock() defer s.mu.Unlock() - if s.conn == nil { + if s.conn == nil || s.closed { return -1, -1, ErrBadSubscription } if s.typ == ChanSubscription { @@ -3229,7 +3376,7 @@ func (s *Subscription) ClearMaxPending() error { } s.mu.Lock() defer s.mu.Unlock() - if s.conn == nil { + if s.conn == nil || s.closed { return ErrBadSubscription } if s.typ == ChanSubscription { @@ -3254,7 +3401,7 @@ func (s *Subscription) PendingLimits() (int, int, error) { } s.mu.Lock() defer s.mu.Unlock() - if s.conn == nil { + if s.conn == nil || s.closed { return -1, -1, ErrBadSubscription } if s.typ == ChanSubscription { @@ -3271,7 +3418,7 @@ func (s *Subscription) SetPendingLimits(msgLimit, bytesLimit int) error { } s.mu.Lock() defer s.mu.Unlock() - if s.conn == nil { + if s.conn == nil || s.closed { return ErrBadSubscription } if s.typ == ChanSubscription { @@ -3291,7 +3438,7 @@ func (s *Subscription) Delivered() (int64, error) { } s.mu.Lock() defer s.mu.Unlock() - if s.conn == nil { + if s.conn == nil || s.closed { return -1, ErrBadSubscription } return int64(s.delivered), nil @@ -3307,7 +3454,7 @@ func (s *Subscription) Dropped() (int, error) { } s.mu.Lock() defer s.mu.Unlock() - if s.conn == nil { + if s.conn == nil || s.closed { return -1, ErrBadSubscription } return s.dropped, nil @@ -3529,8 +3676,14 @@ func (nc *Conn) close(status Status, doCBs bool, err error) { nc.stopPingTimer() nc.ptmr = nil - // Go ahead and make sure we have flushed the outbound - if nc.conn != nil { + // Need to close and set tcp conn to nil if reconnect loop has stopped, + // otherwise we would incorrectly invoke Disconnect handler (if set) + // down below. + if nc.ar && nc.conn != nil { + nc.conn.Close() + nc.conn = nil + } else if nc.conn != nil { + // Go ahead and make sure we have flushed the outbound nc.bw.Flush() defer nc.conn.Close() } @@ -3583,7 +3736,7 @@ func (nc *Conn) close(status Status, doCBs bool, err error) { // all blocking calls, such as Flush() and NextMsg() func (nc *Conn) Close() { if nc != nil { - nc.close(CLOSED, true, nil) + nc.close(CLOSED, !nc.Opts.NoCallbacksAfterClientClose, nil) } } @@ -3662,12 +3815,12 @@ func (nc *Conn) drainConnection() { err := nc.Flush() if err != nil { pushErr(err) - nc.Close() + nc.close(CLOSED, true, nil) return } // Move to closed state. - nc.Close() + nc.close(CLOSED, true, nil) } // Drain will put a connection into a drain state. All subscriptions will @@ -3773,18 +3926,16 @@ func (nc *Conn) isDrainingPubs() bool { // Stats will return a race safe copy of the Statistics section for the connection. func (nc *Conn) Stats() Statistics { - // Stats are updated either under connection's mu or subsMu mutexes. - // Lock both to safely get them. + // Stats are updated either under connection's mu or with atomic operations + // for inbound stats in processMsg(). nc.mu.Lock() - nc.subsMu.RLock() stats := Statistics{ - InMsgs: nc.InMsgs, - InBytes: nc.InBytes, + InMsgs: atomic.LoadUint64(&nc.InMsgs), + InBytes: atomic.LoadUint64(&nc.InBytes), OutMsgs: nc.OutMsgs, OutBytes: nc.OutBytes, Reconnects: nc.Reconnects, } - nc.subsMu.RUnlock() nc.mu.Unlock() return stats } @@ -3902,70 +4053,74 @@ func NkeyOptionFromSeed(seedFile string) (Option, error) { return Nkey(string(pub), sigCB), nil } -// This is a regex to match decorated jwts in keys/seeds. -// .e.g. -// -----BEGIN NATS USER JWT----- -// eyJ0eXAiOiJqd3QiLCJhbGciOiJlZDI1NTE5... -// ------END NATS USER JWT------ -// -// ************************* IMPORTANT ************************* -// NKEY Seed printed below can be used sign and prove identity. -// NKEYs are sensitive and should be treated as secrets. -// -// -----BEGIN USER NKEY SEED----- -// SUAIO3FHUX5PNV2LQIIP7TZ3N4L7TX3W53MQGEIVYFIGA635OZCKEYHFLM -// ------END USER NKEY SEED------ - -var nscDecoratedRe = regexp.MustCompile(`\s*(?:(?:[-]{3,}[^\n]*[-]{3,}\n)(.+)(?:\n\s*[-]{3,}[^\n]*[-]{3,}\n))`) +// Just wipe slice with 'x', for clearing contents of creds or nkey seed file. +func wipeSlice(buf []byte) { + for i := range buf { + buf[i] = 'x' + } +} func userFromFile(userFile string) (string, error) { - contents, err := ioutil.ReadFile(userFile) + path, err := expandPath(userFile) if err != nil { return _EMPTY_, fmt.Errorf("nats: %v", err) } - defer wipeSlice(contents) - - items := nscDecoratedRe.FindAllSubmatch(contents, -1) - if len(items) == 0 { - return string(contents), nil - } - // First result should be the user JWT. - // We copy here so that if the file contained a seed file too we wipe appropriately. - raw := items[0][1] - tmp := make([]byte, len(raw)) - copy(tmp, raw) - return string(tmp), nil -} -func nkeyPairFromSeedFile(seedFile string) (nkeys.KeyPair, error) { - var seed []byte - contents, err := ioutil.ReadFile(seedFile) + contents, err := ioutil.ReadFile(path) if err != nil { - return nil, fmt.Errorf("nats: %v", err) + return _EMPTY_, fmt.Errorf("nats: %v", err) } defer wipeSlice(contents) + return jwt.ParseDecoratedJWT(contents) +} - items := nscDecoratedRe.FindAllSubmatch(contents, -1) - if len(items) > 1 { - seed = items[1][1] - } else { - lines := bytes.Split(contents, []byte("\n")) - for _, line := range lines { - if bytes.HasPrefix(bytes.TrimSpace(line), []byte("SU")) { - seed = line - break +func homeDir() (string, error) { + if runtime.GOOS == "windows" { + homeDrive, homePath := os.Getenv("HOMEDRIVE"), os.Getenv("HOMEPATH") + userProfile := os.Getenv("USERPROFILE") + + var home string + if homeDrive == "" || homePath == "" { + if userProfile == "" { + return _EMPTY_, errors.New("nats: failed to get home dir, require %HOMEDRIVE% and %HOMEPATH% or %USERPROFILE%") } + home = userProfile + } else { + home = filepath.Join(homeDrive, homePath) } + + return home, nil + } + + home := os.Getenv("HOME") + if home == "" { + return _EMPTY_, errors.New("nats: failed to get home dir, require $HOME") + } + return home, nil +} + +func expandPath(p string) (string, error) { + p = os.ExpandEnv(p) + + if !strings.HasPrefix(p, "~") { + return p, nil } - if seed == nil { - return nil, fmt.Errorf("nats: No nkey user seed found in %q", seedFile) + home, err := homeDir() + if err != nil { + return _EMPTY_, err } - kp, err := nkeys.FromSeed(seed) + + return filepath.Join(home, p[1:]), nil +} + +func nkeyPairFromSeedFile(seedFile string) (nkeys.KeyPair, error) { + contents, err := ioutil.ReadFile(seedFile) if err != nil { - return nil, err + return nil, fmt.Errorf("nats: %v", err) } - return kp, nil + defer wipeSlice(contents) + return jwt.ParseDecoratedNKey(contents) } // Sign authentication challenges from the server. @@ -3982,13 +4137,6 @@ func sigHandler(nonce []byte, seedFile string) ([]byte, error) { return sig, nil } -// Just wipe slice with 'x', for clearing contents of nkey seed file. -func wipeSlice(buf []byte) { - for i := range buf { - buf[i] = 'x' - } -} - type timeoutWriter struct { timeout time.Duration conn net.Conn diff --git a/vendor/github.com/nats-io/nkeys/README.md b/vendor/github.com/nats-io/nkeys/README.md index 5cb87861..8b787cc3 100644 --- a/vendor/github.com/nats-io/nkeys/README.md +++ b/vendor/github.com/nats-io/nkeys/README.md @@ -17,7 +17,7 @@ Ed25519 is fast and resistant to side channel attacks. Generation of a seed key The NATS system will utilize Ed25519 keys, meaning that NATS systems will never store or even have access to any private keys. Authentication will utilize a random challenge response mechanism. -Dealing with 32 byte and 64 byte raw keys can be challenging. NKEYS is designed to formulate keys in a much friendlier fashion and references work done in cryptocurrencies, specifically [Stellar](https://www.stellar.org/). Bitcoin and others used a form of Base58 (or Base58Check) to endode raw keys. Stellar utilized a more traditonal Base32 with a CRC16 and a version or prefix byte. NKEYS utilizes a similar format where the prefix will be 1 byte for public and private keys and will be 2 bytes for seeds. The base32 encoding of these prefixes will yield friendly human readbable prefixes, e.g. '**N**' = server, '**C**' = cluster, '**O**' = operator, '**A**' = account, and '**U**' = user. '**P**' is used for private keys. For seeds, the first encoded prefix is '**S**', and the second character will be the type for the public key, e.g. "**SU**" is a seed for a user key pair, "**SA**" is a seed for an account key pair. +Dealing with 32 byte and 64 byte raw keys can be challenging. NKEYS is designed to formulate keys in a much friendlier fashion and references work done in cryptocurrencies, specifically [Stellar](https://www.stellar.org/). Bitcoin and others used a form of Base58 (or Base58Check) to encode raw keys. Stellar utilized a more traditional Base32 with a CRC16 and a version or prefix byte. NKEYS utilizes a similar format where the prefix will be 1 byte for public and private keys and will be 2 bytes for seeds. The base32 encoding of these prefixes will yield friendly human readable prefixes, e.g. '**N**' = server, '**C**' = cluster, '**O**' = operator, '**A**' = account, and '**U**' = user. '**P**' is used for private keys. For seeds, the first encoded prefix is '**S**', and the second character will be the type for the public key, e.g. "**SU**" is a seed for a user key pair, "**SA**" is a seed for an account key pair. ## Installation @@ -69,4 +69,4 @@ Unless otherwise noted, the NATS source files are distributed under the Apache Version 2.0 license found in the LICENSE file. -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fnats-io%2Fnkeys.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fnats-io%2Fnkeys?ref=badge_large) \ No newline at end of file +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fnats-io%2Fnkeys.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fnats-io%2Fnkeys?ref=badge_large) diff --git a/vendor/github.com/nats-io/nkeys/main.go b/vendor/github.com/nats-io/nkeys/main.go index 71b56336..2ea3f904 100644 --- a/vendor/github.com/nats-io/nkeys/main.go +++ b/vendor/github.com/nats-io/nkeys/main.go @@ -19,8 +19,8 @@ import ( "errors" ) -// Version -const Version = "0.1.0" +// Version is our current version +const Version = "0.1.3" // Errors var ( @@ -33,6 +33,7 @@ var ( ErrInvalidSignature = errors.New("nkeys: signature verification failed") ErrCannotSign = errors.New("nkeys: can not sign, no private key available") ErrPublicKeyOnly = errors.New("nkeys: no seed or private key available") + ErrIncompatibleKey = errors.New("nkeys: incompatible key") ) // KeyPair provides the central interface to nkeys. @@ -93,7 +94,7 @@ func FromSeed(seed []byte) (KeyPair, error) { return &kp{copy}, nil } -// Create a KeyPair from the raw 32 byte seed for a given type. +// FromRawSeed will create a KeyPair from the raw 32 byte seed for a given type. func FromRawSeed(prefix PrefixByte, rawSeed []byte) (KeyPair, error) { seed, err := EncodeSeed(prefix, rawSeed) if err != nil { diff --git a/vendor/github.com/nats-io/nkeys/strkey.go b/vendor/github.com/nats-io/nkeys/strkey.go index 36fd0536..324ea638 100644 --- a/vendor/github.com/nats-io/nkeys/strkey.go +++ b/vendor/github.com/nats-io/nkeys/strkey.go @@ -17,7 +17,6 @@ import ( "bytes" "encoding/base32" "encoding/binary" - "golang.org/x/crypto/ed25519" ) @@ -47,7 +46,7 @@ const ( PrefixByteUser PrefixByte = 20 << 3 // Base32-encodes to 'U...' // PrefixByteUnknown is for unknown prefixes. - PrefixByteUknown PrefixByte = 23 << 3 // Base32-encodes to 'X...' + PrefixByteUnknown PrefixByte = 23 << 3 // Base32-encodes to 'X...' ) // Set our encoding to not include padding '==' @@ -188,10 +187,11 @@ func DecodeSeed(src []byte) (PrefixByte, []byte, error) { return PrefixByte(b2), raw[2:], nil } +// Prefix returns PrefixBytes of its input func Prefix(src string) PrefixByte { b, err := decode([]byte(src)) if err != nil { - return PrefixByteUknown + return PrefixByteUnknown } prefix := PrefixByte(b[0]) err = checkValidPrefixByte(prefix) @@ -203,7 +203,7 @@ func Prefix(src string) PrefixByte { if PrefixByte(b1) == PrefixByteSeed { return PrefixByteSeed } - return PrefixByteUknown + return PrefixByteUnknown } // IsValidPublicKey will decode and verify that the string is a valid encoded public key. @@ -288,3 +288,19 @@ func (p PrefixByte) String() string { } return "unknown" } + +// CompatibleKeyPair returns an error if the KeyPair doesn't match expected PrefixByte(s) +func CompatibleKeyPair(kp KeyPair, expected ...PrefixByte) error { + pk, err := kp.PublicKey() + if err != nil { + return err + } + pkType := Prefix(pk) + for _, k := range expected { + if pkType == k { + return nil + } + } + + return ErrIncompatibleKey +} diff --git a/vendor/github.com/nats-io/stan.go/.travis.yml b/vendor/github.com/nats-io/stan.go/.travis.yml index 08915efa..4e66a2ce 100644 --- a/vendor/github.com/nats-io/stan.go/.travis.yml +++ b/vendor/github.com/nats-io/stan.go/.travis.yml @@ -3,21 +3,21 @@ sudo: false go: - 1.11.x - 1.12.x +env: +- GO111MODULE=off go_import_path: github.com/nats-io/stan.go install: -- go get -t ./... +- go get -t -v ./... - go get github.com/nats-io/nats-streaming-server -- go get github.com/mattn/goveralls -- go get github.com/wadey/gocovmerge - go get -u honnef.co/go/tools/cmd/staticcheck - go get -u github.com/client9/misspell/cmd/misspell before_script: - $(exit $(go fmt ./... | wc -l)) - go vet ./... -- misspell -error -locale US . +- find . -type f -name "*.go" | grep -v "/pb/" | xargs misspell -error -locale US - staticcheck ./... script: -- go test -i -race ./... +- go test -i -v ./... - go test -v -race ./... after_success: - if [[ "$TRAVIS_GO_VERSION" =~ 1.12 ]]; then ./scripts/cov.sh TRAVIS; fi diff --git a/vendor/github.com/nats-io/stan.go/README.md b/vendor/github.com/nats-io/stan.go/README.md index aa7d77ac..1dc3e0b1 100644 --- a/vendor/github.com/nats-io/stan.go/README.md +++ b/vendor/github.com/nats-io/stan.go/README.md @@ -25,6 +25,14 @@ NATS Streaming provides the following high-level feature set: go get github.com/nats-io/stan.go/ ``` +When using or transitioning to Go modules support: + +```bash +# Go client latest or explicit version +go get github.com/nats-io/stan.go/@latest +go get github.com/nats-io/stan.go/@v0.5.2 +``` + ## Basic Usage ```go diff --git a/vendor/github.com/nats-io/stan.go/go.mod b/vendor/github.com/nats-io/stan.go/go.mod index f924bd17..600561d3 100644 --- a/vendor/github.com/nats-io/stan.go/go.mod +++ b/vendor/github.com/nats-io/stan.go/go.mod @@ -2,6 +2,6 @@ module github.com/nats-io/stan.go require ( github.com/gogo/protobuf v1.2.1 - github.com/nats-io/nats.go v1.8.1 + github.com/nats-io/nats.go v1.9.1 github.com/nats-io/nuid v1.0.1 ) diff --git a/vendor/github.com/nats-io/stan.go/go.sum b/vendor/github.com/nats-io/stan.go/go.sum index bf57b3cd..296a7401 100644 --- a/vendor/github.com/nats-io/stan.go/go.sum +++ b/vendor/github.com/nats-io/stan.go/go.sum @@ -2,12 +2,19 @@ github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/nats-io/nats.go v1.8.1 h1:6lF/f1/NN6kzUDBz6pyvQDEXO39jqXcWRLu/tKjtOUQ= -github.com/nats-io/nats.go v1.8.1/go.mod h1:BrFz9vVn0fU3AcH9Vn4Kd7W0NpJ651tD5omQ3M8LwxM= -github.com/nats-io/nkeys v0.0.2 h1:+qM7QpgXnvDDixitZtQUBDY9w/s9mu1ghS+JIbsrx6M= -github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4= +github.com/nats-io/jwt v0.3.0 h1:xdnzwFETV++jNc4W1mw//qFyJGb2ABOombmZJQS4+Qo= +github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/nats.go v1.9.1 h1:ik3HbLhZ0YABLto7iX80pZLPw/6dx3T+++MZJwLnMrQ= +github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= +github.com/nats-io/nkeys v0.1.0 h1:qMd4+pRHgdr1nAClu+2h/2a5F2TmKcCzjCDazVgRoX4= +github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= -golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 h1:mKdxBk7AujPs8kU4m80U72y/zjbZ3UcXC7dClwKbUI0= -golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/vendor/github.com/nats-io/stan.go/stan.go b/vendor/github.com/nats-io/stan.go/stan.go index 5a716eb9..7af21d5e 100644 --- a/vendor/github.com/nats-io/stan.go/stan.go +++ b/vendor/github.com/nats-io/stan.go/stan.go @@ -26,11 +26,11 @@ import ( ) // Version is the NATS Streaming Go Client version -const Version = "0.5.0" +const Version = "0.5.2" const ( // DefaultNatsURL is the default URL the client connects to - DefaultNatsURL = "nats://localhost:4222" + DefaultNatsURL = "nats://127.0.0.1:4222" // DefaultConnectWait is the default timeout used for the connect operation DefaultConnectWait = 2 * time.Second // DefaultDiscoverPrefix is the prefix subject used to connect to the NATS Streaming server @@ -48,6 +48,7 @@ const ( // Conn represents a connection to the NATS Streaming subsystem. It can Publish and // Subscribe to messages within the NATS Streaming cluster. +// The connection is safe to use in multiple Go routines concurrently. type Conn interface { // Publish will publish to the cluster and wait for an ACK. Publish(subject string, data []byte) error @@ -99,7 +100,7 @@ const ( // Errors var ( - ErrConnectReqTimeout = errors.New("stan: connect request timeout") + ErrConnectReqTimeout = errors.New("stan: connect request timeout (possibly wrong cluster ID?)") ErrCloseReqTimeout = errors.New("stan: close request timeout") ErrSubReqTimeout = errors.New("stan: subscribe request timeout") ErrUnsubReqTimeout = errors.New("stan: unsubscribe request timeout") @@ -253,7 +254,7 @@ func Pings(interval, maxOut int) Option { // by the library as milliseconds. If this test boolean is set, // do not check values. if !testAllowMillisecInPings { - if interval < 1 || maxOut <= 2 { + if interval < 1 || maxOut < 2 { return fmt.Errorf("invalid ping values: interval=%v (min>0) maxOut=%v (min=2)", interval, maxOut) } } @@ -410,7 +411,7 @@ func Connect(stanClusterID, clientID string, options ...Option) (Conn, error) { c.Close() return nil, err } - c.ackSubscription.SetPendingLimits(1024*1024, 32*1024*1024) + c.ackSubscription.SetPendingLimits(-1, -1) c.pubAckMap = make(map[string]*ack) // Create Subscription map diff --git a/vendor/github.com/nats-io/stan.go/sub.go b/vendor/github.com/nats-io/stan.go/sub.go index a58e1982..84e8897a 100644 --- a/vendor/github.com/nats-io/stan.go/sub.go +++ b/vendor/github.com/nats-io/stan.go/sub.go @@ -11,7 +11,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package stan is a Go client for the NATS Streaming messaging system (https://nats.io). package stan import ( @@ -40,6 +39,7 @@ type Msg struct { // Subscription represents a subscription within the NATS Streaming cluster. Subscriptions // will be rate matched and follow at-least once delivery semantics. +// The subscription is safe to use in multiple Go routines concurrently. type Subscription interface { // Unsubscribe removes interest in the subscription. // For durables, it means that the durable interest is also removed from @@ -257,6 +257,7 @@ func (sc *conn) subscribe(subject, qgroup string, cb MsgHandler, options ...Subs if err != nil { return nil, err } + nsub.SetPendingLimits(-1, -1) sub.inboxSub = nsub // Create a subscription request diff --git a/vendor/modules.txt b/vendor/modules.txt index f8868b7a..dbed0b28 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -20,23 +20,23 @@ github.com/hashicorp/raft github.com/lib/pq github.com/lib/pq/oid github.com/lib/pq/scram -# github.com/nats-io/jwt v0.2.14 +# github.com/nats-io/jwt v0.3.2 github.com/nats-io/jwt -# github.com/nats-io/nats-server/v2 v2.0.4 +# github.com/nats-io/nats-server/v2 v2.1.2 github.com/nats-io/nats-server/v2/server github.com/nats-io/nats-server/v2/logger github.com/nats-io/nats-server/v2/conf github.com/nats-io/nats-server/v2/server/pse github.com/nats-io/nats-server/v2/test -# github.com/nats-io/nats.go v1.8.1 +# github.com/nats-io/nats.go v1.9.1 github.com/nats-io/nats.go github.com/nats-io/nats.go/encoders/builtin github.com/nats-io/nats.go/util -# github.com/nats-io/nkeys v0.1.0 +# github.com/nats-io/nkeys v0.1.3 github.com/nats-io/nkeys # github.com/nats-io/nuid v1.0.1 github.com/nats-io/nuid -# github.com/nats-io/stan.go v0.5.0 +# github.com/nats-io/stan.go v0.5.2 github.com/nats-io/stan.go/pb github.com/nats-io/stan.go # github.com/prometheus/procfs v0.0.3