Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/environment/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type Config struct {
RolloutUpgrade struct {
TestFarms []uint32 `json:"test_farms"`
} `json:"rollout_upgrade"`
RelaysURLs []string `json:"relays_urls"`
}

// Merge, updates current config with cfg merging and override config
Expand Down
35 changes: 25 additions & 10 deletions pkg/environment/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"sync"

"github.com/pkg/errors"
"github.com/rs/zerolog/log"
substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go"
"github.com/threefoldtech/zosbase/pkg"

Expand Down Expand Up @@ -47,7 +48,7 @@ type Environment struct {
// IMPORTANT NOTICE:
// SINCE RELAYS FOR A NODE IS STORED ON THE CHAIN IN A LIMITED SPACE
// PLEASE MAKE SURE THAT ANY ENV HAS NO MORE THAN FOUR RELAYS CONFIGURED
RelayURL []string
relaysURLs []string
ActivationURL []string
GraphQL []string
KycURL string
Expand Down Expand Up @@ -112,7 +113,7 @@ var (
"wss://tfchain.dev.grid.tf/",
"wss://tfchain.02.dev.grid.tf",
},
RelayURL: []string{
relaysURLs: []string{
"wss://relay.dev.grid.tf",
"wss://relay.02.dev.grid.tf",
},
Expand All @@ -136,7 +137,7 @@ var (
"wss://tfchain.test.grid.tf/",
"wss://tfchain.02.test.grid.tf",
},
RelayURL: []string{
relaysURLs: []string{
"wss://relay.test.grid.tf",
"wss://relay.02.test.grid.tf",
},
Expand All @@ -160,7 +161,7 @@ var (
"wss://tfchain.qa.grid.tf/",
"wss://tfchain.02.qa.grid.tf/",
},
RelayURL: []string{
relaysURLs: []string{
"wss://relay.qa.grid.tf",
"wss://relay.02.qa.grid.tf",
},
Expand All @@ -187,9 +188,9 @@ var (
"wss://03.tfchain.grid.tf/",
"wss://04.tfchain.grid.tf/",
},
RelayURL: []string{
relaysURLs: []string{
"wss://relay.grid.tf",
"wss://relay.02.grid.tf",
// "wss://relay.02.grid.tf",
},
ActivationURL: []string{
"https://activation.grid.tf/activation/activate",
Expand Down Expand Up @@ -224,13 +225,22 @@ func Get() (Environment, error) {
if err != nil {
return Environment{}, err
}
if params.IsV4() {
env.FlistURL = "redis://v4.hub.grid.tf:9940"
}

return env, nil
}

func GetRelaysURLs() []string {
config, err := GetConfig()
if err == nil && len(config.RelaysURLs) > 0 {
log.Debug().Msg("using relays urls from zos-config")
return config.RelaysURLs
}

log.Debug().Msg("using relays urls from environment")
env := MustGet()
return env.relaysURLs
}

// GetSubstrate gets a client to subsrate blockchain
func GetSubstrate() (substrate.Manager, error) {
env, err := Get()
Expand Down Expand Up @@ -281,7 +291,7 @@ func getEnvironmentFromParams(params kernel.Params) (Environment, error) {

if relay, ok := params.Get("relay"); ok {
if len(relay) > 0 {
env.RelayURL = relay
env.relaysURLs = relay
}
}

Expand Down Expand Up @@ -368,5 +378,10 @@ func getEnvironmentFromParams(params kernel.Params) (Environment, error) {
env.BinRepo = e
}

// if the node running v4 chage flisturl to use v4.hub.grid.tf
if params.IsV4() {
env.FlistURL = "redis://v4.hub.grid.tf:9940"
}

return env, nil
}
2 changes: 1 addition & 1 deletion pkg/perf/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (h *healthcheckTask) Run(ctx context.Context) (interface{}, error) {
}

if len(errors) != 0 {
return fmt.Errorf("failed health check")
return fmt.Errorf("failed health check %s", errorsToStrings(errors))
}

return nil
Expand Down
93 changes: 65 additions & 28 deletions pkg/perf/healthcheck/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"net"
"net/url"
"strings"
"sync"
"time"

Expand All @@ -13,64 +14,100 @@ import (
"github.com/threefoldtech/zosbase/pkg/environment"
)

const defaultRequestTimeout = 5 * time.Second
const defaultRequestTimeout = 10 * time.Second

// function: at least one instance of each service should be reachable
// returns errors as a report for perf healthcheck
// a side effect: set/delete the not-reachable flag
func networkCheck(ctx context.Context) []error {
env := environment.MustGet()
servicesUrl := []string{env.FlistURL}

servicesUrl = append(append(servicesUrl, env.SubstrateURL...), env.RelayURL...)
servicesUrl = append(append(servicesUrl, env.ActivationURL...), env.GraphQL...)
var (
wg sync.WaitGroup
errMu sync.Mutex
errors []error
)

var errors []error
env := environment.MustGet()
services := map[string][]string{
"substrate": env.SubstrateURL,
"activation": env.ActivationURL,
"relay": environment.GetRelaysURLs(),
"graphql": env.GraphQL,
"hub": {env.FlistURL},
"kyc": {env.KycURL},
}

var wg sync.WaitGroup
var mut sync.Mutex
for _, serviceUrl := range servicesUrl {
for service, instances := range services {
wg.Add(1)
go func(serviceUrl string) {
go func(service string, instances []string) {
defer wg.Done()

err := checkService(ctx, serviceUrl)
if err != nil {
mut.Lock()
defer mut.Unlock()

if err := verifyAtLeastOneIsReachable(ctx, service, instances); err != nil {
errMu.Lock()
errors = append(errors, err)
errMu.Unlock()
}
}(serviceUrl)

}(service, instances)
}

wg.Wait()

if len(errors) == 0 {
log.Debug().Msg("all network checks passed")
if err := app.DeleteFlag(app.NotReachable); err != nil {
log.Error().Err(err).Msg("failed to delete readonly flag")
log.Error().Err(err).Msg("failed to delete not-reachable flag")
}
} else {
log.Warn().Int("failed_checks", len(errors)).Msg("some network checks failed")
if err := app.SetFlag(app.NotReachable); err != nil {
log.Error().Err(err).Msg("failed to set not-reachable flag")
}
}

return errors
}

func verifyAtLeastOneIsReachable(ctx context.Context, service string, instances []string) error {
if len(instances) == 0 {
return fmt.Errorf("no instances provided for service %s", service)
}

var unreachableErrors []string
for _, instance := range instances {
if err := checkService(ctx, instance); err == nil {
return nil
} else {
unreachableErrors = append(unreachableErrors, err.Error())
}
}

return fmt.Errorf("all %s instances are unreachable: %s", service, strings.Join(unreachableErrors, "; "))
}

func checkService(ctx context.Context, serviceUrl string) error {
ctx, cancel := context.WithTimeout(ctx, defaultRequestTimeout)
timeoutCtx, cancel := context.WithTimeout(ctx, defaultRequestTimeout)
defer cancel()

address := parseUrl(serviceUrl)
err := isReachable(ctx, address)
address, err := parseUrl(serviceUrl)
if err != nil {
if err := app.SetFlag(app.NotReachable); err != nil {
log.Error().Err(err).Msg("failed to set not reachable flag")
}
return fmt.Errorf("invalid URL %s: %w", serviceUrl, err)
}

if err := isReachable(timeoutCtx, address); err != nil {
return fmt.Errorf("%s is not reachable: %w", serviceUrl, err)
}

return nil
}

func parseUrl(serviceUrl string) string {
func parseUrl(serviceUrl string) (string, error) {
u, err := url.Parse(serviceUrl)
if err != nil {
return ""
return "", fmt.Errorf("failed to parse URL: %w", err)
}

if u.Host == "" {
return "", fmt.Errorf("missing hostname in URL")
}

port := ":80"
Expand All @@ -82,11 +119,11 @@ func parseUrl(serviceUrl string) string {
u.Host += port
}

return u.Host
return u.Host, nil
}

func isReachable(ctx context.Context, address string) error {
d := net.Dialer{Timeout: defaultRequestTimeout}
var d net.Dialer
conn, err := d.DialContext(ctx, "tcp", address)
if err != nil {
return fmt.Errorf("failed to connect: %w", err)
Expand Down