diff --git a/cmds/capacityd/main.go b/cmds/capacityd/main.go index 354732421..597343c31 100644 --- a/cmds/capacityd/main.go +++ b/cmds/capacityd/main.go @@ -26,6 +26,7 @@ const module = "monitor" func cap(ctx context.Context, client zbus.Client) { storage := stubs.NewStorageModuleStub(client) identity := stubs.NewIdentityManagerStub(client) + network := stubs.NewNetworkerStub(client) cl, err := bcdbClient() if err != nil { log.Fatal().Err(err).Msg("failed to connect to bcdb backend") @@ -34,6 +35,17 @@ func cap(ctx context.Context, client zbus.Client) { // call this now so we block here until identityd is ready to serve us nodeID := identity.NodeID().Identity() + // block until networkd is ready to serve request from zbus + // this is used to prevent uptime and online status to the explorer if the node is not in a fully ready + // https://github.com/threefoldtech/zos/issues/632 + bo := backoff.NewExponentialBackOff() + bo.MaxElapsedTime = 0 + backoff.RetryNotify(func() error { + return network.Ready() + }, bo, func(err error, d time.Duration) { + log.Error().Err(err).Msgf("networkd is not ready yet") + }) + r := capacity.NewResourceOracle(storage) log.Info().Msg("inspect hardware resources") @@ -75,7 +87,7 @@ func cap(ctx context.Context, client zbus.Client) { log.Info().Msg("sends capacity detail to BCDB") return cl.NodeSetCapacity(nodeID, ru, *dmi, disks, hypervisor) } - bo := backoff.NewExponentialBackOff() + bo = backoff.NewExponentialBackOff() bo.MaxElapsedTime = 0 // retry forever backoff.RetryNotify(setCapacity, bo, func(err error, d time.Duration) { log.Error(). diff --git a/cmds/provisiond/main.go b/cmds/provisiond/main.go index ff6a87c10..31bd12402 100644 --- a/cmds/provisiond/main.go +++ b/cmds/provisiond/main.go @@ -7,6 +7,7 @@ import ( "path/filepath" "time" + "github.com/cenkalti/backoff/v3" "github.com/threefoldtech/zos/pkg" "github.com/threefoldtech/zos/pkg/app" "github.com/threefoldtech/zos/pkg/environment" @@ -92,6 +93,18 @@ func main() { identity := stubs.NewIdentityManagerStub(zbusCl) nodeID := identity.NodeID() + // block until networkd is ready to serve request from zbus + // this is used to prevent uptime and online status to the explorer if the node is not in a fully ready + // https://github.com/threefoldtech/zos/issues/632 + network := stubs.NewNetworkerStub(zbusCl) + bo := backoff.NewExponentialBackOff() + bo.MaxElapsedTime = 0 + backoff.RetryNotify(func() error { + return network.Ready() + }, bo, func(err error, d time.Duration) { + log.Error().Err(err).Msgf("networkd is not ready yet") + }) + // to get reservation from tnodb e, err := app.ExplorerClient() if err != nil { diff --git a/pkg/network.go b/pkg/network.go index a5eb22af4..31a169720 100644 --- a/pkg/network.go +++ b/pkg/network.go @@ -20,6 +20,10 @@ type Member struct { //Networker is the interface for the network module type Networker interface { + // Ready return nil is networkd is ready to operate + // This function is used by other deamon to test if networkd is done booting + Ready() error + // Create a new network resource CreateNR(Network) (string, error) // Delete a network resource diff --git a/pkg/network/networker.go b/pkg/network/networker.go index 0e62bce09..6f35985a7 100644 --- a/pkg/network/networker.go +++ b/pkg/network/networker.go @@ -210,6 +210,10 @@ func validatePeer(p pkg.Peer) error { return nil } +func (n *networker) Ready() error { + return nil +} + func (n *networker) Join(networkdID pkg.NetID, containerID string, addrs []string, publicIP6 bool) (join pkg.Member, err error) { // TODO: // 1- Make sure this network id is actually deployed diff --git a/pkg/stubs/network_stub.go b/pkg/stubs/network_stub.go index a425a9c8a..ea4720536 100644 --- a/pkg/stubs/network_stub.go +++ b/pkg/stubs/network_stub.go @@ -168,6 +168,19 @@ func (s *NetworkerStub) PublicAddresses(ctx context.Context) (<-chan pkg.Netlink return ch, nil } +func (s *NetworkerStub) Ready() (ret0 error) { + args := []interface{}{} + result, err := s.client.Request(s.module, s.object, "Ready", args...) + if err != nil { + panic(err) + } + ret0 = new(zbus.RemoteError) + if err := result.Unmarshal(0, &ret0); err != nil { + panic(err) + } + return +} + func (s *NetworkerStub) RemoveTap(arg0 pkg.NetID) (ret0 error) { args := []interface{}{arg0} result, err := s.client.Request(s.module, s.object, "RemoveTap", args...)