Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore dns on unclean shutdown #1494

Merged
merged 35 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
5492ddb
Improve error handling
lixmal Jan 22, 2024
7a60f8a
Add DNS unclean shutdown linux file and resolvconf handlers
lixmal Jan 22, 2024
2b12336
Add scaffold for other platforms
lixmal Jan 23, 2024
0516116
Add systemd restore
lixmal Jan 23, 2024
9bcacc4
Log close error
lixmal Jan 23, 2024
ed04672
Remove duplicate const
lixmal Jan 23, 2024
3dc21a3
Handle scan error
lixmal Jan 23, 2024
0d8edf6
Return scan error
lixmal Jan 23, 2024
1698f4e
Add network-manager
lixmal Jan 23, 2024
7923c2e
Add tested nm version contraint
lixmal Jan 23, 2024
1cc9e62
Use plural and spaces
lixmal Jan 23, 2024
34fd193
Move consts
lixmal Jan 23, 2024
fc6f846
Make registry keys more readable
lixmal Jan 23, 2024
9154ecb
Fix network-manager failing on cleanup
lixmal Jan 24, 2024
f76c520
Fix systemd-resolved failing on cleanup
lixmal Jan 24, 2024
e812d7d
Wrap remaining linux errors
lixmal Jan 24, 2024
e8ce5c1
Add windows recovery logic
lixmal Jan 24, 2024
08254ec
Improve error handling
lixmal Jan 24, 2024
5bcb6bb
Remove double remove
lixmal Jan 24, 2024
7b26182
Move windows unclean shutdown backup file creation further up
lixmal Jan 24, 2024
0e270f6
Add macos logic
lixmal Jan 24, 2024
30d1987
Use more intuitive functions names
lixmal Jan 24, 2024
2f023ce
Improve systemd error messages
lixmal Jan 24, 2024
581eb49
Stop and restore DNS before removing wg interface
lixmal Jan 24, 2024
88373d4
Merge branch 'main' into restore-dns-on-unclean-shutdown
lixmal Jan 24, 2024
69dfd9f
Make linter happy
lixmal Jan 24, 2024
a397b97
Simplify error messages
lixmal Jan 24, 2024
c8eeb81
Fix formatting
lixmal Jan 24, 2024
3433490
Don't restore file manager backup if first NS address doesn't match c…
lixmal Jan 25, 2024
0909a25
Fix builds
lixmal Jan 25, 2024
1b6038e
Remove unused param placeholder
lixmal Jan 26, 2024
6b2a566
Move DNS recovery code to own files
lixmal Jan 26, 2024
2644718
Remove full wg interface dependency in host manager creation
lixmal Jan 26, 2024
5e6e539
Fix android build
lixmal Jan 26, 2024
c870c95
Merge branch 'main' into restore-dns-on-unclean-shutdown
lixmal Jan 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions client/cmd/service_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ import (
"github.com/kardianos/service"
log "github.com/sirupsen/logrus"

"github.com/spf13/cobra"
"google.golang.org/grpc"

"github.com/netbirdio/netbird/client/proto"
"github.com/netbirdio/netbird/client/server"
"github.com/netbirdio/netbird/util"
"github.com/spf13/cobra"
"google.golang.org/grpc"
)

func (p *program) Start(svc service.Service) error {
Expand Down Expand Up @@ -109,7 +110,6 @@ var runCmd = &cobra.Command{
if err != nil {
return err
}
cmd.Printf("Netbird service is running")
return nil
},
}
Expand Down
9 changes: 8 additions & 1 deletion client/internal/connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package internal

import (
"context"
"errors"
"fmt"
"strings"
"time"
Expand Down Expand Up @@ -93,6 +94,12 @@ func runClient(
) error {
log.Infof("starting NetBird client version %s", version.NetbirdVersion())

// Check if client was not shut down in a clean way and restore DNS config if required.
// Otherwise, we might not be able to connect to the management server to retrieve new config.
if err := dns.CheckUncleanShutdown(config.WgIface); err != nil {
log.Errorf("checking unclean shutdown error: %s", err)
}

backOff := &backoff.ExponentialBackOff{
InitialInterval: time.Second,
RandomizationFactor: 1,
Expand Down Expand Up @@ -244,7 +251,7 @@ func runClient(

log.Info("stopped NetBird client")

if _, err := state.Status(); err == ErrResetConnection {
if _, err := state.Status(); errors.Is(err, ErrResetConnection) {
return err
}

Expand Down
15 changes: 11 additions & 4 deletions client/internal/dns/dbus_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,38 @@ package dns

import (
"context"
"fmt"
"time"

"github.com/godbus/dbus/v5"
log "github.com/sirupsen/logrus"
"time"
)

const dbusDefaultFlag = 0

func isDbusListenerRunning(dest string, path dbus.ObjectPath) bool {
obj, closeConn, err := getDbusObject(dest, path)
if err != nil {
log.Tracef("error getting dbus object: %s", err)
return false
}
defer closeConn()

ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second)
defer cancel()

err = obj.CallWithContext(ctx, "org.freedesktop.DBus.Peer.Ping", 0).Store()
return err == nil
if err = obj.CallWithContext(ctx, "org.freedesktop.DBus.Peer.Ping", 0).Store(); err != nil {
log.Tracef("error calling dbus: %s", err)
return false
}

return true
}

func getDbusObject(dest string, path dbus.ObjectPath) (dbus.BusObject, func(), error) {
conn, err := dbus.SystemBus()
if err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("get dbus: %w", err)
}
obj := conn.Object(dest, path)

Expand Down
75 changes: 64 additions & 11 deletions client/internal/dns/file_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package dns
import (
"bytes"
"fmt"
"net/netip"
"os"
"strings"

Expand Down Expand Up @@ -49,7 +50,7 @@ func (f *fileConfigurator) applyDNSConfig(config HostDNSConfig) error {
if backupFileExist {
err = f.restore()
if err != nil {
return fmt.Errorf("unable to configure DNS for this peer using file manager without a Primary nameserver group. Restoring the original file return err: %s", err)
return fmt.Errorf("unable to configure DNS for this peer using file manager without a Primary nameserver group. Restoring the original file return err: %w", err)
}
}
return fmt.Errorf("unable to configure DNS for this peer using file manager without a nameserver group with all domains configured")
Expand All @@ -58,7 +59,7 @@ func (f *fileConfigurator) applyDNSConfig(config HostDNSConfig) error {
if !backupFileExist {
err = f.backup()
if err != nil {
return fmt.Errorf("unable to backup the resolv.conf file")
return fmt.Errorf("unable to backup the resolv.conf file: %w", err)
}
}

Expand All @@ -67,7 +68,7 @@ func (f *fileConfigurator) applyDNSConfig(config HostDNSConfig) error {

resolvConf, err := parseBackupResolvConf()
if err != nil {
log.Error(err)
log.Errorf("could not read original search domains from %s: %s", fileDefaultResolvConfBackupLocation, err)
}

f.repair.stopWatchFileChanges()
Expand Down Expand Up @@ -96,10 +97,16 @@ func (f *fileConfigurator) updateConfig(nbSearchDomains []string, nbNameserverIP
if restoreErr != nil {
log.Errorf("attempt to restore default file failed with error: %s", err)
}
return fmt.Errorf("got an creating resolver file %s. Error: %s", defaultResolvConfPath, err)
return fmt.Errorf("creating resolver file %s. Error: %w", defaultResolvConfPath, err)
}

log.Infof("created a NetBird managed %s file with the DNS settings. Added %d search domains. Search list: %s", defaultResolvConfPath, len(searchDomainList), searchDomainList)

// create another backup for unclean shutdown detection right after overwriting the original resolv.conf
if err := createUncleanShutdownIndicator(fileDefaultResolvConfBackupLocation, fileManager, nbNameserverIP); err != nil {
log.Errorf("failed to create unclean shutdown resolv.conf backup: %s", err)
}

log.Infof("created a NetBird managed %s file with your DNS settings. Added %d search domains. Search list: %s", defaultResolvConfPath, len(searchDomainList), searchDomainList)
return nil
}

Expand All @@ -111,27 +118,73 @@ func (f *fileConfigurator) restoreHostDNS() error {
func (f *fileConfigurator) backup() error {
stats, err := os.Stat(defaultResolvConfPath)
if err != nil {
return fmt.Errorf("got an error while checking stats for %s file. Error: %s", defaultResolvConfPath, err)
return fmt.Errorf("checking stats for %s file. Error: %w", defaultResolvConfPath, err)
}

f.originalPerms = stats.Mode()

err = copyFile(defaultResolvConfPath, fileDefaultResolvConfBackupLocation)
if err != nil {
return fmt.Errorf("got error while backing up the %s file. Error: %s", defaultResolvConfPath, err)
return fmt.Errorf("backing up %s: %w", defaultResolvConfPath, err)
}
return nil
}

func (f *fileConfigurator) restore() error {
err := copyFile(fileDefaultResolvConfBackupLocation, defaultResolvConfPath)
if err != nil {
return fmt.Errorf("got error while restoring the %s file from %s. Error: %s", defaultResolvConfPath, fileDefaultResolvConfBackupLocation, err)
return fmt.Errorf("restoring %s from %s: %w", defaultResolvConfPath, fileDefaultResolvConfBackupLocation, err)
}

if err := removeUncleanShutdownIndicator(); err != nil {
log.Errorf("failed to remove unclean shutdown resolv.conf backup: %s", err)
}

return os.RemoveAll(fileDefaultResolvConfBackupLocation)
}

func (f *fileConfigurator) restoreUncleanShutdownDNS(storedDNSAddress *netip.Addr) error {
resolvConf, err := parseDefaultResolvConf()
if err != nil {
return fmt.Errorf("parse current resolv.conf: %w", err)
}

// no current nameservers set -> restore
if len(resolvConf.nameServers) == 0 {
return restoreResolvConfFile()
}

currentDNSAddress, err := netip.ParseAddr(resolvConf.nameServers[0])
// not a valid first nameserver -> restore
if err != nil {
log.Errorf("restoring unclean shutdown: parse dns address %s failed: %s", resolvConf.nameServers[1], err)
return restoreResolvConfFile()
}

// current address is still netbird's non-available dns address -> restore
// comparing parsed addresses only, to remove ambiguity
if currentDNSAddress.String() == storedDNSAddress.String() {
return restoreResolvConfFile()
}

log.Info("restoring unclean shutdown: first current nameserver differs from saved nameserver pre-netbird: not restoring")
return nil
}

func restoreResolvConfFile() error {
log.Debugf("restoring unclean shutdown: restoring %s from %s", defaultResolvConfPath, fileUncleanShutdownResolvConfLocation)

if err := copyFile(fileUncleanShutdownResolvConfLocation, defaultResolvConfPath); err != nil {
return fmt.Errorf("restoring %s from %s: %w", defaultResolvConfPath, fileUncleanShutdownResolvConfLocation, err)
}

if err := removeUncleanShutdownIndicator(); err != nil {
log.Errorf("failed to remove unclean shutdown resolv.conf file: %s", err)
}

return nil
}

// generateNsList generates a list of nameservers from the config and adds the primary nameserver to the beginning of the list
func generateNsList(nbNameserverIP string, cfg *resolvConf) []string {
ns := make([]string, 1, len(cfg.nameServers)+1)
Expand Down Expand Up @@ -231,17 +284,17 @@ func validateAndFillSearchDomains(initialLineChars int, s *[]string, vs []string
func copyFile(src, dest string) error {
stats, err := os.Stat(src)
if err != nil {
return fmt.Errorf("got an error while checking stats for %s file when copying it. Error: %s", src, err)
return fmt.Errorf("checking stats for %s file when copying it. Error: %s", src, err)
}

bytesRead, err := os.ReadFile(src)
if err != nil {
return fmt.Errorf("got an error while reading the file %s file for copy. Error: %s", src, err)
return fmt.Errorf("reading the file %s file for copy. Error: %s", src, err)
}

err = os.WriteFile(dest, bytesRead, stats.Mode())
if err != nil {
return fmt.Errorf("got an writing the destination file %s for copy. Error: %s", dest, err)
return fmt.Errorf("writing the destination file %s for copy. Error: %s", dest, err)
}
return nil
}
Expand Down
23 changes: 17 additions & 6 deletions client/internal/dns/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package dns

import (
"fmt"
"net/netip"
"strings"

nbdns "github.com/netbirdio/netbird/dns"
Expand All @@ -11,6 +12,7 @@ type hostManager interface {
applyDNSConfig(config HostDNSConfig) error
restoreHostDNS() error
supportCustomPort() bool
restoreUncleanShutdownDNS(storedDNSAddress *netip.Addr) error
}

type HostDNSConfig struct {
Expand All @@ -27,9 +29,10 @@ type DomainConfig struct {
}

type mockHostConfigurator struct {
applyDNSConfigFunc func(config HostDNSConfig) error
restoreHostDNSFunc func() error
supportCustomPortFunc func() bool
applyDNSConfigFunc func(config HostDNSConfig) error
restoreHostDNSFunc func() error
supportCustomPortFunc func() bool
restoreUncleanShutdownDNSFunc func(*netip.Addr) error
}

func (m *mockHostConfigurator) applyDNSConfig(config HostDNSConfig) error {
Expand All @@ -53,11 +56,19 @@ func (m *mockHostConfigurator) supportCustomPort() bool {
return false
}

func (m *mockHostConfigurator) restoreUncleanShutdownDNS(storedDNSAddress *netip.Addr) error {
if m.restoreUncleanShutdownDNSFunc != nil {
return m.restoreUncleanShutdownDNSFunc(storedDNSAddress)
}
return fmt.Errorf("method restoreUncleanShutdownDNS is not implemented")
}

func newNoopHostMocker() hostManager {
return &mockHostConfigurator{
applyDNSConfigFunc: func(config HostDNSConfig) error { return nil },
restoreHostDNSFunc: func() error { return nil },
supportCustomPortFunc: func() bool { return true },
applyDNSConfigFunc: func(config HostDNSConfig) error { return nil },
restoreHostDNSFunc: func() error { return nil },
supportCustomPortFunc: func() bool { return true },
restoreUncleanShutdownDNSFunc: func(*netip.Addr) error { return nil },
}
}

Expand Down
8 changes: 7 additions & 1 deletion client/internal/dns/host_android.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package dns

import "net/netip"

type androidHostManager struct {
}

func newHostManager(wgInterface WGIface) (hostManager, error) {
func newHostManager() (hostManager, error) {
return &androidHostManager{}, nil
}

Expand All @@ -18,3 +20,7 @@ func (a androidHostManager) restoreHostDNS() error {
func (a androidHostManager) supportCustomPort() bool {
return false
}

func (a androidHostManager) restoreUncleanShutdownDNS(*netip.Addr) error {
return nil
}