Skip to content

Commit

Permalink
Detect Network Instance IP subnet conflict
Browse files Browse the repository at this point in the history
Zedrouter is able to detect if NI IP subnet overlaps with the subnet
of another NI or with a device port network. When a conflict is detected
for a new NI, it is blocked from being created. If IP conflict arises
for an already created NI (e.g. a device port later received IP from
the same subnet), the NI is unconfigured and VIFs are set DOWN
(not deleted). In both cases an error is reported for the NI.

This is crucial because, previously, an NI with a subnet overlapping with
a device port network would result in the device permanently losing
controller connectivity (due to routing conflicts), with very limited
options to restore connectivity and make the device manageable again.
Instead, the device should remain online, inform the user about the issue,
and allow to correct the IP configuration.

When IP conflict is detected for an already created NI with an app
connected to it, we intentionally do not halt and undeploy the app.
Instead, the app is left running, just VIFs connected to the problematic
NI lose their connectivity. This allows the user to fix the network
config or at least backup the application data when IP conflict arises.

To enable the deletion of NI when an IP conflict is detected, followed
by its later recreation when the conflict is resolved, and to reconnect
already running apps, few enhancements had to be made to the NI Reconciler,
particularly in the area of VIF bridging.

Signed-off-by: Milan Lenco <milan@zededa.com>
  • Loading branch information
milan-zededa committed Mar 18, 2024
1 parent 1cd8f6a commit 1816ee2
Show file tree
Hide file tree
Showing 16 changed files with 481 additions and 280 deletions.
60 changes: 46 additions & 14 deletions pkg/pillar/cmd/zedrouter/networkinstance.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func (z *zedrouter) getNIBridgeConfig(
MACAddress: status.BridgeMac,
IPAddress: ipAddr,
Uplink: z.getNIUplinkConfig(status),
IPConflict: status.IPConflict,
}
}

Expand Down Expand Up @@ -297,26 +298,57 @@ func (z *zedrouter) delNetworkInstance(status *types.NetworkInstanceStatus) {
z.deleteNetworkInstanceMetrics(status.Key())
}

// Called when a NetworkInstance is deleted or modified to check if there are other
// NIs that previously could not be configured due to inter-NI config conflicts.
func (z *zedrouter) checkConflictingNetworkInstances() {
// Called when a NetworkInstance is deleted or modified, or when a device port IP is
// added or removed, to check if there are new IP conflicts or if some existing
// have been resolved.
func (z *zedrouter) checkAllNetworkInstanceIPConflicts() {
for _, item := range z.pubNetworkInstanceStatus.GetAll() {
niStatus := item.(types.NetworkInstanceStatus)
if !niStatus.NIConflict {
continue
}
niConfig := z.lookupNetworkInstanceConfig(niStatus.Key())
if niConfig == nil {
continue
}
niConflict, err := z.doNetworkInstanceSanityCheck(niConfig)
if err == nil && niConflict == false {
// Try to re-create the network instance now that the conflict is gone.
z.log.Noticef("Recreating NI %s (%s) now that inter-NI conflict "+
"is not present anymore", niConfig.UUID, niConfig.DisplayName)
// First release whatever has been already allocated for this NI.
z.delNetworkInstance(&niStatus)
z.handleNetworkInstanceCreate(nil, niConfig.Key(), *niConfig)
conflictErr := z.checkNetworkInstanceIPConflicts(niConfig)
if conflictErr == nil && niStatus.IPConflict {
// IP conflict was resolved.
if niStatus.Activated {
// Local NI was initially activated prior to the IP conflict.
// Subsequently, when the IP conflict arose, it was almost completely
// un-configured (only preserving app VIFs) to keep device connectivity
// unaffected. Now, it can be restored to full functionality.
z.log.Noticef("Updating NI %s (%s) now that IP conflict "+
"is not present anymore", niConfig.UUID, niConfig.DisplayName)
niStatus.IPConflict = false
niStatus.ClearError()
// This also publishes the new status.
z.doUpdateActivatedNetworkInstance(*niConfig, &niStatus)
} else {
// NI is not in an active state (nothing configured in the network stack).
// We can simply re-create the network instance now that the IP conflict
// is gone.
z.log.Noticef("Recreating NI %s (%s) now that IP conflict "+
"is not present anymore", niConfig.UUID, niConfig.DisplayName)
// First release whatever has been already allocated for this NI.
z.delNetworkInstance(&niStatus)
z.handleNetworkInstanceCreate(nil, niConfig.Key(), *niConfig)
}
}
if conflictErr != nil && !niStatus.IPConflict {
// New IP conflict arose.
z.log.Error(conflictErr)
niStatus.IPConflict = true
niStatus.SetErrorNow(conflictErr.Error())
z.publishNetworkInstanceStatus(&niStatus)
if niStatus.Activated {
// Local NI is already activated. Instead of removing it and halting
// all connected applications (which can lead to loss of data), we
// un-configure everything but app VIFs, which will be set DOWN
// on the host side. User has a chance to fix the configuration.
// When IP conflict is removed, NI will be automatically fully restored.
z.log.Noticef("Updating NI %s (%s) after detecting an IP conflict (%s)",
niConfig.UUID, niConfig.DisplayName, conflictErr)
z.doUpdateActivatedNetworkInstance(*niConfig, &niStatus)
}
}
}
}
38 changes: 30 additions & 8 deletions pkg/pillar/cmd/zedrouter/pubsubhandlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ func (z *zedrouter) handleDNSImpl(ctxArg interface{}, key string,
z.initReconcileDone = true
}

// A new IP address may have been assigned to a device port, or a previously existing
// one may have been removed, potentially creating or resolving an IP conflict.
z.checkAllNetworkInstanceIPConflicts()

// Update uplink config for network instances.
// Also handle (dis)appearance of uplink interfaces.
// Note that even if uplink interface disappears, we do not revert activated NI.
Expand Down Expand Up @@ -181,11 +185,19 @@ func (z *zedrouter) handleNetworkInstanceCreate(ctxArg interface{}, key string,
return
}

if niConflict, err := z.doNetworkInstanceSanityCheck(&config); err != nil {
if err := z.doNetworkInstanceSanityCheck(&config); err != nil {
z.log.Error(err)
status.SetErrorNow(err.Error())
status.ChangeInProgress = types.ChangeInProgressTypeNone
z.publishNetworkInstanceStatus(&status)
return
}

if err := z.checkNetworkInstanceIPConflicts(&config); err != nil {
z.log.Error(err)
status.SetErrorNow(err.Error())
status.ChangeInProgress = types.ChangeInProgressTypeNone
status.NIConflict = niConflict
status.IPConflict = true
z.publishNetworkInstanceStatus(&status)
return
}
Expand Down Expand Up @@ -301,15 +313,25 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string,

prevPortLL := status.PortLogicalLabel
status.NetworkInstanceConfig = config
if niConflict, err := z.doNetworkInstanceSanityCheck(&config); err != nil {
if err := z.doNetworkInstanceSanityCheck(&config); err != nil {
z.log.Error(err)
status.SetErrorNow(err.Error())
status.WaitingForUplink = false
status.ChangeInProgress = types.ChangeInProgressTypeNone
z.publishNetworkInstanceStatus(status)
return
}

if err := z.checkNetworkInstanceIPConflicts(&config); err != nil {
z.log.Error(err)
status.SetErrorNow(err.Error())
status.IPConflict = true
status.WaitingForUplink = false
status.ChangeInProgress = types.ChangeInProgressTypeNone
status.NIConflict = niConflict
z.publishNetworkInstanceStatus(status)
return
}
status.IPConflict = false

// Get or (less likely) allocate a bridge number.
bridgeNumKey := types.UuidToNumKey{UUID: status.UUID}
Expand Down Expand Up @@ -399,8 +421,8 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string,
z.doUpdateActivatedNetworkInstance(config, status)
}

// Check if some inter-NI conflicts were resolved by this modification.
z.checkConflictingNetworkInstances()
// Check if some IP conflicts were resolved by this modification.
z.checkAllNetworkInstanceIPConflicts()
z.log.Functionf("handleNetworkInstanceModify(%s) done", key)
}

Expand All @@ -417,8 +439,8 @@ func (z *zedrouter) handleNetworkInstanceDelete(ctxArg interface{}, key string,
z.publishNetworkInstanceStatus(status)

done := z.maybeDelOrInactivateNetworkInstance(status)
// Check if some inter-NI conflicts were resolved by this delete.
z.checkConflictingNetworkInstances()
// Check if some IP conflicts were resolved by this NI deletion.
z.checkAllNetworkInstanceIPConflicts()
z.log.Functionf("handleNetworkInstanceDelete(%s) done %t", key, done)
}

Expand Down
82 changes: 39 additions & 43 deletions pkg/pillar/cmd/zedrouter/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ import (
"time"

"github.com/lf-edge/eve/pkg/pillar/types"
"github.com/lf-edge/eve/pkg/pillar/utils/netutils"
uuid "github.com/satori/go.uuid"
)

func (z *zedrouter) doNetworkInstanceSanityCheck(
config *types.NetworkInstanceConfig) (niConflict bool, err error) {
func (z *zedrouter) doNetworkInstanceSanityCheck(config *types.NetworkInstanceConfig) error {
z.log.Functionf("Sanity Checking NetworkInstance(%s-%s): type:%d, IpType:%d",
config.DisplayName, config.UUID, config.Type, config.IpType)

Expand All @@ -24,7 +24,7 @@ func (z *zedrouter) doNetworkInstanceSanityCheck(
case types.NetworkInstanceTypeSwitch:
// Do nothing
default:
return false, fmt.Errorf("network instance type %d is not supported", config.Type)
return fmt.Errorf("network instance type %d is not supported", config.Type)
}

// IpType - Check for valid types
Expand All @@ -33,63 +33,32 @@ func (z *zedrouter) doNetworkInstanceSanityCheck(
// Do nothing
case types.AddressTypeIPV4, types.AddressTypeIPV6,
types.AddressTypeCryptoIPV4, types.AddressTypeCryptoIPV6:
niConflict, err = z.doNetworkInstanceSubnetSanityCheck(config)
err := z.doNetworkInstanceSubnetSanityCheck(config)
if err != nil {
return niConflict, err
return err
}
err = z.doNetworkInstanceDhcpRangeSanityCheck(config)
if err != nil {
return false, err
return err
}
err = z.doNetworkInstanceGatewaySanityCheck(config)
if err != nil {
return false, err
return err
}

default:
return false, fmt.Errorf("IpType %d not supported", config.IpType)
return fmt.Errorf("IpType %d not supported", config.IpType)
}
return false, nil
return nil
}

func (z *zedrouter) doNetworkInstanceSubnetSanityCheck(
config *types.NetworkInstanceConfig) (niConflict bool, err error) {
config *types.NetworkInstanceConfig) error {
if config.Subnet.IP == nil || config.Subnet.IP.IsUnspecified() {
return false, fmt.Errorf("subnet unspecified for %s-%s: %+v",
return fmt.Errorf("subnet unspecified for %s-%s: %+v",
config.Key(), config.DisplayName, config.Subnet)
}

items := z.subNetworkInstanceConfig.GetAll()
for key2, config2 := range items {
niConfig2 := config2.(types.NetworkInstanceConfig)
if config.Key() == key2 {
continue
}

// We check for overlapping subnets by checking the
// SubnetAddr ( first address ) is not contained in the subnet of
// any other NI and vice-versa ( Other NI Subnet addrs are not
// contained in the current NI subnet)

// Check if config.Subnet is contained in iterStatusEntry.Subnet
if niConfig2.Subnet.Contains(config.Subnet.IP) {
return true, fmt.Errorf("subnet(%s, IP:%s) overlaps with another "+
"network instance(%s-%s) Subnet(%s)",
config.Subnet.String(), config.Subnet.IP.String(),
niConfig2.DisplayName, niConfig2.UUID,
niConfig2.Subnet.String())
}

// Reverse check: check if iterStatusEntry.Subnet is contained in config.subnet
if config.Subnet.Contains(niConfig2.Subnet.IP) {
return true, fmt.Errorf("another network instance(%s-%s) Subnet(%s) "+
"overlaps with Subnet(%s)",
niConfig2.DisplayName, niConfig2.UUID,
niConfig2.Subnet.String(),
config.Subnet.String())
}
}
return false, nil
return nil
}

func (z *zedrouter) doNetworkInstanceDhcpRangeSanityCheck(
Expand Down Expand Up @@ -131,6 +100,33 @@ func (z *zedrouter) doNetworkInstanceGatewaySanityCheck(
return nil
}

func (z *zedrouter) checkNetworkInstanceIPConflicts(
config *types.NetworkInstanceConfig) error {
// Check for overlapping subnets between NIs.
items := z.subNetworkInstanceConfig.GetAll()
for key2, config2 := range items {
niConfig2 := config2.(types.NetworkInstanceConfig)
if config.Key() == key2 {
continue
}
if netutils.OverlappingSubnets(&config.Subnet, &niConfig2.Subnet) {
return fmt.Errorf("subnet (%s) overlaps with another "+
"network instance (%s-%s) subnet (%s)",
config.Subnet.String(), niConfig2.DisplayName, niConfig2.UUID,
niConfig2.Subnet.String())
}
}
// Check for overlapping subnets between the NI and device ports.
for _, port := range z.deviceNetworkStatus.Ports {
if netutils.OverlappingSubnets(&config.Subnet, &port.Subnet) {
return fmt.Errorf("subnet (%s) overlaps with device port %s "+
"subnet (%s)", config.Subnet.String(), port.Logicallabel,
port.Subnet.String())
}
}
return nil
}

func (z *zedrouter) validateAppNetworkConfig(appNetConfig types.AppNetworkConfig) error {
z.log.Functionf("AppNetwork(%s), check for duplicate port map acls",
appNetConfig.DisplayName)
Expand Down
8 changes: 4 additions & 4 deletions pkg/pillar/cmd/zedrouter/zedrouter.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,22 +244,22 @@ func (z *zedrouter) init() (err error) {

z.peUsagePersist, err = persistcache.New(types.PersistCachePatchEnvelopesUsage)
if err != nil {
log.Fatal(err)
return err
}

// restore cached patchEnvelopeUsage counters
for _, key := range z.peUsagePersist.Objects() {
cached, err := z.peUsagePersist.Get(key)
if err != nil {
log.Fatal(err)
return err
}
buf := bytes.NewBuffer(cached)
dec := gob.NewDecoder(buf)

var peUsage types.PatchEnvelopeUsage

if err := dec.Decode(&peUsage); err != nil {
log.Fatal(err)
return err
}

z.patchEnvelopesUsage.Store(key, peUsage)
Expand Down Expand Up @@ -963,7 +963,7 @@ func (z *zedrouter) processNIReconcileStatus(recStatus nireconciler.NIReconcileS
changed = true
}
} else {
if niStatus.HasError() {
if niStatus.HasError() && !niStatus.IPConflict {
niStatus.ClearError()
changed = true
}
Expand Down
Loading

0 comments on commit 1816ee2

Please sign in to comment.