Skip to content

Commit

Permalink
- Added a new channel named "deleted" to the EgressDNS struct, so tha…
Browse files Browse the repository at this point in the history
…t the domain name (if any) of a newly deleted EgressFirewall can be passed over to the Run() loop, which keeps track of DNS records and updates them as soon as they expire. This prevents inconsistencies between current EgressFirewalls and corresponding DNS records, which eventually led to a panic in a customer case.

- The aforementioned panic is now avoided by checking for the presence of the given DNS domain to delete in a map, before proceeding to update its (pointer) value.
(- Made error messages start with lowercase letters to comply with style guidelines, as a CI test was failing)

This patch fixes bug BZ 2000057.

Signed-off-by: Riccardo Ravaioli <rravaiol@redhat.com>
(cherry picked from commit 12ff248)
  • Loading branch information
ricky-rav committed Sep 15, 2021
1 parent 67cc966 commit 6a11b65
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
29 changes: 23 additions & 6 deletions go-controller/pkg/ovn/egressfirewall_dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type EgressDNS struct {

// Report change when Add operation is done
added chan struct{}
deleted chan string
stopChan chan struct{}
controllerStop <-chan struct{}
}
Expand All @@ -51,6 +52,7 @@ func NewEgressDNS(addressSetFactory addressset.AddressSetFactory, controllerStop
addressSetFactory: addressSetFactory,

added: make(chan struct{}),
deleted: make(chan string, 1),
stopChan: make(chan struct{}),
controllerStop: controllerStop,
}
Expand Down Expand Up @@ -91,19 +93,23 @@ func (e *EgressDNS) Delete(namespace string) bool {
// delete the dnsEntry
delete(dnsEntry.namespaces, namespace)
if len(dnsEntry.namespaces) == 0 {
// the dnsEntry appears in no other namespace so delete the address_set
// the dnsEntry appears in no other namespace, so delete the address_set
err := dnsEntry.dnsAddressSet.Destroy()
if err != nil {
klog.Errorf("Error deleteing EgressFirewall AddressSet for dnsName: %s %v", dnsName, err)
klog.Errorf("Error deleting EgressFirewall AddressSet for dnsName: %s %v", dnsName, err)
}
// the dnsEntry is no longer needed because nothing references it delete it
// the dnsEntry is no longer needed because nothing references it, so delete it
delete(e.dnsEntries, dnsName)
dnsNamesToDelete = append(dnsNamesToDelete, dnsName)
}
}
e.lock.Unlock()
for _, name := range dnsNamesToDelete {
e.dns.Delete(name)
// send a message to the "deleted" buffered channel so that Run() stops using
// the deleted domain name. (channel is buffered so that sending values to it
// blocks only if Run() is busy updating its internal values)
e.deleted <- name
}
return len(e.dnsEntries) == 0
}
Expand All @@ -116,6 +122,10 @@ func (e *EgressDNS) updateEntryForName(dnsName string) error {
e.lock.Lock()
defer e.lock.Unlock()
ips := e.dns.GetIPs(dnsName)
if _, ok := e.dnsEntries[dnsName]; !ok {
return fmt.Errorf("cannot update DNS record for %s: no entry found. "+
"Was the EgressFirewall deleted?", dnsName)
}
e.dnsEntries[dnsName].dnsResolves = ips

if err := e.dnsEntries[dnsName].dnsAddressSet.SetIPs(ips); err != nil {
Expand Down Expand Up @@ -145,12 +155,13 @@ func (e *EgressDNS) addToDNS(dnsName string) {
}

// Run spawns a goroutine that handles updates to the dns entries for dnsNames used in
// EgressFirewalls. The loop runs after receiving one of two signals
// EgressFirewalls. The loop runs after receiving one of three signals:
// 1. time.After(durationTillNextQuery) times out and the dnsName with the lowest ttl is checked
// and the durationTillNextQuery is updated
// 2. e.added is recived and durationTillNextQuery is recomputed
// 2. e.added is received and durationTillNextQuery is recomputed
// 3. e.deleted is received and coincides with dnsName
func (e *EgressDNS) Run(defaultInterval time.Duration) {
var dnsName string
var dnsName, dnsNameDeleted string
var ttl time.Time
var timeSet bool
// initially the next DNS Query happens at the default interval
Expand All @@ -172,6 +183,12 @@ func (e *EgressDNS) Run(defaultInterval time.Duration) {
utilruntime.HandleError(err)
}
}
case dnsNameDeleted = <-e.deleted:
// Break from the select and update dnsName only if dnsName
// appears in an EgressFirewall that has just been deleted.
if dnsName != dnsNameDeleted {
continue
}
case <-e.stopChan:
return
case <-e.controllerStop:
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/unidling.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ var _ = ginkgo.Describe("Unidling", func() {
if pollErr := wait.PollImmediate(framework.Poll, e2eservice.KubeProxyEndpointLagTimeout, func() (bool, error) {
_, err := framework.RunHostCmd(execPod.Namespace, execPod.Name, cmd)
if err != nil && strings.Contains(err.Error(), nonExpectedErr) {
return false, fmt.Errorf("Service is rejecting packets")
return false, fmt.Errorf("service is rejecting packets")
}
// An event like this must be generated
// oc.recorder.Eventf(&serviceRef, kapi.EventTypeNormal, "NeedPods", "The service %s needs pods", serviceName.Name)
Expand Down

0 comments on commit 6a11b65

Please sign in to comment.