Skip to content

Commit

Permalink
Fix panic in NP deletion
Browse files Browse the repository at this point in the history
Fixes panic observed during NP deletion:

2022-05-27T15:48:27.957882179Z I0527 15:48:27.957839       1
policy.go:1251] Deleting network policy XXXXX in namespace XXXXX
2022-05-27T15:48:27.957950868Z E0527 15:48:27.957928       1
runtime.go:78] Observed a panic: "invalid memory address or nil pointer
dereference" (runtime error: invalid memory address or nil pointer
dereference)
2022-05-27T15:48:27.957950868Z goroutine 132 [running]:
2022-05-27T15:48:27.957950868Z
k8s.io/apimachinery/pkg/util/runtime.logPanic(0x1930dc0, 0x2907f50)
2022-05-27T15:48:27.957950868Z
/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:74
+0x95
2022-05-27T15:48:27.957950868Z
k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
2022-05-27T15:48:27.957950868Z
/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:48
+0x86
2022-05-27T15:48:27.957950868Z panic(0x1930dc0, 0x2907f50)
2022-05-27T15:48:27.957950868Z 	/usr/lib/golang/src/runtime/panic.go:965
+0x1b9
2022-05-27T15:48:27.957950868Z
github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn.(*Controller).destroyNetworkPolicy(0xc002fcc6c0,
0x0, 0xc000fc9901, 0x0, 0x0)
2022-05-27T15:48:27.957950868Z
/go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/ovn/policy.go:1287
+0x55
2022-05-27T15:48:27.957950868Z
github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn.(*Controller).deleteNetworkPolicy(0xc002fcc6c0,
0xc0006cc580, 0x0, 0x0, 0x0)
2022-05-27T15:48:27.957950868Z
/go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/ovn/policy.go:1275
+0x451

Signed-off-by: Surya Seetharaman <suryaseetharaman.9@gmail.com>
  • Loading branch information
tssurya committed Jun 23, 2022
1 parent a04606b commit 93d4889
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 10 deletions.
2 changes: 1 addition & 1 deletion go-controller/pkg/libovsdbops/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func DeleteACLs(nbClient libovsdbclient.Client, acls ...*nbdb.ACL) error {
Model: acl,
ModelPredicate: func(item *nbdb.ACL) bool { return isEquivalentACL(item, acl) },
ErrNotFound: false,
BulkOp: false,
BulkOp: true,
}
opModels = append(opModels, opModel)
}
Expand Down
54 changes: 53 additions & 1 deletion go-controller/pkg/ovn/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,36 @@ func (oc *Controller) createDefaultDenyPGAndACLs(namespace, policy string, nsInf
return nil
}

// deleteDefaultDenyPGAndACLs deletes the default port groups and acls for a ns/policy
// must be called with a write lock on nsInfo
func (oc *Controller) deleteDefaultDenyPGAndACLs(namespace, policy string, nsInfo *namespaceInfo) error {
aclLogging := nsInfo.aclLogging.Deny
var aclsToBeDeleted []*nbdb.ACL

ingressPGName := defaultDenyPortGroup(namespace, ingressDefaultDenySuffix)
ingressDenyACL, ingressAllowACL := buildDenyACLs(namespace, policy, ingressPGName, aclLogging, knet.PolicyTypeIngress)
aclsToBeDeleted = append(aclsToBeDeleted, ingressDenyACL, ingressAllowACL)
egressPGName := defaultDenyPortGroup(namespace, egressDefaultDenySuffix)
egressDenyACL, egressAllowACL := buildDenyACLs(namespace, policy, egressPGName, aclLogging, knet.PolicyTypeEgress)
aclsToBeDeleted = append(aclsToBeDeleted, egressDenyACL, egressAllowACL)

err := libovsdbops.DeletePortGroups(oc.nbClient, ingressPGName, egressPGName)
if err != nil {
return err
}

nsInfo.portGroupEgressDenyName = ""
nsInfo.portGroupIngressDenyName = ""

// Manually remove the default ACLs instead of relying on ovsdb garbage collection to do so
err = libovsdbops.DeleteACLs(oc.nbClient, aclsToBeDeleted...)
if err != nil {
return err
}

return nil
}

func (oc *Controller) updateACLLoggingForPolicy(np *networkPolicy, logLevel string) error {
np.Lock()
defer np.Unlock()
Expand Down Expand Up @@ -1174,6 +1204,23 @@ func (oc *Controller) addNetworkPolicy(policy *knet.NetworkPolicy) error {

if len(nsInfo.networkPolicies) == 0 {
err = oc.createDefaultDenyPGAndACLs(policy.Namespace, policy.Name, nsInfo)
defer func() {
if err != nil {
nsInfo, nsUnlock, errDelete := oc.ensureNamespaceLocked(policy.Namespace, false, nil)
// rollback failed, best effort cleanup; won't add to retry mechanism since item doesn't exist in cache yet.
if errDelete != nil {
klog.Warningf("Rollback of default port groups and acls for policy: %s/%s failed, Unable to ensure namespace for network policy: error %v", policy.Namespace, policy.Name, errDelete)
return
}
// try rolling-back since creation of default acls/pgs failed
errDelete = oc.deleteDefaultDenyPGAndACLs(policy.Namespace, policy.Name, nsInfo)
nsUnlock()
if errDelete != nil {
// rollback failed, best effort cleanup; won't add to retry mechanism since item doesn't exist in cache yet.
klog.Warningf("Rollback of default port groups and acls for policy: %s/%s failed: error %v", policy.Namespace, policy.Name, errDelete)
}
}
}()
if err != nil {
nsUnlock()
return fmt.Errorf("failed to create default port groups and acls for policy: %s/%s, error: %v",
Expand Down Expand Up @@ -1242,8 +1289,9 @@ func (oc *Controller) deleteNetworkPolicy(policy *knet.NetworkPolicy, np *networ
if nsInfo == nil {
// if we didn't get nsInfo and np is nil, we cannot proceed
if np == nil {
return fmt.Errorf("failed to get namespace lock when deleting policy %s in namespace %s",
klog.Warningf("Failed to get namespace lock when deleting policy %s in namespace %s",
policy.Name, policy.Namespace)
return nil
}

if err := oc.destroyNetworkPolicy(np, false); err != nil {
Expand All @@ -1263,6 +1311,10 @@ func (oc *Controller) deleteNetworkPolicy(policy *knet.NetworkPolicy, np *networ
expectedLastPolicyNum = 1
np = foundNp
}
if np == nil {
klog.Warningf("Unable to delete network policy: %s/%s since its not found in cache", policy.Namespace, policy.Name)
return nil
}
isLastPolicyInNamespace := len(nsInfo.networkPolicies) == expectedLastPolicyNum
if err := oc.destroyNetworkPolicy(np, isLastPolicyInNamespace); err != nil {
return fmt.Errorf("failed to destroy network policy: %s/%s", policy.Namespace, policy.Name)
Expand Down
16 changes: 8 additions & 8 deletions go-controller/pkg/ovn/policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1609,7 +1609,7 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("deleting a network policy that was not added successfully panics", func() {
ginkgo.It("deleting a network policy that failed half-way through creation succeeds", func() {
app.Action = func(ctx *cli.Context) error {
namespace1 := *newNamespace(namespaceName1)
nPodTest := newTPod(
Expand Down Expand Up @@ -1718,17 +1718,17 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() {
gomega.Expect(err).To(gomega.HaveOccurred())
gomega.Expect(err.Error()).To(gomega.ContainSubstring("failed to create default port groups and acls for policy: namespace1/networkpolicy1, error: unexpectedly found multiple results for provided predicate"))

ginkgo.By("Deleting the network policy that failed to create and ensuring we panic")
gomega.Expect(func () {
fakeOvn.controller.deleteNetworkPolicy(networkPolicy, nil)
}).To(gomega.Panic())

//ensure the default PGs and ACLs were removed via rollback from add failure
expectedData := []libovsdb.TestData{}
expectedData = append(expectedData, getExpectedDataPodsAndSwitches([]testPod{nPodTest}, []string{"node1"})...)
expectedData = append(expectedData, leftOverACLFromUpgrade1)
expectedData = append(expectedData, leftOverACLFromUpgrade2)
gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData...))

ginkgo.By("Deleting the network policy that failed to create and ensuring we don't panic")
err = fakeOvn.controller.deleteNetworkPolicy(networkPolicy, nil)
// I0623 policy.go:1285] Deleting network policy networkpolicy1 in namespace namespace1, np is nil: true
// W0623 policy.go:1315] Unable to delete network policy: namespace1/networkpolicy1 since its not found in cache
gomega.Expect(err).NotTo(gomega.HaveOccurred())

return nil
}

Expand Down

0 comments on commit 93d4889

Please sign in to comment.