-
Notifications
You must be signed in to change notification settings - Fork 38.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add optional mutation checks for shared informer cache #27784
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/* | ||
Copyright 2016 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package cache | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"reflect" | ||
"strconv" | ||
"sync" | ||
"time" | ||
|
||
"k8s.io/kubernetes/pkg/api" | ||
"k8s.io/kubernetes/pkg/runtime" | ||
"k8s.io/kubernetes/pkg/util/diff" | ||
) | ||
|
||
var mutationDetectionEnabled = false | ||
|
||
func init() { | ||
mutationDetectionEnabled, _ = strconv.ParseBool(os.Getenv("KUBE_CACHE_MUTATION_DETECTOR")) | ||
} | ||
|
||
type CacheMutationDetector interface { | ||
AddObject(obj interface{}) | ||
Run(stopCh <-chan struct{}) | ||
} | ||
|
||
func NewCacheMutationDetector(name string) CacheMutationDetector { | ||
if !mutationDetectionEnabled { | ||
return dummyMutationDetector{} | ||
} | ||
return &defaultCacheMutationDetector{name: name, period: 1 * time.Second} | ||
} | ||
|
||
type dummyMutationDetector struct{} | ||
|
||
func (dummyMutationDetector) Run(stopCh <-chan struct{}) { | ||
} | ||
func (dummyMutationDetector) AddObject(obj interface{}) { | ||
} | ||
|
||
// defaultCacheMutationDetector gives a way to detect if a cached object has been mutated | ||
// It has a list of cached objects and their copies. I haven't thought of a way | ||
// to see WHO is mutating it, just that it's getting mutated. | ||
type defaultCacheMutationDetector struct { | ||
name string | ||
period time.Duration | ||
|
||
lock sync.Mutex | ||
cachedObjs []cacheObj | ||
|
||
// failureFunc is injectable for unit testing. If you don't have it, the process will panic. | ||
// This panic is intentional, since turning on this detection indicates you want a strong | ||
// failure signal. This failure is effectively a p0 bug and you can't trust process results | ||
// after a mutation anyway. | ||
failureFunc func(message string) | ||
} | ||
|
||
// cacheObj holds the actual object and a copy | ||
type cacheObj struct { | ||
cached interface{} | ||
copied interface{} | ||
} | ||
|
||
func (d *defaultCacheMutationDetector) Run(stopCh <-chan struct{}) { | ||
// we DON'T want protection from panics. If we're running this code, we want to die | ||
go func() { | ||
for { | ||
d.CompareObjects() | ||
|
||
select { | ||
case <-stopCh: | ||
return | ||
case <-time.After(d.period): | ||
} | ||
} | ||
}() | ||
} | ||
|
||
// AddObject makes a deep copy of the object for later comparison. It only works on runtime.Object | ||
// but that covers the vast majority of our cached objects | ||
func (d *defaultCacheMutationDetector) AddObject(obj interface{}) { | ||
if _, ok := obj.(DeletedFinalStateUnknown); ok { | ||
return | ||
} | ||
if _, ok := obj.(runtime.Object); !ok { | ||
return | ||
} | ||
|
||
copiedObj, err := api.Scheme.Copy(obj.(runtime.Object)) | ||
if err != nil { | ||
return | ||
} | ||
|
||
d.lock.Lock() | ||
defer d.lock.Unlock() | ||
d.cachedObjs = append(d.cachedObjs, cacheObj{cached: obj, copied: copiedObj}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we want to dedup? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think we want a reference to every reference that has existed since we don't know which reference any particular observer (and naughty mutator) may have gotten. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see |
||
} | ||
|
||
func (d *defaultCacheMutationDetector) CompareObjects() { | ||
d.lock.Lock() | ||
defer d.lock.Unlock() | ||
|
||
altered := false | ||
for i, obj := range d.cachedObjs { | ||
if !reflect.DeepEqual(obj.cached, obj.copied) { | ||
fmt.Printf("CACHE %s[%d] ALTERED!\n%v\n", d.name, i, diff.ObjectDiff(obj.cached, obj.copied)) | ||
altered = true | ||
} | ||
} | ||
|
||
if altered { | ||
msg := fmt.Sprintf("cache %s modified", d.name) | ||
if d.failureFunc != nil { | ||
d.failureFunc(msg) | ||
return | ||
} | ||
panic(msg) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
// +build !race | ||
|
||
/* | ||
Copyright 2016 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package cache | ||
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"k8s.io/kubernetes/pkg/api" | ||
"k8s.io/kubernetes/pkg/runtime" | ||
"k8s.io/kubernetes/pkg/watch" | ||
) | ||
|
||
func TestMutationDetector(t *testing.T) { | ||
fakeWatch := watch.NewFake() | ||
lw := &testLW{ | ||
WatchFunc: func(options api.ListOptions) (watch.Interface, error) { | ||
return fakeWatch, nil | ||
}, | ||
ListFunc: func(options api.ListOptions) (runtime.Object, error) { | ||
return &api.PodList{}, nil | ||
}, | ||
} | ||
pod := &api.Pod{ | ||
ObjectMeta: api.ObjectMeta{ | ||
Name: "anything", | ||
Labels: map[string]string{"check": "foo"}, | ||
}, | ||
} | ||
stopCh := make(chan struct{}) | ||
defer close(stopCh) | ||
addReceived := make(chan bool) | ||
mutationFound := make(chan bool) | ||
|
||
informer := NewSharedInformer(lw, &api.Pod{}, 1*time.Second).(*sharedIndexInformer) | ||
informer.cacheMutationDetector = &defaultCacheMutationDetector{ | ||
name: "name", | ||
period: 1 * time.Second, | ||
failureFunc: func(message string) { | ||
mutationFound <- true | ||
}, | ||
} | ||
informer.AddEventHandler( | ||
ResourceEventHandlerFuncs{ | ||
AddFunc: func(obj interface{}) { | ||
addReceived <- true | ||
}, | ||
}, | ||
) | ||
go informer.Run(stopCh) | ||
|
||
fakeWatch.Add(pod) | ||
|
||
select { | ||
case <-addReceived: | ||
} | ||
|
||
pod.Labels["change"] = "true" | ||
|
||
select { | ||
case <-mutationFound: | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,11 +68,12 @@ func NewSharedInformer(lw ListerWatcher, objType runtime.Object, resyncPeriod ti | |
// be shared amongst all consumers. | ||
func NewSharedIndexInformer(lw ListerWatcher, objType runtime.Object, resyncPeriod time.Duration, indexers Indexers) SharedIndexInformer { | ||
sharedIndexInformer := &sharedIndexInformer{ | ||
processor: &sharedProcessor{}, | ||
indexer: NewIndexer(DeletionHandlingMetaNamespaceKeyFunc, indexers), | ||
listerWatcher: lw, | ||
objectType: objType, | ||
fullResyncPeriod: resyncPeriod, | ||
processor: &sharedProcessor{}, | ||
indexer: NewIndexer(DeletionHandlingMetaNamespaceKeyFunc, indexers), | ||
listerWatcher: lw, | ||
objectType: objType, | ||
fullResyncPeriod: resyncPeriod, | ||
cacheMutationDetector: NewCacheMutationDetector(fmt.Sprintf("%T", objType)), | ||
} | ||
return sharedIndexInformer | ||
} | ||
|
@@ -109,7 +110,8 @@ type sharedIndexInformer struct { | |
indexer Indexer | ||
controller *Controller | ||
|
||
processor *sharedProcessor | ||
processor *sharedProcessor | ||
cacheMutationDetector CacheMutationDetector | ||
|
||
// This block is tracked to handle late initialization of the controller | ||
listerWatcher ListerWatcher | ||
|
@@ -180,6 +182,7 @@ func (s *sharedIndexInformer) Run(stopCh <-chan struct{}) { | |
}() | ||
|
||
s.stopCh = stopCh | ||
s.cacheMutationDetector.Run(stopCh) | ||
s.processor.run(stopCh) | ||
s.controller.Run(stopCh) | ||
} | ||
|
@@ -273,6 +276,7 @@ func (s *sharedIndexInformer) HandleDeltas(obj interface{}) error { | |
for _, d := range obj.(Deltas) { | ||
switch d.Type { | ||
case Sync, Added, Updated: | ||
s.cacheMutationDetector.AddObject(d.Object) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we also want to remove on Deleted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Deletion doesn't guarantee references are gone. If we end up with memory pressure problems during runs, we could consider that. |
||
if old, exists, err := s.indexer.Get(d.Object); err == nil && exists { | ||
if err := s.indexer.Update(d.Object); err != nil { | ||
return err | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add godoc that if this is not specified, we panic
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure though that's desired behavior. This func only exists for unit testing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then call that out