/
pod.go
417 lines (355 loc) · 14.4 KB
/
pod.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
package controlloop
import (
"context"
"encoding/json"
"fmt"
"net"
"os"
"strconv"
"strings"
"time"
"k8s.io/client-go/kubernetes"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/util/wait"
v1coreinformerfactory "k8s.io/client-go/informers"
v1corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
nadinformers "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions"
nadlister "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1"
"github.com/pkg/errors"
whereaboutsv1alpha1 "github.com/k8snetworkplumbingwg/whereabouts/pkg/api/whereabouts.cni.cncf.io/v1alpha1"
wbclientset "github.com/k8snetworkplumbingwg/whereabouts/pkg/client/clientset/versioned"
wbinformers "github.com/k8snetworkplumbingwg/whereabouts/pkg/client/informers/externalversions"
wblister "github.com/k8snetworkplumbingwg/whereabouts/pkg/client/listers/whereabouts.cni.cncf.io/v1alpha1"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/config"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/iphelpers"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/logging"
wbclient "github.com/k8snetworkplumbingwg/whereabouts/pkg/storage/kubernetes"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/types"
)
const (
defaultMountPath = "/host"
ipReconcilerQueueName = "pod-updates"
syncPeriod = time.Second
whereaboutsConfigPath = "/etc/cni/net.d/whereabouts.d/whereabouts.conf"
maxRetries = 2
)
const (
addressGarbageCollected = "IPAddressGarbageCollected"
addressGarbageCollectionFailed = "IPAddressGarbageCollectionFailed"
)
const (
podControllerFilterKey = "spec.nodeName"
podControllerNodeNameEnvVariable = "NODENAME"
noResyncPeriod = 0
)
type garbageCollector func(ctx context.Context, mode int, ipamConf types.IPAMConfig, client *wbclient.KubernetesIPAM) ([]net.IPNet, error)
type PodController struct {
k8sClient kubernetes.Interface
wbClient wbclientset.Interface
arePodsSynched cache.InformerSynced
areIPPoolsSynched cache.InformerSynced
areNetAttachDefsSynched cache.InformerSynced
podsInformer cache.SharedIndexInformer
ipPoolInformer cache.SharedIndexInformer
netAttachDefInformer cache.SharedIndexInformer
podLister v1corelisters.PodLister
ipPoolLister wblister.IPPoolLister
netAttachDefLister nadlister.NetworkAttachmentDefinitionLister
broadcaster record.EventBroadcaster
recorder record.EventRecorder
workqueue workqueue.RateLimitingInterface
mountPath string
cleanupFunc garbageCollector
}
// NewPodController ...
func NewPodController(k8sCoreClient kubernetes.Interface, wbClient wbclientset.Interface, k8sCoreInformerFactory v1coreinformerfactory.SharedInformerFactory, wbSharedInformerFactory wbinformers.SharedInformerFactory, netAttachDefInformerFactory nadinformers.SharedInformerFactory, broadcaster record.EventBroadcaster, recorder record.EventRecorder) *PodController {
return newPodController(k8sCoreClient, wbClient, k8sCoreInformerFactory, wbSharedInformerFactory, netAttachDefInformerFactory, broadcaster, recorder, wbclient.IPManagement)
}
// PodInformerFactory is a wrapper around NewSharedInformerFactoryWithOptions. Before returning the informer, it will
// extract the node name from environment variable "NODENAME". It will then try to look up the node with the given name.
// On success, it will create an informer that filters all pods with spec.nodeName == <value of env NODENAME>.
func PodInformerFactory(k8sClientSet kubernetes.Interface) (v1coreinformerfactory.SharedInformerFactory, error) {
nodeName := os.Getenv(podControllerNodeNameEnvVariable)
logging.Debugf("Filtering pods with filter key '%s' and filter value '%s'", podControllerFilterKey, nodeName)
if _, err := k8sClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}); err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("Could not find node with node name '%s'.", nodeName))
}
return v1coreinformerfactory.NewSharedInformerFactoryWithOptions(
k8sClientSet, noResyncPeriod, v1coreinformerfactory.WithTweakListOptions(
func(options *metav1.ListOptions) {
options.FieldSelector = fields.OneTermEqualSelector(podControllerFilterKey, nodeName).String()
})), nil
}
func newPodController(k8sCoreClient kubernetes.Interface, wbClient wbclientset.Interface, k8sCoreInformerFactory v1coreinformerfactory.SharedInformerFactory, wbSharedInformerFactory wbinformers.SharedInformerFactory, netAttachDefInformerFactory nadinformers.SharedInformerFactory, broadcaster record.EventBroadcaster, recorder record.EventRecorder, cleanupFunc garbageCollector) *PodController {
k8sPodFilteredInformer := k8sCoreInformerFactory.Core().V1().Pods()
ipPoolInformer := wbSharedInformerFactory.Whereabouts().V1alpha1().IPPools()
netAttachDefInformer := netAttachDefInformerFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions()
poolInformer := ipPoolInformer.Informer()
networksInformer := netAttachDefInformer.Informer()
podsInformer := k8sPodFilteredInformer.Informer()
queue := workqueue.NewNamedRateLimitingQueue(
workqueue.DefaultControllerRateLimiter(),
ipReconcilerQueueName)
podsInformer.AddEventHandler(
cache.ResourceEventHandlerFuncs{
DeleteFunc: func(obj interface{}) {
onPodDelete(queue, obj)
},
})
return &PodController{
k8sClient: k8sCoreClient,
wbClient: wbClient,
arePodsSynched: podsInformer.HasSynced,
areIPPoolsSynched: poolInformer.HasSynced,
areNetAttachDefsSynched: networksInformer.HasSynced,
broadcaster: broadcaster,
recorder: recorder,
podsInformer: podsInformer,
ipPoolInformer: poolInformer,
netAttachDefInformer: networksInformer,
podLister: k8sPodFilteredInformer.Lister(),
ipPoolLister: ipPoolInformer.Lister(),
netAttachDefLister: netAttachDefInformer.Lister(),
workqueue: queue,
cleanupFunc: cleanupFunc,
}
}
// Start runs worker thread after performing cache synchronization
func (pc *PodController) Start(stopChan <-chan struct{}) {
logging.Verbosef("starting network controller")
if ok := cache.WaitForCacheSync(stopChan, pc.arePodsSynched, pc.areNetAttachDefsSynched, pc.areIPPoolsSynched); !ok {
logging.Verbosef("failed waiting for caches to sync")
}
go wait.Until(pc.worker, syncPeriod, stopChan)
}
// Shutdown stops the PodController worker queue
func (pc *PodController) Shutdown() {
pc.workqueue.ShutDown()
}
func (pc *PodController) worker() {
for pc.processNextWorkItem() {
}
}
func (pc *PodController) processNextWorkItem() bool {
queueItem, shouldQuit := pc.workqueue.Get()
if shouldQuit {
return false
}
defer pc.workqueue.Done(queueItem)
pod := queueItem.(*v1.Pod)
err := pc.garbageCollectPodIPs(pod)
logging.Verbosef("result of garbage collecting pods: %+v", err)
pc.handleResult(pod, err)
return true
}
func (pc *PodController) garbageCollectPodIPs(pod *v1.Pod) error {
podNamespace := pod.GetNamespace()
podName := pod.GetName()
ifaceStatuses, err := podNetworkStatus(pod)
if err != nil {
return fmt.Errorf("failed to access the network status for pod [%s/%s]: %v", podName, podNamespace, err)
}
for _, ifaceStatus := range ifaceStatuses {
if ifaceStatus.Default {
logging.Verbosef("skipped net-attach-def for default network")
continue
}
nad, err := pc.ifaceNetAttachDef(ifaceStatus)
if err != nil {
return fmt.Errorf("failed to get network-attachment-definition for iface %s: %+v", ifaceStatus.Name, err)
}
mountPath := defaultMountPath
if pc.mountPath != "" {
mountPath = pc.mountPath
}
logging.Verbosef("the NAD's config: %s", nad.Spec)
ipamConfig, err := ipamConfiguration(nad, podNamespace, podName, mountPath)
if err != nil && isInvalidPluginType(err) {
logging.Debugf("error while computing something: %v", err)
continue
} else if err != nil {
return fmt.Errorf("failed to create an IPAM configuration for the pod %s iface %s: %+v", podID(podNamespace, podName), ifaceStatus.Name, err)
}
var pools []*whereaboutsv1alpha1.IPPool
for _, rangeConfig := range ipamConfig.IPRanges {
pool, err := pc.ipPool(wbclient.PoolIdentifier{IpRange: rangeConfig.Range, NetworkName: ipamConfig.NetworkName})
if err != nil {
return fmt.Errorf("failed to get the IPPool data: %+v", err)
}
logging.Verbosef("pool range [%s]", pool.Spec.Range)
pools = append(pools, pool)
}
for _, pool := range pools {
for allocationIndex, allocation := range pool.Spec.Allocations {
if allocation.PodRef == podID(podNamespace, podName) {
logging.Verbosef("stale allocation to cleanup: %+v", allocation)
client := *wbclient.NewKubernetesClient(nil, pc.k8sClient, 0)
wbClient := &wbclient.KubernetesIPAM{
Client: client,
Config: *ipamConfig,
}
if err != nil {
logging.Debugf("error while generating the IPAM client: %v", err)
continue
}
if _, err := pc.cleanupFunc(context.TODO(), types.Deallocate, *ipamConfig, wbClient); err != nil {
logging.Errorf("failed to cleanup allocation: %v", err)
}
if err := pc.addressGarbageCollected(pod, nad.GetName(), pool.Spec.Range, allocationIndex); err != nil {
logging.Errorf("failed to issue event for successful IP address cleanup: %v", err)
}
}
}
}
}
return nil
}
func isInvalidPluginType(err error) bool {
_, isInvalidPluginError := err.(*config.InvalidPluginError)
return isInvalidPluginError
}
func (pc *PodController) handleResult(pod *v1.Pod, err error) {
if err == nil {
pc.workqueue.Forget(pod)
return
}
podNamespace := pod.GetNamespace()
podName := pod.GetName()
currentRetries := pc.workqueue.NumRequeues(pod)
if currentRetries <= maxRetries {
logging.Verbosef(
"re-queuing IP address reconciliation request for pod %s; retry #: %d",
podID(podNamespace, podName),
currentRetries)
pc.workqueue.AddRateLimited(pod)
return
}
pc.addressGarbageCollectionFailed(pod, err)
}
func (pc *PodController) ifaceNetAttachDef(ifaceStatus nadv1.NetworkStatus) (*nadv1.NetworkAttachmentDefinition, error) {
const (
namespaceIndex = 0
nameIndex = 1
)
logging.Debugf("pod's network status: %+v", ifaceStatus)
ifaceInfo := strings.Split(ifaceStatus.Name, "/")
if len(ifaceInfo) < 2 {
return nil, fmt.Errorf("pod %s name does not feature namespace/pod name syntax", ifaceStatus.Name)
}
netNamespaceName := ifaceInfo[namespaceIndex]
netName := ifaceInfo[nameIndex]
nad, err := pc.netAttachDefLister.NetworkAttachmentDefinitions(netNamespaceName).Get(netName)
if err != nil {
return nil, err
}
return nad, nil
}
func (pc *PodController) ipPool(poolIdentifier wbclient.PoolIdentifier) (*whereaboutsv1alpha1.IPPool, error) {
pool, err := pc.ipPoolLister.IPPools(ipPoolsNamespace()).Get(wbclient.IPPoolName(poolIdentifier))
if err != nil {
return nil, err
}
return pool, nil
}
func (pc *PodController) addressGarbageCollected(pod *v1.Pod, networkName string, ipRange string, allocationIndex string) error {
if pc.recorder != nil {
ip, _, err := net.ParseCIDR(ipRange)
if err != nil {
return err
}
index, err := strconv.Atoi(allocationIndex)
if err != nil {
return err
}
pc.recorder.Eventf(
pod,
v1.EventTypeNormal,
addressGarbageCollected,
"successful cleanup of IP address [%s] from network %s",
iphelpers.IPAddOffset(ip, uint64(index)),
networkName)
}
return nil
}
func (pc *PodController) addressGarbageCollectionFailed(pod *v1.Pod, err error) {
logging.Errorf(
"dropping pod [%s] deletion out of the queue - could not reconcile IP: %+v",
podID(pod.GetNamespace(), pod.GetName()),
err)
pc.workqueue.Forget(pod)
if pc.recorder != nil {
pc.recorder.Eventf(
pod,
v1.EventTypeWarning,
addressGarbageCollectionFailed,
"failed to garbage collect addresses for pod %s",
podID(pod.GetNamespace(), pod.GetName()))
}
}
func onPodDelete(queue workqueue.RateLimitingInterface, obj interface{}) {
pod, err := podFromTombstone(obj)
if err != nil {
logging.Errorf("cannot create pod object from %v on pod delete: %v", obj, err)
return
}
logging.Verbosef("deleted pod [%s]", podID(pod.GetNamespace(), pod.GetName()))
queue.Add(stripPod(pod)) // we only need the pod's metadata & its network-status annotations. Hence we strip it.
}
func podID(podNamespace string, podName string) string {
return fmt.Sprintf("%s/%s", podNamespace, podName)
}
func podNetworkStatus(pod *v1.Pod) ([]nadv1.NetworkStatus, error) {
var ifaceStatuses []nadv1.NetworkStatus
networkStatus, found := pod.Annotations[nadv1.NetworkStatusAnnot]
if found {
if err := json.Unmarshal([]byte(networkStatus), &ifaceStatuses); err != nil {
return nil, err
}
}
return ifaceStatuses, nil
}
func ipamConfiguration(nad *nadv1.NetworkAttachmentDefinition, podNamespace string, podName string, mountPath string) (*types.IPAMConfig, error) {
mounterWhereaboutsConfigFilePath := mountPath + whereaboutsConfigPath
ipamConfig, err := config.LoadIPAMConfiguration([]byte(nad.Spec.Config), "", mounterWhereaboutsConfigFilePath)
if err != nil {
return nil, err
}
ipamConfig.PodName = podName
ipamConfig.PodNamespace = podNamespace
ipamConfig.Kubernetes.KubeConfigPath = mountPath + ipamConfig.Kubernetes.KubeConfigPath // must use the mount path
return ipamConfig, nil
}
func ipPoolsNamespace() string {
const wbNamespaceEnvVariableName = "WHEREABOUTS_NAMESPACE"
if wbNamespace, found := os.LookupEnv(wbNamespaceEnvVariableName); found {
return wbNamespace
}
const wbDefaultNamespace = "kube-system"
return wbDefaultNamespace
}
func podFromTombstone(obj interface{}) (*v1.Pod, error) {
pod, isPod := obj.(*v1.Pod)
if !isPod {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
return nil, fmt.Errorf("received unexpected object: %v", obj)
}
pod, ok = tombstone.Obj.(*v1.Pod)
if !ok {
return nil, fmt.Errorf("deletedFinalStateUnknown contained non-Pod object: %v", tombstone.Obj)
}
}
return pod, nil
}
func stripPod(pod *v1.Pod) *v1.Pod {
newPod := pod.DeepCopy()
newPod.Spec = v1.PodSpec{}
newPod.Status = v1.PodStatus{}
return newPod
}