forked from kubernetes/kubernetes
-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dns.go
862 lines (773 loc) · 30.9 KB
/
dns.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dns
import (
"encoding/json"
"fmt"
"hash/fnv"
"net"
"strings"
"sync"
"time"
etcd "github.com/coreos/etcd/client"
"github.com/miekg/dns"
skymsg "github.com/skynetservices/skydns/msg"
kapi "k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/endpoints"
"k8s.io/kubernetes/pkg/api/unversioned"
kcache "k8s.io/kubernetes/pkg/client/cache"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
kframework "k8s.io/kubernetes/pkg/controller/framework"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util/validation"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/watch"
"github.com/golang/glog"
)
const (
kubernetesSvcName = "kubernetes"
// A subdomain added to the user specified domain for all services.
serviceSubdomain = "svc"
// A subdomain added to the user specified dmoain for all pods.
podSubdomain = "pod"
// arpaSuffix is the standard suffix for PTR IP reverse lookups.
arpaSuffix = ".in-addr.arpa."
// Resync period for the kube controller loop.
resyncPeriod = 5 * time.Minute
// Duration for which the TTL cache should hold the node resource to retrieve the zone
// annotation from it so that it could be added to federation CNAMEs. There is ideally
// no need to expire this cache, but we don't want to assume that node annotations
// never change. So we expire the cache and retrieve a node once every 180 seconds.
// The value is chosen to be neither too long nor too short.
nodeCacheTTL = 180 * time.Second
// default priority used for service records
defaultPriority = 10
// default weight used for service records
defaultWeight = 10
// default TTL used for service records
defaultTTL = 30
)
type KubeDNS struct {
// kubeClient makes calls to API Server and registers calls with API Server
// to get Endpoints and Service objects.
kubeClient clientset.Interface
// The domain for which this DNS Server is authoritative.
domain string
// A cache that contains all the endpoints in the system.
endpointsStore kcache.Store
// A cache that contains all the services in the system.
servicesStore kcache.Store
// stores DNS records for the domain.
// A Records and SRV Records for (regular) services and headless Services.
// CNAME Records for ExternalName Services.
cache *TreeCache
// TODO(nikhiljindal): Remove this. It can be recreated using clusterIPServiceMap.
reverseRecordMap map[string]*skymsg.Service
// Map of cluster IP to service object. Headless services are not part of this map.
// Used to get a service when given its cluster IP.
// Access to this is coordinated using cacheLock. We use the same lock for cache and this map
// to ensure that they don't get out of sync.
clusterIPServiceMap map[string]*kapi.Service
// caller is responsible for using the cacheLock before invoking methods on cache
// the cache is not thread-safe, and the caller can guarantee thread safety by using
// the cacheLock
cacheLock sync.RWMutex
// The domain for which this DNS Server is authoritative, in array format and reversed.
// e.g. if domain is "cluster.local", domainPath is []string{"local", "cluster"}
domainPath []string
// endpointsController invokes registered callbacks when endpoints change.
endpointsController *kframework.Controller
// serviceController invokes registered callbacks when services change.
serviceController *kframework.Controller
// Map of federation names that the cluster in which this kube-dns is running belongs to, to
// the corresponding domain names.
federations map[string]string
// A TTL cache that contains some subset of nodes in the system so that we can retrieve the
// cluster zone annotation from the cached node instead of getting it from the API server
// every time.
nodesStore kcache.Store
}
func NewKubeDNS(client clientset.Interface, domain string, federations map[string]string) (*KubeDNS, error) {
// Verify that federation names should not contain dots ('.')
// We can not allow dots since we use that as separator for path segments (svcname.nsname.fedname.svc.domain)
for key := range federations {
if strings.ContainsAny(key, ".") {
return nil, fmt.Errorf("invalid federation name: %s, cannot have '.'", key)
}
}
kd := &KubeDNS{
kubeClient: client,
domain: domain,
cache: NewTreeCache(),
cacheLock: sync.RWMutex{},
nodesStore: kcache.NewStore(kcache.MetaNamespaceKeyFunc),
reverseRecordMap: make(map[string]*skymsg.Service),
clusterIPServiceMap: make(map[string]*kapi.Service),
domainPath: reverseArray(strings.Split(strings.TrimRight(domain, "."), ".")),
federations: federations,
}
kd.setEndpointsStore()
kd.setServicesStore()
return kd, nil
}
func (kd *KubeDNS) Start() {
go kd.endpointsController.Run(wait.NeverStop)
go kd.serviceController.Run(wait.NeverStop)
// Wait synchronously for the Kubernetes service and add a DNS record for it.
// This ensures that the Start function returns only after having received Service objects
// from APIServer.
// TODO: we might not have to wait for kubernetes service specifically. We should just wait
// for a list operation to be complete from APIServer.
kd.waitForKubernetesService()
}
func (kd *KubeDNS) waitForKubernetesService() (svc *kapi.Service) {
name := fmt.Sprintf("%v/%v", kapi.NamespaceDefault, kubernetesSvcName)
glog.Infof("Waiting for service: %v", name)
var err error
servicePollInterval := 1 * time.Second
for {
svc, err = kd.kubeClient.Core().Services(kapi.NamespaceDefault).Get(kubernetesSvcName)
if err != nil || svc == nil {
glog.Infof("Ignoring error while waiting for service %v: %v. Sleeping %v before retrying.", name, err, servicePollInterval)
time.Sleep(servicePollInterval)
continue
}
break
}
return
}
func (kd *KubeDNS) GetCacheAsJSON() (string, error) {
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
json, err := kd.cache.Serialize()
return json, err
}
func (kd *KubeDNS) setServicesStore() {
// Returns a cache.ListWatch that gets all changes to services.
kd.servicesStore, kd.serviceController = kframework.NewInformer(
&kcache.ListWatch{
ListFunc: func(options kapi.ListOptions) (runtime.Object, error) {
return kd.kubeClient.Core().Services(kapi.NamespaceAll).List(options)
},
WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) {
return kd.kubeClient.Core().Services(kapi.NamespaceAll).Watch(options)
},
},
&kapi.Service{},
resyncPeriod,
kframework.ResourceEventHandlerFuncs{
AddFunc: kd.newService,
DeleteFunc: kd.removeService,
UpdateFunc: kd.updateService,
},
)
}
func (kd *KubeDNS) setEndpointsStore() {
// Returns a cache.ListWatch that gets all changes to endpoints.
kd.endpointsStore, kd.endpointsController = kframework.NewInformer(
&kcache.ListWatch{
ListFunc: func(options kapi.ListOptions) (runtime.Object, error) {
return kd.kubeClient.Core().Endpoints(kapi.NamespaceAll).List(options)
},
WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) {
return kd.kubeClient.Core().Endpoints(kapi.NamespaceAll).Watch(options)
},
},
&kapi.Endpoints{},
resyncPeriod,
kframework.ResourceEventHandlerFuncs{
AddFunc: kd.handleEndpointAdd,
UpdateFunc: func(oldObj, newObj interface{}) {
// TODO: Avoid unwanted updates.
kd.handleEndpointAdd(newObj)
},
},
)
}
func assertIsService(obj interface{}) (*kapi.Service, bool) {
if service, ok := obj.(*kapi.Service); ok {
return service, ok
} else {
glog.Errorf("Type assertion failed! Expected 'Service', got %T", service)
return nil, ok
}
}
func (kd *KubeDNS) newService(obj interface{}) {
if service, ok := assertIsService(obj); ok {
glog.V(4).Infof("Add/Updated for service %v", service.Name)
// ExternalName services are a special kind that return CNAME records
if service.Spec.Type == kapi.ServiceTypeExternalName {
kd.newExternalNameService(service)
return
}
// if ClusterIP is not set, a DNS entry should not be created
if !kapi.IsServiceIPSet(service) {
kd.newHeadlessService(service)
return
}
if len(service.Spec.Ports) == 0 {
glog.Warningf("Unexpected service with no ports, this should not have happened: %v", service)
}
kd.newPortalService(service)
}
}
func (kd *KubeDNS) removeService(obj interface{}) {
if s, ok := assertIsService(obj); ok {
subCachePath := append(kd.domainPath, serviceSubdomain, s.Namespace, s.Name)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
success := kd.cache.deletePath(subCachePath...)
glog.V(2).Infof("Removing service %v at path %v. Success: ", s.Name, subCachePath, success)
// ExternalName services have no IP
if kapi.IsServiceIPSet(s) {
delete(kd.reverseRecordMap, s.Spec.ClusterIP)
delete(kd.clusterIPServiceMap, s.Spec.ClusterIP)
}
}
}
func (kd *KubeDNS) updateService(oldObj, newObj interface{}) {
if new, ok := assertIsService(newObj); ok {
if old, ok := assertIsService(oldObj); ok {
// Remove old cache path only if changing type to/from ExternalName.
// In all other cases, we'll update records in place.
if (new.Spec.Type == kapi.ServiceTypeExternalName) != (old.Spec.Type == kapi.ServiceTypeExternalName) {
kd.removeService(oldObj)
}
kd.newService(newObj)
}
}
}
func (kd *KubeDNS) handleEndpointAdd(obj interface{}) {
if e, ok := obj.(*kapi.Endpoints); ok {
kd.addDNSUsingEndpoints(e)
}
}
func (kd *KubeDNS) addDNSUsingEndpoints(e *kapi.Endpoints) error {
svc, err := kd.getServiceFromEndpoints(e)
if err != nil {
return err
}
if svc == nil || kapi.IsServiceIPSet(svc) {
// No headless service found corresponding to endpoints object.
return nil
}
return kd.generateRecordsForHeadlessService(e, svc)
}
func (kd *KubeDNS) getServiceFromEndpoints(e *kapi.Endpoints) (*kapi.Service, error) {
key, err := kcache.MetaNamespaceKeyFunc(e)
if err != nil {
return nil, err
}
obj, exists, err := kd.servicesStore.GetByKey(key)
if err != nil {
return nil, fmt.Errorf("failed to get service object from services store - %v", err)
}
if !exists {
glog.V(1).Infof("could not find service for endpoint %q in namespace %q", e.Name, e.Namespace)
return nil, nil
}
if svc, ok := assertIsService(obj); ok {
return svc, nil
}
return nil, fmt.Errorf("got a non service object in services store %v", obj)
}
// fqdn constructs the fqdn for the given service. subpaths is a list of path
// elements rooted at the given service, ending at a service record.
func (kd *KubeDNS) fqdn(service *kapi.Service, subpaths ...string) string {
domainLabels := append(append(kd.domainPath, serviceSubdomain, service.Namespace, service.Name), subpaths...)
return dns.Fqdn(strings.Join(reverseArray(domainLabels), "."))
}
func (kd *KubeDNS) newPortalService(service *kapi.Service) {
subCache := NewTreeCache()
recordValue, recordLabel := getSkyMsg(service.Spec.ClusterIP, 0)
subCache.setEntry(recordLabel, recordValue, kd.fqdn(service, recordLabel))
// Generate SRV Records
for i := range service.Spec.Ports {
port := &service.Spec.Ports[i]
if port.Name != "" && port.Protocol != "" {
srvValue := kd.generateSRVRecordValue(service, int(port.Port))
l := []string{"_" + strings.ToLower(string(port.Protocol)), "_" + port.Name}
subCache.setEntry(recordLabel, srvValue, kd.fqdn(service, append(l, recordLabel)...), l...)
}
}
subCachePath := append(kd.domainPath, serviceSubdomain, service.Namespace)
host := kd.getServiceFQDN(service)
reverseRecord, _ := getSkyMsg(host, 0)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
kd.cache.setSubCache(service.Name, subCache, subCachePath...)
kd.reverseRecordMap[service.Spec.ClusterIP] = reverseRecord
kd.clusterIPServiceMap[service.Spec.ClusterIP] = service
}
func (kd *KubeDNS) generateRecordsForHeadlessService(e *kapi.Endpoints, svc *kapi.Service) error {
// TODO: remove this after v1.4 is released and the old annotations are EOL
podHostnames, err := getPodHostnamesFromAnnotation(e.Annotations)
if err != nil {
return err
}
subCache := NewTreeCache()
glog.V(4).Infof("Endpoints Annotations: %v", e.Annotations)
for idx := range e.Subsets {
for subIdx := range e.Subsets[idx].Addresses {
address := &e.Subsets[idx].Addresses[subIdx]
endpointIP := address.IP
recordValue, endpointName := getSkyMsg(endpointIP, 0)
if hostLabel, exists := getHostname(address, podHostnames); exists {
endpointName = hostLabel
}
subCache.setEntry(endpointName, recordValue, kd.fqdn(svc, endpointName))
for portIdx := range e.Subsets[idx].Ports {
endpointPort := &e.Subsets[idx].Ports[portIdx]
if endpointPort.Name != "" && endpointPort.Protocol != "" {
srvValue := kd.generateSRVRecordValue(svc, int(endpointPort.Port), endpointName)
l := []string{"_" + strings.ToLower(string(endpointPort.Protocol)), "_" + endpointPort.Name}
subCache.setEntry(endpointName, srvValue, kd.fqdn(svc, append(l, endpointName)...), l...)
}
}
}
}
subCachePath := append(kd.domainPath, serviceSubdomain, svc.Namespace)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
kd.cache.setSubCache(svc.Name, subCache, subCachePath...)
return nil
}
func getHostname(address *kapi.EndpointAddress, podHostnames map[string]endpoints.HostRecord) (string, bool) {
if len(address.Hostname) > 0 {
return address.Hostname, true
}
if hostRecord, exists := podHostnames[address.IP]; exists && len(validation.IsDNS1123Label(hostRecord.HostName)) == 0 {
return hostRecord.HostName, true
}
return "", false
}
func getPodHostnamesFromAnnotation(annotations map[string]string) (map[string]endpoints.HostRecord, error) {
hostnames := map[string]endpoints.HostRecord{}
if annotations != nil {
if serializedHostnames, exists := annotations[endpoints.PodHostnamesAnnotation]; exists && len(serializedHostnames) > 0 {
err := json.Unmarshal([]byte(serializedHostnames), &hostnames)
if err != nil {
return nil, err
}
}
}
return hostnames, nil
}
func (kd *KubeDNS) generateSRVRecordValue(svc *kapi.Service, portNumber int, labels ...string) *skymsg.Service {
host := strings.Join([]string{svc.Name, svc.Namespace, serviceSubdomain, kd.domain}, ".")
for _, cNameLabel := range labels {
host = cNameLabel + "." + host
}
recordValue, _ := getSkyMsg(host, portNumber)
return recordValue
}
// Generates skydns records for a headless service.
func (kd *KubeDNS) newHeadlessService(service *kapi.Service) error {
// Create an A record for every pod in the service.
// This record must be periodically updated.
// Format is as follows:
// For a service x, with pods a and b create DNS records,
// a.x.ns.domain. and, b.x.ns.domain.
key, err := kcache.MetaNamespaceKeyFunc(service)
if err != nil {
return err
}
e, exists, err := kd.endpointsStore.GetByKey(key)
if err != nil {
return fmt.Errorf("failed to get endpoints object from endpoints store - %v", err)
}
if !exists {
glog.V(1).Infof("Could not find endpoints for service %q in namespace %q. DNS records will be created once endpoints show up.", service.Name, service.Namespace)
return nil
}
if e, ok := e.(*kapi.Endpoints); ok {
return kd.generateRecordsForHeadlessService(e, service)
}
return nil
}
// Generates skydns records for an ExternalName service.
func (kd *KubeDNS) newExternalNameService(service *kapi.Service) {
// Create a CNAME record for the service's ExternalName.
// TODO: TTL?
recordValue, _ := getSkyMsg(service.Spec.ExternalName, 0)
cachePath := append(kd.domainPath, serviceSubdomain, service.Namespace)
fqdn := kd.fqdn(service)
glog.V(2).Infof("newExternalNameService: storing key %s with value %v as %s under %v", service.Name, recordValue, fqdn, cachePath)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
// Store the service name directly as the leaf key
kd.cache.setEntry(service.Name, recordValue, fqdn, cachePath...)
}
// Records responds with DNS records that match the given name, in a format
// understood by the skydns server. If "exact" is true, a single record
// matching the given name is returned, otherwise all records stored under
// the subtree matching the name are returned.
func (kd *KubeDNS) Records(name string, exact bool) (retval []skymsg.Service, err error) {
glog.V(2).Infof("Received DNS Request:%s, exact:%v", name, exact)
trimmed := strings.TrimRight(name, ".")
segments := strings.Split(trimmed, ".")
isFederationQuery := false
federationSegments := []string{}
if !exact && kd.isFederationQuery(segments) {
glog.V(2).Infof("federation service query: Received federation query. Going to try to find local service first")
// Try quering the non-federation (local) service first.
// Will try the federation one later, if this fails.
isFederationQuery = true
federationSegments = append(federationSegments, segments...)
// To try local service, remove federation name from segments.
// Federation name is 3rd in the segment (after service name and namespace).
segments = append(segments[:2], segments[3:]...)
}
path := reverseArray(segments)
records, err := kd.getRecordsForPath(path, exact)
if err != nil {
return nil, err
}
if !isFederationQuery {
if len(records) > 0 {
return records, nil
}
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
// For federation query, verify that the local service has endpoints.
validRecord := false
for _, val := range records {
// We know that a headless service has endpoints for sure if a record was returned for it.
// The record contains endpoint IPs. So nothing to check for headless services.
if !kd.isHeadlessServiceRecord(&val) {
ok, err := kd.serviceWithClusterIPHasEndpoints(&val)
if err != nil {
glog.V(2).Infof("federation service query: unexpected error while trying to find if service has endpoint: %v")
continue
}
if !ok {
glog.Infof("federation service query: skipping record since service has no endpoint: %v", val)
continue
}
}
validRecord = true
break
}
if validRecord {
// There is a local service with valid endpoints, return its CNAME.
name := strings.Join(reverseArray(path), ".")
// Ensure that this name that we are returning as a CNAME response is a fully qualified
// domain name so that the client's resolver library doesn't have to go through its
// search list all over again.
if !strings.HasSuffix(name, ".") {
name = name + "."
}
glog.Infof("federation service query: Returning CNAME for local service : %s", name)
return []skymsg.Service{{Host: name}}, nil
}
// If the name query is not an exact query and does not match any records in the local store,
// attempt to send a federation redirect (CNAME) response.
if !exact {
glog.V(2).Infof("federation service query: Did not find a local service. Trying federation redirect (CNAME) response")
return kd.federationRecords(reverseArray(federationSegments))
}
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
func (kd *KubeDNS) getRecordsForPath(path []string, exact bool) ([]skymsg.Service, error) {
retval := []skymsg.Service{}
if kd.isPodRecord(path) {
ip, err := kd.getPodIP(path)
if err == nil {
skyMsg, _ := getSkyMsg(ip, 0)
return []skymsg.Service{*skyMsg}, nil
}
return nil, err
}
if exact {
key := path[len(path)-1]
if key == "" {
return []skymsg.Service{}, nil
}
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
if record, ok := kd.cache.getEntry(key, path[:len(path)-1]...); ok {
glog.V(2).Infof("Exact match %v for %v received from cache", record, path[:len(path)-1])
return []skymsg.Service{*(record.(*skymsg.Service))}, nil
}
glog.V(2).Infof("Exact match for %v not found in cache", path)
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
records := kd.cache.getValuesForPathWithWildcards(path...)
glog.V(2).Infof("Received %d records for %v from cache", len(records), path)
for _, val := range records {
retval = append(retval, *val)
}
glog.V(2).Infof("records:%v, retval:%v, path:%v", records, retval, path)
return retval, nil
}
// Returns true if the given record corresponds to a headless service.
// Important: Assumes that we already have the cacheLock. Callers responsibility to acquire it.
// This is because the code will panic, if we try to acquire it again if we already have it.
func (kd *KubeDNS) isHeadlessServiceRecord(msg *skymsg.Service) bool {
// If it is not a headless service, then msg.Host will be the cluster IP.
// So we can check if msg.host exists in our clusterIPServiceMap.
_, ok := kd.clusterIPServiceMap[msg.Host]
// It is headless service if no record was found.
return !ok
}
// Returns true if the service corresponding to the given message has endpoints.
// Note: Works only for services with ClusterIP. Will return an error for headless service (service without a clusterIP).
// Important: Assumes that we already have the cacheLock. Callers responsibility to acquire it.
// This is because the code will panic, if we try to acquire it again if we already have it.
func (kd *KubeDNS) serviceWithClusterIPHasEndpoints(msg *skymsg.Service) (bool, error) {
svc, ok := kd.clusterIPServiceMap[msg.Host]
if !ok {
// It is a headless service.
return false, fmt.Errorf("method not expected to be called for headless service")
}
key, err := kcache.MetaNamespaceKeyFunc(svc)
if err != nil {
return false, err
}
e, exists, err := kd.endpointsStore.GetByKey(key)
if err != nil {
return false, fmt.Errorf("failed to get endpoints object from endpoints store - %v", err)
}
if !exists {
return false, nil
}
if e, ok := e.(*kapi.Endpoints); ok {
return len(e.Subsets) > 0, nil
}
return false, fmt.Errorf("unexpected: found non-endpoint object in endpoint store: %v", e)
}
// ReverseRecords performs a reverse lookup for the given name.
func (kd *KubeDNS) ReverseRecord(name string) (*skymsg.Service, error) {
glog.V(2).Infof("Received ReverseRecord Request:%s", name)
// if portalIP is not a valid IP, the reverseRecordMap lookup will fail
portalIP, ok := extractIP(name)
if !ok {
return nil, fmt.Errorf("does not support reverse lookup for %s", name)
}
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
if reverseRecord, ok := kd.reverseRecordMap[portalIP]; ok {
return reverseRecord, nil
}
return nil, fmt.Errorf("must be exactly one service record")
}
// extractIP turns a standard PTR reverse record lookup name
// into an IP address
func extractIP(reverseName string) (string, bool) {
if !strings.HasSuffix(reverseName, arpaSuffix) {
return "", false
}
search := strings.TrimSuffix(reverseName, arpaSuffix)
// reverse the segments and then combine them
segments := reverseArray(strings.Split(search, "."))
return strings.Join(segments, "."), true
}
// e.g {"local", "cluster", "pod", "default", "10-0-0-1"}
func (kd *KubeDNS) isPodRecord(path []string) bool {
if len(path) != len(kd.domainPath)+3 {
return false
}
if path[len(kd.domainPath)] != "pod" {
return false
}
for _, segment := range path {
if segment == "*" {
return false
}
}
return true
}
func (kd *KubeDNS) getPodIP(path []string) (string, error) {
ipStr := path[len(path)-1]
ip := strings.Replace(ipStr, "-", ".", -1)
if parsed := net.ParseIP(ip); parsed != nil {
return ip, nil
}
return "", fmt.Errorf("Invalid IP Address %v", ip)
}
func hashServiceRecord(msg *skymsg.Service) string {
s := fmt.Sprintf("%v", msg)
h := fnv.New32a()
h.Write([]byte(s))
return fmt.Sprintf("%x", h.Sum32())
}
func newServiceRecord(ip string, port int) *skymsg.Service {
return &skymsg.Service{
Host: ip,
Port: port,
Priority: defaultPriority,
Weight: defaultWeight,
Ttl: defaultTTL,
}
}
// Returns record in a format that SkyDNS understands.
// Also return the hash of the record.
func getSkyMsg(ip string, port int) (*skymsg.Service, string) {
msg := newServiceRecord(ip, port)
hash := hashServiceRecord(msg)
glog.V(2).Infof("DNS Record:%s, hash:%s", fmt.Sprintf("%v", msg), hash)
return msg, fmt.Sprintf("%x", hash)
}
// isFederationQuery checks if the given query `path` matches the federated service query pattern.
// The conjunction of the following conditions forms the test for the federated service query
// pattern:
// 1. `path` has exactly 4+len(domainPath) segments: mysvc.myns.myfederation.svc.domain.path.
// 2. Service name component must be a valid RFC 1035 name.
// 3. Namespace component must be a valid RFC 1123 name.
// 4. Federation component must also be a valid RFC 1123 name.
// 5. Fourth segment is exactly "svc"
// 6. The remaining segments match kd.domainPath.
// 7. And federation must be one of the listed federations in the config.
// Note: Because of the above conditions, this method will treat wildcard queries such as
// *.mysvc.myns.myfederation.svc.domain.path as non-federation queries.
// We can add support for wildcard queries later, if needed.
func (kd *KubeDNS) isFederationQuery(path []string) bool {
if len(path) != 4+len(kd.domainPath) {
glog.V(2).Infof("not a federation query: len(%q) != 4+len(%q)", path, kd.domainPath)
return false
}
if errs := validation.IsDNS1035Label(path[0]); len(errs) != 0 {
glog.V(2).Infof("not a federation query: %q is not an RFC 1035 label: %q", path[0], errs)
return false
}
if errs := validation.IsDNS1123Label(path[1]); len(errs) != 0 {
glog.V(2).Infof("not a federation query: %q is not an RFC 1123 label: %q", path[1], errs)
return false
}
if errs := validation.IsDNS1123Label(path[2]); len(errs) != 0 {
glog.V(2).Infof("not a federation query: %q is not an RFC 1123 label: %q", path[2], errs)
return false
}
if path[3] != serviceSubdomain {
glog.V(2).Infof("not a federation query: %q != %q (serviceSubdomain)", path[3], serviceSubdomain)
return false
}
for i, domComp := range kd.domainPath {
// kd.domainPath is reversed, so we need to look in the `path` in the reverse order.
if domComp != path[len(path)-i-1] {
glog.V(2).Infof("not a federation query: kd.domainPath[%d] != path[%d] (%q != %q)", i, len(path)-i-1, domComp, path[len(path)-i-1])
return false
}
}
if _, ok := kd.federations[path[2]]; !ok {
glog.V(2).Infof("not a federation query: kd.federations[%q] not found", path[2])
return false
}
return true
}
// federationRecords checks if the given `queryPath` is for a federated service and if it is,
// it returns a CNAME response containing the cluster zone name and federation domain name
// suffix.
func (kd *KubeDNS) federationRecords(queryPath []string) ([]skymsg.Service, error) {
// `queryPath` is a reversed-array of the queried name, reverse it back to make it easy
// to follow through this code and reduce confusion. There is no reason for it to be
// reversed here.
path := reverseArray(queryPath)
// Check if the name query matches the federation query pattern.
if !kd.isFederationQuery(path) {
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
// Now that we have already established that the query is a federation query, remove the local
// domain path components, i.e. kd.domainPath, from the query.
path = path[:len(path)-len(kd.domainPath)]
// Append the zone name (zone in the cloud provider terminology, not a DNS
// zone) and the region name.
zone, region, err := kd.getClusterZoneAndRegion()
if err != nil {
return nil, fmt.Errorf("failed to obtain the cluster zone and region: %v", err)
}
path = append(path, zone, region)
// We have already established that the map entry exists for the given federation,
// we just need to retrieve the domain name, validate it and append it to the path.
domain := kd.federations[path[2]]
// We accept valid subdomains as well, so just let all the valid subdomains.
if len(validation.IsDNS1123Subdomain(domain)) != 0 {
return nil, fmt.Errorf("%s is not a valid domain name for federation %s", domain, path[2])
}
name := strings.Join(append(path, domain), ".")
// Ensure that this name that we are returning as a CNAME response is a fully qualified
// domain name so that the client's resolver library doesn't have to go through its
// search list all over again.
if !strings.HasSuffix(name, ".") {
name = name + "."
}
return []skymsg.Service{{Host: name}}, nil
}
// getClusterZoneAndRegion returns the name of the zone and the region the
// cluster is running in. It arbitrarily selects a node and reads the failure
// domain label on the node. An alternative is to obtain this pod's
// (i.e. kube-dns pod's) name using the downward API, get the pod, get the
// node the pod is bound to and retrieve that node's labels. But even just by
// reading those steps, it looks complex and it is not entirely clear what
// that complexity is going to buy us. So taking a simpler approach here.
// Also note that zone here means the zone in cloud provider terminology, not
// the DNS zone.
func (kd *KubeDNS) getClusterZoneAndRegion() (string, string, error) {
var node *kapi.Node
objs := kd.nodesStore.List()
if len(objs) > 0 {
var ok bool
if node, ok = objs[0].(*kapi.Node); !ok {
return "", "", fmt.Errorf("expected node object, got: %T", objs[0])
}
} else {
// An alternative to listing nodes each time is to set a watch, but that is totally
// wasteful in case of non-federated independent Kubernetes clusters. So carefully
// proceeding here.
// TODO(madhusudancs): Move this to external/v1 API.
nodeList, err := kd.kubeClient.Core().Nodes().List(kapi.ListOptions{})
if err != nil || len(nodeList.Items) == 0 {
return "", "", fmt.Errorf("failed to retrieve the cluster nodes: %v", err)
}
// Select a node (arbitrarily the first node) that has
// `LabelZoneFailureDomain` and `LabelZoneRegion` set.
for _, nodeItem := range nodeList.Items {
_, zfound := nodeItem.Labels[unversioned.LabelZoneFailureDomain]
_, rfound := nodeItem.Labels[unversioned.LabelZoneRegion]
if !zfound || !rfound {
continue
}
// Make a copy of the node, don't rely on the loop variable.
node = &(*(&nodeItem))
if err := kd.nodesStore.Add(node); err != nil {
return "", "", fmt.Errorf("couldn't add the retrieved node to the cache: %v", err)
}
// Node is found, break out of the loop.
break
}
}
if node == nil {
return "", "", fmt.Errorf("Could not find any nodes")
}
zone, ok := node.Labels[unversioned.LabelZoneFailureDomain]
if !ok || zone == "" {
return "", "", fmt.Errorf("unknown cluster zone")
}
region, ok := node.Labels[unversioned.LabelZoneRegion]
if !ok || region == "" {
return "", "", fmt.Errorf("unknown cluster region")
}
return zone, region, nil
}
func (kd *KubeDNS) getServiceFQDN(service *kapi.Service) string {
return strings.Join([]string{service.Name, service.Namespace, serviceSubdomain, kd.domain}, ".")
}
func reverseArray(arr []string) []string {
for i := 0; i < len(arr)/2; i++ {
j := len(arr) - i - 1
arr[i], arr[j] = arr[j], arr[i]
}
return arr
}