Skip to content

Commit

Permalink
feat: [sc-103754] Be able to detect search domain misconfiguration #1391
Browse files Browse the repository at this point in the history
 (#1534)

* new collector dns

* implement DNS collector

* add dns service and endpoints check

* add nil check on retrieve endpoints
  • Loading branch information
nvanthao committed Apr 30, 2024
1 parent cb5db17 commit 6b368f2
Show file tree
Hide file tree
Showing 11 changed files with 414 additions and 0 deletions.
9 changes: 9 additions & 0 deletions config/crds/troubleshoot.sh_collectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,15 @@ spec:
required:
- data
type: object
dns:
properties:
collectorName:
type: string
exclude:
type: BoolString
timeout:
type: string
type: object
exec:
properties:
args:
Expand Down
9 changes: 9 additions & 0 deletions config/crds/troubleshoot.sh_preflights.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1972,6 +1972,15 @@ spec:
required:
- data
type: object
dns:
properties:
collectorName:
type: string
exclude:
type: BoolString
timeout:
type: string
type: object
exec:
properties:
args:
Expand Down
9 changes: 9 additions & 0 deletions config/crds/troubleshoot.sh_supportbundles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,15 @@ spec:
required:
- data
type: object
dns:
properties:
collectorName:
type: string
exclude:
type: BoolString
timeout:
type: string
type: object
exec:
properties:
args:
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/collector_shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,11 @@ type Sonobuoy struct {
Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"`
}

type DNS struct {
CollectorMeta `json:",inline" yaml:",inline"`
Timeout string `json:"timeout,omitempty" yaml:"timeout,omitempty"`
}

type Collect struct {
ClusterInfo *ClusterInfo `json:"clusterInfo,omitempty" yaml:"clusterInfo,omitempty"`
ClusterResources *ClusterResources `json:"clusterResources,omitempty" yaml:"clusterResources,omitempty"`
Expand Down Expand Up @@ -322,6 +327,7 @@ type Collect struct {
Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"`
Sonobuoy *Sonobuoy `json:"sonobuoy,omitempty" yaml:"sonobuoy,omitempty"`
NodeMetrics *NodeMetrics `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"`
DNS *DNS `json:"dns,omitempty" yaml:"dns,omitempty"`
}

func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec {
Expand Down
21 changes: 21 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/collect/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam
return &CollectSonobuoyResults{collector.Sonobuoy, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
case collector.NodeMetrics != nil:
return &CollectNodeMetrics{collector.NodeMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true
case collector.DNS != nil:
return &CollectDNS{collector.DNS, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
default:
return nil, false
}
Expand Down Expand Up @@ -215,6 +217,8 @@ func getCollectorName(c interface{}) string {
collector = "sonobuoy"
case *CollectNodeMetrics:
collector = "node-metrics"
case *CollectDNS:
collector = "dns"
default:
collector = "<none>"
}
Expand Down
273 changes: 273 additions & 0 deletions pkg/collect/dns.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
package collect

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"path/filepath"
"strings"
"time"

"github.com/pkg/errors"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
)

const (
dnsUtilsImage = "registry.k8s.io/e2e-test-images/jessie-dnsutils:1.3"
)

type CollectDNS struct {
Collector *troubleshootv1beta2.DNS
BundlePath string
Namespace string
ClientConfig *rest.Config
Client kubernetes.Interface
Context context.Context
RBACErrors
}

func (c *CollectDNS) Title() string {
return getCollectorName(c)
}

func (c *CollectDNS) IsExcluded() (bool, error) {
return isExcluded(c.Collector.Exclude)
}

func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult, error) {

ctx, cancel := context.WithTimeout(c.Context, time.Duration(60*time.Second))
defer cancel()

sb := strings.Builder{}

// get kubernetes Cluster IP
clusterIP, err := getKubernetesClusterIP(c.Client, ctx)
if err == nil {
sb.WriteString(fmt.Sprintf("=== Kubernetes Cluster IP from API Server: %s\n", clusterIP))
} else {
sb.WriteString(fmt.Sprintf("=== Failed to detect Kubernetes Cluster IP: %v\n", err))
}

// run a pod and perform DNS lookup
podLog, err := troubleshootDNSFromPod(c.Client, ctx)
if err == nil {
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", dnsUtilsImage))
sb.WriteString(podLog)
} else {
sb.WriteString(fmt.Sprintf("=== Failed to run commands from pod: %v\n", err))
}

// is DNS pods running?
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", getRunningKubeDNSPodNames(c.Client, ctx)))

// is DNS service up?
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", getKubeDNSServiceClusterIP(c.Client, ctx)))

// are DNS endpoints exposed?
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", getKubeDNSEndpoints(c.Client, ctx)))

// get DNS server config
coreDNSConfig, err := getCoreDNSConfig(c.Client, ctx)
if err == nil {
sb.WriteString("=== CoreDNS config: \n")
sb.WriteString(coreDNSConfig)
}
kubeDNSConfig, err := getKubeDNSConfig(c.Client, ctx)
if err == nil {
sb.WriteString("=== KubeDNS config: \n")
sb.WriteString(kubeDNSConfig)
}

data := sb.String()
output := NewResult()
output.SaveResult(c.BundlePath, filepath.Join("dns", c.Collector.CollectorName), bytes.NewBuffer([]byte(data)))

return output, nil
}

func getKubernetesClusterIP(client kubernetes.Interface, ctx context.Context) (string, error) {
service, err := client.CoreV1().Services("default").Get(ctx, "kubernetes", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("Failed to detect Kubernetes Cluster IP: %v", err)
return "", err
}

return service.Spec.ClusterIP, nil
}

func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (string, error) {
namespace := "default"
command := []string{"/bin/sh", "-c", `
set -x
cat /etc/resolv.conf
nslookup -debug kubernetes
exit 0
`}

// TODO: image pull secret?
podLabels := map[string]string{
"troubleshoot-role": "dns-collector",
}
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "troubleshoot-dns-",
Namespace: namespace,
Labels: podLabels,
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "troubleshoot-dns",
Image: dnsUtilsImage,
Command: command,
},
},
RestartPolicy: corev1.RestartPolicyNever,
},
}

created, err := client.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
if err != nil {
return "", errors.Wrap(err, "failed to run troubleshoot DNS pod")
}
klog.V(2).Infof("Pod with prefix %s has been created", created.GenerateName)

defer func() {
if created == nil {
return
}
err := client.CoreV1().Pods(namespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
if err != nil {
klog.Errorf("Failed to delete troubleshoot DNS pod %s: %v", created.Name, err)
}
klog.V(2).Infof("Deleted pod %s", created.Name)
}()

// wait for pod to be completed
watcher, err := client.CoreV1().Pods(namespace).Watch(ctx, metav1.ListOptions{
LabelSelector: "troubleshoot-role=dns-collector",
})
if err != nil {
return "", errors.Wrap(err, "failed to watch pod")
}
defer func() {
if watcher != nil {
watcher.Stop()
}
}()

for event := range watcher.ResultChan() {
pod, ok := event.Object.(*corev1.Pod)
if !ok {
continue
}
if pod.Status.Phase == corev1.PodSucceeded {
break
}
if pod.Status.Phase == corev1.PodFailed {
return "", errors.New("troubleshoot DNS pod failed")
}
}

// get pod logs
podLogOpts := corev1.PodLogOptions{}
req := client.CoreV1().Pods(namespace).GetLogs(created.Name, &podLogOpts)
podLogs, err := req.Stream(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to get pod logs")
}
defer podLogs.Close()

bytes, err := io.ReadAll(podLogs)
if err != nil {
return "", errors.Wrap(err, "failed to read troubleshoot DNS pod logs")
}

return string(bytes), nil
}

func getCoreDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("Failed to detect CoreDNS config: %v", err)
return "", err
}

return configMap.Data["Corefile"], nil
}

func getKubeDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("Failed to detect KubeDNS config: %v", err)
return "", err
}

if configMap.Data == nil {
return "", nil
}

dataBytes, err := json.Marshal(configMap.Data)
if err != nil {
return "", err
}

return string(dataBytes), nil
}

func getRunningKubeDNSPodNames(client kubernetes.Interface, ctx context.Context) string {
pods, err := client.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{
LabelSelector: "k8s-app=kube-dns",
})
if err != nil {
klog.V(2).Infof("failed to list kube-dns pods: %v", err)
return ""
}

var podNames []string
for _, pod := range pods.Items {
if pod.Status.Phase == corev1.PodRunning {
podNames = append(podNames, pod.Name)
}
}

return strings.Join(podNames, ", ")
}

func getKubeDNSServiceClusterIP(client kubernetes.Interface, ctx context.Context) string {
service, err := client.CoreV1().Services("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("failed to get kube-dns service: %v", err)
return ""
}

return service.Spec.ClusterIP
}

func getKubeDNSEndpoints(client kubernetes.Interface, ctx context.Context) string {
endpoints, err := client.CoreV1().Endpoints("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("failed to get kube-dns endpoints: %v", err)
return ""
}

var endpointStrings []string
for _, subset := range endpoints.Subsets {
for _, address := range subset.Addresses {
if len(subset.Ports) > 0 {
endpointStrings = append(endpointStrings, fmt.Sprintf("%s:%d", address.IP, subset.Ports[0].Port))
}
}
}

return strings.Join(endpointStrings, ", ")
}

0 comments on commit 6b368f2

Please sign in to comment.