Skip to content

Commit

Permalink
[hackathon] kubectl trace based testing (#2549)
Browse files Browse the repository at this point in the history
* add script and basic docs

* Adding k3d setup docs

* finish script

* Adding containerd tracing to our Kube Tracing example

* add kubelet script

* add example

* Updating docs

* Update files

* Fix rebase error

* Removing file

---------

Co-authored-by: Daniel Dias <danielbpdias@gmail.com>
  • Loading branch information
schoren and danielbdias committed May 18, 2023
1 parent 64c34a3 commit efb642f
Show file tree
Hide file tree
Showing 10 changed files with 609 additions and 0 deletions.
127 changes: 127 additions & 0 deletions examples/tracetesting-kubernetes/kubetracing/config.toml.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# File generated by {{ .Program }}. DO NOT EDIT. Use config.toml.tmpl instead.
version = 2

[plugins."io.containerd.internal.v1.opt"]
path = "{{ .NodeConfig.Containerd.Opt }}"
[plugins."io.containerd.grpc.v1.cri"]
stream_server_address = "127.0.0.1"
stream_server_port = "10010"
enable_selinux = {{ .NodeConfig.SELinux }}
enable_unprivileged_ports = {{ .EnableUnprivileged }}
enable_unprivileged_icmp = {{ .EnableUnprivileged }}

{{- if .DisableCgroup}}
disable_cgroup = true
{{end}}
{{- if .IsRunningInUserNS }}
disable_apparmor = true
restrict_oom_score_adj = true
{{end}}

{{- if .NodeConfig.AgentConfig.PauseImage }}
sandbox_image = "{{ .NodeConfig.AgentConfig.PauseImage }}"
{{end}}

{{- if .NodeConfig.AgentConfig.Snapshotter }}
[plugins."io.containerd.grpc.v1.cri".containerd]
snapshotter = "{{ .NodeConfig.AgentConfig.Snapshotter }}"
disable_snapshot_annotations = {{ if eq .NodeConfig.AgentConfig.Snapshotter "stargz" }}false{{else}}true{{end}}
{{ if eq .NodeConfig.AgentConfig.Snapshotter "stargz" }}
{{ if .NodeConfig.AgentConfig.ImageServiceSocket }}
[plugins."io.containerd.snapshotter.v1.stargz"]
cri_keychain_image_service_path = "{{ .NodeConfig.AgentConfig.ImageServiceSocket }}"
[plugins."io.containerd.snapshotter.v1.stargz".cri_keychain]
enable_keychain = true
{{end}}
{{ if .PrivateRegistryConfig }}
{{ if .PrivateRegistryConfig.Mirrors }}
[plugins."io.containerd.snapshotter.v1.stargz".registry.mirrors]{{end}}
{{range $k, $v := .PrivateRegistryConfig.Mirrors }}
[plugins."io.containerd.snapshotter.v1.stargz".registry.mirrors."{{$k}}"]
endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}]
{{if $v.Rewrites}}
[plugins."io.containerd.snapshotter.v1.stargz".registry.mirrors."{{$k}}".rewrite]
{{range $pattern, $replace := $v.Rewrites}}
"{{$pattern}}" = "{{$replace}}"
{{end}}
{{end}}
{{end}}
{{range $k, $v := .PrivateRegistryConfig.Configs }}
{{ if $v.Auth }}
[plugins."io.containerd.snapshotter.v1.stargz".registry.configs."{{$k}}".auth]
{{ if $v.Auth.Username }}username = {{ printf "%q" $v.Auth.Username }}{{end}}
{{ if $v.Auth.Password }}password = {{ printf "%q" $v.Auth.Password }}{{end}}
{{ if $v.Auth.Auth }}auth = {{ printf "%q" $v.Auth.Auth }}{{end}}
{{ if $v.Auth.IdentityToken }}identitytoken = {{ printf "%q" $v.Auth.IdentityToken }}{{end}}
{{end}}
{{ if $v.TLS }}
[plugins."io.containerd.snapshotter.v1.stargz".registry.configs."{{$k}}".tls]
{{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}}
{{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}}
{{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}}
{{ if $v.TLS.InsecureSkipVerify }}insecure_skip_verify = true{{end}}
{{end}}
{{end}}
{{end}}
{{end}}
{{end}}

{{- if not .NodeConfig.NoFlannel }}
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "{{ .NodeConfig.AgentConfig.CNIBinDir }}"
conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}"
{{end}}

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = {{ .SystemdCgroup }}

{{ if .PrivateRegistryConfig }}
{{ if .PrivateRegistryConfig.Mirrors }}
[plugins."io.containerd.grpc.v1.cri".registry.mirrors]{{end}}
{{range $k, $v := .PrivateRegistryConfig.Mirrors }}
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{$k}}"]
endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}]
{{if $v.Rewrites}}
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{$k}}".rewrite]
{{range $pattern, $replace := $v.Rewrites}}
"{{$pattern}}" = "{{$replace}}"
{{end}}
{{end}}
{{end}}

{{range $k, $v := .PrivateRegistryConfig.Configs }}
{{ if $v.Auth }}
[plugins."io.containerd.grpc.v1.cri".registry.configs."{{$k}}".auth]
{{ if $v.Auth.Username }}username = {{ printf "%q" $v.Auth.Username }}{{end}}
{{ if $v.Auth.Password }}password = {{ printf "%q" $v.Auth.Password }}{{end}}
{{ if $v.Auth.Auth }}auth = {{ printf "%q" $v.Auth.Auth }}{{end}}
{{ if $v.Auth.IdentityToken }}identitytoken = {{ printf "%q" $v.Auth.IdentityToken }}{{end}}
{{end}}
{{ if $v.TLS }}
[plugins."io.containerd.grpc.v1.cri".registry.configs."{{$k}}".tls]
{{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}}
{{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}}
{{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}}
{{ if $v.TLS.InsecureSkipVerify }}insecure_skip_verify = true{{end}}
{{end}}
{{end}}
{{end}}

{{range $k, $v := .ExtraRuntimes}}
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes."{{$k}}"]
runtime_type = "{{$v.RuntimeType}}"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes."{{$k}}".options]
BinaryName = "{{$v.BinaryName}}"
{{end}}

[plugins."io.containerd.tracing.processor.v1.otlp"]
endpoint = "host.k3d.internal:4317"
protocol = "grpc"
insecure = true

[plugins."io.containerd.internal.v1.tracing"]
sampling_ratio = 1.0
service_name = "containerd"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: apiserver.config.k8s.io/v1beta1
kind: TracingConfiguration
endpoint: host.k3d.internal:4317
samplingRatePerMillion: 1000000 # 100%
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
featureGates:
KubeletTracing: true
tracing:
endpoint: host.k3d.internal:4317
samplingRatePerMillion: 1000000
27 changes: 27 additions & 0 deletions examples/tracetesting-kubernetes/kubetracing/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
services:
jaeger:
healthcheck:
test:
- CMD
- wget
- --spider
- localhost:16686
timeout: 3s
interval: 1s
retries: 60
image: jaegertracing/all-in-one:latest
restart: unless-stopped
ports:
- 16686:16686
otel-collector:
command:
- --config
- /otel-local-config.yaml
depends_on:
jaeger:
condition: service_started
image: otel/opentelemetry-collector:0.54.0
ports:
- 4317:4317
volumes:
- ./otel-collector.yaml:/otel-local-config.yaml
25 changes: 25 additions & 0 deletions examples/tracetesting-kubernetes/kubetracing/otel-collector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
receivers:
otlp:
protocols:
grpc:
http:
processors:
# Data sources: traces
probabilistic_sampler:
hash_seed: 22
sampling_percentage: 100
batch:
timeout: 100ms
exporters:
logging:
logLevel: debug
jaeger:
endpoint: jaeger:14250
tls:
insecure: true
service:
pipelines:
traces:
receivers: [otlp]
processors: [probabilistic_sampler, batch]
exporters: [jaeger,logging]
182 changes: 182 additions & 0 deletions examples/tracetesting-kubernetes/setup-k3s.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Test Kubectl commands

This example shows how one can use Tracetest to do trace based testing for kubectl commands. This works by leveraging the `Audit-Id` header sent back from the kubernetes api server for every request.

the `kubectl` command allows to enable a very verbose output, where it includes the response headers. This example script parses that info and maps it to the corresponding Trace ID.

## Requirements

This setup is a bit limited at the moment. It only works with Jaeger backend, becasue it provides a search API that allows us to search spans that include the correct `Audit-Id` tag.

For everything to work, we need the following environment configured:

1. A Kubernetes cluster with `APIServerTracing` enabled and configured
2. A OtelCollector that can be accessed by the k8s api server to send its tracing data
3. A Jager backend to store the traces


## Setup

k3s can be used to easily setup a cluster with the correct configuration. It's easy enough to be configured in any dev environment, even a CICD.

The otelcollector cannot be deployed in the target cluster because it complicates the networking setup needed to have the k8s apiserver communicating with the collector.
An easy solution for this is to have docker compose starting the otelcol/jaeger services, so their ports can be exposed directly to the host network.

### 1. Setup OtelCollector/Jaeger

create a `docker-compose.yaml` file that includes both services:

```yaml
# docker-compose.yaml
services:
jaeger:
healthcheck:
test:
- CMD
- wget
- --spider
- localhost:16686
timeout: 3s
interval: 1s
retries: 60
image: jaegertracing/all-in-one:latest
restart: unless-stopped
ports:
- 16686:16686
otel-collector:
command:
- --config
- /otel-local-config.yaml
depends_on:
jaeger:
condition: service_started
image: otel/opentelemetry-collector:0.54.0
ports:
- 4317:4317
volumes:
- ./otel-collector.yaml:/otel-local-config.yaml
```

We need to create a config file in the same directory for the otel-collector so it can communicate with Jaeger:

```yaml
# otel-collector.yaml
receivers:
otlp:
protocols:
grpc:
http:

processors:
# Data sources: traces
probabilistic_sampler:
hash_seed: 22
sampling_percentage: 100

batch:
timeout: 100ms

exporters:
# logging is optional, but useful for making sure the collector is receiving traces
logging:
logLevel: debug
jaeger:
# this url is valid within the `docker-compose` environmet this collector is running
endpoint: jaeger:14250
tls:
insecure: true

service:
pipelines:
traces:
receivers: [otlp]
processors: [probabilistic_sampler, batch]
exporters: [jaeger,logging] # logging is optional,
```

With both files created on the same director, we can start them:

```
sudo docker compose up -d
```

### 2. Start a k3s cluster

We need to start a k3s cluster, but we need some special configurations:

1. the k8s version needs to be 1.27+ for traces to work
2. we need to enable the tracing feature flag
3. we need to pass the otel-collector address to the apiserver


First, we'll create the tracing config files, because it's required for the apiserver to start.

> Make sure that the directory exists by running: `sudo mkdir -p /etc/kube-tracing/`
```yaml
# /etc/kube-tracing/apiserver-tracing.yaml
apiVersion: apiserver.config.k8s.io/v1beta1
kind: TracingConfiguration
# default value
endpoint: localhost:4317
samplingRatePerMillion: 1000000 # 100%
EOF
```

```yaml
# /etc/kube-tracing/kubelet-tracing.yaml
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
featureGates:
KubeletTracing: true
tracing:
endpoint: host.k3d.internal:4317
samplingRatePerMillion: 1000000
```


This settings are documented [here](https://kubernetes.io/docs/concepts/cluster-administration/system-traces/). The only difference is that we are setting the `samplingRatePerMillion` value to 1.000.000 (meaning, 100%) so that all traces are send to the collector.
This effectively disables sampling. Use this setting with care, and you probablly shouldn't do this in a prod environment.

Now we can install the k3s cluster:

```sh
curl -sfL https://get.k3s.io | \
INSTALL_K3S_VERSION="v1.27.1+k3s1" \
INSTALL_K3S_EXEC="--kube-apiserver-arg=feature-gates=APIServerTracing=true --kube-apiserver-arg=tracing-config-file=/etc/kube-tracing/apiserver-tracing.yaml --kubelet-arg=config=/etc/kube-tracing/kubelet-tracing.yaml" \
sh -s - server --cluster-init
```

We use the `INSTALL_K3S_VERSION` env var to set the correct version, and the `INSTALL_K3S_EXEC` to pass flags and settings to the k8s apiserver.

## Running tests

That's it! We have a k8s apiserver ready to do trace based testing. With our [sample script](./test-kubectl.bash) it's very easy.

You only need to set the correct jaeger UI url and the command you want to run. For example:

```sh
JAEGER_UI_URL="http://127.0.0.1:16686" ./test-kubectl.bash ./test.yaml "kubectl get pods -A"
```

The `test.yaml` file can be named as you want, but it needs to look like this:

```yaml
type: Test
spec:
id: ze90fyU4R # can be anything
name: kubectl # can be anything
trigger:
type: traceid # MUST BE traceide
traceid:
id: ${env:asd} # MUST BE in line 8
# the rest of the file can be modified as needed
specs:
- name: List span exists
selector: span[tracetest.span.type="general" name="List"]
assertions:
- attr:tracetest.selected_spans.count = 1
```

You also need to have the `tracetest` cli configured to talk with the correct tracetest server.
The tracetest server also needs to be configured to use the same Jaeger instance as the k8s for its datastore
Loading

0 comments on commit efb642f

Please sign in to comment.