Skip to content

Commit

Permalink
test: add test-server to collect packet lost during upgrade (#2010)
Browse files Browse the repository at this point in the history
  • Loading branch information
oilbeater committed Nov 2, 2022
1 parent f89908e commit c4d8a2f
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Makefile
Expand Up @@ -26,6 +26,7 @@ build-go:
go mod tidy
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -ldflags $(GOLDFLAGS) -v ./cmd
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-webhook -ldflags $(GOLDFLAGS) -v ./cmd/webhook
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o $(CURDIR)/dist/images/test-server -ldflags $(GOLDFLAGS) -v ./test/server

.PHONY: build-go-windows
build-go-windows:
Expand Down Expand Up @@ -74,6 +75,10 @@ image-centos-compile:
docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/centos7-compile:$(RELEASE_TAG) -o type=docker -f dist/images/compile/centos7/Dockerfile fastpath/
# docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/centos8-compile:$(RELEASE_TAG) -o type=docker -f dist/images/compile/centos8/Dockerfile fastpath/

.PHOONY: image-test
image-test: build-go
docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/test:$(RELEASE_TAG) -o type=docker -f dist/images/Dockerfile.test dist/images/

.PHONY: release
release: lint image-kube-ovn image-vpc-nat-gateway image-centos-compile

Expand Down
14 changes: 14 additions & 0 deletions dist/images/Dockerfile.test
@@ -0,0 +1,14 @@
FROM alpine:edge

RUN set -ex \
&& echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories \
&& apk update \
&& apk upgrade \
&& apk add --no-cache \
bash iproute2 iptables iputils tcpdump conntrack-tools \
redis nginx qperf iperf3 net-tools curl

WORKDIR /kube-ovn
COPY test-server /kube-ovn/test-server
COPY test-server.sh /kube-ovn/test-server.sh
CMD bash test-server.sh
3 changes: 3 additions & 0 deletions dist/images/test-server.sh
@@ -0,0 +1,3 @@
#!/usr/bin/env bash
nginx
iperf3 -s
58 changes: 58 additions & 0 deletions test/server/README.md
@@ -0,0 +1,58 @@
# Test Server

This server mainly focuses on test network break effect during kube-ovn upgrade or restart, but can also be extended to test network connectivity.

## How test server test network break

The test-server will use ping, iperf3 and curl to visit a specified address during upgrade or reload. Then it automatically collect metrics from
`/proc/net/snmp` and return code to calculate ICMP lost, TCP retransmit packets and TCP connection failure.

```bash
# Deploy a kubernetes cluster with kube-ovn
make kind-init kind-install

# Build and deploy test-server
make image-test
kind load docker-image --name kube-ovn kubeovn/test:v1.11.0
kubectl apply -f test/server/test-server.yaml
docker run -d --net=kind kubeovn/test:v1.11.0

# Run test-server analysis tool in one terminal and reload kube-ovn in another terminal
# terminal 1
kubectl exec -it test-client -- ./test-server 100.64.0.1

# terminal 2
kubectl ko reload

# Try with different address to test different path.
```

# Test result

ICMP test result:

| Scenario | Lost |
|-----------------------------------|------|
| Pod address within same node | 0 |
| ovn0 address with in same node | 13 |
| Node address the Pod runs on | 15 |
| Pod address in another node | 4 |
| ovn0 address with in another node | 21 |
| Node address of anther node | 16 |
| Address outside the cluster | 32 |

TCP test result:

| Scenario | Retransmit | Connection Failure | Note |
|---------------------------------|------------|--------------------|------------------|
| Pod address in another node | 38 | 1 | |
| Service address | 86 | 0 | |
| Address outside the cluster | 4 | 1 | |
| External visit NodePort address | | | Connection Reset |

## TODO

1. NodePort long connection will be reset which need further investigation.
2. Traffic that go through ovn0 suffers higher lost, and it may be related to internal type port.
3. Replace curl with ab to test high connection concurrency.
4. Need to be tested in large scale cluster where kube-ovn reload might take much longer time.
101 changes: 101 additions & 0 deletions test/server/server.go
@@ -0,0 +1,101 @@
package main

import (
"fmt"
"k8s.io/klog/v2"
"os"
"os/exec"
"strconv"
"strings"
"time"
)

func ReadSnmp() (map[string]map[string]int, error) {
buf, err := os.ReadFile("/proc/net/snmp")
if err != nil {
return nil, err
}
snmp := make(map[string]map[string]int)
snmpLine := strings.Split(string(buf), "\n")
for index := range snmpLine {
if index%2 == 1 || len(snmpLine[index]) == 0 {
continue
}
keys := strings.Split(snmpLine[index], " ")
values := strings.Split(snmpLine[index+1], " ")
snmpType := strings.TrimSuffix(keys[0], ":")

for i := range keys {
if i == 0 {
snmp[snmpType] = make(map[string]int)
continue
}
v, _ := strconv.Atoi(values[i])
snmp[snmpType][keys[i]] = v
}
}
return snmp, err
}

func main() {
defer klog.Flush()
klog.Infof("start")
icmpDone := make(chan string)
tcpConnDone := make(chan string)
tcpRetransDone := make(chan string)
preSnmp, err := ReadSnmp()
if err != nil {
klog.Error(err)
return
}
preDiff := preSnmp["Icmp"]["OutEchos"] - preSnmp["Icmp"]["InEchoReps"]
preRetrans := preSnmp["Tcp"]["RetransSegs"]

go func() {
output, err := exec.Command("ping", "-D", "-O", "-c", "6000", "-i", "0.01", os.Args[1]).CombinedOutput()
klog.Infof("%s, %v", output, err)
icmpDone <- ""
}()

failedConnection := 0
totalConnection := 0
go func() {
startTime := time.Now()
for {
if time.Since(startTime) > 60*time.Second {
break
}
time.Sleep(100 * time.Millisecond)
totalConnection += 1
output, err := exec.Command("curl", "-m", "1", fmt.Sprintf("%s:80", os.Args[1])).CombinedOutput()
if err != nil {
klog.Infof("%s, %v", output, err)
failedConnection += 1
}
}
tcpConnDone <- ""
}()

go func() {
output, err := exec.Command("iperf3", "-c", os.Args[1], "-b", "10M", "-t", "60", "-l", "1K").CombinedOutput()
klog.Infof("%s, %v", output, err)
tcpRetransDone <- ""
}()

<-icmpDone
<-tcpConnDone
<-tcpRetransDone

curSnmp, err := ReadSnmp()
if err != nil {
klog.Error(err)
return
}
curDiff := curSnmp["Icmp"]["OutEchos"] - curSnmp["Icmp"]["InEchoReps"]
curRetrans := curSnmp["Tcp"]["RetransSegs"]
klog.Infof("lost %d icmp response", curDiff-preDiff)
klog.Infof("retrans %d tcp segment", curRetrans-preRetrans)
klog.Infof("%d failed connection, %d total connection", failedConnection, totalConnection)

klog.Infof("Done")
}
52 changes: 52 additions & 0 deletions test/server/test-server.yaml
@@ -0,0 +1,52 @@
apiVersion: v1
kind: Pod
metadata:
name: test-server
labels:
env: test-server
spec:
containers:
- name: test-server
image: kubeovn/test:v1.11.0
imagePullPolicy: IfNotPresent
nodeSelector:
kubernetes.io/hostname: kube-ovn-control-plane

---

apiVersion: v1
kind: Pod
metadata:
name: test-client
labels:
env: test-client
spec:
containers:
- name: test-client
image: kubeovn/test:v1.11.0
imagePullPolicy: IfNotPresent
nodeSelector:
kubernetes.io/hostname: kube-ovn-worker

---

apiVersion: v1
kind: Service
metadata:
labels:
env: test-server
name: test-server
namespace: default
spec:
ports:
- port: 80
protocol: TCP
targetPort: 80
name: nginx
- port: 5201
protocol: TCP
targetPort: 5201
name: iperf
selector:
env: test-server
type: NodePort

0 comments on commit c4d8a2f

Please sign in to comment.