Skip to content

Commit

Permalink
add env-check (#1464)
Browse files Browse the repository at this point in the history
  • Loading branch information
hongzhen-ma committed Apr 24, 2022
1 parent 1f68e12 commit 35ecc68
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 2 deletions.
2 changes: 1 addition & 1 deletion dist/images/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ ARG DEBIAN_FRONTEND=noninteractive
RUN apt update && apt upgrade -y && apt install ca-certificates python3 hostname libunwind8 netbase \
ethtool iproute2 ncat libunbound-dev procps libatomic1 kmod iptables \
tcpdump ipset curl uuid-runtime openssl inetutils-ping arping ndisc6 \
logrotate libjemalloc2 dnsutils -y --no-install-recommends && \
logrotate libjemalloc2 dnsutils net-tools nmap -y --no-install-recommends && \
rm -rf /var/lib/apt/lists/* && \
cd /usr/sbin && \
ln -sf /usr/sbin/iptables-legacy iptables && \
Expand Down
82 changes: 82 additions & 0 deletions dist/images/env-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/bash

set +e

echo "1) check cni configuration"
if [ ! -e "/etc/cni/net.d" ]; then
echo "Directory /etc/cni/net.d does not exist, please check kube-ovn-cni pod status"
fi
for file in $(ls "/etc/cni/net.d")
do
if [[ ! $file =~ "kube-ovn.conflist" ]]; then
echo "Check files in /etc/cni/net.d, make sure if the config file $file should be deleted"
fi
done

echo "2) check system ipv4 config"
probe_mtu=`cat /proc/sys/net/ipv4/tcp_mtu_probing`
if [ $probe_mtu == 0 ]; then
echo "The 'tcp_mtu_probing' config may affect traffic, make sure if /proc/sys/net/ipv4/tcp_mtu_probing should be set to 1"
fi
if [ -e /proc/sys/net/ipv4/tcp_tw_recycle ]; then
recycle=`cat /proc/sys/net/ipv4/tcp_tw_recycle`
if [ $recycle == 1 ]; then
echo "The 'tcp_tw_recycle' config affects nodeport service, make sure change /proc/sys/net/ipv4/tcp_tw_recycle to 0"
fi
fi

echo "3) check checksum value"
which netstat 2>/dev/null >/dev/null
if [[ $? != 0 ]]; then
echo "The netstat cmd not found, maybe can be installed mannully and exec 'netstat -s' to check if there is 'InCsumErrors'"
echo "If there's 'InCsumErrors' and the value is increasing, should exec cmd 'ethtool -K ETH tx off' to disable checksum, where 'ETH' is the nic used for traffics"
else
result=`netstat -s`
if [[ $result =~ "InCsumErrors" ]]; then
echo "Found 'InCsumErrors' para after exec 'netstat -s' cmd, check if the value is increasing, maybe should exec cmd 'ethtool -K ETH tx off' to disable checksum, where 'ETH' is the nic used for traffics"
fi
fi

echo "4) check dns config"
result=`cat /etc/resolv.conf`
if [[ $result =~ ".com" ]]; then
echo "There's *.com in dns search name, make sure the config /etc/resolv.conf is right"
fi

echo "5) check firewall config"
result=`ps -ef | grep firewall | wc -l`
if [[ $result > 1 ]]; then
echo "The firewalld is running, make sure it has no effect on traffics across nodes"
fi

result=`ps -ef | grep security | wc -l`
if [[ $result > 1 ]]; then
echo "Found pid with '*security*' name, make sure it has no effect on traffics"
fi
result=`ps -ef | grep qax | wc -l`
if [[ $result > 1 ]]; then
echo "Found pid with '*qax*' name, make sure it has no effect on traffics"
fi
result=`ps -ef | grep safe | wc -l`
if [[ $result > 1 ]]; then
echo "Found pid with '*safe*' name, make sure it has no effect on traffics"
fi
result=`ps -ef | grep defence | wc -l`
if [[ $result > 1 ]]; then
echo "Found pid with '*defence*' name, make sure it has no effect on traffics"
fi
result=`ps -ef | grep vmsec | wc -l`
if [[ $result > 1 ]]; then
echo "Found pid with '*vmsec*' name, make sure it has no effect on traffics"
fi

echo "6) check geneve 6081 connection"
which nmap 2>/dev/null >/dev/null
if [[ $? != 0 ]]; then
echo "The nmap cmd not found, maybe can be installed mannully and exec 'nmap -sU 127.0.0.1 -p 6081' to check port connection"
else
result=`nmap -sU 127.0.0.1 -p 6081`
if [[ ! $result =~ "open" ]]; then
echo "The 6081 port for geneve encapsulation may be not available, if the number of nodes is more than 1, please check if ovs-ovn pod is healthy"
fi
fi
21 changes: 20 additions & 1 deletion dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2455,6 +2455,7 @@ showHelp(){
echo " trace {namespace/podname} {target ip address} {icmp|tcp|udp} [target tcp or udp port] trace ovn microflow of specific packet"
echo " diagnose {all|node} [nodename] diagnose connectivity of all nodes or a specific node"
echo " reload restart all kube-ovn components"
echo " env-check check the environment configuration"
}
tcpdump(){
Expand Down Expand Up @@ -2968,6 +2969,21 @@ reload(){
kubectl rollout status deployment/kube-ovn-monitor -n kube-system
}
env-check(){
set +e
KUBE_OVN_NS=kube-system
podNames=`kubectl get pod --no-headers -n $KUBE_OVN_NS | grep kube-ovn-cni | awk '{print $1}'`
for pod in $podNames
do
nodeName=$(kubectl get pod $pod -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName})
echo "************************************************"
echo "Start environment check for Node $nodeName"
echo "************************************************"
kubectl exec -it -n $KUBE_OVN_NS $pod -c cni-server -- bash /kube-ovn/env-check.sh
done
}
if [ $# -lt 1 ]; then
showHelp
exit 0
Expand Down Expand Up @@ -3002,8 +3018,11 @@ case $subcommand in
reload)
reload
;;
env-check)
env-check
;;
*)
showHelp
showHelp
;;
esac
Expand Down

0 comments on commit 35ecc68

Please sign in to comment.