Skip to content

Commit

Permalink
Enable set probe (#3145)
Browse files Browse the repository at this point in the history
* enable set ovs probe
Signed-off-by: bobz965 <zhangbingbing2_yewu@cmss.chinamobile.com>
  • Loading branch information
bobz965 committed Aug 17, 2023
1 parent a7af897 commit 377d56d
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 39 deletions.
7 changes: 6 additions & 1 deletion charts/templates/central-deploy.yaml
Expand Up @@ -44,7 +44,8 @@ spec:
- name: ovn-central
image: {{ .Values.global.registry.address }}/{{ .Values.global.images.kubeovn.repository }}:{{ .Values.global.images.kubeovn.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
command: ["/kube-ovn/start-db.sh"]
command:
- /kube-ovn/start-db.sh
securityContext:
capabilities:
add: ["SYS_NICE"]
Expand All @@ -71,6 +72,10 @@ spec:
fieldPath: status.podIPs
- name: ENABLE_BIND_LOCAL_IP
value: "{{- .Values.func.ENABLE_BIND_LOCAL_IP }}"
- name: PROBE_INTERVAL
value: "{{ .Values.networking.PROBE_INTERVAL }}"
- name: OVN_LEADER_PROBE_INTERVAL
value: "{{ .Values.networking.OVN_LEADER_PROBE_INTERVAL }}"
resources:
requests:
cpu: {{ index .Values "ovn-central" "requests" "cpu" }}
Expand Down
7 changes: 6 additions & 1 deletion charts/templates/ovsovn-ds.yaml
Expand Up @@ -44,7 +44,8 @@ spec:
{{- if .Values.DPDK }}
command: ["/kube-ovn/start-ovs-dpdk.sh"]
{{- else }}
command: ["/kube-ovn/start-ovs.sh"]
command:
- /kube-ovn/start-ovs.sh
{{- end }}
securityContext:
runAsUser: 0
Expand Down Expand Up @@ -74,6 +75,10 @@ spec:
fieldPath: spec.nodeName
- name: OVN_DB_IPS
value: "{{ .Values.MASTER_NODES }}"
- name: OVN_REMOTE_PROBE_INTERVAL
value: "{{ .Values.networking.OVN_REMOTE_PROBE_INTERVAL }}"
- name: OVN_REMOTE_OPENFLOW_INTERVAL
value: "{{ .Values.networking.OVN_REMOTE_OPENFLOW_INTERVAL }}"
volumeMounts:
- mountPath: /var/run/netns
name: host-ns
Expand Down
4 changes: 4 additions & 0 deletions charts/values.yaml
Expand Up @@ -47,6 +47,10 @@ networking:
ENABLE_ECMP: false
ENABLE_METRICS: true
NODE_LOCAL_DNS_IP: ""
PROBE_INTERVAL: 180000
OVN_LEADER_PROBE_INTERVAL: 5
OVN_REMOTE_PROBE_INTERVAL: 10000
OVN_REMOTE_OPENFLOW_INTERVAL: 180

func:
ENABLE_LB: true
Expand Down
26 changes: 17 additions & 9 deletions dist/images/install.sh
Expand Up @@ -80,9 +80,7 @@ POD_NIC_TYPE="veth-pair" # veth-pair or internal-port
POD_DEFAULT_FIP_TYPE="" # iptables, pod can set iptables fip automatically by enable fip annotation

# VLAN Config only take effect when NETWORK_TYPE is vlan
PROVIDER_NAME="provider"
VLAN_INTERFACE_NAME=""
VLAN_NAME="ovn-vlan"
VLAN_ID="100"

if [ "$ENABLE_VLAN" = "true" ]; then
Expand Down Expand Up @@ -134,7 +132,7 @@ then
--with-dpdk=*)
DPDK=true
DPDK_VERSION="${1#*=}"
if [[ ! "${DPDK_SUPPORTED_VERSIONS[@]}" = "${DPDK_VERSION}" ]] || [[ -z "${DPDK_VERSION}" ]]; then
if [[ ! "${DPDK_SUPPORTED_VERSIONS[*]}" = "${DPDK_VERSION}" ]] || [[ -z "${DPDK_VERSION}" ]]; then
echo "Unsupported DPDK version: ${DPDK_VERSION}"
echo "Supported DPDK versions: ${DPDK_SUPPORTED_VERSIONS[*]}"
exit 1
Expand Down Expand Up @@ -201,17 +199,17 @@ fi
echo "[Step 1/6] Label kube-ovn-master node and label datapath type"
count=$(kubectl get no -l$LABEL --no-headers | wc -l)
node_label="$LABEL"
if [ $count -eq 0 ]; then
if [ "${count}" -eq 0 ]; then
count=$(kubectl get no -l$DEPRECATED_LABEL --no-headers | wc -l)
node_label="$DEPRECATED_LABEL"
if [ $count -eq 0 ]; then
if [ "${count}" -eq 0 ]; then
echo "ERROR: No node with label $LABEL or $DEPRECATED_LABEL found"
exit 1
fi
fi
kubectl label no -l$node_label kube-ovn/role=master --overwrite

if [ "$DPDK" = "true" -o "$HYBRID_DPDK" = "true" ]; then
if [ "$DPDK" = "true" ] || [ "$HYBRID_DPDK" = "true" ]; then
kubectl label no -lovn.kubernetes.io/ovs_dp_type!=userspace ovn.kubernetes.io/ovs_dp_type=kernel --overwrite
fi

Expand Down Expand Up @@ -3204,7 +3202,8 @@ spec:
- name: ovn-central
image: "$REGISTRY/kube-ovn:$VERSION"
imagePullPolicy: $IMAGE_PULL_POLICY
command: ["/kube-ovn/start-db.sh"]
command:
- /kube-ovn/start-db.sh
securityContext:
capabilities:
add: ["SYS_NICE"]
Expand Down Expand Up @@ -3233,6 +3232,10 @@ spec:
value: "$ENABLE_BIND_LOCAL_IP"
- name: DEBUG_WRAPPER
value: "$DEBUG_WRAPPER"
- name: PROBE_INTERVAL
value: "180000"
- name: OVN_LEADER_PROBE_INTERVAL
value: "5"
resources:
requests:
cpu: 300m
Expand Down Expand Up @@ -3516,7 +3519,8 @@ spec:
- name: openvswitch
image: "$REGISTRY/kube-ovn:$VERSION"
imagePullPolicy: $IMAGE_PULL_POLICY
command: ["/kube-ovn/start-ovs.sh"]
command:
- /kube-ovn/start-ovs.sh
securityContext:
runAsUser: 0
privileged: true
Expand Down Expand Up @@ -3547,6 +3551,10 @@ spec:
value: $addresses
- name: DEBUG_WRAPPER
value: "$DEBUG_WRAPPER"
- name: OVN_REMOTE_PROBE_INTERVAL
value: "10000"
- name: OVN_REMOTE_OPENFLOW_INTERVAL
value: "180"
volumeMounts:
- mountPath: /var/run/netns
name: host-ns
Expand Down Expand Up @@ -4519,7 +4527,7 @@ if ! sh -c "echo \":$PATH:\" | grep -q \":/usr/local/bin:\""; then
fi

echo "[Step 6/6] Run network diagnose"
kubectl cp kube-system/$(kubectl -n kube-system get pods -o wide | grep cni | awk '{print $1}' | awk 'NR==1{print}'):/kube-ovn/kubectl-ko /usr/local/bin/kubectl-ko
kubectl cp kube-system/"$(kubectl -n kube-system get pods -o wide | grep cni | awk '{print $1}' | awk 'NR==1{print}')":/kube-ovn/kubectl-ko /usr/local/bin/kubectl-ko
chmod +x /usr/local/bin/kubectl-ko
kubectl ko diagnose all

Expand Down
21 changes: 13 additions & 8 deletions dist/images/start-db.sh
Expand Up @@ -4,6 +4,9 @@ set -eo pipefail
DEBUG_WRAPPER=${DEBUG_WRAPPER:-}
DEBUG_OPT="--ovn-northd-wrapper=$DEBUG_WRAPPER --ovsdb-nb-wrapper=$DEBUG_WRAPPER --ovsdb-sb-wrapper=$DEBUG_WRAPPER"

echo "PROBE_INTERVAL is set to $PROBE_INTERVAL"
echo "OVN_LEADER_PROBE_INTERVAL is set to $OVN_LEADER_PROBE_INTERVAL"

# https://bugs.launchpad.net/neutron/+bug/1776778
if grep -q "3.10.0-862" /proc/version
then
Expand Down Expand Up @@ -224,11 +227,12 @@ if [[ "$ENABLE_SSL" == "false" ]]; then
if [[ -z "$NODE_IPS" ]]; then
/usr/share/ovn/scripts/ovn-ctl restart_northd
ovn-nbctl --no-leader-only set-connection ptcp:"${NB_PORT}":["${DB_ADDR}"]
ovn-nbctl --no-leader-only set Connection . inactivity_probe=180000
ovn-nbctl --no-leader-only set Connection . inactivity_probe=${PROBE_INTERVAL}
ovn-nbctl --no-leader-only set NB_Global . options:northd_probe_interval=${PROBE_INTERVAL}
ovn-nbctl --no-leader-only set NB_Global . options:use_logical_dp_groups=true

ovn-sbctl --no-leader-only set-connection ptcp:"${SB_PORT}":["${DB_ADDR}"]
ovn-sbctl --no-leader-only set Connection . inactivity_probe=180000
ovn-sbctl --no-leader-only set Connection . inactivity_probe=${PROBE_INTERVAL}
else
if [[ ! "$NODE_IPS" =~ "$DB_CLUSTER_ADDR" ]]; then
echo "ERROR! host ip $DB_CLUSTER_ADDR not in env NODE_IPS $NODE_IPS"
Expand Down Expand Up @@ -272,7 +276,9 @@ if [[ "$ENABLE_SSL" == "false" ]]; then
/etc/ovn/ovnsb_local_config.db
/usr/share/ovn/scripts/ovn-ctl $ovn_ctl_args \
--ovn-manage-ovsdb=no start_northd
ovn-nbctl --no-leader-only set NB_Global . options:northd_probe_interval=180000
ovn-nbctl --no-leader-only set NB_Global . options:inactivity_probe=${PROBE_INTERVAL}
ovn-sbctl --no-leader-only set SB_Global . options:inactivity_probe=${PROBE_INTERVAL}
ovn-nbctl --no-leader-only set NB_Global . options:northd_probe_interval=${PROBE_INTERVAL}
ovn-nbctl --no-leader-only set NB_Global . options:use_logical_dp_groups=true
else
# known leader always first
Expand Down Expand Up @@ -352,11 +358,11 @@ else
--ovn-northd-ssl-ca-cert=/var/run/tls/cacert \
restart_northd
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set-connection pssl:"${NB_PORT}":["${DB_ADDR}"]
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=180000
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=${PROBE_INTERVAL}
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:use_logical_dp_groups=true

ovn-sbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set-connection pssl:"${SB_PORT}":["${DB_ADDR}"]
ovn-sbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=180000
ovn-sbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=${PROBE_INTERVAL}
else
if [[ ! "$NODE_IPS" =~ "$DB_CLUSTER_ADDR" ]]; then
echo "ERROR! host ip $DB_CLUSTER_ADDR not in env NODE_IPS $NODE_IPS"
Expand Down Expand Up @@ -408,7 +414,7 @@ else
/etc/ovn/ovnsb_local_config.db
/usr/share/ovn/scripts/ovn-ctl $ovn_ctl_args \
--ovn-manage-ovsdb=no start_northd
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:northd_probe_interval=180000
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:northd_probe_interval=${PROBE_INTERVAL}
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:use_logical_dp_groups=true
else
# get leader if cluster exists
Expand Down Expand Up @@ -486,5 +492,4 @@ ovs-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/memory-trim-on-compaction o
ovs-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/memory-trim-on-compaction on

chmod 600 /etc/ovn/*
/kube-ovn/kube-ovn-leader-checker

/kube-ovn/kube-ovn-leader-checker --probeInterval=${OVN_LEADER_PROBE_INTERVAL}
35 changes: 19 additions & 16 deletions dist/images/start-ovs.sh
@@ -1,6 +1,9 @@
#!/bin/bash
set -euo pipefail

echo "OVN_REMOTE_PROBE_INTERVAL is set to $OVN_REMOTE_PROBE_INTERVAL"
echo "OVN_REMOTE_OPENFLOW_INTERVAL is set to $OVN_REMOTE_OPENFLOW_INTERVAL"

HW_OFFLOAD=${HW_OFFLOAD:-false}
ENABLE_SSL=${ENABLE_SSL:-false}
OVN_DB_IPS=${OVN_DB_IPS:-}
Expand Down Expand Up @@ -36,21 +39,21 @@ cat /proc/cmdline"
fi

function cgroup_match {
hash1=$(md5sum /proc/$1/cgroup | awk '{print $1}')
hash2=$(md5sum /proc/$2/cgroup | awk '{print $1}')
hash1=$(md5sum /proc/"$1"/cgroup | awk '{print $1}')
hash2=$(md5sum /proc/"$2"/cgroup | awk '{print $1}')
test -n "$hash1" -a "x$hash1" = "x$hash2"
}

function quit {
gen_name=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.generateName}')
revision_hash=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.labels.controller-revision-hash}')
revision=$(kubectl -n $POD_NAMESPACE get controllerrevision $gen_name$revision_hash -o jsonpath='{.revision}')
gen_name=$(kubectl -n "${POD_NAMESPACE}" get pod "${POD_NAME}" -o jsonpath='{.metadata.generateName}')
revision_hash=$(kubectl -n "${POD_NAMESPACE}" get pod "${POD_NAME}" -o jsonpath='{.metadata.labels.controller-revision-hash}')
revision=$(kubectl -n "${POD_NAMESPACE}" get controllerrevision "${gen_name}${revision_hash}" -o jsonpath='{.revision}')
ds_name=${gen_name%-}
latest_revision=$(kubectl -n kube-system get controllerrevision --no-headers | awk '$2 == "daemonset.apps/'$ds_name'" {print $3}' | sort -nr | head -n1)
if [ "x$latest_revision" = "x$revision" ]; then
# stop ovn-controller/ovs only when the processes are in the same cgroup
pid=$(/usr/share/ovn/scripts/ovn-ctl status_controller | awk '{print $NF}')
if cgroup_match $pid self; then
if cgroup_match "${pid}" self; then
/usr/share/ovn/scripts/grace_stop_ovn_controller
/usr/share/openvswitch/scripts/ovs-ctl stop
fi
Expand All @@ -64,12 +67,12 @@ trap quit EXIT
iptables -V

# Start ovsdb
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd --system-id=random --ovsdb-server-wrapper=$DEBUG_WRAPPER
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd --system-id=random --ovsdb-server-wrapper="${DEBUG_WRAPPER}"
# Restrict the number of pthreads ovs-vswitchd creates to reduce the
# amount of RSS it uses on hosts with many cores
# https://bugzilla.redhat.com/show_bug.cgi?id=1571379
# https://bugzilla.redhat.com/show_bug.cgi?id=1572797
if [[ `nproc` -gt 12 ]]; then
if [[ $(nproc) -gt 12 ]]; then
ovs-vsctl --no-wait set Open_vSwitch . other_config:n-revalidator-threads=4
ovs-vsctl --no-wait set Open_vSwitch . other_config:n-handler-threads=10
fi
Expand All @@ -87,28 +90,28 @@ ovs-appctl -t "$ovsdb_server_ctl" vlog/set reconnect:file:err

function handle_underlay_bridges() {
bridges=($(ovs-vsctl --no-heading --columns=name find bridge external-ids:vendor=kube-ovn))
for br in ${bridges[@]}; do
if ! ip link show $br >/dev/null; then
for br in "${bridges[@]}"; do
if ! ip link show "$br" >/dev/null; then
# the bridge does not exist, leave it to be handled by kube-ovn-cni
echo "deleting ovs bridge $br"
ovs-vsctl --no-wait del-br $br
ovs-vsctl --no-wait del-br "$br"
fi
done

bridges=($(ovs-vsctl --no-heading --columns=name find bridge external-ids:vendor=kube-ovn external-ids:exchange-link-name=true))
for br in ${bridges[@]}; do
for br in "${bridges[@]}"; do
if [ -z $(ip link show $br type openvswitch 2>/dev/null || true) ]; then
# the bridge does not exist, leave it to be handled by kube-ovn-cni
echo "deleting ovs bridge $br"
ovs-vsctl --no-wait del-br $br
ovs-vsctl --no-wait del-br "$br"
fi
done
}

handle_underlay_bridges

# Start vswitchd. restart will automatically set/unset flow-restore-wait which is not what we want
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovsdb-server --system-id=random --no-mlockall --ovs-vswitchd-wrapper=$DEBUG_WRAPPER
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovsdb-server --system-id=random --no-mlockall --ovs-vswitchd-wrapper="$DEBUG_WRAPPER"
/usr/share/openvswitch/scripts/ovs-ctl --protocol=udp --dport=6081 enable-protocol

function gen_conn_str {
Expand Down Expand Up @@ -137,9 +140,9 @@ ovs-vsctl set open . external-ids:hostname="${KUBE_NODE_NAME}"

# Start ovn-controller
if [[ "$ENABLE_SSL" == "false" ]]; then
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-wrapper=$DEBUG_WRAPPER restart_controller
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-wrapper="$DEBUG_WRAPPER" restart_controller
else
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-ssl-key=/var/run/tls/key --ovn-controller-ssl-cert=/var/run/tls/cert --ovn-controller-ssl-ca-cert=/var/run/tls/cacert --ovn-controller-wrapper=$DEBUG_WRAPPER restart_controller
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-ssl-key=/var/run/tls/key --ovn-controller-ssl-cert=/var/run/tls/cert --ovn-controller-ssl-ca-cert=/var/run/tls/cacert --ovn-controller-wrapper="$DEBUG_WRAPPER" restart_controller
fi

chmod 600 /etc/openvswitch/*
Expand Down
9 changes: 7 additions & 2 deletions yamls/ovn-dpdk.yaml
Expand Up @@ -204,9 +204,10 @@ spec:
hostNetwork: true
containers:
- name: ovn-central
image: "kubeovn/kube-ovn:v1.10.0"
image: "kubeovn/kube-ovn:v1.12.0"
imagePullPolicy: IfNotPresent
command: ["/kube-ovn/start-db.sh"]
command:
- /kube-ovn/start-db.sh
securityContext:
capabilities:
add: ["SYS_NICE"]
Expand All @@ -231,6 +232,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.podIPs
- name: PROBE_INTERVAL
value: "180000"
- name: OVN_LEADER_PROBE_INTERVAL
value: "5"
resources:
requests:
cpu: 500m
Expand Down
14 changes: 12 additions & 2 deletions yamls/ovn-ha.yaml
Expand Up @@ -95,7 +95,8 @@ spec:
- name: ovn-central
image: "kubeovn/kube-ovn:v1.12.0"
imagePullPolicy: IfNotPresent
command: ["/kube-ovn/start-db.sh"]
command:
- /kube-ovn/start-db.sh
securityContext:
capabilities:
add: ["SYS_NICE"]
Expand All @@ -122,6 +123,10 @@ spec:
fieldPath: status.podIPs
- name: ENABLE_BIND_LOCAL_IP
value: "true"
- name: PROBE_INTERVAL
value: "180000"
- name: OVN_LEADER_PROBE_INTERVAL
value: "5"
resources:
requests:
cpu: 300m
Expand Down Expand Up @@ -238,7 +243,8 @@ spec:
- name: openvswitch
image: "kubeovn/kube-ovn:v1.12.0"
imagePullPolicy: IfNotPresent
command: ["/kube-ovn/start-ovs.sh"]
command:
- /kube-ovn/start-ovs.sh
securityContext:
runAsUser: 0
privileged: true
Expand All @@ -265,6 +271,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: OVN_REMOTE_PROBE_INTERVAL
value: "10000"
- name: OVN_REMOTE_OPENFLOW_INTERVAL
value: "180"
volumeMounts:
- mountPath: /var/run/netns
name: host-ns
Expand Down

0 comments on commit 377d56d

Please sign in to comment.