-
Notifications
You must be signed in to change notification settings - Fork 392
/
opt-libexec-openshift-azure-routes-sh.yaml
293 lines (253 loc) · 12.3 KB
/
opt-libexec-openshift-azure-routes-sh.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
mode: 0755
path: "/opt/libexec/openshift-azure-routes.sh"
contents:
inline: |
#!/bin/bash
# Prevent hairpin traffic when the apiserver is up
# As per the Azure documentation (https://docs.microsoft.com/en-us/azure/load-balancer/concepts#limitations),
# if a backend is load-balanced to itself, then the traffic will be dropped.
#
# This is because the L3LB does DNAT, so while the outgoing packet has a destination
# IP of the VIP, the incoming load-balanced packet has a destination IP of the
# host. That means that it "sees" a syn with the source and destination
# IPs of itself, and duly replies wit a syn-ack back to itself. However, the client
# socket expects a syn-ack with a source IP of the VIP, so it drops the packet.
#
# The solution is to redirect traffic destined to the lb vip back to ourselves.
#
# We check /run/cloud-routes/ for files $VIP.up and $VIP.down. If the .up file
# exists, then we redirect traffic destined for that vip to ourselves via iptables.
# A systemd unit watches the directory for changes.
#
# TODO: Address the potential issue where apiserver-watcher could create multiple files
# and openshift-azure-routes doesn't detect all of them because file change events are not queued
# when the service is already running.
# https://github.com/openshift/machine-config-operator/pull/3643#issuecomment-1497234369
set -euo pipefail
# the list of load balancer IPs that are assigned to this node
declare -A v4vips
declare -A v6vips
CHAIN_NAME="azure-vips"
RUN_DIR="/run/cloud-routes"
# Create a chan if it doesn't exist
ensure_chain4() {
local table="${1}"
local chain="${2}"
if ! iptables -w -t "${table}" -S "${chain}" &> /dev/null ; then
echo "creating ${chain} chain in ${table} table"
iptables -w -t "${table}" -N "${chain}";
fi;
}
# Create a chain if it doesn't exist
ensure_chain6() {
if [ ! -f /proc/net/if_inet6 ]; then
return
fi
local table="${1}"
local chain="${2}"
if ! ip6tables -w -t "${table}" -S "${chain}" &> /dev/null ; then
echo "creating ${chain} chain in ${table} table"
ip6tables -w -t "${table}" -N "${chain}";
fi;
}
ensure_rule4() {
local table="${1}"
local chain="${2}"
shift 2
if ! iptables -w -t "${table}" -C "${chain}" "$@" &> /dev/null; then
echo "adding \"$@\" to ${chain} chain in ${table} table"
iptables -w -t "${table}" -A "${chain}" "$@"
fi
}
ensure_rule6() {
if [ ! -f /proc/net/if_inet6 ]; then
return
fi
local table="${1}"
local chain="${2}"
shift 2
if ! ip6tables -w -t "${table}" -C "${chain}" "$@" &> /dev/null; then
echo "adding \"$@\" to ${chain} chain in ${table} table"
ip6tables -w -t "${table}" -A "${chain}" "$@"
fi
}
# set the chain, ensure entry rules, ensure ESTABLISHED rule
initialize() {
ensure_chain4 nat "${CHAIN_NAME}"
ensure_chain6 nat "${CHAIN_NAME}"
ensure_rule4 nat PREROUTING -m comment --comment 'azure LB vip overriding for pods' -j ${CHAIN_NAME}
ensure_rule6 nat PREROUTING -m comment --comment 'azure LB vip overriding for pods' -j ${CHAIN_NAME}
ensure_rule4 nat OUTPUT -m comment --comment 'azure LB vip overriding for local clients' -j ${CHAIN_NAME}
ensure_rule6 nat OUTPUT -m comment --comment 'azure LB vip overriding for local clients' -j ${CHAIN_NAME}
# Need this so that existing flows (with an entry in conntrack) continue,
# even if the iptables rule is removed
ensure_rule4 filter FORWARD -m comment --comment 'azure LB vip existing' -m addrtype ! --dst-type LOCAL -m state --state ESTABLISHED,RELATED -j ACCEPT
ensure_rule6 filter FORWARD -m comment --comment 'azure LB vip existing' -m addrtype ! --dst-type LOCAL -m state --state ESTABLISHED,RELATED -j ACCEPT
ensure_rule4 filter OUTPUT -m comment --comment 'azure LB vip existing' -m addrtype ! --dst-type LOCAL -m state --state ESTABLISHED,RELATED -j ACCEPT
ensure_rule6 filter OUTPUT -m comment --comment 'azure LB vip existing' -m addrtype ! --dst-type LOCAL -m state --state ESTABLISHED,RELATED -j ACCEPT
}
remove_stale() {
## find extra iptables rules
for ipt_vip in $(iptables -w -t nat -S "${CHAIN_NAME}" | awk '$4{print $4}' | awk -F/ '{print $1}'); do
if [[ ! -v v4vips[${ipt_vip}] ]] || [[ "${v4vips[${ipt_vip}]}" = down ]]; then
echo removing stale vip "${ipt_vip}" for local clients
iptables -w -t nat -D "${CHAIN_NAME}" --dst "${ipt_vip}" -j REDIRECT
fi
done
if [ ! -f /proc/net/if_inet6 ]; then
return
fi
for ipt_vip in $(ip6tables -w -t nat -S "${CHAIN_NAME}" | awk '$4{print $4}' | awk -F/ '{print $1}'); do
if [[ ! -v v6vips[${ipt_vip}] ]] || [[ "${v6vips[${ipt_vip}]}" = down ]]; then
echo removing stale vip "${ipt_vip}" for local clients
ip6tables -w -t nat -D "${CHAIN_NAME}" --dst "${ipt_vip}" -j REDIRECT
fi
done
}
remove_stale_routes() {
## find extra ovn routes
local ovnkContainerID=$(crictl ps --name ovnkube-controller | awk '{ print $1 }' | tail -n+2)
if [ -z "${ovnkContainerID}" ]; then
echo "Plugin is SDN, nothing to do.. exiting"
return
fi
echo "Found ovnkube-controller pod... ${ovnkContainerID}"
local routeVIPsV4=$(crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-list ovn_cluster_router | grep "1010" | grep "ip4" | awk '$8{print $8}')
echo "Found v4route vips: ${routeVIPsV4}"
local host=$(hostname)
echo ${host}
for route_vip in ${routeVIPsV4}; do
if [[ ! -v v4vips[${route_vip}] ]] || [[ "${v4vips[${route_vip}]}" = down ]]; then
echo removing stale vip "${route_vip}" for local clients
echo "ovn-nbctl lr-policy-del ovn_cluster_router 1010 inport == rtos-${host} && ip4.dst == ${route_vip}"
crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-del ovn_cluster_router 1010 "inport == \"rtos-${host}\" && ip4.dst == ${route_vip}"
fi
done
if [ ! -f /proc/net/if_inet6 ]; then
return
fi
local routeVIPsV6=$(crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-list ovn_cluster_router | grep "1010" | grep "ip6" | awk '$8{print $8}')
echo "Found v6route vips: ${routeVIPsV6}"
for route_vip in ${routeVIPsV6}; do
if [[ ! -v v6vips[${route_vip}] ]] || [[ "${v6vips[${route_vip}]}" = down ]]; then
echo removing stale vip "${route_vip}" for local clients
echo "ovn-nbctl lr-policy-del ovn_cluster_router 1010 inport == rtos-${host} && ip6.dst == ${route_vip}"
crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-del ovn_cluster_router 1010 "inport == \"rtos-${host}\" && ip6.dst == ${route_vip}"
fi
done
}
add_rules() {
for vip in "${!v4vips[@]}"; do
if [[ "${v4vips[${vip}]}" != down ]]; then
echo "ensuring rule for ${vip} for internal clients"
ensure_rule4 nat "${CHAIN_NAME}" --dst "${vip}" -j REDIRECT
fi
done
for vip in "${!v6vips[@]}"; do
if [[ "${v6vips[${vip}]}" != down ]]; then
echo "ensuring rule for ${vip} for internal clients"
ensure_rule6 nat "${CHAIN_NAME}" --dst "${vip}" -j REDIRECT
fi
done
}
add_routes() {
local ovnkContainerID=$(crictl ps --name ovnkube-controller | awk '{ print $1 }' | tail -n+2)
if [ -z "${ovnkContainerID}" ]; then
echo "Plugin is SDN, nothing to do.. exiting"
return
fi
echo "Found ovnkube-controller pod... ${ovnkContainerID}"
local ovnK8sMp0v4=$(ip -brief address show ovn-k8s-mp0 | awk '{print $3}' | awk -F/ '{print $1}')
echo "Found ovn-k8s-mp0 interface IP ${ovnK8sMp0v4}"
local host=$(hostname)
echo ${host}
for vip in "${!v4vips[@]}"; do
if [[ "${v4vips[${vip}]}" != down ]]; then
echo "ensuring route for ${vip} for internal clients"
local routes=$(crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-list ovn_cluster_router | grep "1010" | grep "${vip}" | grep "${ovnK8sMp0v4}")
echo "OVNK Routes on ovn-cluster-router at 1010 priority: $routes"
if [[ "${routes}" == *"${vip}"* ]]; then
echo "Route exists"
else
echo "Route does not exist; creating it..."
echo "ovn-nbctl lr-policy-add ovn_cluster_router 1010 inport == rtos-${host} && ip4.dst == ${vip} reroute ${ovnK8sMp0v4}"
crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-add ovn_cluster_router 1010 "inport == \"rtos-${host}\" && ip4.dst == ${vip}" reroute "${ovnK8sMp0v4}"
fi
fi
done
if [ ! -f /proc/net/if_inet6 ]; then
return
fi
local ovnK8sMp0v6=$(ip -brief address show ovn-k8s-mp0 | awk '{print $4}' | awk -F/ '{print $1}')
echo "Found ovn-k8s-mp0 interface IP ${ovnK8sMp0v6}"
for vip in "${!v6vips[@]}"; do
if [[ "${v6vips[${vip}]}" != down ]]; then
echo "ensuring route for ${vip} for internal clients"
local routes=$(crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-list ovn_cluster_router | grep "1010" | grep "${vip}" | grep "${ovnK8sMp0v6}")
echo "OVNK Routes on ovn-cluster-router at 1010 priority: $routes"
if [[ "${routes}" == *"${vip}"* ]]; then
echo "Route exists"
else
echo "Route does not exist; creating it..."
echo "ovn-nbctl lr-policy-add ovn_cluster_router 1010 inport == rtos-${host} && ip6.dst == ${vip} reroute ${ovnK8sMp0v6}"
crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-add ovn_cluster_router 1010 "inport == \"rtos-${host}\" && ip6.dst == ${vip}" reroute "${ovnK8sMp0v6}"
fi
fi
done
}
clear_rules() {
echo "clearing rules from ${CHAIN_NAME} chain in nat table"
iptables -t nat -F "${CHAIN_NAME}" || true
}
clear_routes() {
local ovnkContainerID=$(crictl ps --name ovnkube-controller | awk '{ print $1 }' | tail -n+2)
if [ -z "${ovnkContainerID}" ]; then
echo "Plugin is SDN, nothing to do.. exiting"
return
fi
echo "Found ovnkube-controller pod... ${ovnkContainerID}"
echo "clearing all routes from ovn-cluster-router"
crictl exec -i ${ovnkContainerID} ovn-nbctl lr-policy-del ovn_cluster_router 1010
}
# out paramaters: v4vips v6vips
list_lb_ips() {
for k in "${!v4vips[@]}"; do
unset v4vips["${k}"]
done
for k in "${!v6vips[@]}"; do
unset v6vips["${k}"]
done
shopt -s nullglob
for file in "${RUN_DIR}"/*.up ; do
vip=$(basename "${file}" .up)
if [[ -e "${RUN_DIR}/${vip}.down" ]]; then
echo "${vip} has upfile and downfile, marking as down"
else
if [[ ${vip} =~ : ]]; then
echo "processing v6 vip ${vip}"
v6vips[${vip}]="${vip}"
else
echo "processing v4 vip ${vip}"
v4vips[${vip}]="${vip}"
fi
fi
done
}
case "$1" in
start)
initialize
list_lb_ips
remove_stale
remove_stale_routes # needed for OVN-Kubernetes plugin's routingViaHost=false mode
add_rules
add_routes # needed for OVN-Kubernetes plugin's routingViaHost=false mode
echo "done applying vip rules"
;;
cleanup)
clear_rules
clear_routes # needed for OVN-Kubernetes plugin's routingViaHost=false mode
;;
*)
echo $"Usage: $0 {start|cleanup}"
exit 1
esac