Skip to content

Commit

Permalink
northd.c: Avoid sending ICMP time exceeded for multicast packets.
Browse files Browse the repository at this point in the history
In RFC1812 section 5.3.1, it is mentioned that:

   If the TTL is reduced to zero (or less), the packet MUST be
   discarded, and if the destination is not a multicast address the
   router MUST send an ICMP Time Exceeded message ...

So if the destionation is a multicast address the route shouldn't send
ICMP Time Exceeded, but the current OVN implementation didn't check
multicast and tries to send ICMP regardless. This patch fixes it.

The reason behind is that TTL has special meanings for multicast. For
example, TTL = 1 means restricted to the same subnet, not forwarded by
the router. So it is very common to see multicast packets with ttl = 1,
and generating ICMP for such packets is harmful from both slowpath
performance and functionality point of view.

Signed-off-by: Han Zhou <hzhou@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>
  • Loading branch information
hzhou8 committed Apr 13, 2023
1 parent f39d541 commit 98e57ea
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 17 deletions.
22 changes: 20 additions & 2 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -12349,6 +12349,24 @@ build_misc_local_traffic_drop_flows_for_lrouter(
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
"eth.bcast", "drop;");

/* Avoid ICMP time exceeded for multicast, silent drop instead.
* See RFC1812 section 5.3.1:
* If the TTL is reduced to zero (or less), the packet MUST be
* discarded, and if the destination is NOT A MULTICAST address the
* router MUST send an ICMP Time Exceeded message ...
*
* The reason behind is that TTL has special meanings for multicast.
* For example, TTL = 1 means restricted to the same subnet, not
* forwarded by the router. So it is very common to see multicast
* packets with ttl = 1, and generating ICMP for such packets is
* harmful from both slowpath performance and functionality point of
* view.
*
* (priority-31 flows will send ICMP time exceeded) */
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 32,
"ip.ttl == {0, 1} && !ip.later_frag && "
"(ip4.mcast || ip6.mcast)", "drop;");

/* TTL discard */
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
"ip.ttl == {0, 1}", "drop;");
Expand Down Expand Up @@ -12535,7 +12553,7 @@ build_ipv6_input_flows_for_lrouter_port(
"outport = %s; flags.loopback = 1; output; };",
ds_cstr(&ip_ds), op->json_key);
ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT,
100, ds_cstr(match), ds_cstr(actions), NULL,
31, ds_cstr(match), ds_cstr(actions), NULL,
copp_meter_get(COPP_ICMP6_ERR, op->od->nbr->copp,
meter_groups),
&op->nbrp->header_);
Expand Down Expand Up @@ -12663,7 +12681,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
"outport = %s; flags.loopback = 1; output; };",
ds_cstr(&ip_ds), op->json_key);
ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT,
100, ds_cstr(match), ds_cstr(actions), NULL,
31, ds_cstr(match), ds_cstr(actions), NULL,
copp_meter_get(COPP_ICMP4_ERR, op->od->nbr->copp,
meter_groups),
&op->nbrp->header_);
Expand Down
10 changes: 9 additions & 1 deletion northd/ovn-northd.8.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2863,10 +2863,18 @@ nd.tll = <var>external_mac</var>;
broadcast address. By definition this traffic should not be forwarded.
</li>

<li>
Avoid ICMP time exceeded for multicast. A priority-32 flow with match
<code>ip.ttl == {0, 1} &amp;&amp; !ip.later_frag &amp;&amp;
(ip4.mcast || ip6.mcast)</code> and actions <code>drop;</code> drops
multicast packets whose TTL has expired without sending ICMP time
exceeded.
</li>

<li>
<p>
ICMP time exceeded. For each router port <var>P</var>, whose IP
address is <var>A</var>, a priority-100 flow with match <code>inport
address is <var>A</var>, a priority-31 flow with match <code>inport
== <var>P</var> &amp;&amp; ip.ttl == {0, 1} &amp;&amp;
!ip.later_frag</code> matches packets whose TTL has expired, with the
following actions to send an ICMP time exceeded reply for IPv4 and
Expand Down
47 changes: 33 additions & 14 deletions tests/ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -16256,7 +16256,7 @@ AT_SETUP([TTL exceeded])
AT_KEYWORDS([ttl-exceeded])
ovn_start

# test_ip_packet INPORT HV ETH_SRC ETH_DST IPV4_SRC IPV4_DST IPV4_ROUTER IP_CHKSUM EXP_IP_CHKSUM EXP_ICMP_CHKSUM
# test_ip_packet INPORT HV ETH_SRC ETH_DST IPV4_SRC IPV4_DST IPV4_ROUTER IP_CHKSUM EXP_IP_CHKSUM EXP_ICMP_CHKSUM SHOULD_REPLY
#
# Causes a packet to be received on INPORT of the hypervisor HV. The packet is an IPv4 packet with
# ETH_SRC, ETH_DST, IPV4_SRC, IPV4_DST, IP_CHKSUM as specified and TTL set to 1.
Expand All @@ -16272,35 +16272,40 @@ test_ip_packet() {
local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv4_src=$5 ipv4_dst=$6 ip_router=$7 ip_chksum=$8
local exp_ip_chksum=$9 exp_icmp_chksum=${10}
shift 10
local should_reply=$1

local ip_ttl=01
local packet=${eth_dst}${eth_src}08004500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst}
local orig_pkt_in_reply=4500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst}
local reply_icmp_ttl=fe
local icmp_type_code_response=0b00
local icmp_data=00000000
local reply_icmp_payload=${icmp_type_code_response}${exp_icmp_chksum}${icmp_data}
local reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload}
echo $reply$orig_pkt_in_reply >> vif$inport.expected
if test $should_reply == yes; then
local orig_pkt_in_reply=4500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst}
local reply_icmp_ttl=fe
local icmp_type_code_response=0b00
local icmp_data=00000000
local reply_icmp_payload=${icmp_type_code_response}${exp_icmp_chksum}${icmp_data}
local reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload}
echo $reply$orig_pkt_in_reply >> vif$inport.expected
fi

as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet
}

# test_ip6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_DST IPV6_ROUTER EXP_ICMP_CHKSUM
# test_ip6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_DST IPV6_ROUTER EXP_ICMP_CHKSUM SHOULD_REPLY
#
# Causes a packet to be received on INPORT of the hypervisor HV. The packet is an IPv6
# packet with ETH_SRC, ETH_DST, IPV6_SRC and IPV6_DST as specified.
# IPV6_ROUTER and EXP_ICMP_CHKSUM are the source IP and checksum of the icmpv6 ttl exceeded
# packet sent by OVN logical router
test_ip6_packet() {
local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 ipv6_router=$7 exp_icmp_chksum=$8
local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 ipv6_router=$7 exp_icmp_chksum=$8 should_reply=$9
shift 8

local ip6_hdr=6000000000151101${ipv6_src}${ipv6_dst}
local packet=${eth_dst}${eth_src}86dd${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a

local reply=${eth_src}${eth_dst}86dd6000000000453afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a
echo $reply >> vif$inport.expected
if test $should_reply == yes; then
local reply=${eth_src}${eth_dst}86dd6000000000453afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a
echo $reply >> vif$inport.expected
fi

as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet
}
Expand All @@ -16323,6 +16328,8 @@ for i in 1 2; do
options:tx_pcap=hv$i/vif$i-tx.pcap \
options:rxq_pcap=hv$i/vif$i-rx.pcap \
ofport-request=$i

ovs-appctl -t ovn-controller vlog/set file:dbg:pinctrl
done

ovn-nbctl lr-add lr0
Expand All @@ -16338,10 +16345,22 @@ OVN_POPULATE_ARP
wait_for_ports_up
ovn-nbctl --wait=hv sync

test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96
test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000200000000000000000011 20010db8000100000000000000000001 1c22
test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96 yes
test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000200000000000000000011 20010db8000100000000000000000001 1c22 yes

# Should not send ICMP for multicast
test_ip_packet 1 1 000000000001 01005e7f0001 $(ip_to_hex 192 168 1 1) $(ip_to_hex 239 255 0 1) $(ip_to_hex 192 168 1 254) 0000 000000000 no
test_ip6_packet 1 1 000000000001 333300000001 20010db8000100000000000000000011 ff020000000000000000000000000001 20010db8000100000000000000000001 0000 no

OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [vif1.expected])

# Confirm from debug log that we only see 2 packet-ins (no packet-ins for
# multicasts). This is necessary because not seeing ICMP messages doesn't
# necessarily mean the packet-in didn't happen. It is possible that packet-in
# is processed but the ICMP message got dropped.
AT_CHECK([grep -c packet-in hv1/ovn-controller.log], [0], [2
])

OVN_CLEANUP([hv1], [hv2])
AT_CLEANUP
])
Expand Down

0 comments on commit 98e57ea

Please sign in to comment.