Skip to content

Commit

Permalink
controller: Avoid double controller action for ICMP errors.
Browse files Browse the repository at this point in the history
The fields that are not directly supported by OvS were encoded
via additional controller action that changed the required value.
This was most notably needed for ICMP need frag messages.

Encode the field value loads as note action instead. This allows
us to find the note and act accordingly in the first controller
action without the need to send it to pinctrl again, parse and
change the packet. This way we can also encode any future fields
that might be needed as this method should be flexible enough.

This change is completely transparent to the user and shouldn't
cause any disruptions.

Signed-off-by: Ales Musil <amusil@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Mark Michelson <mmichels@redhat.com>
  • Loading branch information
almusil authored and putnopvut committed Mar 15, 2024
1 parent 894ffe8 commit 4725ad1
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 34 deletions.
14 changes: 6 additions & 8 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -1277,7 +1277,7 @@ reply_imcp_error_if_pkt_too_big(struct ovn_desired_flow_table *flow_table,
ip_ttl->ttl = 255;

uint16_t frag_mtu = mtu - ETHERNET_OVERHEAD;
size_t frag_mtu_oc_offset;
size_t note_offset;
if (is_ipv6) {
/* icmp6.type = 2 (Packet Too Big) */
/* icmp6.code = 0 */
Expand All @@ -1289,9 +1289,8 @@ reply_imcp_error_if_pkt_too_big(struct ovn_desired_flow_table *flow_table,
&inner_ofpacts, mf_from_id(MFF_ICMPV6_CODE), &icmp_code, NULL);

/* icmp6.frag_mtu */
frag_mtu_oc_offset = encode_start_controller_op(
ACTION_OPCODE_PUT_ICMP6_FRAG_MTU, true, NX_CTLR_NO_METER,
&inner_ofpacts);
note_offset = encode_start_ovn_field_note(OVN_ICMP6_FRAG_MTU,
&inner_ofpacts);
ovs_be32 frag_mtu_ovs = htonl(frag_mtu);
ofpbuf_put(&inner_ofpacts, &frag_mtu_ovs, sizeof(frag_mtu_ovs));
} else {
Expand All @@ -1305,13 +1304,12 @@ reply_imcp_error_if_pkt_too_big(struct ovn_desired_flow_table *flow_table,
&inner_ofpacts, mf_from_id(MFF_ICMPV4_CODE), &icmp_code, NULL);

/* icmp4.frag_mtu = */
frag_mtu_oc_offset = encode_start_controller_op(
ACTION_OPCODE_PUT_ICMP4_FRAG_MTU, true, NX_CTLR_NO_METER,
&inner_ofpacts);
note_offset = encode_start_ovn_field_note(OVN_ICMP4_FRAG_MTU,
&inner_ofpacts);
ovs_be16 frag_mtu_ovs = htons(frag_mtu);
ofpbuf_put(&inner_ofpacts, &frag_mtu_ovs, sizeof(frag_mtu_ovs));
}
encode_finish_controller_op(frag_mtu_oc_offset, &inner_ofpacts);
encode_finish_ovn_field_note(note_offset, &inner_ofpacts);

/* Finally, submit the ICMP error back to the ingress pipeline */
put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &inner_ofpacts);
Expand Down
104 changes: 93 additions & 11 deletions controller/pinctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,23 @@ set_switch_config(struct rconn *swconn,
queue_msg(swconn, request);
}

static void
enqueue_packet(struct rconn *swconn, enum ofp_version version,
const struct dp_packet *packet, const struct ofpbuf *ofpacts)
{
struct ofputil_packet_out po = (struct ofputil_packet_out) {
.packet = dp_packet_data(packet),
.packet_len = dp_packet_size(packet),
.buffer_id = UINT32_MAX,
.ofpacts = ofpacts->data,
.ofpacts_len = ofpacts->size,
};

match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
}

static void
set_actions_and_enqueue_msg(struct rconn *swconn,
const struct dp_packet *packet,
Expand All @@ -632,19 +649,59 @@ set_actions_and_enqueue_msg(struct rconn *swconn,
return;
}

struct ofputil_packet_out po = {
.packet = dp_packet_data(packet),
.packet_len = dp_packet_size(packet),
.buffer_id = UINT32_MAX,
.ofpacts = ofpacts.data,
.ofpacts_len = ofpacts.size,
};
match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
enqueue_packet(swconn, version, packet, &ofpacts);
ofpbuf_uninit(&ofpacts);
}

static bool
ofpacts_decode_and_reload_metadata(enum ofp_version version,
const struct match *md,
struct ofpbuf *userdata,
struct ofpbuf *ofpacts)
{
/* Copy metadata from 'md' into the packet-out via "set_field"
* actions, then add actions from 'userdata'.
*/
reload_metadata(ofpacts, md);
enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
version, NULL, NULL,
ofpacts);
if (error) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "failed to parse actions from userdata (%s)",
ofperr_to_string(error));
return false;
}

return true;
}

static void *
ofpacts_get_ovn_field(const struct ofpbuf *ofpacts,
enum ovn_field_id id)
{
const struct ovn_field *field = ovn_field_from_id(id);

struct ofpact *ofpact;
OFPACT_FOR_EACH (ofpact, ofpacts->data, ofpacts->size) {
if (ofpact->type != OFPACT_NOTE) {
continue;
}

struct ofpact_note *note = ofpact_get_NOTE(ofpact);
struct ovn_field_note_header *hdr = (void *) note->data;
/* The data can contain padding bytes from NXAST_NOTE encode/decode. */
size_t data_len = note->length - sizeof *hdr;

if (!strncmp(hdr->magic, OVN_FIELD_NOTE_MAGIC, ARRAY_SIZE(hdr->magic))
&& field->id == ntohs(hdr->type) && data_len >= field->n_bytes) {
return hdr->data;
}
}

return NULL;
}

/* Forwards a packet to 'out_port_key' even if that's on a remote
* hypervisor, i.e., the packet is re-injected in table OFTABLE_OUTPUT_INIT.
*/
Expand Down Expand Up @@ -1558,6 +1615,8 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
const struct match *md, struct ofpbuf *userdata,
bool set_icmp_code, bool loopback)
{
enum ofp_version version = rconn_get_version(swconn);

/* This action only works for IP packets, and the switch should only send
* us IP packets this way, but check here just to be sure. */
if (ip_flow->dl_type != htons(ETH_TYPE_IP) &&
Expand All @@ -1569,6 +1628,14 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
return;
}

uint64_t ofpacts_stub[4096 / 8];
struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);

if (!ofpacts_decode_and_reload_metadata(version, md, userdata, &ofpacts)) {
ofpbuf_uninit(&ofpacts);
return;
}

uint64_t packet_stub[128 / 8];
struct dp_packet packet;

Expand All @@ -1584,6 +1651,7 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
VLOG_WARN_RL(&rl,
"ICMP action on IP packet with invalid length (%u)",
in_ip_len);
ofpbuf_uninit(&ofpacts);
return;
}

Expand Down Expand Up @@ -1627,6 +1695,12 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
void *data = ih + 1;
memcpy(data, in_ip, in_ip_len);

ovs_be16 *mtu = ofpacts_get_ovn_field(&ofpacts, OVN_ICMP4_FRAG_MTU);
if (mtu) {
ih->icmp_fields.frag.mtu = *mtu;
ih->icmp_code = 4;
}

ih->icmp_csum = 0;
ih->icmp_csum = csum(ih, sizeof *ih + in_ip_len);
} else {
Expand Down Expand Up @@ -1674,6 +1748,12 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
}
ih->icmp6_base.icmp6_cksum = 0;

ovs_be32 *mtu = ofpacts_get_ovn_field(&ofpacts, OVN_ICMP6_FRAG_MTU);
if (mtu) {
put_16aligned_be32(ih->icmp6_data.be32, *mtu);
ih->icmp6_base.icmp6_type = ICMP6_PACKET_TOO_BIG;
}

void *data = ih + 1;
memcpy(data, in_ip, in_ip_len);
uint32_t icmpv6_csum =
Expand All @@ -1688,8 +1768,9 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
ip_flow->vlans[0].tci);
}

set_actions_and_enqueue_msg(swconn, &packet, md, userdata);
enqueue_packet(swconn, version, &packet, &ofpacts);
dp_packet_uninit(&packet);
ofpbuf_uninit(&ofpacts);
}

/* Called with in the pinctrl_handler thread context. */
Expand Down Expand Up @@ -6338,6 +6419,7 @@ pinctrl_handle_put_nd_ra_opts(
}

/* Called with in the pinctrl_handler thread context. */
/* XXX: This handler can be removed in next version (25.03). */
static void
pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
const struct flow *in_flow,
Expand Down
17 changes: 17 additions & 0 deletions include/ovn/actions.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "expr.h"
#include "openvswitch/dynamic-string.h"
#include "openvswitch/hmap.h"
#include "openvswitch/ofp-actions.h"
#include "openvswitch/uuid.h"
#include "util.h"
#include "ovn/features.h"
Expand Down Expand Up @@ -522,6 +523,19 @@ struct ovnact_commit_lb_aff {
uint16_t timeout;
};

#define OVN_FIELD_NOTE_MAGIC "ovn"

struct ovn_field_note_header {
OFPACT_PADDED_MEMBERS(
char magic[4];
ovs_be16 type; /* The type of ovn field note, based
* on 'enum ovn_field_id'. */
);
uint8_t data[];
};

BUILD_ASSERT_DECL(sizeof(struct ovn_field_note_header) == 8);

/* Internal use by the helpers below. */
void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
Expand Down Expand Up @@ -909,4 +923,7 @@ void encode_finish_controller_op(size_t ofs, struct ofpbuf *ofpacts);
void encode_controller_op(enum action_opcode opcode, uint32_t meter_id,
struct ofpbuf *ofpacts);

size_t encode_start_ovn_field_note(enum ovn_field_id, struct ofpbuf *ofpacts);
void encode_finish_ovn_field_note(size_t offset, struct ofpbuf *ofpacts);

#endif /* ovn/actions.h */
42 changes: 32 additions & 10 deletions lib/actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,32 @@ encode_controller_op(enum action_opcode opcode, uint32_t meter_id,
encode_finish_controller_op(ofs, ofpacts);
}

size_t
encode_start_ovn_field_note(enum ovn_field_id id, struct ofpbuf *ofpacts)
{
size_t offset = ofpacts->size;

ofpact_put_NOTE(ofpacts);
struct ovn_field_note_header *hdr = ofpbuf_put_zeros(ofpacts, sizeof *hdr);
*hdr = (struct ovn_field_note_header) {
.magic = OVN_FIELD_NOTE_MAGIC,
.type = htons(id),
};

return offset;
}

void
encode_finish_ovn_field_note(size_t offset, struct ofpbuf *ofpacts)
{
struct ofpact_note *note = ofpbuf_at_assert(ofpacts, offset, sizeof *note);

ofpacts->header = note;
note->length = ofpacts->size - (offset + sizeof *note);

ofpact_finish_NOTE(ofpacts, &note);
}

static void
init_stack(struct ofpact_stack *stack, enum mf_field_id field)
{
Expand Down Expand Up @@ -3808,31 +3834,27 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)

static void
encode_OVNFIELD_LOAD(const struct ovnact_load *load,
const struct ovnact_encode_params *ep,
struct ofpbuf *ofpacts)
const struct ovnact_encode_params *ep OVS_UNUSED,
struct ofpbuf *ofpacts)
{
const struct ovn_field *f = ovn_field_from_name(load->dst.symbol->name);
size_t offset = encode_start_ovn_field_note(f->id, ofpacts);

switch (f->id) {
case OVN_ICMP4_FRAG_MTU: {
size_t oc_offset = encode_start_controller_op(
ACTION_OPCODE_PUT_ICMP4_FRAG_MTU, true,
ep->ctrl_meter_id, ofpacts);
ofpbuf_put(ofpacts, &load->imm.value.be16_int, sizeof(ovs_be16));
encode_finish_controller_op(oc_offset, ofpacts);
break;
}
case OVN_ICMP6_FRAG_MTU: {
size_t oc_offset = encode_start_controller_op(
ACTION_OPCODE_PUT_ICMP6_FRAG_MTU, true,
ep->ctrl_meter_id, ofpacts);
ofpbuf_put(ofpacts, &load->imm.value.be32_int, sizeof(ovs_be32));
encode_finish_controller_op(oc_offset, ofpacts);
break;
}
case OVN_FIELD_N_IDS:
default:
OVS_NOT_REACHED();
}

encode_finish_ovn_field_note(offset, ofpacts);
}

static void
Expand Down
10 changes: 5 additions & 5 deletions tests/ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,7 @@ icmp4 { };

# icmp4 with icmp4.frag_mtu
icmp4 { eth.dst = ff:ff:ff:ff:ff:ff; icmp4.frag_mtu = 1500; output; }; output;
encodes as controller(userdata=00.00.00.0a.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.0e.00.00.00.0d.00.00.00.00.05.dc.00.00.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
encodes as controller(userdata=00.00.00.0a.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.18.00.00.23.20.00.08.6f.76.6e.00.00.00.00.00.05.dc.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
has prereqs ip4

# icmp4_error
Expand All @@ -1903,11 +1903,11 @@ icmp4_error { };

# icmp4_error with icmp4.frag_mtu
icmp4_error { eth.dst = ff:ff:ff:ff:ff:ff; icmp4.frag_mtu = 1500; output; }; output;
encodes as controller(userdata=00.00.00.0e.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.0e.00.00.00.0d.00.00.00.00.05.dc.00.00.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
encodes as controller(userdata=00.00.00.0e.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.18.00.00.23.20.00.08.6f.76.6e.00.00.00.00.00.05.dc.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
has prereqs ip4

icmp4.frag_mtu = 1500;
encodes as controller(userdata=00.00.00.0d.00.00.00.00.05.dc,pause)
encodes as note:6f.76.6e.00.00.00.00.00.05.dc

# icmp6
icmp6 { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output;
Expand All @@ -1931,11 +1931,11 @@ icmp6_error { };

# icmp6_error with icmp6.frag_mtu
icmp6_error { eth.dst = ff:ff:ff:ff:ff:ff; icmp6.frag_mtu = 1500; output; }; output;
encodes as controller(userdata=00.00.00.14.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.10.00.00.00.15.00.00.00.00.00.00.05.dc.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
encodes as controller(userdata=00.00.00.14.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.18.00.00.23.20.00.08.6f.76.6e.00.00.01.00.00.00.00.05.dc.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
has prereqs ip6

icmp6.frag_mtu = 1500;
encodes as controller(userdata=00.00.00.15.00.00.00.00.00.00.05.dc,pause)
encodes as note:6f.76.6e.00.00.01.00.00.00.00.05.dc

# tcp_reset
tcp_reset { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output;
Expand Down

0 comments on commit 4725ad1

Please sign in to comment.