Skip to content

Commit

Permalink
datapath: Add hash info to upcall.
Browse files Browse the repository at this point in the history
This patch backports below upstream patches, and add __skb_set_hash
to compat for older kernels.

commit b5ab1f1be6180a2e975eede18731804b5164a05d
Author: Jakub Kicinski <kuba@kernel.org>
Date:   Mon Mar 2 21:05:18 2020 -0800

    openvswitch: add missing attribute validation for hash

    Add missing attribute validation for OVS_PACKET_ATTR_HASH
    to the netlink policy.

    Fixes: bd1903b7c459 ("net: openvswitch: add hash info to upcall")
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>
    Reviewed-by: Greg Rose <gvrose8192@gmail.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

commit bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Author: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date:   Wed Nov 13 23:04:49 2019 +0800

    net: openvswitch: add hash info to upcall

    When using the kernel datapath, the upcall don't
    include skb hash info relatived. That will introduce
    some problem, because the hash of skb is important
    in kernel stack. For example, VXLAN module uses
    it to select UDP src port. The tx queue selection
    may also use the hash in stack.

    Hash is computed in different ways. Hash is random
    for a TCP socket, and hash may be computed in hardware,
    or software stack. Recalculation hash is not easy.

    Hash of TCP socket is computed:
    tcp_v4_connect
        -> sk_set_txhash (is random)

    __tcp_transmit_skb
        -> skb_set_hash_from_sk

    There will be one upcall, without information of skb
    hash, to ovs-vswitchd, for the first packet of a TCP
    session. The rest packets will be processed in Open vSwitch
    modules, hash kept. If this tcp session is forward to
    VXLAN module, then the UDP src port of first tcp packet
    is different from rest packets.

    TCP packets may come from the host or dockers, to Open vSwitch.
    To fix it, we store the hash info to upcall, and restore hash
    when packets sent back.

    +---------------+          +-------------------------+
    |   Docker/VMs  |          |     ovs-vswitchd        |
    +----+----------+          +-+--------------------+--+
         |                       ^                    |
         |                       |                    |
         |                       |  upcall            v restore packet hash
(not recalculate)
         |                     +-+--------------------+--+
         |  tap netdev         |                         |   vxlan module
         +--------------->     +-->  Open vSwitch ko     +-->
           or internal type    |                         |
                               +-------------------------+

    Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
    Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
    Acked-by: Pravin B Shelar <pshelar@ovn.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

Tested-by: Aliasgar Ginwala <aginwala@ebay.com>
Acked-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
  • Loading branch information
hzhou8 authored and igsilya committed May 28, 2020
1 parent 21ad008 commit 9ba57fc
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 1 deletion.
4 changes: 4 additions & 0 deletions acinclude.m4
Expand Up @@ -1103,6 +1103,10 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_FIND_OP_PARAM_IFELSE([$KSRC/include/net/rtnetlink.h],
[validate], [extack],
[OVS_DEFINE([HAVE_RTNLOP_VALIDATE_WITH_EXTACK])])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
[__skb_set_hash])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [sw_hash])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_get_hash_raw])
if cmp -s datapath/linux/kcompat.h.new \
datapath/linux/kcompat.h >/dev/null 2>&1; then
Expand Down
33 changes: 32 additions & 1 deletion datapath/datapath.c
Expand Up @@ -371,7 +371,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
+ nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
+ nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+ nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */

/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
Expand Down Expand Up @@ -414,6 +415,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
size_t len;
unsigned int hlen;
int err, dp_ifindex;
u64 hash;

dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
Expand Down Expand Up @@ -523,6 +525,25 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
pad_packet(dp, user_skb);
}

/* Add OVS_PACKET_ATTR_HASH */
hash = skb_get_hash_raw(skb);
#ifdef HAVE_SW_HASH
if (skb->sw_hash)
hash |= OVS_PACKET_HASH_SW_BIT;
#endif

#ifdef HAVE_L4_RXHASH
if (skb->l4_rxhash)
#else
if (skb->l4_hash)
#endif
hash |= OVS_PACKET_HASH_L4_BIT;

if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
err = -ENOBUFS;
goto out;
}

/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy()
*/
Expand Down Expand Up @@ -563,6 +584,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *input_vport;
u16 mru = 0;
u64 hash;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
Expand All @@ -588,6 +610,14 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
}
OVS_CB(packet)->mru = mru;

if (a[OVS_PACKET_ATTR_HASH]) {
hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);

__skb_set_hash(packet, hash & 0xFFFFFFFFULL,
!!(hash & OVS_PACKET_HASH_SW_BIT),
!!(hash & OVS_PACKET_HASH_L4_BIT));
}

/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
Expand Down Expand Up @@ -649,6 +679,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
};

static struct genl_ops dp_packet_genl_ops[] = {
Expand Down
12 changes: 12 additions & 0 deletions datapath/datapath.h
Expand Up @@ -159,6 +159,18 @@ struct ovs_net {
#endif
};

/**
* enum ovs_pkt_hash_types - hash info to include with a packet
* to send to userspace.
* @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
* @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
* over transport ports.
*/
enum ovs_pkt_hash_types {
OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
};

extern unsigned int ovs_net_id;
void ovs_lock(void);
void ovs_unlock(void);
Expand Down
31 changes: 31 additions & 0 deletions datapath/linux/compat/include/linux/skbuff.h
Expand Up @@ -456,4 +456,35 @@ static inline void skb_set_inner_ipproto(struct sk_buff *skb,
#define nf_reset_ct nf_reset
#endif

#ifndef HAVE___SKB_SET_HASH
static inline void
__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
{
#ifdef HAVE_RXHASH
skb->rxhash = hash;
#else
skb->hash = hash;
#endif
#if defined(HAVE_L4_RXHASH)
skb->l4_rxhash = is_l4;
#else
skb->l4_hash = is_l4;
#endif
#ifdef HAVE_SW_HASH
skb->sw_hash = is_sw;
#endif
}
#endif

#ifndef HAVE_SKB_GET_HASH_RAW
static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
{
#ifdef HAVE_RXHASH
return skb->rxhash;
#else
return skb->hash;
#endif
}
#endif

#endif

0 comments on commit 9ba57fc

Please sign in to comment.