52 changes: 30 additions & 22 deletions lib/conntrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,18 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
nc->parent_key = alg_exp->parent_key;
}

ovs_mutex_init_adaptive(&nc->lock);
atomic_flag_clear(&nc->reclaimed);
fwd_key_node->dir = CT_DIR_FWD;
rev_key_node->dir = CT_DIR_REV;

if (zl) {
nc->admit_zone = zl->czl.zone;
nc->zone_limit_seq = zl->czl.zone_limit_seq;
} else {
nc->admit_zone = INVALID_ZONE;
}

if (nat_action_info) {
nc->nat_action = nat_action_info->nat_action;

Expand All @@ -965,21 +977,15 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
cmap_insert(&ct->conns, &rev_key_node->cm_node, rev_hash);
}

ovs_mutex_init_adaptive(&nc->lock);
atomic_flag_clear(&nc->reclaimed);
fwd_key_node->dir = CT_DIR_FWD;
rev_key_node->dir = CT_DIR_REV;
cmap_insert(&ct->conns, &fwd_key_node->cm_node, ctx->hash);
conn_expire_push_front(ct, nc);
atomic_count_inc(&ct->n_conn);
ctx->conn = nc; /* For completeness. */

if (zl) {
nc->admit_zone = zl->czl.zone;
nc->zone_limit_seq = zl->czl.zone_limit_seq;
atomic_count_inc(&zl->czl.count);
} else {
nc->admit_zone = INVALID_ZONE;
}

ctx->conn = nc; /* For completeness. */
}

return nc;
Expand Down Expand Up @@ -2290,7 +2296,9 @@ find_addr(const struct conn_key *key, union ct_addr *min,
uint32_t hash, bool ipv4,
const struct nat_action_info_t *nat_info)
{
const union ct_addr zero_ip = {0};
union ct_addr zero_ip;

memset(&zero_ip, 0, sizeof zero_ip);

/* All-zero case. */
if (!memcmp(min, &zero_ip, sizeof *min)) {
Expand Down Expand Up @@ -2382,14 +2390,18 @@ nat_get_unique_tuple(struct conntrack *ct, struct conn *conn,
{
struct conn_key *fwd_key = &conn->key_node[CT_DIR_FWD].key;
struct conn_key *rev_key = &conn->key_node[CT_DIR_REV].key;
union ct_addr min_addr = {0}, max_addr = {0}, addr = {0};
bool pat_proto = fwd_key->nw_proto == IPPROTO_TCP ||
fwd_key->nw_proto == IPPROTO_UDP ||
fwd_key->nw_proto == IPPROTO_SCTP;
uint16_t min_dport, max_dport, curr_dport;
uint16_t min_sport, max_sport, curr_sport;
union ct_addr min_addr, max_addr, addr;
uint32_t hash;

memset(&min_addr, 0, sizeof min_addr);
memset(&max_addr, 0, sizeof max_addr);
memset(&addr, 0, sizeof addr);

hash = nat_range_hash(fwd_key, ct->hash_basis, nat_info);
min_addr = nat_info->min_addr;
max_addr = nat_info->max_addr;
Expand Down Expand Up @@ -2572,7 +2584,9 @@ tuple_to_conn_key(const struct ct_dpif_tuple *tuple, uint16_t zone,
key->src.icmp_type = tuple->icmp_type;
key->src.icmp_code = tuple->icmp_code;
key->dst.icmp_id = tuple->icmp_id;
key->dst.icmp_type = reverse_icmp_type(tuple->icmp_type);
key->dst.icmp_type = (tuple->ip_proto == IPPROTO_ICMP)
? reverse_icmp_type(tuple->icmp_type)
: reverse_icmp6_type(tuple->icmp_type);
key->dst.icmp_code = tuple->icmp_code;
} else {
key->src.port = tuple->src_port;
Expand Down Expand Up @@ -2637,25 +2651,19 @@ conntrack_dump_start(struct conntrack *ct, struct conntrack_dump *dump,

dump->ct = ct;
*ptot_bkts = 1; /* Need to clean up the callers. */
dump->cursor = cmap_cursor_start(&ct->conns);
return 0;
}

int
conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
{
struct conntrack *ct = dump->ct;
long long now = time_msec();

for (;;) {
struct cmap_node *cm_node = cmap_next_position(&ct->conns,
&dump->cm_pos);
if (!cm_node) {
break;
}
struct conn_key_node *keyn;
struct conn *conn;
struct conn_key_node *keyn;
struct conn *conn;

INIT_CONTAINER(keyn, cm_node, cm_node);
CMAP_CURSOR_FOR_EACH_CONTINUE (keyn, cm_node, &dump->cursor) {
if (keyn->dir != CT_DIR_FWD) {
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ struct conntrack_dump {
struct conntrack *ct;
unsigned bucket;
union {
struct cmap_position cm_pos;
struct hmap_position hmap_pos;
struct cmap_cursor cursor;
};
bool filter_zone;
uint16_t zone;
Expand Down
12 changes: 12 additions & 0 deletions lib/dp-packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,18 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
if (dp_packet_hwol_is_tunnel_geneve(p) ||
dp_packet_hwol_is_tunnel_vxlan(p)) {
tnl_inner = true;

/* If the TX interface doesn't support UDP tunnel offload but does
* support inner checksum offload and an outer UDP checksum is
* required, then we can't offload inner checksum either. As that would
* invalidate the outer checksum. */
if (!(flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM) &&
dp_packet_hwol_is_outer_udp_cksum(p)) {
flags &= ~(NETDEV_TX_OFFLOAD_TCP_CKSUM |
NETDEV_TX_OFFLOAD_UDP_CKSUM |
NETDEV_TX_OFFLOAD_SCTP_CKSUM |
NETDEV_TX_OFFLOAD_IPV4_CKSUM);
}
}

if (dp_packet_hwol_tx_ip_csum(p)) {
Expand Down
45 changes: 8 additions & 37 deletions lib/dp-packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -604,25 +604,6 @@ dp_packet_get_nd_payload(const struct dp_packet *b)
}

#ifdef DPDK_NETDEV
static inline void
dp_packet_set_l2_len(struct dp_packet *b, size_t l2_len)
{
b->mbuf.l2_len = l2_len;
}

static inline void
dp_packet_set_l3_len(struct dp_packet *b, size_t l3_len)
{
b->mbuf.l3_len = l3_len;
}

static inline void
dp_packet_set_l4_len(struct dp_packet *b, size_t l4_len)
{
b->mbuf.l4_len = l4_len;
}


static inline uint64_t *
dp_packet_ol_flags_ptr(const struct dp_packet *b)
{
Expand All @@ -642,24 +623,6 @@ dp_packet_flow_mark_ptr(const struct dp_packet *b)
}

#else
static inline void
dp_packet_set_l2_len(struct dp_packet *b OVS_UNUSED, size_t l2_len OVS_UNUSED)
{
/* There is no implementation. */
}

static inline void
dp_packet_set_l3_len(struct dp_packet *b OVS_UNUSED, size_t l3_len OVS_UNUSED)
{
/* There is no implementation. */
}

static inline void
dp_packet_set_l4_len(struct dp_packet *b OVS_UNUSED, size_t l4_len OVS_UNUSED)
{
/* There is no implementation. */
}

static inline uint32_t *
dp_packet_ol_flags_ptr(const struct dp_packet *b)
{
Expand Down Expand Up @@ -1300,6 +1263,14 @@ dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN;
}

/* Clears tunnel offloading marks. */
static inline void
dp_packet_hwol_reset_tunnel(struct dp_packet *b)
{
*dp_packet_ol_flags_ptr(b) &= ~(DP_PACKET_OL_TX_TUNNEL_VXLAN |
DP_PACKET_OL_TX_TUNNEL_GENEVE);
}

/* Mark packet 'b' as a tunnel packet with outer IPv4 header. */
static inline void
dp_packet_hwol_set_tx_outer_ipv4(struct dp_packet *b)
Expand Down
12 changes: 5 additions & 7 deletions lib/dpctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1359,19 +1359,17 @@ static int
dpctl_del_flow_dpif(struct dpif *dpif, const char *key_s,
struct dpctl_params *dpctl_p)
{
struct dpif_port_dump port_dump;
struct dpif_flow_stats stats;
bool ufid_generated = false;
struct dpif_port dpif_port;
struct dpif_port_dump port_dump;
struct ofpbuf key;
bool ufid_present = false;
struct simap port_names;
struct ofpbuf mask; /* To be ignored. */

struct ofpbuf key;
ovs_u128 ufid;
bool ufid_generated;
bool ufid_present;
struct simap port_names;
int n, error;

ufid_present = false;
n = odp_ufid_from_string(key_s, &ufid);
if (n < 0) {
dpctl_error(dpctl_p, -n, "parsing flow ufid");
Expand Down
29 changes: 29 additions & 0 deletions lib/dpif-netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ COVERAGE_DEFINE(datapath_drop_lock_error);
COVERAGE_DEFINE(datapath_drop_userspace_action_error);
COVERAGE_DEFINE(datapath_drop_tunnel_push_error);
COVERAGE_DEFINE(datapath_drop_tunnel_pop_error);
COVERAGE_DEFINE(datapath_drop_tunnel_tso_recirc);
COVERAGE_DEFINE(datapath_drop_recirc_error);
COVERAGE_DEFINE(datapath_drop_invalid_port);
COVERAGE_DEFINE(datapath_drop_invalid_bond);
Expand Down Expand Up @@ -8912,6 +8913,34 @@ static void
dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
size_t i, size = dp_packet_batch_size(packets);
struct dp_packet *packet;

DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets) {
if (dp_packet_hwol_is_tunnel_geneve(packet) ||
dp_packet_hwol_is_tunnel_vxlan(packet)) {

if (dp_packet_hwol_is_tso(packet)) {
/* Can't perform GSO in the middle of a pipeline. */
COVERAGE_INC(datapath_drop_tunnel_tso_recirc);
dp_packet_delete(packet);
VLOG_WARN_RL(&rl, "Recirculating tunnel packets with "
"TSO is not supported");
continue;
}
/* Have to fix all the checksums before re-parsing, because the
* packet will be treated as having a single set of headers. */
dp_packet_ol_send_prepare(packet, 0);
/* This packet must not be marked with anything tunnel-related. */
dp_packet_hwol_reset_tunnel(packet);
/* Clear inner offsets. Other ones are collateral, but they will
* be re-initialized on re-parsing. */
dp_packet_reset_offsets(packet);
}
dp_packet_batch_refill(packets, packet, i);
}

dp_netdev_input__(pmd, packets, true, 0);
}

Expand Down
18 changes: 18 additions & 0 deletions lib/flow.c
Original file line number Diff line number Diff line change
Expand Up @@ -3420,6 +3420,24 @@ flow_compose(struct dp_packet *p, const struct flow *flow,
arp->ar_sha = flow->arp_sha;
arp->ar_tha = flow->arp_tha;
}
} else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
struct nsh_hdr *nsh;

nsh = dp_packet_put_zeros(p, sizeof *nsh);
dp_packet_set_l3(p, nsh);

nsh_set_flags_ttl_len(nsh, flow->nsh.flags, flow->nsh.ttl,
flow->nsh.mdtype == NSH_M_TYPE1
? NSH_M_TYPE1_LEN : NSH_BASE_HDR_LEN);
nsh->next_proto = flow->nsh.np;
nsh->md_type = flow->nsh.mdtype;
put_16aligned_be32(&nsh->path_hdr, flow->nsh.path_hdr);

if (flow->nsh.mdtype == NSH_M_TYPE1) {
for (size_t i = 0; i < 4; i++) {
put_16aligned_be32(&nsh->md1.context[i], flow->nsh.context[i]);
}
}
}

if (eth_type_mpls(flow->dl_type)) {
Expand Down
7 changes: 4 additions & 3 deletions lib/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,16 @@ hash_3words(uint32_t a, uint32_t b, uint32_t c)
uint32_t
hash_bytes(const void *p_, size_t n, uint32_t basis)
{
const uint32_t *p = p_;
const uint8_t *p = p_;
size_t orig_n = n;
uint32_t hash;

hash = basis;
while (n >= 4) {
hash = hash_add(hash, get_unaligned_u32(p));
hash = hash_add(hash,
get_unaligned_u32(ALIGNED_CAST(const uint32_t *, p)));
n -= 4;
p += 1;
p += 4;
}

if (n) {
Expand Down
39 changes: 30 additions & 9 deletions lib/ipf.c
Original file line number Diff line number Diff line change
Expand Up @@ -506,13 +506,15 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list)
}

/* Called when a frag list state transitions to another state. This is
* triggered by new fragment for the list being received.*/
static void
* triggered by new fragment for the list being received. Returns a reassembled
* packet if this fragment has completed one. */
static struct reassembled_pkt *
ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list,
bool ff, bool lf, bool v6)
OVS_REQUIRES(ipf->ipf_lock)
{
enum ipf_list_state curr_state = ipf_list->state;
struct reassembled_pkt *ret = NULL;
enum ipf_list_state next_state;
switch (curr_state) {
case IPF_LIST_STATE_UNUSED:
Expand Down Expand Up @@ -562,12 +564,15 @@ ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list,
ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp);
ipf_expiry_list_remove(ipf_list);
next_state = IPF_LIST_STATE_COMPLETED;
ret = rp;
} else {
next_state = IPF_LIST_STATE_REASS_FAIL;
}
}
}
ipf_list->state = next_state;

return ret;
}

/* Some sanity checks are redundant, but prudent, in case code paths for
Expand Down Expand Up @@ -799,7 +804,8 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx,
static bool
ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list,
struct dp_packet *pkt, uint16_t start_data_byte,
uint16_t end_data_byte, bool ff, bool lf, bool v6)
uint16_t end_data_byte, bool ff, bool lf, bool v6,
struct reassembled_pkt **rp)
OVS_REQUIRES(ipf->ipf_lock)
{
bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list,
Expand All @@ -820,7 +826,7 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list,
ipf_list->last_inuse_idx++;
atomic_count_inc(&ipf->nfrag);
ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED);
ipf_list_state_transition(ipf, ipf_list, ff, lf, v6);
*rp = ipf_list_state_transition(ipf, ipf_list, ff, lf, v6);
} else {
OVS_NOT_REACHED();
}
Expand Down Expand Up @@ -853,7 +859,8 @@ ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key,
* to a list of fragemnts. */
static bool
ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type,
uint16_t zone, long long now, uint32_t hash_basis)
uint16_t zone, long long now, uint32_t hash_basis,
struct reassembled_pkt **rp)
OVS_REQUIRES(ipf->ipf_lock)
{
struct ipf_list_key key;
Expand Down Expand Up @@ -922,7 +929,7 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type,
}

return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte,
end_data_byte, ff, lf, v6);
end_data_byte, ff, lf, v6, rp);
}

/* Filters out fragments from a batch of fragments and adjust the batch. */
Expand All @@ -941,11 +948,17 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb,
||
(dl_type == htons(ETH_TYPE_IPV6) &&
ipf_is_valid_v6_frag(ipf, pkt)))) {
struct reassembled_pkt *rp = NULL;

ovs_mutex_lock(&ipf->ipf_lock);
if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) {
if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis,
&rp)) {
dp_packet_batch_refill(pb, pkt, pb_idx);
} else {
if (rp && !dp_packet_batch_is_full(pb)) {
dp_packet_batch_refill(pb, rp->pkt, pb_idx);
rp->list->reass_execute_ctx = rp->pkt;
}
dp_packet_delete(pkt);
}
ovs_mutex_unlock(&ipf->ipf_lock);
Expand Down Expand Up @@ -1063,6 +1076,9 @@ ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb,
struct ipf_list *ipf_list;

LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_complete_list) {
if ((ipf_list->key.dl_type == htons(ETH_TYPE_IPV6)) != v6) {
continue;
}
if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_COMPLETED_LIST,
v6, now)) {
ipf_completed_list_clean(&ipf->frag_lists, ipf_list);
Expand Down Expand Up @@ -1096,6 +1112,9 @@ ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb,
size_t lists_removed = 0;

LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_exp_list) {
if ((ipf_list->key.dl_type == htons(ETH_TYPE_IPV6)) != v6) {
continue;
}
if (now <= ipf_list->expiration ||
lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) {
break;
Expand All @@ -1116,7 +1135,8 @@ ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb,
/* Adds a reassmebled packet to a packet batch to be processed by the caller.
*/
static void
ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb)
ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb,
ovs_be16 dl_type)
{
if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) {
return;
Expand All @@ -1127,6 +1147,7 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb)

LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) {
if (!rp->list->reass_execute_ctx &&
rp->list->key.dl_type == dl_type &&
ipf_dp_packet_batch_add(pb, rp->pkt, false)) {
rp->list->reass_execute_ctx = rp->pkt;
}
Expand Down Expand Up @@ -1237,7 +1258,7 @@ ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb,
}

if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) {
ipf_execute_reass_pkts(ipf, pb);
ipf_execute_reass_pkts(ipf, pb, dl_type);
}
}

Expand Down
10 changes: 5 additions & 5 deletions lib/jhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,18 @@ jhash_words(const uint32_t *p, size_t n, uint32_t basis)
uint32_t
jhash_bytes(const void *p_, size_t n, uint32_t basis)
{
const uint32_t *p = p_;
const uint8_t *p = p_;
uint32_t a, b, c;

a = b = c = 0xdeadbeef + n + basis;

while (n >= 12) {
a += get_unaligned_u32(p);
b += get_unaligned_u32(p + 1);
c += get_unaligned_u32(p + 2);
a += get_unaligned_u32(ALIGNED_CAST(const uint32_t *, p));
b += get_unaligned_u32(ALIGNED_CAST(const uint32_t *, p + 4));
c += get_unaligned_u32(ALIGNED_CAST(const uint32_t *, p + 8));
jhash_mix(&a, &b, &c);
n -= 12;
p += 3;
p += 12;
}

if (n) {
Expand Down
225 changes: 166 additions & 59 deletions lib/netdev-dpdk.c
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,9 @@ int netdev_dpdk_get_vid(const struct netdev_dpdk *dev);
struct ingress_policer *
netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);

static void netdev_dpdk_mbuf_dump(const char *prefix, const char *message,
const struct rte_mbuf *);

static bool
is_dpdk_class(const struct netdev_class *class)
{
Expand Down Expand Up @@ -1351,6 +1354,20 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
}

if (!strcmp(info.driver_name, "net_ice")
|| !strcmp(info.driver_name, "net_i40e")
|| !strcmp(info.driver_name, "net_iavf")) {
/* FIXME: Driver advertises the capability but doesn't seem
* to actually support it correctly. Can remove this once
* the driver is fixed on DPDK side. */
VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
"net/ice, net/i40e or net/iavf port.",
netdev_get_name(&dev->up));
info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
}

if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
} else {
Expand Down Expand Up @@ -2364,17 +2381,16 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
struct eth_addr mac;

if (!dpdk_port_is_representor(dev)) {
VLOG_WARN_BUF(errp, "'%s' is trying to set the VF MAC '%s' "
"but 'options:dpdk-vf-mac' is only supported for "
"VF representors.",
netdev_get_name(netdev), vf_mac);
VLOG_WARN("'%s' is trying to set the VF MAC '%s' "
"but 'options:dpdk-vf-mac' is only supported for "
"VF representors.",
netdev_get_name(netdev), vf_mac);
} else if (!eth_addr_from_string(vf_mac, &mac)) {
VLOG_WARN_BUF(errp, "interface '%s': cannot parse VF MAC '%s'.",
netdev_get_name(netdev), vf_mac);
VLOG_WARN("interface '%s': cannot parse VF MAC '%s'.",
netdev_get_name(netdev), vf_mac);
} else if (eth_addr_is_multicast(mac)) {
VLOG_WARN_BUF(errp,
"interface '%s': cannot set VF MAC to multicast "
"address '%s'.", netdev_get_name(netdev), vf_mac);
VLOG_WARN("interface '%s': cannot set VF MAC to multicast "
"address '%s'.", netdev_get_name(netdev), vf_mac);
} else if (!eth_addr_equals(dev->requested_hwaddr, mac)) {
dev->requested_hwaddr = mac;
netdev_request_reconfigure(netdev);
Expand Down Expand Up @@ -2567,73 +2583,133 @@ static bool
netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
{
struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
struct tcp_header *th;

if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK
| RTE_MBUF_F_TX_TCP_SEG))) {
mbuf->ol_flags &= ~(RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6);
void *l2;
void *l3;
void *l4;

const uint64_t all_inner_requests = (RTE_MBUF_F_TX_IP_CKSUM |
RTE_MBUF_F_TX_L4_MASK |
RTE_MBUF_F_TX_TCP_SEG);
const uint64_t all_outer_requests = (RTE_MBUF_F_TX_OUTER_IP_CKSUM |
RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
const uint64_t all_requests = all_inner_requests | all_outer_requests;
const uint64_t all_inner_marks = (RTE_MBUF_F_TX_IPV4 |
RTE_MBUF_F_TX_IPV6);
const uint64_t all_outer_marks = (RTE_MBUF_F_TX_OUTER_IPV4 |
RTE_MBUF_F_TX_OUTER_IPV6 |
RTE_MBUF_F_TX_TUNNEL_MASK);
const uint64_t all_marks = all_inner_marks | all_outer_marks;

if (!(mbuf->ol_flags & all_requests)) {
/* No offloads requested, no marks should be set. */
mbuf->ol_flags &= ~all_marks;

uint64_t unexpected = mbuf->ol_flags & RTE_MBUF_F_TX_OFFLOAD_MASK;
if (OVS_UNLIKELY(unexpected)) {
VLOG_WARN_RL(&rl, "%s: Unexpected Tx offload flags: %#"PRIx64,
netdev_get_name(&dev->up), unexpected);
netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
"Packet with unexpected ol_flags", mbuf);
return false;
}
return true;
}

/* If packet is vxlan or geneve tunnel packet, calculate outer
* l2 len and outer l3 len. Inner l2/l3/l4 len are calculated
* before. */
if (mbuf->ol_flags &
(RTE_MBUF_F_TX_TUNNEL_GENEVE | RTE_MBUF_F_TX_TUNNEL_VXLAN)) {
mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) -
(char *) dp_packet_eth(pkt);
mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) -
(char *) dp_packet_l3(pkt);
const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
if (OVS_UNLIKELY(tunnel_type &&
tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE &&
tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN)) {
VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64,
netdev_get_name(&dev->up), tunnel_type);
netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
"Packet with unexpected tunnel type", mbuf);
return false;
}

if (tunnel_type && (mbuf->ol_flags & all_inner_requests)) {
if (mbuf->ol_flags & all_outer_requests) {
mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) -
(char *) dp_packet_eth(pkt);
mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) -
(char *) dp_packet_l3(pkt);

/* Inner L2 length must account for the tunnel header length. */
l2 = dp_packet_l4(pkt);
l3 = dp_packet_inner_l3(pkt);
l4 = dp_packet_inner_l4(pkt);
} else {
/* If no outer offloading is requested, clear outer marks. */
mbuf->ol_flags &= ~all_outer_marks;
mbuf->outer_l2_len = 0;
mbuf->outer_l3_len = 0;

/* Skip outer headers. */
l2 = dp_packet_eth(pkt);
l3 = dp_packet_inner_l3(pkt);
l4 = dp_packet_inner_l4(pkt);
}
} else {
mbuf->l2_len = (char *) dp_packet_l3(pkt) -
(char *) dp_packet_eth(pkt);
mbuf->l3_len = (char *) dp_packet_l4(pkt) -
(char *) dp_packet_l3(pkt);
if (tunnel_type) {
/* No inner offload is requested, fallback to non tunnel
* checksum offloads. */
mbuf->ol_flags &= ~all_inner_marks;
if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) {
mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
mbuf->ol_flags |= RTE_MBUF_F_TX_IPV4;
}
if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM) {
mbuf->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
mbuf->ol_flags |= mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4
? RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6;
}
mbuf->ol_flags &= ~(all_outer_requests | all_outer_marks);
}
mbuf->outer_l2_len = 0;
mbuf->outer_l3_len = 0;
}
th = dp_packet_l4(pkt);

if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
if (!th) {
VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
" pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
return false;
}
l2 = dp_packet_eth(pkt);
l3 = dp_packet_l3(pkt);
l4 = dp_packet_l4(pkt);
}

if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) {
if (!th) {
VLOG_WARN_RL(&rl, "%s: TCP offloading without L4 header"
" pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
return false;
}
ovs_assert(l4);

if (mbuf->ol_flags & (RTE_MBUF_F_TX_TUNNEL_GENEVE |
RTE_MBUF_F_TX_TUNNEL_VXLAN)) {
mbuf->tso_segsz = dev->mtu - mbuf->l2_len - mbuf->l3_len -
mbuf->l4_len - mbuf->outer_l3_len;
mbuf->l2_len = (char *) l3 - (char *) l2;
mbuf->l3_len = (char *) l4 - (char *) l3;

if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
struct tcp_header *th = l4;
uint16_t link_tso_segsz;
int hdr_len;

mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
if (tunnel_type) {
link_tso_segsz = dev->mtu - mbuf->l2_len - mbuf->l3_len -
mbuf->l4_len - mbuf->outer_l3_len;
} else {
mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
link_tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
}

if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
if (OVS_UNLIKELY((hdr_len +
mbuf->tso_segsz) > dev->max_packet_len)) {
VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. hdr: %"PRIu32", "
"gso: %"PRIu32", max len: %"PRIu32"",
dev->up.name, hdr_len, mbuf->tso_segsz,
dev->max_packet_len);
return false;
}
if (mbuf->tso_segsz > link_tso_segsz) {
mbuf->tso_segsz = link_tso_segsz;
}

if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) {
VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. hdr: %"PRIu32", "
"gso: %"PRIu32", max len: %"PRIu32"",
dev->up.name, hdr_len, mbuf->tso_segsz,
dev->max_packet_len);
return false;
}
}

/* If L4 checksum is requested, IPv4 should be requested as well. */
if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK
&& mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
}

return true;
}

Expand Down Expand Up @@ -2664,6 +2740,35 @@ netdev_dpdk_prep_hwol_batch(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
return cnt;
}

static void
netdev_dpdk_mbuf_dump(const char *prefix, const char *message,
const struct rte_mbuf *mbuf)
{
static struct vlog_rate_limit dump_rl = VLOG_RATE_LIMIT_INIT(5, 5);
char *response = NULL;
FILE *stream;
size_t size;

if (VLOG_DROP_DBG(&dump_rl)) {
return;
}

stream = open_memstream(&response, &size);
if (!stream) {
VLOG_ERR("Unable to open memstream for mbuf dump: %s.",
ovs_strerror(errno));
return;
}

rte_pktmbuf_dump(stream, mbuf, rte_pktmbuf_pkt_len(mbuf));

fclose(stream);

VLOG_DBG(prefix ? "%s: %s:\n%s" : "%s%s:\n%s",
prefix ? prefix : "", message, response);
free(response);
}

/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
* 'pkts', even in case of failure.
*
Expand All @@ -2680,6 +2785,8 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
"Only %u/%u are valid: %s", netdev_get_name(&dev->up),
nb_tx_prep, cnt, rte_strerror(rte_errno));
netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
"First invalid packet", pkts[nb_tx_prep]);
}

while (nb_tx != nb_tx_prep) {
Expand Down
17 changes: 16 additions & 1 deletion lib/netdev-dummy.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "pcap-file.h"
#include "openvswitch/poll-loop.h"
#include "openvswitch/shash.h"
#include "ovs-router.h"
#include "sset.h"
#include "stream.h"
#include "unaligned.h"
Expand Down Expand Up @@ -2084,11 +2085,20 @@ netdev_dummy_ip4addr(struct unixctl_conn *conn, int argc OVS_UNUSED,

if (netdev && is_dummy_class(netdev->netdev_class)) {
struct in_addr ip, mask;
struct in6_addr ip6;
uint32_t plen;
char *error;

error = ip_parse_masked(argv[2], &ip.s_addr, &mask.s_addr);
error = ip_parse_cidr(argv[2], &ip.s_addr, &plen);
if (!error) {
mask.s_addr = be32_prefix_mask(plen);
netdev_dummy_add_in4(netdev, ip, mask);

/* Insert local route entry for the new address. */
in6_addr_set_mapped_ipv4(&ip6, ip.s_addr);
ovs_router_force_insert(0, &ip6, plen + 96, true, argv[1],
&in6addr_any, &ip6);

unixctl_command_reply(conn, "OK");
} else {
unixctl_command_reply_error(conn, error);
Expand Down Expand Up @@ -2118,6 +2128,11 @@ netdev_dummy_ip6addr(struct unixctl_conn *conn, int argc OVS_UNUSED,

mask = ipv6_create_mask(plen);
netdev_dummy_add_in6(netdev, &ip6, &mask);

/* Insert local route entry for the new address. */
ovs_router_force_insert(0, &ip6, plen, true, argv[1],
&in6addr_any, &ip6);

unixctl_command_reply(conn, "OK");
} else {
unixctl_command_reply_error(conn, error);
Expand Down
9 changes: 8 additions & 1 deletion lib/netdev-linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -2403,6 +2403,7 @@ static int
netdev_linux_read_stringset_info(struct netdev_linux *netdev, uint32_t *len)
{
union {
struct ethtool_cmd ecmd;
struct ethtool_sset_info hdr;
struct {
uint64_t pad[2];
Expand Down Expand Up @@ -2440,9 +2441,12 @@ netdev_linux_read_definitions(struct netdev_linux *netdev,
int error = 0;

error = netdev_linux_read_stringset_info(netdev, &len);
if (error || !len) {
if (error) {
return error;
} else if (!len) {
return -EOPNOTSUPP;
}

strings = xzalloc(sizeof *strings + len * ETH_GSTRING_LEN);

strings->cmd = ETHTOOL_GSTRINGS;
Expand Down Expand Up @@ -2725,6 +2729,7 @@ netdev_linux_get_speed_locked(struct netdev_linux *netdev,
uint32_t *current, uint32_t *max)
{
if (netdev_linux_netnsid_is_remote(netdev)) {
*current = *max = 0;
return EOPNOTSUPP;
}

Expand All @@ -2734,6 +2739,8 @@ netdev_linux_get_speed_locked(struct netdev_linux *netdev,
? 0 : netdev->current_speed;
*max = MIN(UINT32_MAX,
netdev_features_to_bps(netdev->supported, 0) / 1000000ULL);
} else {
*current = *max = 0;
}
return netdev->get_features_error;
}
Expand Down
60 changes: 11 additions & 49 deletions lib/netdev-native-tnl.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,35 +240,15 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
return udp + 1;
}

/* Calculate inner l2 l3 l4 len as tunnel outer header is not
* encapsulated now. */
static void
dp_packet_tnl_ol_process(struct dp_packet *packet,
const struct ovs_action_push_tnl *data)
{
struct udp_header *udp = NULL;
uint8_t opt_len = 0;
struct eth_header *eth = NULL;
struct ip_header *ip = NULL;
struct genevehdr *gnh = NULL;

/* l2 l3 l4 len refer to inner len, tunnel outer
* header is not encapsulated here. */
if (dp_packet_hwol_l4_mask(packet)) {
ip = dp_packet_l3(packet);

if (ip->ip_proto == IPPROTO_TCP) {
struct tcp_header *th = dp_packet_l4(packet);
dp_packet_set_l4_len(packet, TCP_OFFSET(th->tcp_ctl) * 4);
} else if (ip->ip_proto == IPPROTO_UDP) {
dp_packet_set_l4_len(packet, UDP_HEADER_LEN);
} else if (ip->ip_proto == IPPROTO_SCTP) {
dp_packet_set_l4_len(packet, SCTP_HEADER_LEN);
}

dp_packet_set_l3_len(packet, (char *) dp_packet_l4(packet) -
(char *) dp_packet_l3(packet));

if (data->tnl_type == OVS_VPORT_TYPE_GENEVE ||
data->tnl_type == OVS_VPORT_TYPE_VXLAN) {

Expand All @@ -279,32 +259,12 @@ dp_packet_tnl_ol_process(struct dp_packet *packet,
dp_packet_hwol_set_tx_ipv6(packet);
}
}
}

/* Attention please, tunnel inner l2 len is consist of udp header
* len and tunnel header len and inner l2 len. */
if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
eth = (struct eth_header *)(data->header);
ip = (struct ip_header *)(eth + 1);
udp = (struct udp_header *)(ip + 1);
gnh = (struct genevehdr *)(udp + 1);
opt_len = gnh->opt_len * 4;
dp_packet_hwol_set_tunnel_geneve(packet);
dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) -
(char *) dp_packet_eth(packet) +
GENEVE_BASE_HLEN + opt_len);
} else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
dp_packet_hwol_set_tunnel_vxlan(packet);
dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) -
(char *) dp_packet_eth(packet) +
VXLAN_HLEN);
}
} else {
/* Mark non-l4 packets as tunneled. */
if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
dp_packet_hwol_set_tunnel_geneve(packet);
} else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
dp_packet_hwol_set_tunnel_vxlan(packet);
}
if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
dp_packet_hwol_set_tunnel_geneve(packet);
} else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
dp_packet_hwol_set_tunnel_vxlan(packet);
}
}

Expand Down Expand Up @@ -932,9 +892,9 @@ netdev_srv6_build_header(const struct netdev *netdev,
const struct netdev_tnl_build_header_params *params)
{
const struct netdev_tunnel_config *tnl_cfg;
union ovs_16aligned_in6_addr *s;
const struct in6_addr *segs;
struct srv6_base_hdr *srh;
struct in6_addr *s;
ovs_be16 dl_type;
int nr_segs;
int i;
Expand Down Expand Up @@ -978,8 +938,7 @@ netdev_srv6_build_header(const struct netdev *netdev,
return EOPNOTSUPP;
}

s = ALIGNED_CAST(struct in6_addr *,
(char *) srh + sizeof *srh);
s = (union ovs_16aligned_in6_addr *) (srh + 1);
for (i = 0; i < nr_segs; i++) {
/* Segment list is written to the header in reverse order. */
memcpy(s, &segs[nr_segs - i - 1], sizeof *s);
Expand Down Expand Up @@ -1068,7 +1027,10 @@ netdev_srv6_pop_header(struct dp_packet *packet)
}

pkt_metadata_init_tnl(md);
netdev_tnl_ip_extract_tnl_md(packet, tnl, &hlen);
if (!netdev_tnl_ip_extract_tnl_md(packet, tnl, &hlen)) {
goto err;
}

dp_packet_reset_packet(packet, hlen);

return packet;
Expand Down
10 changes: 10 additions & 0 deletions lib/netlink-protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ enum {
#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
#endif

/* Introduced in v4.4. */
#ifndef NLM_F_DUMP_FILTERED
#define NLM_F_DUMP_FILTERED 0x20
#endif

/* These were introduced all together in 2.6.14. (We want our programs to
* support the newer kernel features even if compiled with older headers.) */
#ifndef NETLINK_ADD_MEMBERSHIP
Expand All @@ -168,6 +173,11 @@ enum {
#define NETLINK_LISTEN_ALL_NSID 8
#endif

/* Strict checking of netlink arguments introduced in Linux kernel v4.20. */
#ifndef NETLINK_GET_STRICT_CHK
#define NETLINK_GET_STRICT_CHK 12
#endif

/* These were introduced all together in 2.6.23. (We want our programs to
* support the newer kernel features even if compiled with older headers.) */
#ifndef CTRL_ATTR_MCAST_GRP_MAX
Expand Down
9 changes: 9 additions & 0 deletions lib/netlink-socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,15 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
}
}

/* Strict checking only supported for NETLINK_ROUTE. */
if (protocol == NETLINK_ROUTE
&& setsockopt(sock->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK,
&one, sizeof one) < 0) {
VLOG_RL(&rl, errno == ENOPROTOOPT ? VLL_DBG : VLL_WARN,
"netlink: could not enable strict checking (%s)",
ovs_strerror(errno));
}

retval = get_socket_rcvbuf(sock->fd);
if (retval < 0) {
retval = -retval;
Expand Down
5 changes: 5 additions & 0 deletions lib/odp-execute-avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ avx512_get_delta(__m256i old_header, __m256i new_header)
0xF, 0xF, 0xF, 0xF);
v_delta = _mm256_permutexvar_epi32(v_swap32a, v_delta);

v_delta = _mm256_hadd_epi32(v_delta, v_zeros);
v_delta = _mm256_shuffle_epi8(v_delta, v_swap16a);
v_delta = _mm256_hadd_epi32(v_delta, v_zeros);
v_delta = _mm256_hadd_epi16(v_delta, v_zeros);

Expand Down Expand Up @@ -575,6 +577,9 @@ avx512_ipv6_sum_header(__m512i ip6_header)
0xF, 0xF, 0xF, 0xF);

v_delta = _mm256_permutexvar_epi32(v_swap32a, v_delta);

v_delta = _mm256_hadd_epi32(v_delta, v_zeros);
v_delta = _mm256_shuffle_epi8(v_delta, v_swap16a);
v_delta = _mm256_hadd_epi32(v_delta, v_zeros);
v_delta = _mm256_hadd_epi16(v_delta, v_zeros);

Expand Down
4 changes: 2 additions & 2 deletions lib/odp-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1797,8 +1797,8 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
} else if (ovs_scan_len(s, &n, "srv6(segments_left=%"SCNu8,
&segments_left)) {
struct srv6_base_hdr *srh = (struct srv6_base_hdr *) (ip6 + 1);
union ovs_16aligned_in6_addr *segs;
char seg_s[IPV6_SCAN_LEN + 1];
struct in6_addr *segs;
struct in6_addr seg;
uint8_t n_segs = 0;

Expand All @@ -1821,7 +1821,7 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
return -EINVAL;
}

segs = ALIGNED_CAST(struct in6_addr *, srh + 1);
segs = (union ovs_16aligned_in6_addr *) (srh + 1);
segs += segments_left;

while (ovs_scan_len(s, &n, IPV6_SCAN_FMT, seg_s)
Expand Down
4 changes: 2 additions & 2 deletions lib/ofpbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,12 @@ ofpbuf_clone_with_headroom(const struct ofpbuf *b, size_t headroom)
struct ofpbuf *new_buffer;

new_buffer = ofpbuf_clone_data_with_headroom(b->data, b->size, headroom);
if (b->header) {
if (new_buffer->data && b->header) {
ptrdiff_t header_offset = (char *) b->header - (char *) b->data;

new_buffer->header = (char *) new_buffer->data + header_offset;
}
if (b->msg) {
if (new_buffer->data && b->msg) {
ptrdiff_t msg_offset = (char *) b->msg - (char *) b->data;

new_buffer->msg = (char *) new_buffer->data + msg_offset;
Expand Down
2 changes: 1 addition & 1 deletion lib/ovs-rcu.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ ovsrcu_postpone__(void (*function)(void *aux), void *aux)
cb->aux = aux;
}

static bool
static bool OVS_NO_SANITIZE_FUNCTION
ovsrcu_call_postponed(void)
{
struct ovsrcu_cbset *cbset;
Expand Down
14 changes: 14 additions & 0 deletions lib/ovs-router.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,20 @@ ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, uint8_t plen,
}
}

/* The same as 'ovs_router_insert', but it adds the route even if updates
* from the system routing table are disabled. Used for unit tests. */
void
ovs_router_force_insert(uint32_t mark, const struct in6_addr *ip_dst,
uint8_t plen, bool local, const char output_bridge[],
const struct in6_addr *gw,
const struct in6_addr *prefsrc)
{
uint8_t priority = local ? plen + 64 : plen;

ovs_router_insert__(mark, priority, local, ip_dst, plen,
output_bridge, gw, prefsrc);
}

static void
rt_entry_delete__(const struct cls_rule *cr)
{
Expand Down
5 changes: 5 additions & 0 deletions lib/ovs-router.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ void ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst,
uint8_t plen, bool local,
const char output_bridge[], const struct in6_addr *gw,
const struct in6_addr *prefsrc);
void ovs_router_force_insert(uint32_t mark, const struct in6_addr *ip_dst,
uint8_t plen, bool local,
const char output_bridge[],
const struct in6_addr *gw,
const struct in6_addr *prefsrc);
void ovs_router_flush(void);

void ovs_router_disable_system_routing_table(void);
Expand Down
80 changes: 64 additions & 16 deletions lib/route-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/rtnetlink.h>
#include <net/if.h>

#include "coverage.h"
#include "hash.h"
#include "netdev.h"
#include "netlink.h"
Expand All @@ -44,6 +45,8 @@

VLOG_DEFINE_THIS_MODULE(route_table);

COVERAGE_DEFINE(route_table_dump);

struct route_data {
/* Copied from struct rtmsg. */
unsigned char rtm_dst_len;
Expand Down Expand Up @@ -80,7 +83,7 @@ static struct nln_notifier *name_notifier = NULL;

static bool route_table_valid = false;

static int route_table_reset(void);
static void route_table_reset(void);
static void route_table_handle_msg(const struct route_table_msg *);
static int route_table_parse(struct ofpbuf *, struct route_table_msg *);
static void route_table_change(const struct route_table_msg *, void *);
Expand Down Expand Up @@ -153,26 +156,22 @@ route_table_wait(void)
ovs_mutex_unlock(&route_table_mutex);
}

static int
route_table_reset(void)
static bool
route_table_dump_one_table(unsigned char id)
{
struct nl_dump dump;
struct rtgenmsg *rtgenmsg;
uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
struct ofpbuf request, reply, buf;

route_map_clear();
netdev_get_addrs_list_flush();
route_table_valid = true;
rt_change_seq++;
struct rtmsg *rq_msg;
bool filtered = true;
struct nl_dump dump;

ofpbuf_init(&request, 0);

nl_msg_put_nlmsghdr(&request, sizeof *rtgenmsg, RTM_GETROUTE,
NLM_F_REQUEST);
nl_msg_put_nlmsghdr(&request, sizeof *rq_msg, RTM_GETROUTE, NLM_F_REQUEST);

rtgenmsg = ofpbuf_put_zeros(&request, sizeof *rtgenmsg);
rtgenmsg->rtgen_family = AF_UNSPEC;
rq_msg = ofpbuf_put_zeros(&request, sizeof *rq_msg);
rq_msg->rtm_family = AF_UNSPEC;
rq_msg->rtm_table = id;

nl_dump_start(&dump, NETLINK_ROUTE, &request);
ofpbuf_uninit(&request);
Expand All @@ -182,12 +181,43 @@ route_table_reset(void)
struct route_table_msg msg;

if (route_table_parse(&reply, &msg)) {
struct nlmsghdr *nlmsghdr = nl_msg_nlmsghdr(&reply);

/* Older kernels do not support filtering. */
if (!(nlmsghdr->nlmsg_flags & NLM_F_DUMP_FILTERED)) {
filtered = false;
}
route_table_handle_msg(&msg);
}
}
ofpbuf_uninit(&buf);
nl_dump_done(&dump);

return filtered;
}

static void
route_table_reset(void)
{
unsigned char tables[] = {
RT_TABLE_DEFAULT,
RT_TABLE_MAIN,
RT_TABLE_LOCAL,
};

return nl_dump_done(&dump);
route_map_clear();
netdev_get_addrs_list_flush();
route_table_valid = true;
rt_change_seq++;

COVERAGE_INC(route_table_dump);

for (size_t i = 0; i < ARRAY_SIZE(tables); i++) {
if (!route_table_dump_one_table(tables[i])) {
/* Got unfiltered reply, no need to dump further. */
break;
}
}
}

/* Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, 0 on parse
Expand All @@ -203,6 +233,7 @@ route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)
[RTA_GATEWAY] = { .type = NL_A_U32, .optional = true },
[RTA_MARK] = { .type = NL_A_U32, .optional = true },
[RTA_PREFSRC] = { .type = NL_A_U32, .optional = true },
[RTA_TABLE] = { .type = NL_A_U32, .optional = true },
};

static const struct nl_policy policy6[] = {
Expand All @@ -211,6 +242,7 @@ route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)
[RTA_MARK] = { .type = NL_A_U32, .optional = true },
[RTA_GATEWAY] = { .type = NL_A_IPV6, .optional = true },
[RTA_PREFSRC] = { .type = NL_A_IPV6, .optional = true },
[RTA_TABLE] = { .type = NL_A_U32, .optional = true },
};

struct nlattr *attrs[ARRAY_SIZE(policy)];
Expand All @@ -232,6 +264,7 @@ route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)

if (parsed) {
const struct nlmsghdr *nlmsg;
uint32_t table_id;
int rta_oif; /* Output interface index. */

nlmsg = buf->data;
Expand All @@ -247,6 +280,19 @@ route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)
rtm->rtm_type != RTN_LOCAL) {
change->relevant = false;
}

table_id = rtm->rtm_table;
if (attrs[RTA_TABLE]) {
table_id = nl_attr_get_u32(attrs[RTA_TABLE]);
}
/* Do not consider changes in non-standard routing tables. */
if (table_id
&& table_id != RT_TABLE_DEFAULT
&& table_id != RT_TABLE_MAIN
&& table_id != RT_TABLE_LOCAL) {
change->relevant = false;
}

change->nlmsg_type = nlmsg->nlmsg_type;
change->rd.rtm_dst_len = rtm->rtm_dst_len + (ipv4 ? 96 : 0);
change->rd.local = rtm->rtm_type == RTN_LOCAL;
Expand Down Expand Up @@ -312,7 +358,9 @@ static void
route_table_change(const struct route_table_msg *change OVS_UNUSED,
void *aux OVS_UNUSED)
{
route_table_valid = false;
if (!change || change->relevant) {
route_table_valid = false;
}
}

static void
Expand Down
9 changes: 9 additions & 0 deletions lib/socket-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,15 @@ inet_parse_active(const char *target_, int default_port,
if (!host) {
VLOG_ERR("%s: host must be specified", target_);
ok = false;
if (dns_failure) {
*dns_failure = false;
}
} else if (!port && default_port < 0) {
VLOG_ERR("%s: port must be specified", target_);
ok = false;
if (dns_failure) {
*dns_failure = false;
}
} else {
ok = parse_sockaddr_components(ss, host, port, default_port,
target_, resolve_host, dns_failure);
Expand Down Expand Up @@ -671,6 +677,9 @@ inet_parse_passive(const char *target_, int default_port,
if (!port && default_port < 0) {
VLOG_ERR("%s: port must be specified", target_);
ok = false;
if (dns_failure) {
*dns_failure = false;
}
} else {
ok = parse_sockaddr_components(ss, host, port, default_port,
target_, resolve_host, dns_failure);
Expand Down
2 changes: 1 addition & 1 deletion lib/table.c
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ table_print_json__(const struct table *table, const struct table_style *style,
json_object_put_string(json, "caption", table->caption);
}
if (table->timestamp) {
json_object_put_nocopy(
json_object_put(
json, "time",
json_string_create_nocopy(table_format_timestamp__()));
}
Expand Down
22 changes: 11 additions & 11 deletions lib/tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3056,17 +3056,17 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
struct tc_action *action,
uint32_t action_pc)
{
struct {
union {
struct tc_pedit sel;
struct tc_pedit_key keys[MAX_PEDIT_OFFSETS];
struct tc_pedit_key_ex keys_ex[MAX_PEDIT_OFFSETS];
} sel = {
.sel = {
.nkeys = 0
}
};
uint8_t buffer[sizeof(struct tc_pedit)
+ MAX_PEDIT_OFFSETS * sizeof(struct tc_pedit_key)];
} sel;
struct tc_pedit_key_ex keys_ex[MAX_PEDIT_OFFSETS];
int i, j, err;

memset(&sel, 0, sizeof sel);
memset(keys_ex, 0, sizeof keys_ex);

for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) {
struct flower_key_to_pedit *m = &flower_pedit_map[i];
struct tc_pedit_key *pedit_key = NULL;
Expand Down Expand Up @@ -3100,8 +3100,8 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
return EOPNOTSUPP;
}

pedit_key = &sel.keys[sel.sel.nkeys];
pedit_key_ex = &sel.keys_ex[sel.sel.nkeys];
pedit_key = &sel.sel.keys[sel.sel.nkeys];
pedit_key_ex = &keys_ex[sel.sel.nkeys];
pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_SET;
pedit_key_ex->htype = m->htype;
pedit_key->off = cur_offset;
Expand All @@ -3121,7 +3121,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
}
}
}
nl_msg_put_act_pedit(request, &sel.sel, sel.keys_ex,
nl_msg_put_act_pedit(request, &sel.sel, keys_ex,
flower->csum_update_flags ? TC_ACT_PIPE : action_pc);

return 0;
Expand Down
12 changes: 9 additions & 3 deletions lib/vlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <time.h>
#include <unistd.h>
#include "async-append.h"
#include "backtrace.h"
#include "coverage.h"
#include "dirs.h"
#include "openvswitch/dynamic-string.h"
Expand Down Expand Up @@ -410,10 +411,10 @@ vlog_set_log_file__(char *new_log_file_name)

/* Close old log file, if any. */
ovs_mutex_lock(&log_file_mutex);
async_append_destroy(log_writer);
if (log_fd >= 0) {
close(log_fd);
}
async_append_destroy(log_writer);
free(log_file_name);

/* Install new log file. */
Expand Down Expand Up @@ -1274,8 +1275,9 @@ vlog_fatal(const struct vlog_module *module, const char *message, ...)
va_end(args);
}

/* Logs 'message' to 'module' at maximum verbosity, then calls abort(). Always
* writes the message to stderr, even if the console destination is disabled.
/* Attempts to log a stack trace, logs 'message' to 'module' at maximum
* verbosity, then calls abort(). Always writes the message to stderr, even
* if the console destination is disabled.
*
* Choose this function instead of vlog_fatal_valist() if the daemon monitoring
* facility should automatically restart the current daemon. */
Expand All @@ -1289,6 +1291,10 @@ vlog_abort_valist(const struct vlog_module *module_,
* message written by the later ovs_abort_valist(). */
module->levels[VLF_CONSOLE] = VLL_OFF;

/* Printing the stack trace before the 'message', because the 'message'
* will flush the async log queue (VLL_EMER). With a different order we
* would need to flush the queue manually again. */
log_backtrace();
vlog_valist(module, VLL_EMER, message, args);
ovs_abort_valist(0, message, args);
}
Expand Down
3 changes: 2 additions & 1 deletion m4/ax_check_openssl.m4
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ AC_DEFUN([AX_CHECK_OPENSSL], [
SSL_INCLUDES="-I$ssldir/include"
SSL_LDFLAGS="-L$ssldir/lib"
if test "$WIN32" = "yes"; then
SSL_LIBS="-lssleay32 -llibeay32"
SSL_LDFLAGS="$SSL_LDFLAGS -L$ssldir/lib/VC/x64/MT"
SSL_LIBS="-llibssl -llibcrypto"
SSL_DIR=/$(echo ${ssldir} | ${SED} -e 's/://')
else
SSL_LIBS="-lssl -lcrypto"
Expand Down
33 changes: 15 additions & 18 deletions ofproto/bond.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ static struct bond_member *choose_output_member(const struct bond *,
struct flow_wildcards *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
static void update_recirc_rules__(struct bond *);
static void update_recirc_rules(struct bond *) OVS_REQ_WRLOCK(rwlock);
static bool bond_may_recirc(const struct bond *);
static void bond_update_post_recirc_rules__(struct bond *, bool force)
OVS_REQ_WRLOCK(rwlock);
Expand Down Expand Up @@ -299,7 +299,10 @@ bond_unref(struct bond *bond)
}
free(bond->hash);
bond->hash = NULL;
update_recirc_rules__(bond);

ovs_rwlock_wrlock(&rwlock);
update_recirc_rules(bond);
ovs_rwlock_unlock(&rwlock);

hmap_destroy(&bond->pr_rule_ops);
free(bond->primary);
Expand Down Expand Up @@ -331,17 +334,8 @@ add_pr_rule(struct bond *bond, const struct match *match,
hmap_insert(&bond->pr_rule_ops, &pr_op->hmap_node, hash);
}

/* This function should almost never be called directly.
* 'update_recirc_rules()' should be called instead. Since
* this function modifies 'bond->pr_rule_ops', it is only
* safe when 'rwlock' is held.
*
* However, when the 'bond' is the only reference in the system,
* calling this function avoid acquiring lock only to satisfy
* lock annotation. Currently, only 'bond_unref()' calls
* this function directly. */
static void
update_recirc_rules__(struct bond *bond)
update_recirc_rules(struct bond *bond) OVS_REQ_WRLOCK(rwlock)
{
struct match match;
struct bond_pr_rule_op *pr_op;
Expand Down Expand Up @@ -407,6 +401,15 @@ update_recirc_rules__(struct bond *bond)

VLOG_ERR("failed to remove post recirculation flow %s", err_s);
free(err_s);
} else if (bond->hash) {
/* If the flow deletion failed, a subsequent call to
* ofproto_dpif_add_internal_flow() would just modify the
* flow preserving its statistics. Therefore, only reset
* the entry's byte counter if it succeeds. */
uint32_t hash = pr_op->match.flow.dp_hash & BOND_MASK;
struct bond_entry *entry = &bond->hash[hash];

entry->pr_tx_bytes = 0;
}

hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
Expand All @@ -421,12 +424,6 @@ update_recirc_rules__(struct bond *bond)
ofpbuf_uninit(&ofpacts);
}

static void
update_recirc_rules(struct bond *bond)
OVS_REQ_RDLOCK(rwlock)
{
update_recirc_rules__(bond);
}

/* Updates 'bond''s overall configuration to 's'.
*
Expand Down
21 changes: 20 additions & 1 deletion ofproto/ofproto-dpif-trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ oftrace_add_recirc_node(struct ovs_list *recirc_queue,
node->flow = *flow;
node->flow.recirc_id = recirc_id;
node->flow.ct_zone = zone;
node->nat_act = ofn;
node->nat_act = ofn ? xmemdup(ofn, sizeof *ofn) : NULL;
node->packet = packet ? dp_packet_clone(packet) : NULL;

return true;
Expand All @@ -113,6 +113,7 @@ oftrace_recirc_node_destroy(struct oftrace_recirc_node *node)
{
if (node) {
recirc_free_id(node->recirc_id);
free(node->nat_act);
dp_packet_delete(node->packet);
free(node);
}
Expand Down Expand Up @@ -845,17 +846,35 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
bool names)
{
struct ovs_list recirc_queue = OVS_LIST_INITIALIZER(&recirc_queue);
int recirculations = 0;

ofproto_trace__(ofproto, flow, packet, &recirc_queue,
ofpacts, ofpacts_len, output, names);

struct oftrace_recirc_node *recirc_node;
LIST_FOR_EACH_POP (recirc_node, node, &recirc_queue) {
if (recirculations++ > 4096) {
ds_put_cstr(output, "\n\n");
ds_put_char_multiple(output, '=', 79);
ds_put_cstr(output, "\nTrace reached the recirculation limit."
" Sopping the trace here.");
ds_put_format(output,
"\nQueued but not processed: %"PRIuSIZE
" recirculations.",
ovs_list_size(&recirc_queue) + 1);
oftrace_recirc_node_destroy(recirc_node);
break;
}
ofproto_trace_recirc_node(recirc_node, next_ct_states, output);
ofproto_trace__(ofproto, &recirc_node->flow, recirc_node->packet,
&recirc_queue, ofpacts, ofpacts_len, output,
names);
oftrace_recirc_node_destroy(recirc_node);
}
/* Destroy remaining recirculation nodes, if any. */
LIST_FOR_EACH_POP (recirc_node, node, &recirc_queue) {
oftrace_recirc_node_destroy(recirc_node);
}
}

void
Expand Down
2 changes: 1 addition & 1 deletion ofproto/ofproto-dpif-trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ struct oftrace_recirc_node {
uint32_t recirc_id;
struct flow flow;
struct dp_packet *packet;
const struct ofpact_nat *nat_act;
struct ofpact_nat *nat_act;
};

/* A node within a next_ct_states list. */
Expand Down
14 changes: 7 additions & 7 deletions ofproto/ofproto-dpif-upcall.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ COVERAGE_DEFINE(handler_duplicate_upcall);
COVERAGE_DEFINE(revalidate_missed_dp_flow);
COVERAGE_DEFINE(ukey_dp_change);
COVERAGE_DEFINE(ukey_invalid_stat_reset);
COVERAGE_DEFINE(ukey_replace_contention);
COVERAGE_DEFINE(upcall_flow_limit_grew);
COVERAGE_DEFINE(upcall_flow_limit_hit);
COVERAGE_DEFINE(upcall_flow_limit_kill);
Expand Down Expand Up @@ -1428,8 +1429,6 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi
}

if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) {
static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 1);
VLOG_WARN_RL(&rll, "upcall_cb failure: ukey installation fails");
error = ENOSPC;
}
out:
Expand Down Expand Up @@ -1927,15 +1926,15 @@ try_ukey_replace(struct umap *umap, struct udpif_key *old_ukey,
transition_ukey(old_ukey, UKEY_DELETED);
transition_ukey(new_ukey, UKEY_VISIBLE);
replaced = true;
COVERAGE_INC(upcall_ukey_replace);
} else {
COVERAGE_INC(handler_duplicate_upcall);
}
ovs_mutex_unlock(&old_ukey->mutex);
}

if (replaced) {
COVERAGE_INC(upcall_ukey_replace);
} else {
COVERAGE_INC(handler_duplicate_upcall);
COVERAGE_INC(ukey_replace_contention);
}

return replaced;
}

Expand Down Expand Up @@ -2973,6 +2972,7 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge)
/* Handler threads could be holding a ukey lock while it installs a
* new flow, so don't hang around waiting for access to it. */
if (ovs_mutex_trylock(&ukey->mutex)) {
COVERAGE_INC(upcall_ukey_contention);
continue;
}
ukey_state = ukey->state;
Expand Down
68 changes: 36 additions & 32 deletions ofproto/ofproto-dpif-xlate.c
Original file line number Diff line number Diff line change
Expand Up @@ -3815,6 +3815,8 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,

if (flow->tunnel.ip_src) {
in6_addr_set_mapped_ipv4(&s_ip6, flow->tunnel.ip_src);
} else if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
s_ip6 = flow->tunnel.ipv6_src;
}

err = tnl_route_lookup_flow(ctx, flow, &d_ip6, &s_ip6, &out_dev);
Expand Down Expand Up @@ -5078,10 +5080,37 @@ put_controller_user_action(struct xlate_ctx *ctx,
bool dont_send, bool continuation,
uint32_t recirc_id, int len,
enum ofp_packet_in_reason reason,
uint32_t provider_meter_id,
uint16_t controller_id)
{
struct user_action_cookie cookie;

/* If the controller action didn't request a meter (indicated by a
* 'meter_id' argument other than NX_CTLR_NO_METER), see if one was
* configured through the "controller" virtual meter.
*
* Internally, ovs-vswitchd uses UINT32_MAX to indicate no meter is
* configured. */
uint32_t meter_id;
if (provider_meter_id == UINT32_MAX) {
meter_id = ctx->xbridge->ofproto->up.controller_meter_id;
} else {
meter_id = provider_meter_id;
}

size_t offset;
size_t ac_offset;
if (meter_id != UINT32_MAX) {
/* If controller meter is configured, generate
* clone(meter,userspace) action. */
offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_SAMPLE);
nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY,
UINT32_MAX);
ac_offset = nl_msg_start_nested(ctx->odp_actions,
OVS_SAMPLE_ATTR_ACTIONS);
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_METER, meter_id);
}

memset(&cookie, 0, sizeof cookie);
cookie.type = USER_ACTION_COOKIE_CONTROLLER;
cookie.ofp_in_port = OFPP_NONE,
Expand All @@ -5099,6 +5128,11 @@ put_controller_user_action(struct xlate_ctx *ctx,
uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
odp_put_userspace_action(pid, &cookie, sizeof cookie, ODPP_NONE,
false, ctx->odp_actions, NULL);

if (meter_id != UINT32_MAX) {
nl_msg_end_nested(ctx->odp_actions, ac_offset);
nl_msg_end_nested(ctx->odp_actions, offset);
}
}

static void
Expand Down Expand Up @@ -5143,45 +5177,14 @@ xlate_controller_action(struct xlate_ctx *ctx, int len,
}
recirc_refs_add(&ctx->xout->recircs, recirc_id);

/* If the controller action didn't request a meter (indicated by a
* 'meter_id' argument other than NX_CTLR_NO_METER), see if one was
* configured through the "controller" virtual meter.
*
* Internally, ovs-vswitchd uses UINT32_MAX to indicate no meter is
* configured. */
uint32_t meter_id;
if (provider_meter_id == UINT32_MAX) {
meter_id = ctx->xbridge->ofproto->up.controller_meter_id;
} else {
meter_id = provider_meter_id;
}

size_t offset;
size_t ac_offset;
if (meter_id != UINT32_MAX) {
/* If controller meter is configured, generate clone(meter, userspace)
* action. */
offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_SAMPLE);
nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY,
UINT32_MAX);
ac_offset = nl_msg_start_nested(ctx->odp_actions,
OVS_SAMPLE_ATTR_ACTIONS);
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_METER, meter_id);
}

/* Generate the datapath flows even if we don't send the packet-in
* so that debugging more closely represents normal state. */
bool dont_send = false;
if (!ctx->xin->allow_side_effects && !ctx->xin->xcache) {
dont_send = true;
}
put_controller_user_action(ctx, dont_send, false, recirc_id, len,
reason, controller_id);

if (meter_id != UINT32_MAX) {
nl_msg_end_nested(ctx->odp_actions, ac_offset);
nl_msg_end_nested(ctx->odp_actions, offset);
}
reason, provider_meter_id, controller_id);
}

/* Creates a frozen state, and allocates a unique recirc id for the given
Expand Down Expand Up @@ -5233,6 +5236,7 @@ finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
put_controller_user_action(ctx, false, true, recirc_id,
ctx->pause->max_len,
ctx->pause->reason,
ctx->pause->provider_meter_id,
ctx->pause->controller_id);
} else {
if (ctx->recirc_update_dp_hash) {
Expand Down
18 changes: 12 additions & 6 deletions ofproto/ofproto-dpif.c
Original file line number Diff line number Diff line change
Expand Up @@ -3904,15 +3904,21 @@ port_query_by_name(const struct ofproto *ofproto_, const char *devname,
int error;

if (sset_contains(&ofproto->ghost_ports, devname)) {
const char *type = netdev_get_type_from_name(devname);

/* We may be called before ofproto->up.port_by_name is populated with
* the appropriate ofport. For this reason, we must get the name and
* type from the netdev layer directly. */
if (type) {
const struct ofport *ofport;
* type from the netdev layer directly.
* However, when a port deleted, the corresponding netdev is also
* removed from netdev_shash. netdev_get_type_from_name returns NULL
* in such case and we should try to get type from ofport->netdev. */
const char *type = netdev_get_type_from_name(devname);
const struct ofport *ofport =
shash_find_data(&ofproto->up.port_by_name, devname);

ofport = shash_find_data(&ofproto->up.port_by_name, devname);
if (!type && ofport && ofport->netdev) {
type = netdev_get_type(ofport->netdev);
}

if (type) {
ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
ofproto_port->name = xstrdup(devname);
ofproto_port->type = xstrdup(type);
Expand Down
2 changes: 2 additions & 0 deletions ovsdb/automake.mk
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,13 @@ $(OVSIDL_BUILT): ovsdb/ovsdb-idlc.in python/ovs/dirs.py

# ovsdb-doc
EXTRA_DIST += ovsdb/ovsdb-doc
FLAKE8_PYFILES += ovsdb/ovsdb-doc
OVSDB_DOC = $(run_python) $(srcdir)/ovsdb/ovsdb-doc
ovsdb/ovsdb-doc: python/ovs/dirs.py

# ovsdb-dot
EXTRA_DIST += ovsdb/ovsdb-dot.in ovsdb/dot2pic
FLAKE8_PYFILES += ovsdb/ovsdb-dot.in ovsdb/dot2pic
noinst_SCRIPTS += ovsdb/ovsdb-dot
CLEANFILES += ovsdb/ovsdb-dot
OVSDB_DOT = $(run_python) $(srcdir)/ovsdb/ovsdb-dot.in
Expand Down
6 changes: 3 additions & 3 deletions ovsdb/dot2pic
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import getopt
import sys


def dot2pic(src, dst):
scale = 1.0
while True:
Expand Down Expand Up @@ -49,8 +50,8 @@ def dot2pic(src, dst):
dst.write("box at %f,%f wid %f height %f\n"
% (x, y, width, height))
elif command == 'edge':
tail = words[1]
head = words[2]
# tail = words[1]
# head = words[2]
n = int(words[3])

# Extract x,y coordinates.
Expand Down Expand Up @@ -114,4 +115,3 @@ else:
if font_scale:
print(".ps %+d" % font_scale)
print(".PE")

5 changes: 3 additions & 2 deletions ovsdb/ovsdb-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,9 @@ usage(void)
" wait until DATABASE reaches STATE "
"(\"added\" or \"connected\" or \"removed\")\n"
" in DATBASE on SERVER.\n"
"\n dump [SERVER] [DATABASE]\n"
" dump contents of DATABASE on SERVER to stdout\n"
"\n dump [SERVER] [DATABASE] [TABLE]\n"
" dump contents of TABLE (or all tables) in DATABASE on SERVER\n"
" to stdout\n"
"\n backup [SERVER] [DATABASE] > SNAPSHOT\n"
" dump database contents in the form of a database file\n"
"\n [--force] restore [SERVER] [DATABASE] < SNAPSHOT\n"
Expand Down
50 changes: 25 additions & 25 deletions ovsdb/ovsdb-doc
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import date
import getopt
import os
import sys
import xml.dom.minidom

import ovs.json
from ovs.db import error
import ovs.db.schema

from ovs_build_helpers.nroff import *
from ovs_build_helpers.nroff import block_xml_to_nroff
from ovs_build_helpers.nroff import escape_nroff_literal
from ovs_build_helpers.nroff import text_to_nroff

argv0 = sys.argv[0]


def typeAndConstraintsToNroff(column):
type = column.type.toEnglish(escape_nroff_literal)
constraints = column.type.constraintsToEnglish(escape_nroff_literal,
Expand All @@ -38,6 +39,7 @@ def typeAndConstraintsToNroff(column):
type += " (must be unique within table)"
return type


def columnGroupToNroff(table, groupXml, documented_columns):
introNodes = []
columnNodes = []
Expand All @@ -49,7 +51,10 @@ def columnGroupToNroff(table, groupXml, documented_columns):
if (columnNodes
and not (node.nodeType == node.TEXT_NODE
and node.data.isspace())):
raise error.Error("text follows <column> or <group> inside <group>: %s" % node)
raise error.Error(
"text follows <column> or <group> inside <group>: %s"
% node
)
introNodes += [node]

summary = []
Expand All @@ -65,15 +70,9 @@ def columnGroupToNroff(table, groupXml, documented_columns):
if node.hasAttribute('type'):
type_string = node.attributes['type'].nodeValue
type_json = ovs.json.from_string(str(type_string))
# py2 -> py3 means str -> bytes and unicode -> str
try:
if type(type_json) in (str, unicode):
raise error.Error("%s %s:%s has invalid 'type': %s"
% (table.name, name, key, type_json))
except:
if type(type_json) in (bytes, str):
raise error.Error("%s %s:%s has invalid 'type': %s"
% (table.name, name, key, type_json))
if type(type_json) in (bytes, str):
raise error.Error("%s %s:%s has invalid 'type': %s"
% (table.name, name, key, type_json))
type_ = ovs.db.types.BaseType.from_json(type_json)
else:
type_ = column.type.value
Expand All @@ -91,10 +90,11 @@ def columnGroupToNroff(table, groupXml, documented_columns):
else:
if type_.type != column.type.value.type:
type_english = type_.toEnglish()
typeNroff += ", containing "
if type_english[0] in 'aeiou':
typeNroff += ", containing an %s" % type_english
typeNroff += "an %s" % type_english
else:
typeNroff += ", containing a %s" % type_english
typeNroff += "a %s" % type_english
constraints = (
type_.constraintsToEnglish(escape_nroff_literal,
text_to_nroff))
Expand All @@ -121,6 +121,7 @@ def columnGroupToNroff(table, groupXml, documented_columns):
raise error.Error("unknown element %s in <table>" % node.tagName)
return summary, intro, body


def tableSummaryToNroff(summary, level=0):
s = ""
for type, name, arg in summary:
Expand All @@ -132,6 +133,7 @@ def tableSummaryToNroff(summary, level=0):
s += ".RE\n"
return s


def tableToNroff(schema, tableXml):
tableName = tableXml.attributes['name'].nodeValue
table = schema.tables[tableName]
Expand All @@ -156,20 +158,17 @@ def tableToNroff(schema, tableXml):

return s


def docsToNroff(schemaFile, xmlFile, erFile, version=None):
schema = ovs.db.schema.DbSchema.from_json(ovs.json.from_file(schemaFile))
doc = xml.dom.minidom.parse(xmlFile).documentElement

schemaDate = os.stat(schemaFile).st_mtime
xmlDate = os.stat(xmlFile).st_mtime
d = date.fromtimestamp(max(schemaDate, xmlDate))

if doc.hasAttribute('name'):
manpage = doc.attributes['name'].nodeValue
else:
manpage = schema.name

if version == None:
if version is None:
version = "UNKNOWN"

# Putting '\" p as the first line tells "man" that the manpage
Expand All @@ -194,7 +193,6 @@ def docsToNroff(schemaFile, xmlFile, erFile, version=None):
.PP
''' % (manpage, schema.version, version, text_to_nroff(manpage), schema.name)

tables = ""
introNodes = []
tableNodes = []
summary = []
Expand Down Expand Up @@ -237,8 +235,8 @@ Purpose
""" % (name, text_to_nroff(title))

if erFile:
s += """
.\\" check if in troff mode (TTY)
s += r"""
.\" check if in troff mode (TTY)
.if t \{
.bp
.SH "TABLE RELATIONSHIPS"
Expand All @@ -248,8 +246,8 @@ database. Each node represents a table. Tables that are part of the
``root set'' are shown with double borders. Each edge leads from the
table that contains it and points to the table that its value
represents. Edges are labeled with their column names, followed by a
constraint on the number of allowed values: \\fB?\\fR for zero or one,
\\fB*\\fR for zero or more, \\fB+\\fR for one or more. Thick lines
constraint on the number of allowed values: \fB?\fR for zero or one,
\fB*\fR for zero or more, \fB+\fR for one or more. Thick lines
represent strong references; thin lines represent weak references.
.RS -1in
"""
Expand All @@ -263,6 +261,7 @@ represent strong references; thin lines represent weak references.
s += tableToNroff(schema, node) + "\n"
return s


def usage():
print("""\
%(argv0)s: ovsdb schema documentation generator
Expand All @@ -278,6 +277,7 @@ The following options are also available:
""" % {'argv0': argv0})
sys.exit(0)


if __name__ == "__main__":
try:
try:
Expand Down
41 changes: 22 additions & 19 deletions ovsdb/ovsdb-dot.in
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#! @PYTHON3@

from datetime import date
import ovs.db.error
import ovs.db.schema
import getopt
import os
import re
import sys

argv0 = sys.argv[0]


def printEdge(tableName, type, baseType, label):
if baseType.ref_table_name:
if type.n_min == 0:
Expand All @@ -31,38 +29,42 @@ def printEdge(tableName, type, baseType, label):
options['label'] = '"%s%s"' % (label, arity)
if baseType.ref_type == 'weak':
options['style'] = 'dotted'
print ("\t%s -> %s [%s];" % (
print("\t%s -> %s [%s];" % (
tableName,
baseType.ref_table_name,
', '.join(['%s=%s' % (k,v) for k,v in options.items()])))
', '.join(['%s=%s' % (k, v) for k, v in options.items()])))


def schemaToDot(schemaFile, arrows):
schema = ovs.db.schema.DbSchema.from_json(ovs.json.from_file(schemaFile))

print ("digraph %s {" % schema.name)
print ('\trankdir=LR;')
print ('\tsize="6.5,4";')
print ('\tmargin="0";')
print ("\tnode [shape=box];")
print("digraph %s {" % schema.name)
print('\trankdir=LR;')
print('\tsize="6.5,4";')
print('\tmargin="0";')
print("\tnode [shape=box];")
if not arrows:
print ("\tedge [dir=none, arrowhead=none, arrowtail=none];")
print("\tedge [dir=none, arrowhead=none, arrowtail=none];")
for tableName, table in schema.tables.items():
options = {}
if table.is_root:
options['style'] = 'bold'
print ("\t%s [%s];" % (
print("\t%s [%s];" % (
tableName,
', '.join(['%s=%s' % (k,v) for k,v in options.items()])))
', '.join(['%s=%s' % (k, v) for k, v in options.items()])))
for columnName, column in table.columns.items():
if column.type.value:
printEdge(tableName, column.type, column.type.key, "%s key" % columnName)
printEdge(tableName, column.type, column.type.value, "%s value" % columnName)
printEdge(tableName, column.type, column.type.key,
"%s key" % columnName)
printEdge(tableName, column.type, column.type.value,
"%s value" % columnName)
else:
printEdge(tableName, column.type, column.type.key, columnName)
print ("}");
print("}")


def usage():
print ("""\
print("""\
%(argv0)s: compiles ovsdb schemas to graphviz format
Prints a .dot file that "dot" can render to an entity-relationship diagram
usage: %(argv0)s [OPTIONS] SCHEMA
Expand All @@ -75,12 +77,13 @@ The following options are also available:
""" % {'argv0': argv0})
sys.exit(0)


if __name__ == "__main__":
try:
try:
options, args = getopt.gnu_getopt(sys.argv[1:], 'hV',
['no-arrows',
'help', 'version',])
'help', 'version'])
except getopt.GetoptError as geo:
sys.stderr.write("%s: %s\n" % (argv0, geo.msg))
sys.exit(1)
Expand All @@ -92,7 +95,7 @@ if __name__ == "__main__":
elif key in ['-h', '--help']:
usage()
elif key in ['-V', '--version']:
print ("ovsdb-dot (Open vSwitch) @VERSION@")
print("ovsdb-dot (Open vSwitch) @VERSION@")
else:
sys.exit(0)

Expand Down
140 changes: 126 additions & 14 deletions ovsdb/raft.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ enum raft_failure_test {
FT_STOP_RAFT_RPC,
FT_TRANSFER_LEADERSHIP,
FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ,
FT_TRANSFER_LEADERSHIP_AFTER_STARTING_TO_ADD,
};
static enum raft_failure_test failure_test;

Expand Down Expand Up @@ -280,6 +281,7 @@ struct raft {
/* Used for joining a cluster. */
bool joining; /* Attempting to join the cluster? */
struct sset remote_addresses; /* Addresses to try to find other servers. */
#define RAFT_JOIN_TIMEOUT_MS 1000
long long int join_timeout; /* Time to re-send add server request. */

/* Used for leaving a cluster. */
Expand Down Expand Up @@ -385,6 +387,7 @@ static void raft_get_servers_from_log(struct raft *, enum vlog_level);
static void raft_get_election_timer_from_log(struct raft *);

static bool raft_handle_write_error(struct raft *, struct ovsdb_error *);
static bool raft_has_uncommitted_configuration(const struct raft *);

static void raft_run_reconfigure(struct raft *);

Expand Down Expand Up @@ -1015,8 +1018,13 @@ raft_conn_update_probe_interval(struct raft *raft, struct raft_conn *r_conn)
* inactivity probe follower will just try to initiate election
* indefinitely staying in 'candidate' role. And the leader will continue
* to send heartbeats to the dead connection thinking that remote server
* is still part of the cluster. */
int probe_interval = raft->election_timer + ELECTION_RANGE_MSEC;
* is still part of the cluster.
*
* While joining, the real value of the election timeout is not known to
* this server, so using the maximum. */
int probe_interval = (raft->joining ? ELECTION_MAX_MSEC
: raft->election_timer)
+ ELECTION_RANGE_MSEC;

jsonrpc_session_set_probe_interval(r_conn->js, probe_interval);
}
Expand Down Expand Up @@ -1083,7 +1091,7 @@ raft_open(struct ovsdb_log *log, struct raft **raftp)
raft_start_election(raft, false, false);
}
} else {
raft->join_timeout = time_msec() + 1000;
raft->join_timeout = time_msec() + RAFT_JOIN_TIMEOUT_MS;
}

raft_reset_ping_timer(raft);
Expand Down Expand Up @@ -1261,10 +1269,30 @@ raft_transfer_leadership(struct raft *raft, const char *reason)
return;
}

struct raft_server *s;
struct raft_server **servers, *s;
uint64_t threshold = 0;
size_t n = 0, start, i;

servers = xmalloc(hmap_count(&raft->servers) * sizeof *servers);

HMAP_FOR_EACH (s, hmap_node, &raft->servers) {
if (!uuid_equals(&raft->sid, &s->sid)
&& s->phase == RAFT_PHASE_STABLE) {
if (uuid_equals(&raft->sid, &s->sid)
|| s->phase != RAFT_PHASE_STABLE) {
continue;
}
if (s->match_index > threshold) {
threshold = s->match_index;
}
servers[n++] = s;
}

start = n ? random_range(n) : 0;

retry:
for (i = 0; i < n; i++) {
s = servers[(start + i) % n];

if (s->match_index >= threshold) {
struct raft_conn *conn = raft_find_conn_by_sid(raft, &s->sid);
if (!conn) {
continue;
Expand All @@ -1280,14 +1308,34 @@ raft_transfer_leadership(struct raft *raft, const char *reason)
.term = raft->term,
}
};
raft_send_to_conn(raft, &rpc, conn);

if (!raft_send_to_conn(raft, &rpc, conn)) {
continue;
}

raft_record_note(raft, "transfer leadership",
"transferring leadership to %s because %s",
s->nickname, reason);
break;
}
}

if (n && i == n && threshold) {
if (threshold > raft->commit_index) {
/* Failed to transfer to servers with the highest 'match_index'.
* Try other servers that are not behind the majority. */
threshold = raft->commit_index;
} else {
/* Try any other server. It is safe, because they either have all
* the append requests queued up for them before the leadership
* transfer message or their connection is broken and we will not
* transfer anyway. */
threshold = 0;
}
goto retry;
}

free(servers);
}

/* Send a RemoveServerRequest to the rest of the servers in the cluster.
Expand Down Expand Up @@ -2078,7 +2126,7 @@ raft_run(struct raft *raft)
raft_start_election(raft, true, false);
}
} else {
raft_start_election(raft, true, false);
raft_start_election(raft, hmap_count(&raft->servers) > 1, false);
}

}
Expand All @@ -2088,7 +2136,7 @@ raft_run(struct raft *raft)
}

if (raft->joining && time_msec() >= raft->join_timeout) {
raft->join_timeout = time_msec() + 1000;
raft->join_timeout = time_msec() + RAFT_JOIN_TIMEOUT_MS;
LIST_FOR_EACH (conn, list_node, &raft->conns) {
raft_send_add_server_request(raft, conn);
}
Expand Down Expand Up @@ -2122,10 +2170,12 @@ raft_run(struct raft *raft)
raft_reset_ping_timer(raft);
}

uint64_t interval = raft->joining
? RAFT_JOIN_TIMEOUT_MS
: RAFT_TIMER_THRESHOLD(raft->election_timer);
cooperative_multitasking_set(
&raft_run_cb, (void *) raft, time_msec(),
RAFT_TIMER_THRESHOLD(raft->election_timer)
+ RAFT_TIMER_THRESHOLD(raft->election_timer) / 10, "raft_run");
interval + interval / 10, "raft_run");

/* Do this only at the end; if we did it as soon as we set raft->left or
* raft->failed in handling the RemoveServerReply, then it could easily
Expand Down Expand Up @@ -2696,15 +2746,22 @@ raft_become_follower(struct raft *raft)
* new configuration. Our AppendEntries processing will properly update
* the server configuration later, if necessary.
*
* However, since we're sending replies about a failure to add, those new
* servers has to be cleaned up. Otherwise, they will stuck in a 'CATCHUP'
* phase in case this server regains leadership before they join through
* the current new leader. They are not yet in 'raft->servers', so not
* part of the shared configuration.
*
* Also we do not complete commands here, as they can still be completed
* if their log entries have already been replicated to other servers.
* If the entries were actually committed according to the new leader, our
* AppendEntries processing will complete the corresponding commands.
*/
struct raft_server *s;
HMAP_FOR_EACH (s, hmap_node, &raft->add_servers) {
HMAP_FOR_EACH_POP (s, hmap_node, &raft->add_servers) {
raft_send_add_server_reply__(raft, &s->sid, s->address, false,
RAFT_SERVER_LOST_LEADERSHIP);
raft_server_destroy(s);
}
if (raft->remove_server) {
raft_send_remove_server_reply__(raft, &raft->remove_server->sid,
Expand Down Expand Up @@ -2768,6 +2825,13 @@ raft_send_heartbeats(struct raft *raft)
raft_reset_ping_timer(raft);
}

static void
raft_join_complete(struct raft *raft)
{
raft->joining = false;
raft_update_probe_intervals(raft);
}

/* Initializes the fields in 's' that represent the leader's view of the
* server. */
static void
Expand Down Expand Up @@ -2805,6 +2869,18 @@ raft_become_leader(struct raft *raft)
raft_reset_election_timer(raft);
raft_reset_ping_timer(raft);

if (raft->joining) {
/* It is possible that the server committing this one to the list of
* servers lost leadership before the entry is committed but after
* it was already replicated to majority of servers. In this case
* other servers will recognize this one as a valid cluster member
* and may transfer leadership to it and vote for it. This way
* we're becoming a cluster leader without receiving reply for a
* join request and will commit addition of this server ourselves. */
VLOG_INFO_RL(&rl, "elected as leader while joining");
raft_join_complete(raft);
}

struct raft_server *s;
HMAP_FOR_EACH (s, hmap_node, &raft->servers) {
raft_server_init_leader(raft, s);
Expand Down Expand Up @@ -2963,12 +3039,12 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index)
}

while (raft->commit_index < new_commit_index) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
uint64_t index = ++raft->commit_index;
const struct raft_entry *e = raft_get_entry(raft, index);

if (raft_entry_has_data(e)) {
struct raft_command *cmd = raft_find_command_by_eid(raft, &e->eid);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);

if (cmd) {
if (!cmd->index && raft->role == RAFT_LEADER) {
Expand Down Expand Up @@ -3012,6 +3088,35 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index)
* reallocate raft->entries, which would invalidate 'e', so
* this case must be last, after the one for 'e->data'. */
raft_run_reconfigure(raft);
} else if (e->servers && !raft_has_uncommitted_configuration(raft)) {
struct ovsdb_error *error;
struct raft_server *s;
struct hmap servers;

error = raft_servers_from_json(e->servers, &servers);
ovs_assert(!error);
HMAP_FOR_EACH (s, hmap_node, &servers) {
struct raft_server *server = raft_find_server(raft, &s->sid);

if (server && server->phase == RAFT_PHASE_COMMITTING) {
/* This server lost leadership while committing
* server 's', but it was committed later by a
* new leader. */
server->phase = RAFT_PHASE_STABLE;
}

if (raft->joining && uuid_equals(&s->sid, &raft->sid)) {
/* Leadership change happened before previous leader
* could commit the change of a servers list, but it
* was replicated and a new leader committed it. */
VLOG_INFO_RL(&rl,
"added to configuration without reply "
"(eid: "UUID_FMT", commit index: %"PRIu64")",
UUID_ARGS(&e->eid), index);
raft_join_complete(raft);
}
}
raft_servers_destroy(&servers);
}
}

Expand Down Expand Up @@ -3938,6 +4043,10 @@ raft_handle_add_server_request(struct raft *raft,
"to cluster "CID_FMT, s->nickname, SID_ARGS(&s->sid),
rq->address, CID_ARGS(&raft->cid));
raft_send_append_request(raft, s, 0, "initialize new server");

if (failure_test == FT_TRANSFER_LEADERSHIP_AFTER_STARTING_TO_ADD) {
failure_test = FT_TRANSFER_LEADERSHIP;
}
}

static void
Expand All @@ -3952,7 +4061,7 @@ raft_handle_add_server_reply(struct raft *raft,
}

if (rpy->success) {
raft->joining = false;
raft_join_complete(raft);

/* It is tempting, at this point, to check that this server is part of
* the current configuration. However, this is not necessarily the
Expand Down Expand Up @@ -4926,6 +5035,7 @@ raft_get_election_timer_from_log(struct raft *raft)
break;
}
}
raft_update_probe_intervals(raft);
}

static void
Expand Down Expand Up @@ -5063,6 +5173,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
} else if (!strcmp(test,
"transfer-leadership-after-sending-append-request")) {
failure_test = FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ;
} else if (!strcmp(test, "transfer-leadership-after-starting-to-add")) {
failure_test = FT_TRANSFER_LEADERSHIP_AFTER_STARTING_TO_ADD;
} else if (!strcmp(test, "transfer-leadership")) {
failure_test = FT_TRANSFER_LEADERSHIP;
} else if (!strcmp(test, "clear")) {
Expand Down
13 changes: 10 additions & 3 deletions python/ovs/db/custom_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,21 @@ def index_create(self, name):
index = self.indexes[name] = MultiColumnIndex(name)
return index

def __getitem__(self, key):
return self.data[key][-1]

def __setitem__(self, key, item):
self.data[key] = item
try:
self.data[key].append(item)
except KeyError:
self.data[key] = [item]
for index in self.indexes.values():
index.add(item)

def __delitem__(self, key):
val = self.data[key]
del self.data[key]
val = self.data[key].pop()
if len(self.data[key]) == 0:
del self.data[key]
for index in self.indexes.values():
index.remove(val)

Expand Down
43 changes: 24 additions & 19 deletions python/ovs/db/idl.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
ROW_UPDATE = "update"
ROW_DELETE = "delete"

OVSDB_UPDATE = 0
OVSDB_UPDATE2 = 1
OVSDB_UPDATE3 = 2
OVSDB_UPDATE = "update"
OVSDB_UPDATE2 = "update2"
OVSDB_UPDATE3 = "update3"

CLUSTERED = "clustered"
RELAY = "relay"
Expand Down Expand Up @@ -77,7 +77,7 @@ def __contains__(self, item):
return item in self.keys()


class Monitor(enum.IntEnum):
class Monitor(enum.Enum):
monitor = OVSDB_UPDATE
monitor_cond = OVSDB_UPDATE2
monitor_cond_since = OVSDB_UPDATE3
Expand Down Expand Up @@ -465,23 +465,18 @@ def run(self):
self.__parse_update(msg.params[2], OVSDB_UPDATE3)
self.last_id = msg.params[1]
elif (msg.type == ovs.jsonrpc.Message.T_NOTIFY
and msg.method == "update2"
and len(msg.params) == 2):
# Database contents changed.
self.__parse_update(msg.params[1], OVSDB_UPDATE2)
elif (msg.type == ovs.jsonrpc.Message.T_NOTIFY
and msg.method == "update"
and msg.method in (OVSDB_UPDATE, OVSDB_UPDATE2)
and len(msg.params) == 2):
# Database contents changed.
if msg.params[0] == str(self.server_monitor_uuid):
self.__parse_update(msg.params[1], OVSDB_UPDATE,
self.__parse_update(msg.params[1], msg.method,
tables=self.server_tables)
self.change_seqno = previous_change_seqno
if not self.__check_server_db():
self.force_reconnect()
break
else:
self.__parse_update(msg.params[1], OVSDB_UPDATE)
self.__parse_update(msg.params[1], msg.method)
elif self.handle_monitor_canceled(msg):
break
elif self.handle_monitor_cancel_reply(msg):
Expand Down Expand Up @@ -540,7 +535,7 @@ def run(self):
# Reply to our "monitor" of _Server request.
try:
self._server_monitor_request_id = None
self.__parse_update(msg.result, OVSDB_UPDATE,
self.__parse_update(msg.result, OVSDB_UPDATE2,
tables=self.server_tables)
self.change_seqno = previous_change_seqno
if self.__check_server_db():
Expand Down Expand Up @@ -579,6 +574,11 @@ def run(self):
elif msg.type == ovs.jsonrpc.Message.T_NOTIFY and msg.id == "echo":
# Reply to our echo request. Ignore it.
pass
elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
self.state == self.IDL_S_SERVER_MONITOR_REQUESTED and
msg.id == self._server_monitor_request_id):
self._server_monitor_request_id = None
self.__send_monitor_request()
elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
self.state == (
self.IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED) and
Expand Down Expand Up @@ -912,7 +912,7 @@ def __send_server_monitor_request(self):
monitor_request = {"columns": columns}
monitor_requests[table.name] = [monitor_request]
msg = ovs.jsonrpc.Message.create_request(
'monitor', [self._server_db.name,
'monitor_cond', [self._server_db.name,
str(self.server_monitor_uuid),
monitor_requests])
self._server_monitor_request_id = msg.id
Expand Down Expand Up @@ -1013,7 +1013,9 @@ def __process_update2(self, table, uuid, row_update):
if not row:
raise error.Error('Modify non-existing row')

del table.rows[uuid]
old_row = self.__apply_diff(table, row, row_update['modify'])
table.rows[uuid] = row
return Notice(ROW_UPDATE, row, Row(self, table, uuid, old_row))
else:
raise error.Error('<row-update> unknown operation',
Expand Down Expand Up @@ -1044,9 +1046,10 @@ def __process_update(self, table, uuid, old, new):
op = ROW_UPDATE
vlog.warn("cannot add existing row %s to table %s"
% (uuid, table.name))
del table.rows[uuid]

changed |= self.__row_update(table, row, new)
if op == ROW_CREATE:
table.rows[uuid] = row
table.rows[uuid] = row
if changed:
return Notice(ROW_CREATE, row)
else:
Expand All @@ -1058,9 +1061,11 @@ def __process_update(self, table, uuid, old, new):
# XXX rate-limit
vlog.warn("cannot modify missing row %s in table %s"
% (uuid, table.name))
else:
del table.rows[uuid]

changed |= self.__row_update(table, row, new)
if op == ROW_CREATE:
table.rows[uuid] = row
table.rows[uuid] = row
if changed:
return Notice(op, row, Row.from_json(self, table, uuid, old))
return False
Expand Down Expand Up @@ -1854,7 +1859,7 @@ def commit(self):
if row._data is None:
op["op"] = "insert"
if row._persist_uuid:
op["uuid"] = row.uuid
op["uuid"] = str(row.uuid)
else:
op["uuid-name"] = _uuid_name_from_uuid(row.uuid)

Expand Down
24 changes: 13 additions & 11 deletions python/ovs/fatal_signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os
import signal
import sys
import threading

import ovs.vlog

Expand Down Expand Up @@ -112,29 +113,29 @@ def _unlink(file_):
def _signal_handler(signr, _):
_call_hooks(signr)

# Re-raise the signal with the default handling so that the program
# termination status reflects that we were killed by this signal.
signal.signal(signr, signal.SIG_DFL)
os.kill(os.getpid(), signr)


def _atexit_handler():
_call_hooks(0)


recurse = False
mutex = threading.Lock()


def _call_hooks(signr):
global recurse
if recurse:
global mutex
if not mutex.acquire(blocking=False):
return
recurse = True

for hook, cancel, run_at_exit in _hooks:
if signr != 0 or run_at_exit:
hook()

if signr != 0:
# Re-raise the signal with the default handling so that the program
# termination status reflects that we were killed by this signal.
signal.signal(signr, signal.SIG_DFL)
os.kill(os.getpid(), signr)


_inited = False

Expand All @@ -150,7 +151,9 @@ def _init():
signal.SIGALRM]

for signr in signals:
if signal.getsignal(signr) == signal.SIG_DFL:
handler = signal.getsignal(signr)
if (handler == signal.SIG_DFL or
handler == signal.default_int_handler):
signal.signal(signr, _signal_handler)
atexit.register(_atexit_handler)

Expand All @@ -165,7 +168,6 @@ def signal_alarm(timeout):

if sys.platform == "win32":
import time
import threading

class Alarm (threading.Thread):
def __init__(self, timeout):
Expand Down
43 changes: 22 additions & 21 deletions python/ovs/flow/odp.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,29 +365,30 @@ def _action_decoders_args():
is_list=True,
)

return {
**_decoders,
"check_pkt_len": nested_kv_decoder(
KVDecoders(
{
"size": decode_int,
"gt": nested_kv_decoder(
KVDecoders(
decoders=_decoders,
default_free=decode_free_output,
),
is_list=True,
_decoders["check_pkt_len"] = nested_kv_decoder(
KVDecoders(
{
"size": decode_int,
"gt": nested_kv_decoder(
KVDecoders(
decoders=_decoders,
default_free=decode_free_output,
),
"le": nested_kv_decoder(
KVDecoders(
decoders=_decoders,
default_free=decode_free_output,
),
is_list=True,
is_list=True,
),
"le": nested_kv_decoder(
KVDecoders(
decoders=_decoders,
default_free=decode_free_output,
),
}
)
),
is_list=True,
),
}
)
)

return {
**_decoders,
}

@staticmethod
Expand Down
29 changes: 29 additions & 0 deletions python/ovs/tests/test_odp.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,35 @@ def test_odp_fields(input_string, expected):
),
],
),
(
"actions:check_pkt_len(size=200,gt(check_pkt_len(size=400,gt(4),le(2))),le(check_pkt_len(size=100,gt(1),le(drop))))", # noqa: E501
[
KeyValue(
"check_pkt_len",
{
"size": 200,
"gt": [
{
"check_pkt_len": {
"size": 400,
"gt": [{"output": {"port": 4}}],
"le": [{"output": {"port": 2}}],
}
}
],
"le": [
{
"check_pkt_len": {
"size": 100,
"gt": [{"output": {"port": 1}}],
"le": [{"drop": True}],
}
}
],
},
)
],
),
(
"actions:meter(1),hash(l4(0))",
[
Expand Down
1 change: 1 addition & 0 deletions python/test_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
netaddr
packaging
pyftpdlib
pyparsing
pytest
Expand Down
1 change: 1 addition & 0 deletions rhel/usr_lib_systemd_system_ovsdb-server.service
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ ExecStop=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd stop
ExecReload=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd \
${OVS_USER_OPT} \
--no-monitor restart $OPTIONS
TimeoutSec=300
Loading