forked from openwrt/openwrt
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
mac80211: add a few performance improvement patches
Signed-off-by: Felix Fietkau <nbd@nbd.name>
- Loading branch information
Showing
4 changed files
with
297 additions
and
0 deletions.
There are no files selected for viewing
96 changes: 96 additions & 0 deletions
96
...el/mac80211/patches/subsys/353-mac80211-mesh-drop-redundant-rcu_read_lock-unlock-ca.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
From: Felix Fietkau <nbd@nbd.name> | ||
Date: Sat, 16 Mar 2019 17:43:58 +0100 | ||
Subject: [PATCH] mac80211: mesh: drop redundant rcu_read_lock/unlock calls | ||
|
||
The callers of these functions are all within RCU locked sections | ||
|
||
Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||
--- | ||
|
||
--- a/net/mac80211/mesh_hwmp.c | ||
+++ b/net/mac80211/mesh_hwmp.c | ||
@@ -1112,16 +1112,13 @@ int mesh_nexthop_resolve(struct ieee8021 | ||
struct mesh_path *mpath; | ||
struct sk_buff *skb_to_free = NULL; | ||
u8 *target_addr = hdr->addr3; | ||
- int err = 0; | ||
|
||
/* Nulls are only sent to peers for PS and should be pre-addressed */ | ||
if (ieee80211_is_qos_nullfunc(hdr->frame_control)) | ||
return 0; | ||
|
||
- rcu_read_lock(); | ||
- err = mesh_nexthop_lookup(sdata, skb); | ||
- if (!err) | ||
- goto endlookup; | ||
+ if (!mesh_nexthop_lookup(sdata, skb)) | ||
+ return 0; | ||
|
||
/* no nexthop found, start resolving */ | ||
mpath = mesh_path_lookup(sdata, target_addr); | ||
@@ -1129,8 +1126,7 @@ int mesh_nexthop_resolve(struct ieee8021 | ||
mpath = mesh_path_add(sdata, target_addr); | ||
if (IS_ERR(mpath)) { | ||
mesh_path_discard_frame(sdata, skb); | ||
- err = PTR_ERR(mpath); | ||
- goto endlookup; | ||
+ return PTR_ERR(mpath); | ||
} | ||
} | ||
|
||
@@ -1143,13 +1139,10 @@ int mesh_nexthop_resolve(struct ieee8021 | ||
info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; | ||
ieee80211_set_qos_hdr(sdata, skb); | ||
skb_queue_tail(&mpath->frame_queue, skb); | ||
- err = -ENOENT; | ||
if (skb_to_free) | ||
mesh_path_discard_frame(sdata, skb_to_free); | ||
|
||
-endlookup: | ||
- rcu_read_unlock(); | ||
- return err; | ||
+ return -ENOENT; | ||
} | ||
|
||
/** | ||
@@ -1169,13 +1162,10 @@ int mesh_nexthop_lookup(struct ieee80211 | ||
struct sta_info *next_hop; | ||
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; | ||
u8 *target_addr = hdr->addr3; | ||
- int err = -ENOENT; | ||
|
||
- rcu_read_lock(); | ||
mpath = mesh_path_lookup(sdata, target_addr); | ||
- | ||
if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE)) | ||
- goto endlookup; | ||
+ return -ENOENT; | ||
|
||
if (time_after(jiffies, | ||
mpath->exp_time - | ||
@@ -1190,12 +1180,10 @@ int mesh_nexthop_lookup(struct ieee80211 | ||
memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); | ||
memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); | ||
ieee80211_mps_set_frame_flags(sdata, next_hop, hdr); | ||
- err = 0; | ||
+ return 0; | ||
} | ||
|
||
-endlookup: | ||
- rcu_read_unlock(); | ||
- return err; | ||
+ return -ENOENT; | ||
} | ||
|
||
void mesh_path_timer(struct timer_list *t) | ||
--- a/net/mac80211/mesh_pathtbl.c | ||
+++ b/net/mac80211/mesh_pathtbl.c | ||
@@ -217,7 +217,7 @@ static struct mesh_path *mpath_lookup(st | ||
{ | ||
struct mesh_path *mpath; | ||
|
||
- mpath = rhashtable_lookup_fast(&tbl->rhead, dst, mesh_rht_params); | ||
+ mpath = rhashtable_lookup(&tbl->rhead, dst, mesh_rht_params); | ||
|
||
if (mpath && mpath_expired(mpath)) { | ||
spin_lock_bh(&mpath->state_lock); |
124 changes: 124 additions & 0 deletions
124
...el/mac80211/patches/subsys/354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
From: Felix Fietkau <nbd@nbd.name> | ||
Date: Sat, 16 Mar 2019 17:57:38 +0100 | ||
Subject: [PATCH] mac80211: calculate hash for fq without holding fq->lock | ||
in itxq enqueue | ||
|
||
Reduces lock contention on enqueue/dequeue of iTXQ packets | ||
|
||
Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||
--- | ||
|
||
--- a/include/net/fq_impl.h | ||
+++ b/include/net/fq_impl.h | ||
@@ -107,21 +107,23 @@ begin: | ||
return skb; | ||
} | ||
|
||
+static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) | ||
+{ | ||
+ u32 hash = skb_get_hash_perturb(skb, fq->perturbation); | ||
+ | ||
+ return reciprocal_scale(hash, fq->flows_cnt); | ||
+} | ||
+ | ||
static struct fq_flow *fq_flow_classify(struct fq *fq, | ||
- struct fq_tin *tin, | ||
+ struct fq_tin *tin, u32 idx, | ||
struct sk_buff *skb, | ||
fq_flow_get_default_t get_default_func) | ||
{ | ||
struct fq_flow *flow; | ||
- u32 hash; | ||
- u32 idx; | ||
|
||
lockdep_assert_held(&fq->lock); | ||
|
||
- hash = skb_get_hash_perturb(skb, fq->perturbation); | ||
- idx = reciprocal_scale(hash, fq->flows_cnt); | ||
flow = &fq->flows[idx]; | ||
- | ||
if (flow->tin && flow->tin != tin) { | ||
flow = get_default_func(fq, tin, idx, skb); | ||
tin->collisions++; | ||
@@ -153,7 +155,7 @@ static void fq_recalc_backlog(struct fq | ||
} | ||
|
||
static void fq_tin_enqueue(struct fq *fq, | ||
- struct fq_tin *tin, | ||
+ struct fq_tin *tin, u32 idx, | ||
struct sk_buff *skb, | ||
fq_skb_free_t free_func, | ||
fq_flow_get_default_t get_default_func) | ||
@@ -163,7 +165,7 @@ static void fq_tin_enqueue(struct fq *fq | ||
|
||
lockdep_assert_held(&fq->lock); | ||
|
||
- flow = fq_flow_classify(fq, tin, skb, get_default_func); | ||
+ flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); | ||
|
||
flow->tin = tin; | ||
flow->backlog += skb->len; | ||
--- a/net/mac80211/tx.c | ||
+++ b/net/mac80211/tx.c | ||
@@ -1390,11 +1390,15 @@ static void ieee80211_txq_enqueue(struct | ||
{ | ||
struct fq *fq = &local->fq; | ||
struct fq_tin *tin = &txqi->tin; | ||
+ u32 flow_idx = fq_flow_idx(fq, skb); | ||
|
||
ieee80211_set_skb_enqueue_time(skb); | ||
- fq_tin_enqueue(fq, tin, skb, | ||
+ | ||
+ spin_lock_bh(&fq->lock); | ||
+ fq_tin_enqueue(fq, tin, flow_idx, skb, | ||
fq_skb_free_func, | ||
fq_flow_get_default_func); | ||
+ spin_unlock_bh(&fq->lock); | ||
} | ||
|
||
static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin, | ||
@@ -1564,7 +1568,6 @@ static bool ieee80211_queue_skb(struct i | ||
struct sta_info *sta, | ||
struct sk_buff *skb) | ||
{ | ||
- struct fq *fq = &local->fq; | ||
struct ieee80211_vif *vif; | ||
struct txq_info *txqi; | ||
|
||
@@ -1582,9 +1585,7 @@ static bool ieee80211_queue_skb(struct i | ||
if (!txqi) | ||
return false; | ||
|
||
- spin_lock_bh(&fq->lock); | ||
ieee80211_txq_enqueue(local, txqi, skb); | ||
- spin_unlock_bh(&fq->lock); | ||
|
||
schedule_and_wake_txq(local, txqi); | ||
|
||
@@ -3198,6 +3199,7 @@ static bool ieee80211_amsdu_aggregate(st | ||
u8 max_subframes = sta->sta.max_amsdu_subframes; | ||
int max_frags = local->hw.max_tx_fragments; | ||
int max_amsdu_len = sta->sta.max_amsdu_len; | ||
+ u32 flow_idx; | ||
int orig_truesize; | ||
__be16 len; | ||
void *data; | ||
@@ -3220,6 +3222,8 @@ static bool ieee80211_amsdu_aggregate(st | ||
max_amsdu_len = min_t(int, max_amsdu_len, | ||
sta->sta.max_rc_amsdu_len); | ||
|
||
+ flow_idx = fq_flow_idx(fq, skb); | ||
+ | ||
spin_lock_bh(&fq->lock); | ||
|
||
/* TODO: Ideally aggregation should be done on dequeue to remain | ||
@@ -3227,7 +3231,8 @@ static bool ieee80211_amsdu_aggregate(st | ||
*/ | ||
|
||
tin = &txqi->tin; | ||
- flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func); | ||
+ flow = fq_flow_classify(fq, tin, flow_idx, skb, | ||
+ fq_flow_get_default_func); | ||
head = skb_peek_tail(&flow->queue); | ||
if (!head) | ||
goto unlock; |
55 changes: 55 additions & 0 deletions
55
...el/mac80211/patches/subsys/355-mac80211-run-late-dequeue-late-tx-handlers-without-h.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
From: Felix Fietkau <nbd@nbd.name> | ||
Date: Sat, 16 Mar 2019 18:00:12 +0100 | ||
Subject: [PATCH] mac80211: run late dequeue late tx handlers without | ||
holding fq->lock | ||
|
||
Reduces lock contention on enqueue/dequeue of iTXQ packets | ||
|
||
Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||
--- | ||
|
||
--- a/net/mac80211/tx.c | ||
+++ b/net/mac80211/tx.c | ||
@@ -3507,6 +3507,7 @@ struct sk_buff *ieee80211_tx_dequeue(str | ||
ieee80211_tx_result r; | ||
struct ieee80211_vif *vif = txq->vif; | ||
|
||
+begin: | ||
spin_lock_bh(&fq->lock); | ||
|
||
if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) || | ||
@@ -3523,11 +3524,12 @@ struct sk_buff *ieee80211_tx_dequeue(str | ||
if (skb) | ||
goto out; | ||
|
||
-begin: | ||
skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); | ||
if (!skb) | ||
goto out; | ||
|
||
+ spin_unlock_bh(&fq->lock); | ||
+ | ||
hdr = (struct ieee80211_hdr *)skb->data; | ||
info = IEEE80211_SKB_CB(skb); | ||
|
||
@@ -3573,8 +3575,11 @@ begin: | ||
|
||
skb = __skb_dequeue(&tx.skbs); | ||
|
||
- if (!skb_queue_empty(&tx.skbs)) | ||
+ if (!skb_queue_empty(&tx.skbs)) { | ||
+ spin_lock_bh(&fq->lock); | ||
skb_queue_splice_tail(&tx.skbs, &txqi->frags); | ||
+ spin_unlock_bh(&fq->lock); | ||
+ } | ||
} | ||
|
||
if (skb && skb_has_frag_list(skb) && | ||
@@ -3613,6 +3618,7 @@ begin: | ||
} | ||
|
||
IEEE80211_SKB_CB(skb)->control.vif = vif; | ||
+ return skb; | ||
|
||
out: | ||
spin_unlock_bh(&fq->lock); |
22 changes: 22 additions & 0 deletions
22
...el/mac80211/patches/subsys/356-mac80211-set-NETIF_F_LLTX-when-using-intermediate-tx.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
From: Felix Fietkau <nbd@nbd.name> | ||
Date: Sat, 16 Mar 2019 18:01:53 +0100 | ||
Subject: [PATCH] mac80211: set NETIF_F_LLTX when using intermediate tx | ||
queues | ||
|
||
When using iTXQ, tx sequence number allocation and statistics are run at | ||
dequeue time. Because of that, it is safe to enable NETIF_F_LLTX, which | ||
allows tx handlers to run on multiple CPUs in parallel. | ||
|
||
Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||
--- | ||
|
||
--- a/net/mac80211/iface.c | ||
+++ b/net/mac80211/iface.c | ||
@@ -1301,6 +1301,7 @@ static void ieee80211_if_setup(struct ne | ||
static void ieee80211_if_setup_no_queue(struct net_device *dev) | ||
{ | ||
ieee80211_if_setup(dev); | ||
+ dev->features |= NETIF_F_LLTX; | ||
#if LINUX_VERSION_IS_GEQ(4,3,0) | ||
dev->priv_flags |= IFF_NO_QUEUE; | ||
#else |