forked from openwrt/openwrt
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
lantiq: backport latest upstream patches
This patch includes a series of performance improvements. All patches were accepted and should land in 5.17. NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500): Down Up Before 539 Mbps 599 Mbps After 624 Mbps 695 Mbps Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
- Loading branch information
Showing
3 changed files
with
266 additions
and
0 deletions.
There are no files selected for viewing
28 changes: 28 additions & 0 deletions
28
target/linux/lantiq/patches-5.10/0713-MIPS-lantiq-dma-increase-descritor-count.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
From 5112e9234bbb89f8dd15c983206bd9107b8436d5 Mon Sep 17 00:00:00 2001 | ||
From: Aleksander Jan Bajkowski <olek2@wp.pl> | ||
Date: Tue, 4 Jan 2022 16:11:42 +0100 | ||
Subject: [PATCH 713/715] MIPS: lantiq: dma: increase descritor count | ||
|
||
NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500): | ||
|
||
Down Up | ||
Before 539 Mbps 599 Mbps | ||
After 545 Mbps 625 Mbps | ||
|
||
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl> | ||
Signed-off-by: Jakub Kicinski <kuba@kernel.org> | ||
--- | ||
arch/mips/include/asm/mach-lantiq/xway/xway_dma.h | 2 +- | ||
1 file changed, 1 insertion(+), 1 deletion(-) | ||
|
||
--- a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h | ||
+++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h | ||
@@ -8,7 +8,7 @@ | ||
#define LTQ_DMA_H__ | ||
|
||
#define LTQ_DESC_SIZE 0x08 /* each descriptor is 64bit */ | ||
-#define LTQ_DESC_NUM 0x40 /* 64 descriptors / channel */ | ||
+#define LTQ_DESC_NUM 0xC0 /* 192 descriptors / channel */ | ||
|
||
#define LTQ_DMA_OWN BIT(31) /* owner bit */ | ||
#define LTQ_DMA_C BIT(30) /* complete bit */ |
32 changes: 32 additions & 0 deletions
32
target/linux/lantiq/patches-5.10/0714-net-lantiq_xrx200-increase-napi-poll-weigth.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
From 768818d772d5d4ddc0c7eb2e62848929270ab7a3 Mon Sep 17 00:00:00 2001 | ||
From: Aleksander Jan Bajkowski <olek2@wp.pl> | ||
Date: Tue, 4 Jan 2022 16:11:43 +0100 | ||
Subject: [PATCH 714/715] net: lantiq_xrx200: increase napi poll weigth | ||
|
||
NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500): | ||
|
||
Down Up | ||
Before 545 Mbps 625 Mbps | ||
After 577 Mbps 648 Mbps | ||
|
||
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl> | ||
Signed-off-by: Jakub Kicinski <kuba@kernel.org> | ||
--- | ||
drivers/net/ethernet/lantiq_xrx200.c | 6 ++++-- | ||
1 file changed, 4 insertions(+), 2 deletions(-) | ||
|
||
--- a/drivers/net/ethernet/lantiq_xrx200.c | ||
+++ b/drivers/net/ethernet/lantiq_xrx200.c | ||
@@ -606,8 +606,10 @@ static int xrx200_probe(struct platform_ | ||
PMAC_HD_CTL); | ||
|
||
/* setup NAPI */ | ||
- netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32); | ||
- netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); | ||
+ netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, | ||
+ NAPI_POLL_WEIGHT); | ||
+ netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, | ||
+ NAPI_POLL_WEIGHT); | ||
|
||
platform_set_drvdata(pdev, priv); | ||
|
206 changes: 206 additions & 0 deletions
206
target/linux/lantiq/patches-5.10/0715-net-lantiq_xrx200-convert-to-build_skb.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
From e015593573b3e3f74bd8a63c05fa92902194a354 Mon Sep 17 00:00:00 2001 | ||
From: Aleksander Jan Bajkowski <olek2@wp.pl> | ||
Date: Tue, 4 Jan 2022 16:11:44 +0100 | ||
Subject: [PATCH 715/715] net: lantiq_xrx200: convert to build_skb | ||
|
||
We can increase the efficiency of rx path by using buffers to receive | ||
packets then build SKBs around them just before passing into the network | ||
stack. In contrast, preallocating SKBs too early reduces CPU cache | ||
efficiency. | ||
|
||
NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500): | ||
|
||
Down Up | ||
Before 577 Mbps 648 Mbps | ||
After 624 Mbps 695 Mbps | ||
|
||
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl> | ||
Signed-off-by: Jakub Kicinski <kuba@kernel.org> | ||
--- | ||
drivers/net/ethernet/lantiq_xrx200.c | 56 ++++++++++++++++++---------- | ||
1 file changed, 36 insertions(+), 20 deletions(-) | ||
|
||
--- a/drivers/net/ethernet/lantiq_xrx200.c | ||
+++ b/drivers/net/ethernet/lantiq_xrx200.c | ||
@@ -63,7 +63,11 @@ struct xrx200_chan { | ||
|
||
struct napi_struct napi; | ||
struct ltq_dma_channel dma; | ||
- struct sk_buff *skb[LTQ_DESC_NUM]; | ||
+ | ||
+ union { | ||
+ struct sk_buff *skb[LTQ_DESC_NUM]; | ||
+ void *rx_buff[LTQ_DESC_NUM]; | ||
+ }; | ||
|
||
struct sk_buff *skb_head; | ||
struct sk_buff *skb_tail; | ||
@@ -78,6 +82,7 @@ struct xrx200_priv { | ||
struct xrx200_chan chan_rx; | ||
|
||
u16 rx_buf_size; | ||
+ u16 rx_skb_size; | ||
|
||
struct net_device *net_dev; | ||
struct device *dev; | ||
@@ -115,6 +120,12 @@ static int xrx200_buffer_size(int mtu) | ||
return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN); | ||
} | ||
|
||
+static int xrx200_skb_size(u16 buf_size) | ||
+{ | ||
+ return SKB_DATA_ALIGN(buf_size + NET_SKB_PAD + NET_IP_ALIGN) + | ||
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||
+} | ||
+ | ||
/* drop all the packets from the DMA ring */ | ||
static void xrx200_flush_dma(struct xrx200_chan *ch) | ||
{ | ||
@@ -173,30 +184,29 @@ static int xrx200_close(struct net_devic | ||
return 0; | ||
} | ||
|
||
-static int xrx200_alloc_skb(struct xrx200_chan *ch) | ||
+static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int size)) | ||
{ | ||
- struct sk_buff *skb = ch->skb[ch->dma.desc]; | ||
+ void *buf = ch->rx_buff[ch->dma.desc]; | ||
struct xrx200_priv *priv = ch->priv; | ||
dma_addr_t mapping; | ||
int ret = 0; | ||
|
||
- ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev, | ||
- priv->rx_buf_size); | ||
- if (!ch->skb[ch->dma.desc]) { | ||
+ ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size); | ||
+ if (!ch->rx_buff[ch->dma.desc]) { | ||
ret = -ENOMEM; | ||
goto skip; | ||
} | ||
|
||
- mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data, | ||
+ mapping = dma_map_single(priv->dev, ch->rx_buff[ch->dma.desc], | ||
priv->rx_buf_size, DMA_FROM_DEVICE); | ||
if (unlikely(dma_mapping_error(priv->dev, mapping))) { | ||
- dev_kfree_skb_any(ch->skb[ch->dma.desc]); | ||
- ch->skb[ch->dma.desc] = skb; | ||
+ skb_free_frag(ch->rx_buff[ch->dma.desc]); | ||
+ ch->rx_buff[ch->dma.desc] = buf; | ||
ret = -ENOMEM; | ||
goto skip; | ||
} | ||
|
||
- ch->dma.desc_base[ch->dma.desc].addr = mapping; | ||
+ ch->dma.desc_base[ch->dma.desc].addr = mapping + NET_SKB_PAD + NET_IP_ALIGN; | ||
/* Make sure the address is written before we give it to HW */ | ||
wmb(); | ||
skip: | ||
@@ -210,13 +220,14 @@ static int xrx200_hw_receive(struct xrx2 | ||
{ | ||
struct xrx200_priv *priv = ch->priv; | ||
struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc]; | ||
- struct sk_buff *skb = ch->skb[ch->dma.desc]; | ||
+ void *buf = ch->rx_buff[ch->dma.desc]; | ||
u32 ctl = desc->ctl; | ||
int len = (ctl & LTQ_DMA_SIZE_MASK); | ||
struct net_device *net_dev = priv->net_dev; | ||
+ struct sk_buff *skb; | ||
int ret; | ||
|
||
- ret = xrx200_alloc_skb(ch); | ||
+ ret = xrx200_alloc_buf(ch, napi_alloc_frag); | ||
|
||
ch->dma.desc++; | ||
ch->dma.desc %= LTQ_DESC_NUM; | ||
@@ -227,19 +238,21 @@ static int xrx200_hw_receive(struct xrx2 | ||
return ret; | ||
} | ||
|
||
+ skb = build_skb(buf, priv->rx_skb_size); | ||
+ skb_reserve(skb, NET_SKB_PAD); | ||
skb_put(skb, len); | ||
|
||
/* add buffers to skb via skb->frag_list */ | ||
if (ctl & LTQ_DMA_SOP) { | ||
ch->skb_head = skb; | ||
ch->skb_tail = skb; | ||
+ skb_reserve(skb, NET_IP_ALIGN); | ||
} else if (ch->skb_head) { | ||
if (ch->skb_head == ch->skb_tail) | ||
skb_shinfo(ch->skb_tail)->frag_list = skb; | ||
else | ||
ch->skb_tail->next = skb; | ||
ch->skb_tail = skb; | ||
- skb_reserve(ch->skb_tail, -NET_IP_ALIGN); | ||
ch->skb_head->len += skb->len; | ||
ch->skb_head->data_len += skb->len; | ||
ch->skb_head->truesize += skb->truesize; | ||
@@ -395,12 +408,13 @@ xrx200_change_mtu(struct net_device *net | ||
struct xrx200_chan *ch_rx = &priv->chan_rx; | ||
int old_mtu = net_dev->mtu; | ||
bool running = false; | ||
- struct sk_buff *skb; | ||
+ void *buff; | ||
int curr_desc; | ||
int ret = 0; | ||
|
||
net_dev->mtu = new_mtu; | ||
priv->rx_buf_size = xrx200_buffer_size(new_mtu); | ||
+ priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); | ||
|
||
if (new_mtu <= old_mtu) | ||
return ret; | ||
@@ -416,14 +430,15 @@ xrx200_change_mtu(struct net_device *net | ||
|
||
for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM; | ||
ch_rx->dma.desc++) { | ||
- skb = ch_rx->skb[ch_rx->dma.desc]; | ||
- ret = xrx200_alloc_skb(ch_rx); | ||
+ buff = ch_rx->rx_buff[ch_rx->dma.desc]; | ||
+ ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag); | ||
if (ret) { | ||
net_dev->mtu = old_mtu; | ||
priv->rx_buf_size = xrx200_buffer_size(old_mtu); | ||
+ priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); | ||
break; | ||
} | ||
- dev_kfree_skb_any(skb); | ||
+ skb_free_frag(buff); | ||
} | ||
|
||
ch_rx->dma.desc = curr_desc; | ||
@@ -476,7 +491,7 @@ static int xrx200_dma_init(struct xrx200 | ||
ltq_dma_alloc_rx(&ch_rx->dma); | ||
for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM; | ||
ch_rx->dma.desc++) { | ||
- ret = xrx200_alloc_skb(ch_rx); | ||
+ ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag); | ||
if (ret) | ||
goto rx_free; | ||
} | ||
@@ -511,7 +526,7 @@ rx_ring_free: | ||
/* free the allocated RX ring */ | ||
for (i = 0; i < LTQ_DESC_NUM; i++) { | ||
if (priv->chan_rx.skb[i]) | ||
- dev_kfree_skb_any(priv->chan_rx.skb[i]); | ||
+ skb_free_frag(priv->chan_rx.rx_buff[i]); | ||
} | ||
|
||
rx_free: | ||
@@ -528,7 +543,7 @@ static void xrx200_hw_cleanup(struct xrx | ||
|
||
/* free the allocated RX ring */ | ||
for (i = 0; i < LTQ_DESC_NUM; i++) | ||
- dev_kfree_skb_any(priv->chan_rx.skb[i]); | ||
+ skb_free_frag(priv->chan_rx.rx_buff[i]); | ||
} | ||
|
||
static int xrx200_probe(struct platform_device *pdev) | ||
@@ -554,6 +569,7 @@ static int xrx200_probe(struct platform_ | ||
net_dev->min_mtu = ETH_ZLEN; | ||
net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0); | ||
priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN); | ||
+ priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); | ||
|
||
/* load the memory ranges */ | ||
res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |