Skip to content

Commit

Permalink
lantiq: improve ethernet performance
Browse files Browse the repository at this point in the history
This commit contains a series of fixes for DMA. The burst length
patch significantly improves Ethernet performance. Patches were
tested on the xRX200 and xRX330.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
  • Loading branch information
abajk authored and mkresin committed Nov 15, 2021
1 parent 2f95dd8 commit 0e561a2
Show file tree
Hide file tree
Showing 14 changed files with 706 additions and 0 deletions.
Expand Up @@ -236,6 +236,8 @@
interrupts = <105 109>;
pinctrl-0 = <&mdio_pins>;
pinctrl-names = "default";
lantiq,tx-burst-length = <4>;
lantiq,rx-burst-length = <4>;
};
};

Expand Down
2 changes: 2 additions & 0 deletions target/linux/lantiq/files/arch/mips/boot/dts/lantiq/ar9.dtsi
Expand Up @@ -387,6 +387,8 @@
interrupts = <73 72>;
pinctrl-0 = <&mdio_pins>;
pinctrl-names = "default";
lantiq,tx-burst-length = <8>;
lantiq,rx-burst-length = <8>;
};

ppe@e234000 {
Expand Down
Expand Up @@ -299,6 +299,8 @@
reg = <0xe180000 0x40000>;
interrupt-parent = <&icu0>;
interrupts = <73 78>;
lantiq,tx-burst-length = <4>;
lantiq,rx-burst-length = <4>;
};

ppe@e234000 {
Expand Down
2 changes: 2 additions & 0 deletions target/linux/lantiq/files/arch/mips/boot/dts/lantiq/vr9.dtsi
Expand Up @@ -488,6 +488,8 @@
interrupt-names = "tx", "rx";
resets = <&reset0 21 16>, <&reset0 8 8>, <&reset0 3 3>;
reset-names = "switch", "ppe", "ppe_dsp";
lantiq,tx-burst-length = <8>;
lantiq,rx-burst-length = <8>;
#address-cells = <1>;
#size-cells = <0>;

Expand Down
@@ -0,0 +1,32 @@
From 497acc03cd3918baabe25d46e30c5c62b998e24d Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Sat, 19 Jun 2021 13:38:12 +0200
Subject: [PATCH 1/5] MIPS: lantiq: dma: add small delay after reset

Reading the DMA registers immediately after the reset causes
Data Bus Error. Adding a small delay fixes this problem.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
---
arch/mips/lantiq/xway/dma.c | 3 +++
1 file changed, 3 insertions(+)

--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/err.h>

#include <lantiq_soc.h>
@@ -221,6 +222,8 @@ ltq_dma_init(struct platform_device *pde
clk_enable(clk);
ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL);

+ usleep_range(1, 10);
+
/* disable all interrupts */
ltq_dma_w32(0, LTQ_DMA_IRNEN);

@@ -0,0 +1,68 @@
From d31260c2f6a5cdddb052ab7cb09560eb23ce6597 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Thu, 15 Apr 2021 21:28:24 +0200
Subject: [PATCH 2/5] MIPS: lantiq: dma: reset correct number of channel

Different SoCs have a different number of channels, e.g .:
* amazon-se has 10 channels,
* danube+ar9 have 20 channels,
* vr9 has 28 channels,
* ar10 has 24 channels.

We can read the ID register and, depending on the reported
number of channels, reset the appropriate number of channels.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
---
arch/mips/lantiq/xway/dma.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)

--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -30,6 +30,7 @@
#define LTQ_DMA_PCTRL 0x44
#define LTQ_DMA_IRNEN 0xf4

+#define DMA_ID_CHNR GENMASK(26, 20) /* channel number */
#define DMA_DESCPT BIT(3) /* descriptor complete irq */
#define DMA_TX BIT(8) /* TX channel direction */
#define DMA_CHAN_ON BIT(0) /* channel on / off bit */
@@ -40,7 +41,6 @@
#define DMA_POLL BIT(31) /* turn on channel polling */
#define DMA_CLK_DIV4 BIT(6) /* polling clock divider */
#define DMA_2W_BURST BIT(1) /* 2 word burst length */
-#define DMA_MAX_CHANNEL 20 /* the soc has 20 channels */
#define DMA_ETOP_ENDIANNESS (0xf << 8) /* endianness swap etop channels */
#define DMA_WEIGHT (BIT(17) | BIT(16)) /* default channel wheight */

@@ -206,7 +206,7 @@ ltq_dma_init(struct platform_device *pde
{
struct clk *clk;
struct resource *res;
- unsigned id;
+ unsigned int id, nchannels;
int i;

res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -228,17 +228,18 @@ ltq_dma_init(struct platform_device *pde
ltq_dma_w32(0, LTQ_DMA_IRNEN);

/* reset/configure each channel */
- for (i = 0; i < DMA_MAX_CHANNEL; i++) {
+ id = ltq_dma_r32(LTQ_DMA_ID);
+ nchannels = ((id & DMA_ID_CHNR) >> 20);
+ for (i = 0; i < nchannels; i++) {
ltq_dma_w32(i, LTQ_DMA_CS);
ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL);
ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL);
ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
}

- id = ltq_dma_r32(LTQ_DMA_ID);
dev_info(&pdev->dev,
"Init done - hw rev: %X, ports: %d, channels: %d\n",
- id & 0x1f, (id >> 16) & 0xf, id >> 20);
+ id & 0x1f, (id >> 16) & 0xf, nchannels);

return 0;
}
@@ -0,0 +1,41 @@
From 7d9ea9052d6680d2910b8b005c397d95b3a8b012 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Wed, 7 Apr 2021 21:04:39 +0200
Subject: [PATCH 3/5] MIPS: lantiq: dma: fix burst length for DEU

The current definition of 2W burst length is invalid.
This patch fixes it. Current downstream DEU driver doesn't
use DMA. An incorrect burst length value doesn't cause any
errors. This patch also adds other burst length values.

Fixes: dfec1a827d2b ("MIPS: Lantiq: Add DMA support")
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
---
arch/mips/lantiq/xway/dma.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -40,7 +40,11 @@
#define DMA_IRQ_ACK 0x7e /* IRQ status register */
#define DMA_POLL BIT(31) /* turn on channel polling */
#define DMA_CLK_DIV4 BIT(6) /* polling clock divider */
-#define DMA_2W_BURST BIT(1) /* 2 word burst length */
+#define DMA_PCTRL_2W_BURST 0x1 /* 2 word burst length */
+#define DMA_PCTRL_4W_BURST 0x2 /* 4 word burst length */
+#define DMA_PCTRL_8W_BURST 0x3 /* 8 word burst length */
+#define DMA_TX_BURST_SHIFT 4 /* tx burst shift */
+#define DMA_RX_BURST_SHIFT 2 /* rx burst shift */
#define DMA_ETOP_ENDIANNESS (0xf << 8) /* endianness swap etop channels */
#define DMA_WEIGHT (BIT(17) | BIT(16)) /* default channel wheight */

@@ -191,7 +195,8 @@ ltq_dma_init_port(int p)
break;

case DMA_PORT_DEU:
- ltq_dma_w32((DMA_2W_BURST << 4) | (DMA_2W_BURST << 2),
+ ltq_dma_w32((DMA_PCTRL_2W_BURST << DMA_TX_BURST_SHIFT) |
+ (DMA_PCTRL_2W_BURST << DMA_RX_BURST_SHIFT),
LTQ_DMA_PCTRL);
break;

@@ -0,0 +1,84 @@
From 6615eeb39f7a110a196f20acbfb3a017da4d75d2 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Fri, 14 May 2021 21:25:08 +0200
Subject: [PATCH 4/5] MIPS: lantiq: dma: make a burst length configurable in
drivers

Make a burst length configurable in drivers.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
---
.../include/asm/mach-lantiq/xway/xway_dma.h | 2 +-
arch/mips/lantiq/xway/dma.c | 38 ++++++++++++++++---
2 files changed, 34 insertions(+), 6 deletions(-)

--- a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
@@ -45,6 +45,6 @@ extern void ltq_dma_close(struct ltq_dma
extern void ltq_dma_alloc_tx(struct ltq_dma_channel *ch);
extern void ltq_dma_alloc_rx(struct ltq_dma_channel *ch);
extern void ltq_dma_free(struct ltq_dma_channel *ch);
-extern void ltq_dma_init_port(int p);
+extern void ltq_dma_init_port(int p, int tx_burst, int rx_burst);

#endif
--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -181,7 +181,7 @@ ltq_dma_free(struct ltq_dma_channel *ch)
EXPORT_SYMBOL_GPL(ltq_dma_free);

void
-ltq_dma_init_port(int p)
+ltq_dma_init_port(int p, int tx_burst, int rx_burst)
{
ltq_dma_w32(p, LTQ_DMA_PS);
switch (p) {
@@ -190,16 +190,44 @@ ltq_dma_init_port(int p)
* Tell the DMA engine to swap the endianness of data frames and
* drop packets if the channel arbitration fails.
*/
- ltq_dma_w32_mask(0, DMA_ETOP_ENDIANNESS | DMA_PDEN,
+ ltq_dma_w32_mask(0, (DMA_ETOP_ENDIANNESS | DMA_PDEN),
LTQ_DMA_PCTRL);
break;

- case DMA_PORT_DEU:
- ltq_dma_w32((DMA_PCTRL_2W_BURST << DMA_TX_BURST_SHIFT) |
- (DMA_PCTRL_2W_BURST << DMA_RX_BURST_SHIFT),
+ default:
+ break;
+ }
+
+ switch (rx_burst) {
+ case 8:
+ ltq_dma_w32_mask(0x0c, (DMA_PCTRL_8W_BURST << DMA_RX_BURST_SHIFT),
LTQ_DMA_PCTRL);
break;
+ case 4:
+ ltq_dma_w32_mask(0x0c, (DMA_PCTRL_4W_BURST << DMA_RX_BURST_SHIFT),
+ LTQ_DMA_PCTRL);
+ break;
+ case 2:
+ ltq_dma_w32_mask(0x0c, (DMA_PCTRL_2W_BURST << DMA_RX_BURST_SHIFT),
+ LTQ_DMA_PCTRL);
+ break;
+ default:
+ break;
+ }

+ switch (tx_burst) {
+ case 8:
+ ltq_dma_w32_mask(0x30, (DMA_PCTRL_8W_BURST << DMA_TX_BURST_SHIFT),
+ LTQ_DMA_PCTRL);
+ break;
+ case 4:
+ ltq_dma_w32_mask(0x30, (DMA_PCTRL_4W_BURST << DMA_TX_BURST_SHIFT),
+ LTQ_DMA_PCTRL);
+ break;
+ case 2:
+ ltq_dma_w32_mask(0x30, (DMA_PCTRL_2W_BURST << DMA_TX_BURST_SHIFT),
+ LTQ_DMA_PCTRL);
+ break;
default:
break;
}
@@ -0,0 +1,124 @@
From ec1a17a11aced3cd756e59d91ad6f50b7a2fabfb Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Sun, 16 May 2021 15:52:06 +0200
Subject: [PATCH 5/5] net: lantiq: configure burst length for ethernet

Configure the burst length for Ethernet. This improves Ethernet
performance by 58%. According to the vendor BSP, 8W burst length
is supported by ar9 and newer SoCs.

The NAT benchmark results on xRX200 (Down/Up):
* 2W: 330 Mb/s
* 4W: 432 Mb/s 372 Mb/s
* 8W: 520 Mb/s 389 Mb/s

Tested on xRX200 and xRX330.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
---
drivers/net/ethernet/lantiq_etop.c | 21 ++++++++++++++++++---
drivers/net/ethernet/lantiq_xrx200.c | 21 ++++++++++++++++++---
2 files changed, 36 insertions(+), 6 deletions(-)

--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -148,6 +148,9 @@ struct ltq_etop_priv {
struct ltq_etop_chan txch;
struct ltq_etop_chan rxch;

+ int tx_burst_len;
+ int rx_burst_len;
+
int tx_irq;
int rx_irq;

@@ -399,7 +402,7 @@ ltq_etop_dma_init(struct net_device *dev
int rx = priv->rx_irq - LTQ_DMA_ETOP;
int err;

- ltq_dma_init_port(DMA_PORT_ETOP);
+ ltq_dma_init_port(DMA_PORT_ETOP, priv->tx_burst_len, priv->rx_burst_len);

priv->txch.dma.nr = tx;
priv->txch.dma.dev = &priv->pdev->dev;
@@ -676,8 +679,8 @@ ltq_etop_tx(struct sk_buff *skb, struct
return NETDEV_TX_BUSY;
}

- /* dma needs to start on a 16 byte aligned address */
- byte_offset = CPHYSADDR(skb->data) % 16;
+ /* dma needs to start on a burst length value aligned address */
+ byte_offset = CPHYSADDR(skb->data) % (priv->tx_burst_len * 4);
priv->txch.skb[priv->txch.dma.desc] = skb;

netif_trans_update(dev);
@@ -925,6 +928,18 @@ static int ltq_etop_probe(struct platfor
spin_lock_init(&priv->lock);
SET_NETDEV_DEV(dev, &pdev->dev);

+ err = device_property_read_u32(&pdev->dev, "lantiq,tx-burst-length", &priv->tx_burst_len);
+ if (err < 0) {
+ dev_err(&pdev->dev, "unable to read tx-burst-length property\n");
+ return err;
+ }
+
+ err = device_property_read_u32(&pdev->dev, "lantiq,rx-burst-length", &priv->rx_burst_len);
+ if (err < 0) {
+ dev_err(&pdev->dev, "unable to read rx-burst-length property\n");
+ return err;
+ }
+
netif_napi_add(dev, &priv->txch.napi, ltq_etop_poll_tx, 8);
netif_napi_add(dev, &priv->rxch.napi, ltq_etop_poll_rx, 32);
priv->txch.netdev = dev;
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -71,6 +71,9 @@ struct xrx200_priv {
struct net_device *net_dev;
struct device *dev;

+ int tx_burst_len;
+ int rx_burst_len;
+
__iomem void *pmac_reg;
};

@@ -316,8 +319,8 @@ static netdev_tx_t xrx200_start_xmit(str
if (unlikely(dma_mapping_error(priv->dev, mapping)))
goto err_drop;

- /* dma needs to start on a 16 byte aligned address */
- byte_offset = mapping % 16;
+ /* dma needs to start on a burst length value aligned address */
+ byte_offset = mapping % (priv->tx_burst_len * 4);

desc->addr = mapping - byte_offset;
/* Make sure the address is written before we give it to HW */
@@ -369,7 +372,7 @@ static int xrx200_dma_init(struct xrx200
int ret = 0;
int i;

- ltq_dma_init_port(DMA_PORT_ETOP);
+ ltq_dma_init_port(DMA_PORT_ETOP, priv->tx_burst_len, priv->rx_burst_len);

ch_rx->dma.nr = XRX200_DMA_RX;
ch_rx->dma.dev = priv->dev;
@@ -487,6 +490,18 @@ static int xrx200_probe(struct platform_
if (err)
eth_hw_addr_random(net_dev);

+ err = device_property_read_u32(dev, "lantiq,tx-burst-length", &priv->tx_burst_len);
+ if (err < 0) {
+ dev_err(dev, "unable to read tx-burst-length property\n");
+ return err;
+ }
+
+ err = device_property_read_u32(dev, "lantiq,rx-burst-length", &priv->rx_burst_len);
+ if (err < 0) {
+ dev_err(dev, "unable to read rx-burst-length property\n");
+ return err;
+ }
+
/* bring up the dma engine and IP core */
err = xrx200_dma_init(priv);
if (err)

0 comments on commit 0e561a2

Please sign in to comment.