Skip to content

Commit

Permalink
[not for review] gve: device memory tcp data path
Browse files Browse the repository at this point in the history
Implement proof-of-concept GVE tcpdevmem with page pool integration.

Change-Id: I89c706a6adb6d33488507b48cc3ded75ee2dc8b0
  • Loading branch information
mina committed Oct 23, 2023
1 parent e3847dc commit 3c27aa2
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 28 deletions.
5 changes: 4 additions & 1 deletion drivers/net/ethernet/google/gve/gve.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ struct gve_rx_ring {

/* Array of buffers for header-split */
struct gve_header_buf *hdr_bufs;

struct page_pool *pp;
} dqo;
};

Expand Down Expand Up @@ -1139,7 +1141,8 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
enum dma_data_direction, gfp_t gfp_flags);
enum dma_data_direction, gfp_t gfp_flags,
struct gve_rx_ring *rx);
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
/* tx handling */
Expand Down
6 changes: 6 additions & 0 deletions drivers/net/ethernet/google/gve/gve_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"
#include <net/netdev_rx_queue.h>

static void gve_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
Expand Down Expand Up @@ -462,6 +463,11 @@ static void gve_get_channels(struct net_device *netdev,
cmd->tx_count = priv->tx_cfg.num_queues;
cmd->other_count = 0;
cmd->combined_count = 0;

for (int i = 0; i < netdev->num_rx_queues; i++) {
pr_err("queue %2d is bound=%d\n", i,
!!READ_ONCE(__netif_get_rx_queue(netdev, i)->binding));
}
}

static int gve_set_channels(struct net_device *netdev,
Expand Down
42 changes: 32 additions & 10 deletions drivers/net/ethernet/google/gve/gve_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_register.h"
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>

#define GVE_DEFAULT_RX_COPYBREAK (256)

Expand Down Expand Up @@ -972,18 +974,32 @@ static void gve_free_rings(struct gve_priv *priv)

int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
enum dma_data_direction dir, gfp_t gfp_flags)
enum dma_data_direction dir, gfp_t gfp_flags,
struct gve_rx_ring *rx)
{
*page = alloc_page(gfp_flags);
struct netdev_rx_queue *rxq = NULL;

if (rx)
rxq = __netif_get_rx_queue(priv->dev, rx->q_num);
else
WARN_ON_ONCE(true);

*page = page_pool_alloc_pages(rx->dqo.pp, GFP_KERNEL);

if (!*page) {
priv->page_alloc_fail++;
return -ENOMEM;
}
*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
if (dma_mapping_error(dev, *dma)) {
priv->dma_mapping_error++;
put_page(*page);
return -ENOMEM;

if (!page_is_page_pool_iov(*page)) {
*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
if (dma_mapping_error(dev, *dma)) {
priv->dma_mapping_error++;
put_page(*page);
return -ENOMEM;
}

page_pool_set_dma_addr(*page, *dma);
}
return 0;
}
Expand Down Expand Up @@ -1017,7 +1033,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
for (i = 0; i < pages; i++) {
err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
&qpl->page_buses[i],
gve_qpl_dma_dir(priv, id), GFP_KERNEL);
gve_qpl_dma_dir(priv, id), GFP_KERNEL, NULL);
/* caller handles clean up */
if (err)
return -ENOMEM;
Expand All @@ -1031,10 +1047,16 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction dir)
{
if (!dma_mapping_error(dev, dma))
struct page_pool *pp;

if (!page_is_page_pool_iov(page) && !dma_mapping_error(dev, dma))
dma_unmap_page(dev, dma, PAGE_SIZE, dir);

pp = page_is_page_pool_iov(page) ? page_to_page_pool_iov(page)->pp :
page->pp;

if (page)
put_page(page);
page_pool_put_full_page(pp, page, true);
}

static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ethernet/google/gve/gve_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static void gve_rx_free_buffer(struct device *dev,
dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
GVE_DATA_SLOT_ADDR_PAGE_MASK);

page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
page_pool_page_put_many(page_info->page, page_info->pagecnt_bias - 1);
gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
}

Expand Down Expand Up @@ -100,7 +100,7 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
int err;

err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
GFP_ATOMIC);
GFP_ATOMIC, NULL);
if (err)
return err;

Expand Down
102 changes: 88 additions & 14 deletions drivers/net/ethernet/google/gve/gve_rx_dqo.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,26 @@
#include <net/ip6_checksum.h>
#include <net/ipv6.h>
#include <net/tcp.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/types.h>
#include <net/netdev_rx_queue.h>

static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
{
return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
return page_pool_page_ref_count(bs->page_info.page) -
bs->page_info.pagecnt_bias;
}

static void gve_free_page_dqo(struct gve_priv *priv,
struct gve_rx_buf_state_dqo *bs,
bool free_page)
{
page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
page_pool_page_put_many(bs->page_info.page,
bs->page_info.pagecnt_bias - 1);

if (free_page)
gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
DMA_FROM_DEVICE);
DMA_FROM_DEVICE);
bs->page_info.page = NULL;
}

Expand Down Expand Up @@ -177,12 +183,30 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
if (!rx->dqo.qpl) {
int err;

err = gve_alloc_page(priv, &priv->pdev->dev,
&buf_state->page_info.page,
&buf_state->addr,
DMA_FROM_DEVICE, GFP_ATOMIC);
if (err)
return err;
if (unlikely(rx->dqo.pp->p.memory_provider ==
PP_MP_DMABUF_DEVMEM)) {
buf_state->page_info.page =
page_pool_alloc_pages(rx->dqo.pp, GFP_KERNEL);

if (!buf_state->page_info.page) {
priv->page_alloc_fail++;
return -ENOMEM;
}

BUG_ON(!page_is_page_pool_iov(buf_state->page_info.page));

buf_state->addr =
page_pool_get_dma_addr(buf_state->page_info.page);
} else {
err = gve_alloc_page(priv, &priv->pdev->dev,
&buf_state->page_info.page,
&buf_state->addr,
DMA_FROM_DEVICE,
GFP_ATOMIC,
rx);
if (err)
return err;
}
} else {
idx = rx->dqo.next_qpl_page_idx;
if (idx >= priv->rx_pages_per_qpl) {
Expand All @@ -200,8 +224,8 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
buf_state->last_single_ref_offset = 0;

/* The page already has 1 ref. */
page_ref_add(buf_state->page_info.page, INT_MAX - 1);
buf_state->page_info.pagecnt_bias = INT_MAX;
page_pool_page_get_many(buf_state->page_info.page, (INT_MAX / 2) - 1);
buf_state->page_info.pagecnt_bias = (INT_MAX / 2);

return 0;
}
Expand Down Expand Up @@ -265,6 +289,9 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
rx->dqo.hdr_bufs = NULL;
}

if (rx->dqo.pp)
page_pool_destroy(rx->dqo.pp);

netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}

Expand All @@ -274,6 +301,7 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
struct device *hdev = &priv->pdev->dev;
size_t size;
int i;
struct netdev_rx_queue *rxq = NULL;

const u32 buffer_queue_slots =
priv->queue_format == GVE_DQO_RDA_FORMAT ?
Expand Down Expand Up @@ -317,6 +345,27 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
}
}

rxq = __netif_get_rx_queue(priv->dev, rx->q_num);

/* Create a page_pool and register it with rxq */
struct netdev_dmabuf_binding *binding = READ_ONCE(rxq->binding);
struct page_pool_params pp_params = { 0 };


pp_params.order = 0;
pp_params.pool_size = buffer_queue_slots;
pp_params.nid = NUMA_NO_NODE;
pp_params.dev = &priv->dev->dev;

if (rxq && unlikely(binding)) {
pp_params.memory_provider = PP_MP_DMABUF_DEVMEM;
pp_params.mp_priv = binding;
}

rx->dqo.pp = page_pool_create(&pp_params);
if (IS_ERR(rx->dqo.pp))
BUG();

/* Set up linked list of buffer IDs */
for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
rx->dqo.buf_states[i].next = i + 1;
Expand Down Expand Up @@ -713,6 +762,24 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
*/
prefetch(buf_state->page_info.page);

if (!sph && !rx->ctx.skb_head &&
page_is_page_pool_iov(buf_state->page_info.page)) {
/* !sph indicates the packet is not split, and the header went
* to the packet buffer. If the packet buffer is a dma_buf
* page, those can't be easily mapped into the kernel space to
* access the header required to process the packet.
*
* In the future we may be able to map the dma_buf page to
* kernel space to access the header for dma_buf providers that
* support that, but for now, simply drop the packet. We expect
* the TCP packets that we care about to be header split
* anyway.
*/
rx->rx_devmem_dropped++;
gve_recycle_buf(rx, buf_state);
return -EFAULT;
}

/* Copy the header into the skb in the case of header split */
if (sph) {
dma_sync_single_for_cpu(&priv->pdev->dev,
Expand Down Expand Up @@ -747,7 +814,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}

if (eop && buf_len <= priv->rx_copybreak) {
/* We can't copy dma-buf pages. Ignore any copybreak setting. */
if (eop && buf_len <= priv->rx_copybreak &&
(!page_is_page_pool_iov(buf_state->page_info.page) || !buf_len)) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
&buf_state->page_info, buf_len);
if (unlikely(!rx->ctx.skb_head))
Expand Down Expand Up @@ -839,10 +908,15 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
return err;
}

if (skb_headlen(rx->ctx.skb_head) == 0)
if (skb_headlen(rx->ctx.skb_head) == 0) {
if (napi_get_frags(napi)->devmem)
rx->rx_devmem_pkt++;
napi_gro_frags(napi);
else
} else {
if (rx->ctx.skb_head->devmem)
rx->rx_devmem_pkt++;
napi_gro_receive(napi, rx->ctx.skb_head);
}

return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/google/gve/gve_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,6 @@ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
page_info->pagecnt_bias = INT_MAX - pagecount;

/* Set pagecount back up to max. */
page_ref_add(page_info->page, INT_MAX - pagecount);
page_pool_page_get_many(page_info->page, (INT_MAX /2 )- pagecount);
}
}

0 comments on commit 3c27aa2

Please sign in to comment.