Skip to content

Commit

Permalink
net/ifcvf: add ifcvf vDPA driver
Browse files Browse the repository at this point in the history
The IFCVF vDPA (vhost data path acceleration) driver provides support for
the Intel FPGA 100G VF (IFCVF). IFCVF's datapath is virtio ring compatible,
it works as a HW vhost backend which can send/receive packets to/from
virtio directly by DMA.

Different VF devices serve different virtio frontends which are in
different VMs, so each VF needs to have its own DMA address translation
service. During the driver probe a new container is created, with this
container vDPA driver can program DMA remapping table with the VM's memory
region information.

Key vDPA driver ops implemented:

- ifcvf_dev_config:
  Enable VF data path with virtio information provided by vhost lib,
  including IOMMU programming to enable VF DMA to VM's memory, VFIO
  interrupt setup to route HW interrupt to virtio driver, create notify
  relay thread to translate virtio driver's kick to a MMIO write onto HW,
  HW queues configuration.

- ifcvf_dev_close:
  Revoke all the setup in ifcvf_dev_config.

Live migration feature is supported by IFCVF and this driver enables
it. For the dirty page logging, VF helps to log for packet buffer write,
driver helps to make the used ring as dirty when device stops.

Because vDPA driver needs to set up MSI-X vector to interrupt the
guest, only vfio-pci is supported currently.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Signed-off-by: Rosen Xu <rosen.xu@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
  • Loading branch information
XiaoWang1772 authored and Ferruh Yigit committed Apr 27, 2018
1 parent 440f03c commit a3f8150
Show file tree
Hide file tree
Showing 11 changed files with 1,356 additions and 0 deletions.
7 changes: 7 additions & 0 deletions MAINTAINERS
Expand Up @@ -525,6 +525,13 @@ T: git://dpdk.org/next/dpdk-next-net-intel
F: drivers/net/avf/
F: doc/guides/nics/features/avf*.ini

Intel ifc
M: Xiao Wang <xiao.w.wang@intel.com>
T: git://dpdk.org/next/dpdk-next-net-intel
F: drivers/net/ifc/
F: doc/guides/nics/ifcvf.rst
F: doc/guides/nics/features/ifcvf.ini

Marvell mvpp2
M: Jacek Siuda <jck@semihalf.com>
M: Tomasz Duszynski <tdu@semihalf.com>
Expand Down
7 changes: 7 additions & 0 deletions config/common_base
Expand Up @@ -824,6 +824,13 @@ CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
#
CONFIG_RTE_LIBRTE_PMD_VHOST=n

#
# Compile IFCVF driver
# To compile, CONFIG_RTE_LIBRTE_VHOST and CONFIG_RTE_EAL_VFIO
# should be enabled.
#
CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD=n

#
# Compile the test application
#
Expand Down
1 change: 1 addition & 0 deletions config/common_linuxapp
Expand Up @@ -15,6 +15,7 @@ CONFIG_RTE_LIBRTE_PMD_KNI=y
CONFIG_RTE_LIBRTE_VHOST=y
CONFIG_RTE_LIBRTE_VHOST_NUMA=y
CONFIG_RTE_LIBRTE_PMD_VHOST=y
CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD=y
CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
CONFIG_RTE_LIBRTE_PMD_TAP=y
CONFIG_RTE_LIBRTE_AVP_PMD=y
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/Makefile
Expand Up @@ -58,6 +58,9 @@ endif # $(CONFIG_RTE_LIBRTE_SCHED)

ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
ifeq ($(CONFIG_RTE_EAL_VFIO),y)
DIRS-$(CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD) += ifc
endif
endif # $(CONFIG_RTE_LIBRTE_VHOST)

ifeq ($(CONFIG_RTE_LIBRTE_MVPP2_PMD),y)
Expand Down
35 changes: 35 additions & 0 deletions drivers/net/ifc/Makefile
@@ -0,0 +1,35 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2018 Intel Corporation

include $(RTE_SDK)/mk/rte.vars.mk

#
# library name
#
LIB = librte_ifcvf_vdpa.a

LDLIBS += -lpthread
LDLIBS += -lrte_eal -lrte_pci -lrte_vhost -lrte_bus_pci

CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
CFLAGS += -DALLOW_EXPERIMENTAL_API

#
# Add extra flags for base driver source files to disable warnings in them
#
BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))

VPATH += $(SRCDIR)/base

EXPORT_MAP := rte_ifcvf_version.map

LIBABIVER := 1

#
# all source are stored in SRCS-y
#
SRCS-$(CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD) += ifcvf_vdpa.c
SRCS-$(CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD) += ifcvf.c

include $(RTE_SDK)/mk/rte.lib.mk
298 changes: 298 additions & 0 deletions drivers/net/ifc/base/ifcvf.c
@@ -0,0 +1,298 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2018 Intel Corporation
*/

#include "ifcvf.h"
#include "ifcvf_osdep.h"

STATIC void *
get_cap_addr(struct ifcvf_hw *hw, struct ifcvf_pci_cap *cap)
{
u8 bar = cap->bar;
u32 length = cap->length;
u32 offset = cap->offset;

if (bar > IFCVF_PCI_MAX_RESOURCE - 1) {
DEBUGOUT("invalid bar: %u\n", bar);
return NULL;
}

if (offset + length < offset) {
DEBUGOUT("offset(%u) + length(%u) overflows\n",
offset, length);
return NULL;
}

if (offset + length > hw->mem_resource[cap->bar].len) {
DEBUGOUT("offset(%u) + length(%u) overflows bar length(%u)",
offset, length, (u32)hw->mem_resource[cap->bar].len);
return NULL;
}

return hw->mem_resource[bar].addr + offset;
}

int
ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev)
{
int ret;
u8 pos;
struct ifcvf_pci_cap cap;

ret = PCI_READ_CONFIG_BYTE(dev, &pos, PCI_CAPABILITY_LIST);
if (ret < 0) {
DEBUGOUT("failed to read pci capability list\n");
return -1;
}

while (pos) {
ret = PCI_READ_CONFIG_RANGE(dev, (u32 *)&cap,
sizeof(cap), pos);
if (ret < 0) {
DEBUGOUT("failed to read cap at pos: %x", pos);
break;
}

if (cap.cap_vndr != PCI_CAP_ID_VNDR)
goto next;

DEBUGOUT("cfg type: %u, bar: %u, offset: %u, "
"len: %u\n", cap.cfg_type, cap.bar,
cap.offset, cap.length);

switch (cap.cfg_type) {
case IFCVF_PCI_CAP_COMMON_CFG:
hw->common_cfg = get_cap_addr(hw, &cap);
break;
case IFCVF_PCI_CAP_NOTIFY_CFG:
PCI_READ_CONFIG_DWORD(dev, &hw->notify_off_multiplier,
pos + sizeof(cap));
hw->notify_base = get_cap_addr(hw, &cap);
hw->notify_region = cap.bar;
break;
case IFCVF_PCI_CAP_ISR_CFG:
hw->isr = get_cap_addr(hw, &cap);
break;
case IFCVF_PCI_CAP_DEVICE_CFG:
hw->dev_cfg = get_cap_addr(hw, &cap);
break;
}
next:
pos = cap.cap_next;
}

hw->lm_cfg = hw->mem_resource[4].addr;

if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
DEBUGOUT("capability incomplete\n");
return -1;
}

DEBUGOUT("capability mapping:\ncommon cfg: %p\n"
"notify base: %p\nisr cfg: %p\ndevice cfg: %p\n"
"multiplier: %u\n",
hw->common_cfg, hw->dev_cfg,
hw->isr, hw->notify_base,
hw->notify_off_multiplier);

return 0;
}

STATIC u8
ifcvf_get_status(struct ifcvf_hw *hw)
{
return IFCVF_READ_REG8(&hw->common_cfg->device_status);
}

STATIC void
ifcvf_set_status(struct ifcvf_hw *hw, u8 status)
{
IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
}

STATIC void
ifcvf_reset(struct ifcvf_hw *hw)
{
ifcvf_set_status(hw, 0);

/* flush status write */
while (ifcvf_get_status(hw))
msec_delay(1);
}

STATIC void
ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
{
if (status != 0)
status |= ifcvf_get_status(hw);

ifcvf_set_status(hw, status);
ifcvf_get_status(hw);
}

u64
ifcvf_get_features(struct ifcvf_hw *hw)
{
u32 features_lo, features_hi;
struct ifcvf_pci_common_cfg *cfg = hw->common_cfg;

IFCVF_WRITE_REG32(0, &cfg->device_feature_select);
features_lo = IFCVF_READ_REG32(&cfg->device_feature);

IFCVF_WRITE_REG32(1, &cfg->device_feature_select);
features_hi = IFCVF_READ_REG32(&cfg->device_feature);

return ((u64)features_hi << 32) | features_lo;
}

STATIC void
ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
{
struct ifcvf_pci_common_cfg *cfg = hw->common_cfg;

IFCVF_WRITE_REG32(0, &cfg->guest_feature_select);
IFCVF_WRITE_REG32(features & ((1ULL << 32) - 1), &cfg->guest_feature);

IFCVF_WRITE_REG32(1, &cfg->guest_feature_select);
IFCVF_WRITE_REG32(features >> 32, &cfg->guest_feature);
}

STATIC int
ifcvf_config_features(struct ifcvf_hw *hw)
{
u64 host_features;

host_features = ifcvf_get_features(hw);
hw->req_features &= host_features;

ifcvf_set_features(hw, hw->req_features);
ifcvf_add_status(hw, IFCVF_CONFIG_STATUS_FEATURES_OK);

if (!(ifcvf_get_status(hw) & IFCVF_CONFIG_STATUS_FEATURES_OK)) {
DEBUGOUT("failed to set FEATURES_OK status\n");
return -1;
}

return 0;
}

STATIC void
io_write64_twopart(u64 val, u32 *lo, u32 *hi)
{
IFCVF_WRITE_REG32(val & ((1ULL << 32) - 1), lo);
IFCVF_WRITE_REG32(val >> 32, hi);
}

STATIC int
ifcvf_hw_enable(struct ifcvf_hw *hw)
{
struct ifcvf_pci_common_cfg *cfg;
u8 *lm_cfg;
u32 i;
u16 notify_off;

cfg = hw->common_cfg;
lm_cfg = hw->lm_cfg;

IFCVF_WRITE_REG16(0, &cfg->msix_config);
if (IFCVF_READ_REG16(&cfg->msix_config) == IFCVF_MSI_NO_VECTOR) {
DEBUGOUT("msix vec alloc failed for device config\n");
return -1;
}

for (i = 0; i < hw->nr_vring; i++) {
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
&cfg->queue_avail_hi);
io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
&cfg->queue_used_hi);
IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);

*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
(u32)hw->vring[i].last_avail_idx |
((u32)hw->vring[i].last_used_idx << 16);

IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
IFCVF_MSI_NO_VECTOR) {
DEBUGOUT("queue %u, msix vec alloc failed\n",
i);
return -1;
}

notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off);
hw->notify_addr[i] = (void *)((u8 *)hw->notify_base +
notify_off * hw->notify_off_multiplier);
IFCVF_WRITE_REG16(1, &cfg->queue_enable);
}

return 0;
}

STATIC void
ifcvf_hw_disable(struct ifcvf_hw *hw)
{
u32 i;
struct ifcvf_pci_common_cfg *cfg;
u32 ring_state;

cfg = hw->common_cfg;

IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->msix_config);
for (i = 0; i < hw->nr_vring; i++) {
IFCVF_WRITE_REG16(i, &cfg->queue_select);
IFCVF_WRITE_REG16(0, &cfg->queue_enable);
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
hw->vring[i].last_avail_idx = (u16)ring_state;
hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
}
}

int
ifcvf_start_hw(struct ifcvf_hw *hw)
{
ifcvf_reset(hw);
ifcvf_add_status(hw, IFCVF_CONFIG_STATUS_ACK);
ifcvf_add_status(hw, IFCVF_CONFIG_STATUS_DRIVER);

if (ifcvf_config_features(hw) < 0)
return -1;

if (ifcvf_hw_enable(hw) < 0)
return -1;

ifcvf_add_status(hw, IFCVF_CONFIG_STATUS_DRIVER_OK);
return 0;
}

void
ifcvf_stop_hw(struct ifcvf_hw *hw)
{
ifcvf_hw_disable(hw);
ifcvf_reset(hw);
}

void
ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
{
IFCVF_WRITE_REG16(qid, hw->notify_addr[qid]);
}

u8
ifcvf_get_notify_region(struct ifcvf_hw *hw)
{
return hw->notify_region;
}

u64
ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid)
{
return (u8 *)hw->notify_addr[qid] -
(u8 *)hw->mem_resource[hw->notify_region].addr;
}

0 comments on commit a3f8150

Please sign in to comment.