From c1f86306a0261f50c503a73fea40293e2b2b724c Mon Sep 17 00:00:00 2001 From: Intel Date: Wed, 18 Sep 2013 12:00:00 +0200 Subject: [PATCH] virtio: add new driver This PMD can be used in a VM having virtio-net NIC. Note: it is a different implementation than virtio-usermap extension. Signed-off-by: Intel --- config/defconfig_i686-default-linuxapp-gcc | 10 + config/defconfig_i686-default-linuxapp-icc | 10 + config/defconfig_x86_64-default-linuxapp-gcc | 10 + config/defconfig_x86_64-default-linuxapp-icc | 10 + lib/Makefile | 1 + .../common/include/rte_pci_dev_ids.h | 22 +- lib/librte_eal/linuxapp/eal/eal_pci.c | 35 +- lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 1 + lib/librte_ether/rte_ethdev.h | 16 + lib/librte_pmd_virtio/Makefile | 57 ++ lib/librte_pmd_virtio/virtio_ethdev.c | 636 ++++++++++++++++++ lib/librte_pmd_virtio/virtio_ethdev.h | 123 ++++ lib/librte_pmd_virtio/virtio_logs.h | 70 ++ lib/librte_pmd_virtio/virtio_pci.c | 129 ++++ lib/librte_pmd_virtio/virtio_pci.h | 250 +++++++ lib/librte_pmd_virtio/virtio_ring.h | 163 +++++ lib/librte_pmd_virtio/virtio_rxtx.c | 359 ++++++++++ lib/librte_pmd_virtio/virtqueue.c | 70 ++ lib/librte_pmd_virtio/virtqueue.h | 362 ++++++++++ mk/rte.app.mk | 8 +- 20 files changed, 2335 insertions(+), 7 deletions(-) create mode 100644 lib/librte_pmd_virtio/Makefile create mode 100644 lib/librte_pmd_virtio/virtio_ethdev.c create mode 100644 lib/librte_pmd_virtio/virtio_ethdev.h create mode 100644 lib/librte_pmd_virtio/virtio_logs.h create mode 100644 lib/librte_pmd_virtio/virtio_pci.c create mode 100644 lib/librte_pmd_virtio/virtio_pci.h create mode 100644 lib/librte_pmd_virtio/virtio_ring.h create mode 100644 lib/librte_pmd_virtio/virtio_rxtx.c create mode 100644 lib/librte_pmd_virtio/virtqueue.c create mode 100644 lib/librte_pmd_virtio/virtqueue.h diff --git a/config/defconfig_i686-default-linuxapp-gcc b/config/defconfig_i686-default-linuxapp-gcc index ebae2730a9..b1021a8b33 100644 --- a/config/defconfig_i686-default-linuxapp-gcc +++ b/config/defconfig_i686-default-linuxapp-gcc @@ -163,6 +163,16 @@ CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y CONFIG_RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP=n +# +# Compile burst-oriented VIRTIO PMD driver +# +CONFIG_RTE_LIBRTE_VIRTIO_PMD=y +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n + # # Do prefetch of packet data within PMD driver receive function # diff --git a/config/defconfig_i686-default-linuxapp-icc b/config/defconfig_i686-default-linuxapp-icc index bdbbb3e1b9..d38e4deefa 100644 --- a/config/defconfig_i686-default-linuxapp-icc +++ b/config/defconfig_i686-default-linuxapp-icc @@ -164,6 +164,16 @@ CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y CONFIG_RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP=n +# +# Compile burst-oriented VIRTIO PMD driver +# +CONFIG_RTE_LIBRTE_VIRTIO_PMD=y +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n + # # Do prefetch of packet data within PMD driver receive function # diff --git a/config/defconfig_x86_64-default-linuxapp-gcc b/config/defconfig_x86_64-default-linuxapp-gcc index e0f4183b13..cd16062234 100644 --- a/config/defconfig_x86_64-default-linuxapp-gcc +++ b/config/defconfig_x86_64-default-linuxapp-gcc @@ -175,6 +175,16 @@ CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y CONFIG_RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP=n +# +# Compile burst-oriented VIRTIO PMD driver +# +CONFIG_RTE_LIBRTE_VIRTIO_PMD=y +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n + # # Compile example software rings based PMD # diff --git a/config/defconfig_x86_64-default-linuxapp-icc b/config/defconfig_x86_64-default-linuxapp-icc index f01eb0417d..a599da0676 100644 --- a/config/defconfig_x86_64-default-linuxapp-icc +++ b/config/defconfig_x86_64-default-linuxapp-icc @@ -164,6 +164,16 @@ CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y CONFIG_RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP=n +# +# Compile burst-oriented VIRTIO PMD driver +# +CONFIG_RTE_LIBRTE_VIRTIO_PMD=y +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n + # # Compile example software rings based PMD # diff --git a/lib/Makefile b/lib/Makefile index 59f96c927d..c43fb4d2f1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -44,6 +44,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap +DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h index cc8827c0ff..e9c45945cb 100644 --- a/lib/librte_eal/common/include/rte_pci_dev_ids.h +++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h @@ -63,8 +63,9 @@ * This file contains a list of the PCI device IDs recognised by DPDK, which * can be used to fill out an array of structures describing the devices. * - * Currently three families of devices are recognised: those supported by the - * IGB driver, by EM driver, and those supported by the IXGBE driver. + * Currently four families of devices are recognised: those supported by the + * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio + * driver which is a para virtualization driver running in guest virtual machine. * The inclusion of these in an array built using this file depends on the * definition of * RTE_PCI_DEV_ID_DECL_EM @@ -72,6 +73,7 @@ * RTE_PCI_DEV_ID_DECL_IGBVF * RTE_PCI_DEV_ID_DECL_IXGBE * RTE_PCI_DEV_ID_DECL_IXGBEVF + * RTE_PCI_DEV_ID_DECL_VIRTIO * at the time when this file is included. * * In order to populate an array, the user of this file must define this macro: @@ -112,11 +114,20 @@ #define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) #endif +#ifndef RTE_PCI_DEV_ID_DECL_VIRTIO +#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) +#endif + #ifndef PCI_VENDOR_ID_INTEL /** Vendor ID used by Intel devices */ #define PCI_VENDOR_ID_INTEL 0x8086 #endif +#ifndef PCI_VENDOR_ID_QUMRANET +/** Vendor ID used by virtio devices */ +#define PCI_VENDOR_ID_QUMRANET 0x1AF4 +#endif + /******************** Physical EM devices from e1000_hw.h ********************/ #define E1000_DEV_ID_82542 0x1000 @@ -417,6 +428,12 @@ RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV) RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF) RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) +/****************** Virtio devices from virtio.h ******************/ + +#define QUMRANET_DEV_ID_VIRTIO 0x1000 + +RTE_PCI_DEV_ID_DECL_VIRTIO(PCI_VENDOR_ID_QUMRANET, QUMRANET_DEV_ID_VIRTIO) + /* * Undef all RTE_PCI_DEV_ID_DECL_* here. */ @@ -425,3 +442,4 @@ RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) #undef RTE_PCI_DEV_ID_DECL_IGBVF #undef RTE_PCI_DEV_ID_DECL_IXGBE #undef RTE_PCI_DEV_ID_DECL_IXGBEVF +#undef RTE_PCI_DEV_ID_DECL_VIRTIO diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 2adc0fc952..73c08283cd 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -65,6 +65,7 @@ #include #include +#include "rte_pci_dev_ids.h" #include "eal_filesystem.h" #include "eal_private.h" @@ -467,10 +468,12 @@ pci_uio_map_resource(struct rte_pci_device *dev) struct dirent *e; DIR *dir; char dirname[PATH_MAX]; + char filename[PATH_MAX]; char dirname2[PATH_MAX]; char devname[PATH_MAX]; /* contains the /dev/uioX */ void *mapaddr; unsigned uio_num; + unsigned long start,size; uint64_t phaddr; uint64_t offset; uint64_t pagesz; @@ -482,7 +485,8 @@ pci_uio_map_resource(struct rte_pci_device *dev) dev->intr_handle.fd = -1; /* secondary processes - use already recorded details */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) + if ((rte_eal_process_type() != RTE_PROC_PRIMARY) && + (dev->id.vendor_id != PCI_VENDOR_ID_QUMRANET)) return (pci_uio_map_secondary(dev)); /* depending on kernel version, uio can be located in uio/uioX @@ -544,17 +548,42 @@ pci_uio_map_resource(struct rte_pci_device *dev) return -1; } + if(dev->id.vendor_id == PCI_VENDOR_ID_QUMRANET) { + /* get portio size */ + rte_snprintf(filename, sizeof(filename), + "%s/portio/port0/size", dirname2); + if (eal_parse_sysfs_value(filename, &size) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse size\n", + __func__); + return -1; + } + + /* get portio start */ + rte_snprintf(filename, sizeof(filename), + "%s/portio/port0/start", dirname2); + if (eal_parse_sysfs_value(filename, &start) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", + __func__); + return -1; + } + dev->mem_resource[0].addr = (void *)(uintptr_t)start; + dev->mem_resource[0].len = (uint64_t)size; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx with size=0x%lx\n", start, size); + /* rte_virtio_pmd does not need any other bar even if available */ + return (0); + } + /* allocate the mapping details for secondary processes*/ if ((uio_res = rte_zmalloc("UIO_RES", sizeof (*uio_res), 0)) == NULL) { RTE_LOG(ERR, EAL, "%s(): cannot store uio mmap details\n", __func__); return (-1); } - + rte_snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); rte_snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname); memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr)); - + /* collect info about device mappings */ if ((nb_maps = pci_uio_get_mappings(dirname2, uio_res->maps, sizeof (uio_res->maps) / sizeof (uio_res->maps[0]))) diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index ccf4f619cd..6db8a4a967 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -75,6 +75,7 @@ static struct pci_device_id igbuio_pci_ids[] = { #define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) {PCI_DEVICE(vend, dev)}, #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {PCI_DEVICE(vend, dev)}, #define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {PCI_DEVICE(vend, dev)}, +#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {PCI_DEVICE(vend, dev)}, #include { 0, }, }; diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index cd4d91e908..002535ae0a 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -1257,6 +1257,15 @@ extern int rte_ixgbe_pmd_init(void); */ extern int rte_ixgbevf_pmd_init(void); +/** + * The initialization function of the driver for Qumranet virtio-net + * Ethernet devices. + * Invoked once at EAL start time. + * @return + * 0 on success + */ +extern int rte_virtio_pmd_init(void); + /** * The initialization function of *all* supported and enabled drivers. * Right now, the following PMDs are supported: @@ -1306,6 +1315,13 @@ int rte_pmd_init_all(void) } #endif /* RTE_LIBRTE_IXGBE_PMD */ +#ifdef RTE_LIBRTE_VIRTIO_PMD + if ((ret = rte_virtio_pmd_init()) != 0) { + RTE_LOG(ERR, PMD, "Cannot init virtio PMD\n"); + return (ret); + } +#endif /* RTE_LIBRTE_VIRTIO_PMD */ + if (ret == -ENODEV) RTE_LOG(ERR, PMD, "No PMD(s) are configured\n"); return (ret); diff --git a/lib/librte_pmd_virtio/Makefile b/lib/librte_pmd_virtio/Makefile new file mode 100644 index 0000000000..764d8e873d --- /dev/null +++ b/lib/librte_pmd_virtio/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2013 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_virtio.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c + + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net lib/librte_malloc + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c new file mode 100644 index 0000000000..b5b9d01d8c --- /dev/null +++ b/lib/librte_pmd_virtio/virtio_ethdev.c @@ -0,0 +1,636 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtio_logs.h" +#include "virtqueue.h" + + +static int eth_virtio_dev_init(struct eth_driver *eth_drv, + struct rte_eth_dev *eth_dev); +static int virtio_dev_configure(struct rte_eth_dev *dev); +static int virtio_dev_start(struct rte_eth_dev *dev); +static void virtio_dev_stop(struct rte_eth_dev *dev); +static void virtio_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +static int virtio_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete); + +static void virtio_set_hwaddr(struct virtio_hw *hw); +static void virtio_get_hwaddr(struct virtio_hw *hw); + +static void virtio_dev_rx_queue_release(__rte_unused void *rxq); +static void virtio_dev_tx_queue_release(__rte_unused void *txq); + +static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); +static void virtio_dev_stats_reset(struct rte_eth_dev *dev); +static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); + +/* + * The set of PCI devices this driver supports + */ +static struct rte_pci_id pci_id_virtio_map[] = { + +#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, +#include "rte_pci_dev_ids.h" + +{ .vendor_id = 0, /* sentinel */ }, +}; + +int virtio_dev_queue_setup(struct rte_eth_dev *dev, + int queue_type, + uint16_t queue_idx, + uint8_t vtpci_queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + struct virtqueue **pvq) +{ + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + const struct rte_memzone *mz; + uint16_t vq_size; + int size; + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct virtqueue *vq = NULL; + + /* Write the virtqueue index to the Queue Select Field */ + VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vtpci_queue_idx); + PMD_INIT_LOG(DEBUG, "selecting queue: %d\n", vtpci_queue_idx); + + /* + * Read the virtqueue size from the Queue Size field + * Always power of 2 and if 0 virtqueue does not exist + */ + vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM); + PMD_INIT_LOG(DEBUG, "vq_size: %d nb_desc:%d\n", vq_size, nb_desc); + if (nb_desc == 0) + nb_desc = vq_size; + if (vq_size == 0) { + PMD_INIT_LOG(ERR, "%s: virtqueue does not exist\n", __func__); + return (-EINVAL); + } else if (!rte_is_power_of_2(vq_size)) { + PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2\n", __func__); + return (-EINVAL); + } else if (nb_desc != vq_size) { + PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to vq size (%d), fall to vq size\n", + nb_desc, vq_size); + nb_desc = vq_size; + } + + if (queue_type == VTNET_RQ) { + rte_snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d", + dev->data->port_id, queue_idx); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE); + strncpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); + } else if(queue_type == VTNET_TQ) { + rte_snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d", + dev->data->port_id, queue_idx); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE); + strncpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); + } else if(queue_type == VTNET_CQ) { + rte_snprintf(vq_name, sizeof(vq_name), "port%d_cvq", + dev->data->port_id); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue), + CACHE_LINE_SIZE); + strncpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); + } + if (vq == NULL) { + PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue\n", __func__); + return (-ENOMEM); + } + vq->hw = hw; + vq->port_id = dev->data->port_id; + vq->queue_id = queue_idx; + vq->vq_queue_index = vtpci_queue_idx; + vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN; + vq->vq_nentries = vq_size; + vq->vq_free_cnt = vq_size; + + /* + * Reserve a memzone for vring elements + */ + size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); + vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); + PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d\n", size, vq->vq_ring_size); + + mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, + socket_id, 0, VIRTIO_PCI_VRING_ALIGN); + if (mz == NULL) { + rte_free(vq); + return (-ENOMEM); + } + /* + * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, + * and only accepts 32 bit page frame number. + * Check if the allocated physical memory exceeds 16TB. + */ + if ( (mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32) ) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!\n"); + rte_free(vq); + return (-ENOMEM); + } + memset(mz->addr, 0, sizeof(mz->len)); + vq->mz = mz; + vq->vq_ring_mem = mz->phys_addr; + vq->vq_ring_virt_mem = mz->addr; + PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%"PRIx64"\n", (uint64_t)mz->phys_addr); + PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64"\n", (uint64_t)mz->addr); + vq->virtio_net_hdr_mz = NULL; + vq->virtio_net_hdr_mem = (void *)NULL; + + if (queue_type == VTNET_TQ) { + /* + * For each xmit packet, allocate a virtio_net_hdr + */ + rte_snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", + dev->data->port_id, queue_idx); + vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name, + vq_size * sizeof(struct virtio_net_hdr), + socket_id, 0, CACHE_LINE_SIZE); + if (vq->virtio_net_hdr_mz == NULL) { + rte_free(vq); + return (-ENOMEM); + } + vq->virtio_net_hdr_mem = (void *)(uintptr_t)vq->virtio_net_hdr_mz->phys_addr; + memset(vq->virtio_net_hdr_mz->addr, 0, vq_size * sizeof(struct virtio_net_hdr)); + } else if (queue_type == VTNET_CQ) { + /* Allocate a page for control vq command, data and status */ + rte_snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone", + dev->data->port_id); + vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name, + PAGE_SIZE, socket_id, 0, CACHE_LINE_SIZE); + if (vq->virtio_net_hdr_mz == NULL) { + rte_free(vq); + return (-ENOMEM); + } + vq->virtio_net_hdr_mem = (void *)(uintptr_t)vq->virtio_net_hdr_mz->phys_addr; + memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); + } + + /* + * Set guest physical address of the virtqueue + * in VIRTIO_PCI_QUEUE_PFN config register of device + */ + VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, + mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + *pvq = vq; + return (0); +} + +static int +virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, + unsigned int socket_id) +{ + struct virtqueue *vq; + uint16_t nb_desc = 0; + int ret; + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + PMD_INIT_FUNC_TRACE(); + ret = virtio_dev_queue_setup(dev, VTNET_CQ, 0, VTNET_SQ_CQ_QUEUE_IDX, + nb_desc, socket_id, &vq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "control vq initialization failed\n"); + return ret; + } + + hw->cvq = vq; + return (0); +} + +/* + * dev_ops for virtio, bare necessities for basic operation + */ +static struct eth_dev_ops virtio_eth_dev_ops = { + .dev_configure = virtio_dev_configure, + .dev_start = virtio_dev_start, + .dev_stop = virtio_dev_stop, + + .dev_infos_get = virtio_dev_info_get, + .stats_get = virtio_dev_stats_get, + .stats_reset = virtio_dev_stats_reset, + .link_update = virtio_dev_link_update, + .mac_addr_add = NULL, + .mac_addr_remove = NULL, + .rx_queue_setup = virtio_dev_rx_queue_setup, + .rx_queue_release = virtio_dev_rx_queue_release, /* meaningfull only to multiple queue */ + .tx_queue_setup = virtio_dev_tx_queue_setup, + .tx_queue_release = virtio_dev_tx_queue_release /* meaningfull only to multiple queue */ +}; + +static inline int +virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = link; + struct rte_eth_link *src = &(dev->data->dev_link); + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return (-1); + + return (0); +} + +/** + * Atomically writes the link status information into global + * structure rte_eth_dev. + * + * @param dev + * - Pointer to the structure rte_eth_dev to read from. + * - Pointer to the buffer to be saved with the link status. + * + * @return + * - On success, zero. + * - On failure, negative value. + */ +static inline int +virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = &(dev->data->dev_link); + struct rte_eth_link *src = link; + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return (-1); + + return (0); +} + +static void +virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if(stats) + memcpy(stats, &hw->eth_stats, sizeof(*stats)); +} + +static void +virtio_dev_stats_reset(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + /* Reset software totals */ + memset(&hw->eth_stats, 0, sizeof(hw->eth_stats)); +} + +static void +virtio_set_hwaddr(struct virtio_hw *hw) +{ + vtpci_write_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &hw->mac_addr, ETHER_ADDR_LEN); +} + +static void +virtio_get_hwaddr(struct virtio_hw *hw) +{ + if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &hw->mac_addr, ETHER_ADDR_LEN); + } else { + eth_random_addr(&hw->mac_addr[0]); + virtio_set_hwaddr(hw); + } +} + + +static void virtio_negotiate_features(struct virtio_hw *hw) +{ + uint64_t guest_features, mask; + mask = VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_CTRL_VLAN; + mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM ; + + /* TSO and LRO are only available when their corresponding + * checksum offload feature is also negotiated. + */ + mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN; + mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN; + mask |= VTNET_LRO_FEATURES; + + /* rx_mbuf should not be in multiple merged segments */ + mask |= VIRTIO_NET_F_MRG_RXBUF; + + /* not negotiating INDIRECT descriptor table support */ + mask |= VIRTIO_RING_F_INDIRECT_DESC; + + /* Prepare guest_features: feature that driver wants to support */ + guest_features = VTNET_FEATURES & ~mask; + + /* Read device(host) feature bits */ + hw->host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES); + + /* Negotiate features: Subset of device feature bits are written back (guest feature bits) */ + hw->guest_features = vtpci_negotiate_features(hw, guest_features); +} + +/* + * This function is based on probe() function in virtio_pci.c + * It returns 0 on success. + */ +static int +eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv, + struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev; + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + if (RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr) ) { + PMD_INIT_LOG(ERR, + "MBUF HEADROOM should be enough to hold virtio net hdr\n"); + return (-1); + } + + if (! (rte_eal_get_configuration()->flags & EAL_FLG_HIGH_IOPL)) { + PMD_INIT_LOG(ERR, + "IOPL call failed in EAL init - cannot use virtio PMD driver\n"); + return (-1); + } + + eth_dev->dev_ops = &virtio_eth_dev_ops; + eth_dev->rx_pkt_burst = &virtio_recv_pkts; + eth_dev->tx_pkt_burst = &virtio_xmit_pkts; + + if(rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + pci_dev = eth_dev->pci_dev; + + hw->device_id = pci_dev->id.device_id; + hw->vendor_id = pci_dev->id.vendor_id; + hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr; + + hw->max_rx_queues = VIRTIO_MAX_RX_QUEUES; + hw->max_tx_queues = VIRTIO_MAX_TX_QUEUES; + + /* Reset the device although not necessary at startup */ + vtpci_reset(hw); + + /* Tell the host we've noticed this device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK); + + /* Tell the host we've known how to drive the device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); + virtio_negotiate_features(hw); + /* Setting up rx_header size for the device */ + if(vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) + hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr); + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("virtio", ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, + "Failed to allocate %d bytes needed to store MAC addresses", + ETHER_ADDR_LEN); + return (-ENOMEM); + } + /* Copy the permanent MAC address to: virtio_hw */ + virtio_get_hwaddr(hw); + ether_addr_copy((struct ether_addr *) hw->mac_addr, + ð_dev->data->mac_addrs[0]); + PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", hw->mac_addr[0], + hw->mac_addr[1],hw->mac_addr[2], hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); + + if(vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) + virtio_dev_cq_queue_setup(eth_dev, SOCKET_ID_ANY); + + PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", + eth_dev->data->port_id, pci_dev->id.vendor_id, + pci_dev->id.device_id); + return (0); +} + +static struct eth_driver rte_virtio_pmd = { + { + .name = "rte_virtio_pmd", + .id_table = pci_id_virtio_map, +#ifdef RTE_EAL_UNBIND_PORTS + .drv_flags = RTE_PCI_DRV_NEED_IGB_UIO, +#endif + }, + .eth_dev_init = eth_virtio_dev_init, + .dev_private_size = sizeof(struct virtio_adapter), +}; + +/* + * Driver initialization routine. + * Invoked once at EAL init time. + * Register itself as the [Poll Mode] Driver of PCI virtio devices. + * Returns 0 on success. + */ +int +rte_virtio_pmd_init(void) +{ + rte_eth_driver_register(&rte_virtio_pmd); + return (0); +} + +/* + * Only 1 queue is supported, no queue release related operation + */ +static void +virtio_dev_rx_queue_release(__rte_unused void *rxq) +{ +} + +static void +virtio_dev_tx_queue_release(__rte_unused void *txq) +{ +} + +/* + * Configure virtio device + * It returns 0 on success. + */ +static int +virtio_dev_configure(__rte_unused struct rte_eth_dev *dev) +{ + return (0); +} + + +static int +virtio_dev_start(struct rte_eth_dev *dev) +{ + uint16_t status; + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Tell the host we've noticed this device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK); + + /* Tell the host we've known how to drive the device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); + + hw->adapter_stopped = 0; + + /* Do final configuration before rx/tx engine starts */ + virtio_dev_rxtx_start(dev); + + /* Check VIRTIO_NET_F_STATUS for link status*/ + if(vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, status), + &status, sizeof(status)); + if((status & VIRTIO_NET_S_LINK_UP) == 0) { + PMD_INIT_LOG(ERR, "Port: %d Link is DOWN\n", dev->data->port_id); + return (-EIO); + } else { + PMD_INIT_LOG(DEBUG, "Port: %d Link is UP\n", dev->data->port_id); + } + } + vtpci_reinit_complete(hw); + + /*Notify the backend + *Otherwise the tap backend might already stop its queue due to fullness. + *vhost backend will have no chance to be waked up + */ + virtqueue_notify(dev->data->rx_queues[0]); + PMD_INIT_LOG(DEBUG, "Notified backend at initialization\n"); + return (0); +} + +static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) +{ + struct rte_mbuf * buf; + int i = 0; + PMD_INIT_LOG(DEBUG, "Before freeing rxq used and unused buf \n"); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]); + while( (buf =(struct rte_mbuf *)virtqueue_detatch_unused(dev->data->rx_queues[0])) != NULL) { + rte_pktmbuf_free_seg(buf); + i++; + } + PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i); + PMD_INIT_LOG(DEBUG, "After freeing rxq used and unused buf\n"); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]); + PMD_INIT_LOG(DEBUG, "Before freeing txq used and unused bufs\n"); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]); + i = 0; + while( (buf = (struct rte_mbuf *)virtqueue_detatch_unused(dev->data->tx_queues[0])) != NULL) { + rte_pktmbuf_free_seg(buf); + i++; + } + PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i); + PMD_INIT_LOG(DEBUG, "After freeing txq used and unused buf\n"); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]); +} + +/* + * Stop device: disable rx and tx functions to allow for reconfiguring. + */ +static void +virtio_dev_stop(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* reset the NIC */ + vtpci_reset(hw); + virtio_dev_free_mbufs(dev); +} + +static int +virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) +{ + struct rte_eth_link link, old; + uint16_t status; + struct virtio_hw *hw = + VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + memset(&link, 0, sizeof(link)); + virtio_dev_atomic_read_link_status(dev, &link); + old = link; + link.link_duplex = FULL_DUPLEX ; + link.link_speed = SPEED_10G ; + if(vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + PMD_INIT_LOG(DEBUG, "Get link status from hw\n"); + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, status), + &status, sizeof(status)); + if((status & VIRTIO_NET_S_LINK_UP) == 0) { + link.link_status = 0; + PMD_INIT_LOG(DEBUG, "Port %d is down\n",dev->data->port_id); + } else { + link.link_status = 1; + PMD_INIT_LOG(DEBUG, "Port %d is up\n",dev->data->port_id); + } + } else { + link.link_status = 1; //Link up + } + virtio_dev_atomic_write_link_status(dev, &link); + if(old.link_status == link.link_status) + return (-1); + /*changed*/ + return (0); +} + +static void +virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) +{ + struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->driver_name = dev->driver->pci_drv.name; + dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; + dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; + dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; + dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; + dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; +} diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h b/lib/librte_pmd_virtio/virtio_ethdev.h new file mode 100644 index 0000000000..d9b0a070ce --- /dev/null +++ b/lib/librte_pmd_virtio/virtio_ethdev.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_ETHDEV_H_ +#define _VIRTIO_ETHDEV_H_ + +#include + +#include "virtio_pci.h" + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_10G 10000 +#define HALF_DUPLEX 1 +#define FULL_DUPLEX 2 + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define VIRTIO_MAX_RX_QUEUES 1 +#define VIRTIO_MAX_TX_QUEUES 1 +#define VIRTIO_MAX_MAC_ADDRS 1 +#define VIRTIO_MIN_RX_BUFSIZE 64 +#define VIRTIO_MAX_RX_PKTLEN 1518 + +/* Features desired/implemented by this driver. */ +#define VTNET_FEATURES \ + (VIRTIO_NET_F_MAC | \ + VIRTIO_NET_F_STATUS | \ + VIRTIO_NET_F_CTRL_VQ | \ + VIRTIO_NET_F_CTRL_RX | \ + VIRTIO_NET_F_CTRL_VLAN | \ + VIRTIO_NET_F_CSUM | \ + VIRTIO_NET_F_HOST_TSO4 | \ + VIRTIO_NET_F_HOST_TSO6 | \ + VIRTIO_NET_F_HOST_ECN | \ + VIRTIO_NET_F_GUEST_CSUM | \ + VIRTIO_NET_F_GUEST_TSO4 | \ + VIRTIO_NET_F_GUEST_TSO6 | \ + VIRTIO_NET_F_GUEST_ECN | \ + VIRTIO_NET_F_MRG_RXBUF | \ + VIRTIO_RING_F_INDIRECT_DESC) + +/* + * RX/TX function prototypes + */ +void virtio_dev_rxtx_start(struct rte_eth_dev *dev); + +int virtio_dev_queue_setup(struct rte_eth_dev *dev, + int queue_type, + uint16_t queue_idx, + uint8_t vtpci_queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + struct virtqueue **pvq); + +int virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +/* + * Structure to store private data for each driver instance (for each port). + */ +struct virtio_adapter { + struct virtio_hw hw; +}; + +#define VIRTIO_DEV_PRIVATE_TO_HW(adapter)\ + (&((struct virtio_adapter *)adapter)->hw) + +/* + * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us + * frames larger than 1514 bytes. We do not yet support software LRO + * via tcp_lro_rx(). + */ +#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) + + +#endif /* _VIRTIO_ETHDEV_H_ */ diff --git a/lib/librte_pmd_virtio/virtio_logs.h b/lib/librte_pmd_virtio/virtio_logs.h new file mode 100644 index 0000000000..f70b8844a9 --- /dev/null +++ b/lib/librte_pmd_virtio/virtio_logs.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_LOGS_H_ +#define _VIRTIO_LOGS_H_ + +#include + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT +#define PMD_INIT_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") +#else +#define PMD_INIT_LOG(level, fmt, args...) do { } while(0) +#define PMD_INIT_FUNC_TRACE() do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while(0) +#endif + + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER +#define PMD_DRV_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args) +#else +#define PMD_DRV_LOG(level, fmt, args...) do { } while(0) +#endif + +#endif /* _VIRTIO_LOGS_H_ */ diff --git a/lib/librte_pmd_virtio/virtio_pci.c b/lib/librte_pmd_virtio/virtio_pci.c new file mode 100644 index 0000000000..a462779e86 --- /dev/null +++ b/lib/librte_pmd_virtio/virtio_pci.c @@ -0,0 +1,129 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#include "virtio_pci.h" +#include "virtio_logs.h" + +void +vtpci_read_dev_config(struct virtio_hw *hw, uint64_t offset, + void *dst, int length) +{ + uint64_t off; + uint8_t *d; + int size; + + off = VIRTIO_PCI_CONFIG(hw) + offset; + for (d = dst; length > 0; d += size, off += size, length -= size) { + if (length >= 4) { + size = 4; + *(uint32_t *)d = VIRTIO_READ_REG_4(hw, off); + } else if (length >= 2) { + size = 2; + *(uint16_t *)d = VIRTIO_READ_REG_2(hw, off); + } else { + size = 1; + *d = VIRTIO_READ_REG_1(hw, off); + } + } +} + +void +vtpci_write_dev_config(struct virtio_hw *hw, uint64_t offset, + void *src, int length) +{ + uint64_t off; + uint8_t *s; + int size; + + off = VIRTIO_PCI_CONFIG(hw) + offset; + for (s = src; length > 0; s += size, off += size, length -= size) { + if (length >= 4) { + size = 4; + VIRTIO_WRITE_REG_4(hw, off, *(uint32_t *)s); + } else if (length >= 2) { + size = 2; + VIRTIO_WRITE_REG_2(hw, off, *(uint16_t *)s); + } else { + size = 1; + VIRTIO_WRITE_REG_1(hw, off, *s); + } + } +} + +uint64_t +vtpci_negotiate_features(struct virtio_hw *hw, uint64_t guest_features) +{ + uint32_t features; + /* + * Limit negotiated features to what the driver, virtqueue, and + * host all support. + */ + features = (hw->host_features) & guest_features; + + VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features); + return (features); +} + + +void +vtpci_reset(struct virtio_hw *hw) +{ + /* + * Setting the status to RESET sets the host device to + * the original, uninitialized state. + */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_RESET); + vtpci_get_status(hw); +} + +void +vtpci_reinit_complete(struct virtio_hw *hw) +{ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK); +} + +uint8_t +vtpci_get_status(struct virtio_hw *hw) +{ + return (VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS)); +} + +void +vtpci_set_status(struct virtio_hw *hw, uint8_t status) +{ + if (status != VIRTIO_CONFIG_STATUS_RESET) + status = (uint8_t)(status | vtpci_get_status(hw)); + + VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status); +} diff --git a/lib/librte_pmd_virtio/virtio_pci.h b/lib/librte_pmd_virtio/virtio_pci.h new file mode 100644 index 0000000000..04fc1561a1 --- /dev/null +++ b/lib/librte_pmd_virtio/virtio_pci.h @@ -0,0 +1,250 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_PCI_H_ +#define _VIRTIO_PCI_H_ + +#include +#include + +#include + +struct virtqueue; + +/* VirtIO PCI vendor/device ID. */ +#define VIRTIO_PCI_VENDORID 0x1AF4 +#define VIRTIO_PCI_DEVICEID_MIN 0x1000 +#define VIRTIO_PCI_DEVICEID_MAX 0x103F + +/* VirtIO ABI version, this must match exactly. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* + * VirtIO Header, located in BAR 0. + */ +#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ +#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ +#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ +#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ +#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ +#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ +#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading + * also clears the register (8, RO) */ +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications + (16, RW) */ + +/* The bit of the ISR which indicates a device has an interrupt. */ +#define VIRTIO_PCI_ISR_INTR 0x1 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue. */ +#define VIRTIO_MSI_NO_VECTOR 0xFFFF + +/* VirtIO device IDs. */ +#define VIRTIO_ID_NETWORK 0x01 +#define VIRTIO_ID_BLOCK 0x02 +#define VIRTIO_ID_CONSOLE 0x03 +#define VIRTIO_ID_ENTROPY 0x04 +#define VIRTIO_ID_BALLOON 0x05 +#define VIRTIO_ID_IOMEMORY 0x06 +#define VIRTIO_ID_9P 0x09 + +/* Status byte for guest to report progress. */ +#define VIRTIO_CONFIG_STATUS_RESET 0x00 +#define VIRTIO_CONFIG_STATUS_ACK 0x01 +#define VIRTIO_CONFIG_STATUS_DRIVER 0x02 +#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04 +#define VIRTIO_CONFIG_STATUS_FAILED 0x80 + +/* + * Generate interrupt when the virtqueue ring is + * completely used, even if we've suppressed them. + */ +#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24) + +/* + * The guest should never negotiate this feature; it + * is used to detect faulty drivers. + */ +#define VIRTIO_F_BAD_FEATURE (1 << 30) + +/* + * Some VirtIO feature bits (currently bits 28 through 31) are + * reserved for the transport being used (eg. virtio_ring), the + * rest are per-device feature bits. + */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 32 + +/* + * Each virtqueue indirect descriptor list must be physically contiguous. + * To allow us to malloc(9) each list individually, limit the number + * supported to what will fit in one page. With 4KB pages, this is a limit + * of 256 descriptors. If there is ever a need for more, we can switch to + * contigmalloc(9) for the larger allocations, similar to what + * bus_dmamem_alloc(9) does. + * + * Note the sizeof(struct vring_desc) is 16 bytes. + */ +#define VIRTIO_MAX_INDIRECT ((int) (PAGE_SIZE / 16)) + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0x00001 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/ +#define VIRTIO_NET_F_MAC 0x00020 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GSO 0x00040 /* Host handles pkts w/ any GSO type */ +#define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 0x00200 /* Guest can handle TSO[6] w/ ECN in.*/ +#define VIRTIO_NET_F_GUEST_UFO 0x00400 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 0x00800 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 0x01000 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 0x02000 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 0x04000 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 0x08000 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_STATUS 0x10000 /* virtio_net_config.status available*/ +#define VIRTIO_NET_F_CTRL_VQ 0x20000 /* Control channel available */ +#define VIRTIO_NET_F_CTRL_RX 0x40000 /* Control channel RX mode support */ +#define VIRTIO_NET_F_CTRL_VLAN 0x80000 /* Control channel VLAN filtering */ +#define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */ +#define VIRTIO_RING_F_INDIRECT_DESC 0x10000000 /* Support for indirect buffer descriptors. */ +/* The guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. + * The host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. + */ +#define VIRTIO_RING_F_EVENT_IDX 0x20000000 + +#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ + +/* + * Maximum number of virtqueues per device. + */ +#define VIRTIO_MAX_VIRTQUEUES 8 + +struct virtio_hw { + uint32_t io_base; + uint32_t host_features; + uint32_t guest_features; + + struct virtqueue *cvq; + + uint16_t vtnet_hdr_size; + + uint32_t max_tx_queues; + uint32_t max_rx_queues; + uint16_t device_id; + uint16_t vendor_id; + uint16_t subsystem_device_id; + uint16_t subsystem_vendor_id; + uint8_t revision_id; + uint8_t mac_addr[ETHER_ADDR_LEN]; + int adapter_stopped; + struct rte_eth_stats eth_stats; +}; + +/* + * This structure is just a reference to read + * net device specific config space; it just a chodu structure + * + */ +struct virtio_net_config { + /* The config defining mac address (if VIRTIO_NET_F_MAC) */ + uint8_t mac[ETHER_ADDR_LEN]; + /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ + uint16_t status; +}; +/* Value indicated in device config */ +#define VIRTIO_PCI_FLAG_MSIX 0x0020 +/* + * The remaining space is defined by each driver as the per-driver + * configuration space. + */ +#define VIRTIO_PCI_CONFIG(hw) (((hw)->guest_features & VIRTIO_PCI_FLAG_MSIX) ? 24 : 20) + +/* + * How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. + */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +#define VIRTIO_PCI_REG_ADDR(hw, reg) \ + (unsigned short)((hw)->io_base + (reg)) + +#define VIRTIO_READ_REG_1(hw, reg) \ + inb((VIRTIO_PCI_REG_ADDR((hw), (reg)))) +#define VIRTIO_WRITE_REG_1(hw, reg, value) \ + outb_p((unsigned char)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg)))) + +#define VIRTIO_READ_REG_2(hw, reg) \ + inw((VIRTIO_PCI_REG_ADDR((hw), (reg)))) +#define VIRTIO_WRITE_REG_2(hw, reg, value) \ + outw_p((unsigned short)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg)))) + +#define VIRTIO_READ_REG_4(hw, reg) \ + inl((VIRTIO_PCI_REG_ADDR((hw), (reg)))) +#define VIRTIO_WRITE_REG_4(hw, reg, value) \ + outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg)))) + +static inline int +vtpci_with_feature(struct virtio_hw *hw, uint64_t feature) +{ + return ((hw->guest_features & feature) != 0); +} + +/* + * Function declaration from virtio_pci.c + */ +void vtpci_reset(struct virtio_hw *); + +void vtpci_reinit_complete(struct virtio_hw *); + +uint8_t vtpci_get_status(struct virtio_hw *); + +void vtpci_set_status(struct virtio_hw *, uint8_t); + +int vtpci_with_feature(struct virtio_hw *, uint64_t); + +uint64_t vtpci_negotiate_features(struct virtio_hw *, uint64_t); + +void vtpci_write_dev_config(struct virtio_hw *, uint64_t, void *, int); + +void vtpci_read_dev_config(struct virtio_hw *, uint64_t, void *, int); + +#endif /* _VIRTIO_PCI_H_ */ diff --git a/lib/librte_pmd_virtio/virtio_ring.h b/lib/librte_pmd_virtio/virtio_ring.h new file mode 100644 index 0000000000..97ac398a7c --- /dev/null +++ b/lib/librte_pmd_virtio/virtio_ring.h @@ -0,0 +1,163 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_RING_H_ +#define _VIRTIO_RING_H_ + +#include + +#include + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* The Host uses this in used->flags to advise the Guest: don't kick me + * when you add a buffer. It's unreliable, so it's simply an + * optimization. Guest will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't + * interrupt me when you consume a buffer. It's unreliable, so it's + * simply an optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* VirtIO ring descriptors: 16 bytes. + * These can chain together via "next". */ +struct vring_desc { + uint64_t addr; /* Address (guest-physical). */ + uint32_t len; /* Length. */ + uint16_t flags; /* The flags as indicated above. */ + uint16_t next; /* We chain unused descriptors via this. */ +}; + +struct vring_avail { + uint16_t flags; + uint16_t idx; + uint16_t ring[0]; +}; + +/* id is a 16bit index. uint32_t is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + uint32_t id; + /* Total length of the descriptor chain which was written to. */ + uint32_t len; +}; + +struct vring_used { + uint16_t flags; + uint16_t idx; + struct vring_used_elem ring[0]; +}; + +struct vring { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; +}; + +/* The standard layout for the ring is a continuous chunk of memory which + * looks like this. We assume num is a power of 2. + * + * struct vring { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * __u16 avail_flags; + * __u16 avail_idx; + * __u16 available[num]; + * __u16 used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * __u16 used_flags; + * __u16 used_idx; + * struct vring_used_elem used[num]; + * __u16 avail_event_idx; + * }; + * + * NOTE: for VirtIO PCI, align is 4096. + */ + +/* + * We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. + */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) + +static inline int +vring_size(unsigned int num, unsigned long align) +{ + int size; + + size = num * sizeof(struct vring_desc); + size += sizeof(struct vring_avail) + (num * sizeof(uint16_t)); + size = RTE_ALIGN_CEIL(size, align); + size += sizeof(struct vring_used) + + (num * sizeof(struct vring_used_elem)); + return (size); +} + +static inline void +vring_init(struct vring *vr, unsigned int num, uint8_t *p, + unsigned long align) +{ + vr->num = num; + vr->desc = (struct vring_desc *) p; + vr->avail = (struct vring_avail *) (p + + num * sizeof(struct vring_desc)); + vr->used = (void *) + RTE_ALIGN_CEIL( (uintptr_t)(&vr->avail->ring[num]), align); +} + +/* + * The following is used with VIRTIO_RING_F_EVENT_IDX. + * Assuming a given event_idx value from the other size, if we have + * just incremented index from old to new_idx, should we trigger an + * event? + */ +static inline int +vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} + +#endif /* _VIRTIO_RING_H_ */ diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c new file mode 100644 index 0000000000..05e17212d7 --- /dev/null +++ b/lib/librte_pmd_virtio/virtio_rxtx.c @@ -0,0 +1,359 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtqueue.h" + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP +#define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(m, len) +#else +#define VIRTIO_DUMP_PACKET(m, len) do { } while (0) +#endif + +static inline struct rte_mbuf * +rte_rxmbuf_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + + m = __rte_mbuf_raw_alloc(mp); + __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0); + + return (m); +} + +static void +virtio_dev_vring_start(struct rte_eth_dev *dev, struct virtqueue *vq, int queue_type) +{ + struct rte_mbuf *m; + int i, nbufs, error, size = vq->vq_nentries; + struct vring *vr = &vq->vq_ring; + uint8_t *ring_mem = vq->vq_ring_virt_mem; + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + PMD_INIT_FUNC_TRACE(); + + /* + * Reinitialise since virtio port might have been stopped and restarted + */ + memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); + vring_init(vr, size, ring_mem, vq->vq_alignment); + vq->vq_used_cons_idx = 0; + vq->vq_desc_head_idx = 0; + vq->vq_free_cnt = vq->vq_nentries; + memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); + + /* Chain all the descriptors in the ring with an END */ + for (i = 0; i < size - 1; i++) + vr->desc[i].next = (uint16_t)(i + 1); + vr->desc[i].next = VQ_RING_DESC_CHAIN_END; + + /* + * Disable device(host) interrupting guest + */ + virtqueue_disable_intr(vq); + + rte_snprintf(vq_name, sizeof(vq_name), "port_%d_rx_vq", + dev->data->port_id); + PMD_INIT_LOG(DEBUG, "vq name: %s\n", vq->vq_name); + + /* Only rx virtqueue needs mbufs to be allocated at initialization */ + if (queue_type == VTNET_RQ) { + if (vq->mpool == NULL) + rte_exit(EXIT_FAILURE, "Cannot allocate initial mbufs for rx virtqueue\n"); + /* Allocate blank mbufs for the each rx descriptor */ + nbufs = 0; + error = ENOSPC; + while (!virtqueue_full(vq)) { + m = rte_rxmbuf_alloc(vq->mpool); + if (m == NULL) + break; + /****************************************** + * Enqueue allocated buffers * + *******************************************/ + error = virtqueue_enqueue_recv_refill(vq, m); + if (error) { + rte_pktmbuf_free_seg(m); + break; + } + nbufs++; + } + PMD_INIT_LOG(DEBUG, "Allocated %d bufs\n", nbufs); + VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_RQ_QUEUE_IDX); + VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN, + vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + } else { + VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_TQ_QUEUE_IDX); + VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN, + vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + } +} + +void +virtio_dev_rxtx_start(struct rte_eth_dev *dev) +{ + /* + * Start recieve and transmit vrings + * - Setup vring structure for all queues + * - Initialize descriptor for the rx vring + * - Allocate blank mbufs for the each rx descriptor + * + */ + PMD_INIT_FUNC_TRACE(); + + /* Start rx vring: by default we have 1 rx virtqueue. */ + virtio_dev_vring_start(dev, dev->data->rx_queues[0], VTNET_RQ); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]); + + /* Start tx vring: by default we have 1 tx virtqueue. */ + virtio_dev_vring_start(dev, dev->data->tx_queues[0], VTNET_TQ); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]); +} + +int +virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + __rte_unused const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + uint8_t vtpci_queue_idx = VTNET_SQ_RQ_QUEUE_IDX; + struct virtqueue *vq; + int ret; + + PMD_INIT_FUNC_TRACE(); + ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx, + nb_desc, socket_id, &vq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "tvq initialization failed\n"); + return ret; + } + /* Create mempool for rx mbuf allocation */ + vq->mpool = mp; + + dev->data->rx_queues[queue_idx] = vq; + return (0); +} + +/* + * struct rte_eth_dev *dev: Used to update dev + * uint16_t nb_desc: Defaults to values read from config space + * unsigned int socket_id: Used to allocate memzone + * const struct rte_eth_txconf *tx_conf: Used to setup tx engine + * uint16_t queue_idx: Just used as an index in dev txq list + */ +int +virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + __rte_unused const struct rte_eth_txconf *tx_conf) +{ + uint8_t vtpci_queue_idx = VTNET_SQ_TQ_QUEUE_IDX; + struct virtqueue *vq; + int ret; + + PMD_INIT_FUNC_TRACE(); + ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx, + nb_desc, socket_id, &vq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "rvq initialization failed\n"); + return ret; + } + + dev->data->tx_queues[queue_idx] = vq; + return (0); +} + +static void +virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) +{ + int error; + /* + * Requeue the discarded mbuf. This should always be + * successful since it was just dequeued. + */ + error = virtqueue_enqueue_recv_refill(vq, m); + if (unlikely(error)) { + RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); + rte_pktmbuf_free_seg(m); + } +} + +#define VIRTIO_MBUF_BURST_SZ 64 +uint16_t +virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct virtqueue *rxvq = rx_queue; + struct virtio_hw *hw = rxvq->hw; + struct rte_mbuf *rxm, *new_mbuf; + uint16_t nb_used, num, nb_rx = 0; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; + int error; + uint32_t i, nb_enqueued = 0; + + nb_used = VIRTQUEUE_NUSED(rxvq); + + rmb(); + + num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); + num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); + if(num == 0) return 0; + num = virtqueue_dequeue_burst(rxvq, rcv_pkts, len, num); + PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num); + for (i = 0; i < num ; i ++) { + rxm = rcv_pkts[i]; + PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]); + if (unlikely(len[i] < (uint32_t)hw->vtnet_hdr_size + ETHER_HDR_LEN)) { + PMD_RX_LOG(ERR, "Packet drop\n"); + nb_enqueued++; + virtio_discard_rxbuf(rxvq, rxm); + hw->eth_stats.ierrors++; + continue; + } + rxm->pkt.in_port = rxvq->port_id; + rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM; + rxm->pkt.nb_segs = 1; + rxm->pkt.next = NULL; + rxm->pkt.pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr)); + rxm->pkt.data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr)); + VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len); + rx_pkts[nb_rx++] = rxm; + hw->eth_stats.ibytes += len[i] - sizeof(struct virtio_net_hdr); + } + hw->eth_stats.ipackets += nb_rx; + + /* Allocate new mbuf for the used descriptor */ + error = ENOSPC; + while (likely(!virtqueue_full(rxvq))) { + new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); + if (unlikely(new_mbuf == NULL)) { + hw->eth_stats.rx_nombuf++; + break; + } + error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf); + if (unlikely(error)) { + rte_pktmbuf_free_seg(new_mbuf); + break; + } + nb_enqueued ++; + } + if(likely(nb_enqueued)) { + if(unlikely(virtqueue_kick_prepare(rxvq))) { + virtqueue_notify(rxvq); + PMD_RX_LOG(DEBUG, "Notified\n"); + } + } + return (nb_rx); +} + +uint16_t +virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct virtqueue *txvq = tx_queue; + struct rte_mbuf *txm; + uint16_t nb_used, nb_tx, count, num, i; + int error; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ]; + struct virtio_hw *hw; + + nb_tx = count = 0; + + if (unlikely(nb_pkts < 1)) + return (nb_pkts); + + PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); + nb_used = VIRTQUEUE_NUSED(txvq); + + rmb(); + + hw = txvq->hw; + num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ); + num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num); + for (i = 0; i < num ; i ++) { + rte_pktmbuf_free_seg(snd_pkts[i]); + } + + while (count++ < nb_pkts) { + if(!virtqueue_full(txvq)) { + txm = tx_pkts[nb_tx]; + /************************************************/ + /***** Enqueue Packet buffers *****/ + /************************************************/ + error = virtqueue_enqueue_xmit(txvq, txm); + if (unlikely(error)) { + // rte_pktmbuf_free_seg(txm); /* the upper application will free this packet */ + if (error == ENOSPC) + PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n"); + else if (error == EMSGSIZE) + PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n"); + else + PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error); + break; + } + nb_tx++; + hw->eth_stats.obytes += txm->pkt.data_len; + } else { + PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n"); + virtqueue_notify(txvq); + break; + } + } + hw->eth_stats.opackets += nb_tx; + + if(unlikely(virtqueue_kick_prepare(txvq))) { + virtqueue_notify(txvq); + PMD_TX_LOG(DEBUG, "Notified backend after xmit\n"); + } + return (nb_tx); +} diff --git a/lib/librte_pmd_virtio/virtqueue.c b/lib/librte_pmd_virtio/virtqueue.c new file mode 100644 index 0000000000..c9fa164049 --- /dev/null +++ b/lib/librte_pmd_virtio/virtqueue.c @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#include + +#include "virtqueue.h" +#include "virtio_logs.h" +#include "virtio_pci.h" + +void +virtqueue_disable_intr(struct virtqueue *vq) +{ + /* + * Set VRING_AVAIL_F_NO_INTERRUPT to hint host + * not to interrupt when it consumes packets + * Note: this is only considered a hint to the host + */ + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +/* + * Two types of mbuf to be cleaned: + * 1) mbuf that has been consumed by backend but not used by virtio. + * 2) mbuf that hasn't been consued by backend. + */ +struct rte_mbuf * +virtqueue_detatch_unused(struct virtqueue *vq) +{ + struct rte_mbuf *cookie; + int idx; + + for(idx = 0; idx < vq->vq_nentries; idx++) { + if ((cookie = vq->vq_descx[idx].cookie) != NULL) { + vq->vq_descx[idx].cookie = NULL; + return cookie; + } + } + return (NULL); +} diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h new file mode 100644 index 0000000000..9e50a86846 --- /dev/null +++ b/lib/librte_pmd_virtio/virtqueue.h @@ -0,0 +1,362 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTQUEUE_H_ +#define _VIRTQUEUE_H_ + +#include + +#include +#include +#include +#include +#include + +#include "virtio_pci.h" +#include "virtio_ring.h" +#include "virtio_logs.h" + +#define mb() rte_mb() +#define wmb() rte_wmb() +#define rmb() rte_rmb() + +#define VIRTQUEUE_MAX_NAME_SZ 32 + +#define RTE_MBUF_DATA_DMA_ADDR(mb) \ + (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \ + (char *)(mb)->buf_addr)) + +#define VTNET_SQ_RQ_QUEUE_IDX 0 +#define VTNET_SQ_TQ_QUEUE_IDX 1 +#define VTNET_SQ_CQ_QUEUE_IDX 2 + +enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; +/** + * The maximum virtqueue size is 2^15. Use that value as the end of + * descriptor chain terminator since it will never be a valid index + * in the descriptor table. This is used to verify we are correctly + * handling vq_free_cnt. + */ +#define VQ_RING_DESC_CHAIN_END 32768 + +/** + * Control the RX mode, ie. promiscuous, allmulti, etc... + * All commands require an "out" sg entry containing a 1 byte + * state value, zero = disable, non-zero = enable. Commands + * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. + * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. + */ +#define VIRTIO_NET_CTRL_RX 0 +#define VIRTIO_NET_CTRL_RX_PROMISC 0 +#define VIRTIO_NET_CTRL_RX_ALLMULTI 1 +#define VIRTIO_NET_CTRL_RX_ALLUNI 2 +#define VIRTIO_NET_CTRL_RX_NOMULTI 3 +#define VIRTIO_NET_CTRL_RX_NOUNI 4 +#define VIRTIO_NET_CTRL_RX_NOBCAST 5 + +/** + * Control VLAN filtering + * + * The VLAN filter table is controlled via a simple ADD/DEL interface. + * VLAN IDs not added may be filtered by the hypervisor. Del is the + * opposite of add. Both commands expect an out entry containing a 2 + * byte VLAN ID. VLAN filtering is available with the + * VIRTIO_NET_F_CTRL_VLAN feature bit. + */ +#define VIRTIO_NET_CTRL_VLAN 2 +#define VIRTIO_NET_CTRL_VLAN_ADD 0 +#define VIRTIO_NET_CTRL_VLAN_DEL 1 + +struct virtqueue { + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + struct virtio_hw *hw; /**< virtio_hw structure pointer. */ + const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ + const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */ + struct rte_mempool *mpool; /**< mempool for mbuf allocation */ + uint16_t queue_id; /**< DPDK queue index. */ + uint8_t port_id; /**< Device port identifier. */ + + void *vq_ring_virt_mem; /**< linear address of vring*/ + int vq_alignment; + int vq_ring_size; + phys_addr_t vq_ring_mem; /**< physical address of vring */ + + struct vring vq_ring; /**< vring keeping desc, used and avail */ + uint16_t vq_free_cnt; /**< num of desc available */ + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_queue_index; /**< PCI queue index */ + /** + * Head of the free chain in the descriptor table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_head_idx; + /** + * Last consumed descriptor in the used table, + * trails vq_ring.used->idx. + */ + uint16_t vq_used_cons_idx; + void *virtio_net_hdr_mem; /**< hdr for each xmit packet */ + + struct vq_desc_extra { + void *cookie; + uint16_t ndescs; + } vq_descx[0]; +}; + +/** + * This is the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + */ +struct virtio_net_hdr { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */ + uint8_t gso_type; + uint16_t hdr_len; /**< Ethernet + IP + tcp/udp hdrs */ + uint16_t gso_size; /**< Bytes to append to hdr_len per frame */ + uint16_t csum_start; /**< Position to start checksumming from */ + uint16_t csum_offset; /**< Offset after that to place checksum */ +}; + +/** + * This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. + */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + uint16_t num_buffers; /**< Number of merged rx buffers */ +}; + +/** + * Tell the backend not to interrupt us. + */ +void virtqueue_disable_intr(struct virtqueue *vq); +/** + * Dump virtqueue internal structures, for debug purpose only. + */ +void virtqueue_dump(struct virtqueue *vq); +/** + * Get all mbufs to be freed. + */ +struct rte_mbuf * virtqueue_detatch_unused(struct virtqueue *vq); + +static inline int +virtqueue_full(const struct virtqueue *vq) +{ + return (vq->vq_free_cnt == 0); +} + +#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx)) + +static inline void +vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) +{ + uint16_t avail_idx; + /* + * Place the head of the descriptor chain into the next slot and make + * it usable to the host. The chain is made available now rather than + * deferring to virtqueue_notify() in the hopes that if the host is + * currently running on another CPU, we can keep it processing the new + * descriptor. + */ + avail_idx = (uint16_t)(vq->vq_ring.avail->idx & (vq->vq_nentries - 1)); + vq->vq_ring.avail->ring[avail_idx] = desc_idx; + mb(); + vq->vq_ring.avail->idx++; +} + +static inline int virtqueue_kick_prepare(struct virtqueue * vq) +{ + return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY); +} + +static inline void +virtqueue_notify(struct virtqueue *vq) +{ + /* + * Ensure updated avail->idx is visible to host. mb() necessary? + * For virtio on IA, the notificaiton is through io port operation + * which is a serialization instruction itself. + */ + VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index); +} + +static inline void +vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) +{ + struct vring_desc *dp; + struct vq_desc_extra *dxp; + + dp = &vq->vq_ring.desc[desc_idx]; + dxp = &vq->vq_descx[desc_idx]; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); + if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { + while (dp->flags & VRING_DESC_F_NEXT) { + dp = &vq->vq_ring.desc[dp->next]; + } + } + dxp->ndescs = 0; + + /* + * We must append the existing free chain, if any, to the end of + * newly freed chain. If the virtqueue was completely used, then + * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). + */ + dp->next = vq->vq_desc_head_idx; + vq->vq_desc_head_idx = desc_idx; +} + +static inline int +virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) +{ + struct vq_desc_extra *dxp; + struct vring_desc *start_dp; + uint16_t needed; + uint16_t head_idx, idx; + needed = 1; + + if (unlikely(vq->vq_free_cnt == 0)) + return (-ENOSPC); + if (unlikely(vq->vq_free_cnt < needed)) + return (-EMSGSIZE); + + head_idx = vq->vq_desc_head_idx; + if (unlikely(head_idx >= vq->vq_nentries)) + return (-EFAULT); + + idx = head_idx; + dxp = &vq->vq_descx[idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp = vq->vq_ring.desc; + start_dp[idx].addr = + (uint64_t) (cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr)); + start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr); + start_dp[idx].flags = VRING_DESC_F_WRITE; + idx = start_dp[idx].next; + vq->vq_desc_head_idx = idx; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); + vq_ring_update_avail(vq, head_idx); + + return (0); +} + +static inline int +virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) +{ + struct vq_desc_extra *dxp; + struct vring_desc *start_dp; + uint16_t needed; + uint16_t head_idx, idx; + needed = 2; + if (unlikely(txvq->vq_free_cnt == 0)) + return (-ENOSPC); + if (unlikely(txvq->vq_free_cnt < needed)) + return (-EMSGSIZE); + head_idx = txvq->vq_desc_head_idx; + if (unlikely(head_idx >= txvq->vq_nentries)) + return (-EFAULT); + + idx = head_idx; + dxp = &txvq->vq_descx[idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp = txvq->vq_ring.desc; + start_dp[idx].addr = (uint64_t)(uintptr_t)txvq->virtio_net_hdr_mem + idx * sizeof(struct virtio_net_hdr); + start_dp[idx].len = sizeof(struct virtio_net_hdr); + start_dp[idx].flags = VRING_DESC_F_NEXT; + idx = start_dp[idx].next; + start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie); + start_dp[idx].len = cookie->pkt.data_len; + start_dp[idx].flags = 0; + idx = start_dp[idx].next; + txvq->vq_desc_head_idx = idx; + txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed); + vq_ring_update_avail(txvq, head_idx); + + return (0); +} + +static inline uint16_t +virtqueue_dequeue_burst(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint32_t *len, uint16_t num) +{ + struct vring_used_elem *uep; + struct rte_mbuf *cookie; + uint16_t used_idx, desc_idx; + uint16_t i; + /* Caller does the check */ + for (i = 0; i < num ; i ++) { + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + desc_idx = (uint16_t) uep->id; + cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; + if (unlikely(cookie == NULL)) { + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", + vq->vq_used_cons_idx); + break; + } + len[i] = uep->len; + rx_pkts[i] = cookie; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); + vq->vq_descx[desc_idx].cookie = NULL; + } + return (i); +} + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP +#define VIRTQUEUE_DUMP(vq) do { \ + uint16_t used_idx, nused; \ + used_idx = (vq)->vq_ring.used->idx; \ + nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ + PMD_INIT_LOG(DEBUG, \ + "VQ: %s - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ + " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ + " avail.flags=0x%x; used.flags=0x%x\n", \ + (vq)->vq_name, (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ + (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \ + (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \ + (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \ +} while (0) +#else +#define VIRTQUEUE_DUMP(vq) do { } while (0) +#endif + +#endif /* _VIRTQUEUE_H_ */ diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 324021f17b..728e5b509e 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -72,8 +72,8 @@ ifeq ($(CONFIG_RTE_LIBRTE_IXGBE_PMD),y) LDLIBS += -lrte_pmd_ixgbe endif -ifeq ($(CONFIG_RTE_LIBRTE_MBUF),y) -LDLIBS += -lrte_mbuf +ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_PMD),y) +LDLIBS += -lrte_pmd_virtio endif ifeq ($(CONFIG_RTE_LIBRTE_CMDLINE),y) @@ -112,6 +112,10 @@ endif LDLIBS += --start-group +ifeq ($(CONFIG_RTE_LIBRTE_MBUF),y) +LDLIBS += -lrte_mbuf +endif + ifeq ($(CONFIG_RTE_LIBRTE_ETHER),y) LDLIBS += -lethdev endif