1,171 changes: 866 additions & 305 deletions ebpf/rss.bpf.skeleton.h

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions gitdm.config
Expand Up @@ -33,6 +33,7 @@ EmailMap contrib/gitdm/domain-map

GroupMap contrib/gitdm/group-map-cadence Cadence Design Systems
GroupMap contrib/gitdm/group-map-codeweavers CodeWeavers
GroupMap contrib/gitdm/group-map-facebook Facebook
GroupMap contrib/gitdm/group-map-ibm IBM
GroupMap contrib/gitdm/group-map-janustech Janus Technologies
GroupMap contrib/gitdm/group-map-netflix Netflix
Expand Down
1 change: 1 addition & 0 deletions hw/core/machine.c
Expand Up @@ -40,6 +40,7 @@
#include "hw/virtio/virtio-pci.h"

GlobalProperty hw_compat_7_2[] = {
{ "e1000e", "migrate-timadj", "off" },
{ "virtio-mem", "x-early-migration", "false" },
};
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
Expand Down
5 changes: 5 additions & 0 deletions hw/net/Kconfig
Expand Up @@ -44,6 +44,11 @@ config E1000E_PCI_EXPRESS
default y if PCI_DEVICES
depends on PCI_EXPRESS && MSI_NONBROKEN

config IGB_PCI_EXPRESS
bool
default y if PCI_DEVICES
depends on PCI_EXPRESS && MSI_NONBROKEN

config RTL8139_PCI
bool
default y if PCI_DEVICES
Expand Down
259 changes: 120 additions & 139 deletions hw/net/e1000.c

Large diffs are not rendered by default.

102 changes: 102 additions & 0 deletions hw/net/e1000_common.h
@@ -0,0 +1,102 @@
/*
* QEMU e1000(e) emulation - shared definitions
*
* Copyright (c) 2008 Qumranet
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#ifndef HW_NET_E1000_COMMON_H
#define HW_NET_E1000_COMMON_H

#include "e1000_regs.h"

#define defreg(x) x = (E1000_##x >> 2)
enum {
defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH0),
defreg(RDBAL0), defreg(RDH0), defreg(RDLEN0), defreg(RDT0),
defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
defreg(TDLEN1), defreg(TDBAL1), defreg(TDBAH1), defreg(TDH1),
defreg(TDT1), defreg(TORH), defreg(TORL), defreg(TOTH),
defreg(TOTL), defreg(TPR), defreg(TPT), defreg(TXDCTL),
defreg(WUFC), defreg(RA), defreg(MTA), defreg(CRCERRS),
defreg(VFTA), defreg(VET), defreg(RDTR), defreg(RADV),
defreg(TADV), defreg(ITR), defreg(SCC), defreg(ECOL),
defreg(MCC), defreg(LATECOL), defreg(COLC), defreg(DC),
defreg(TNCRS), defreg(SEQEC), defreg(CEXTERR), defreg(RLEC),
defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), defreg(XOFFTXC),
defreg(FCRUC), defreg(AIT), defreg(TDFH), defreg(TDFT),
defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(WUC),
defreg(WUS), defreg(POEMB), defreg(PBS), defreg(RDFH),
defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC),
defreg(PBM), defreg(IPAV), defreg(IP4AT), defreg(IP6AT),
defreg(WUPM), defreg(FFLT), defreg(FFMT), defreg(FFVT),
defreg(TARC0), defreg(TARC1), defreg(IAM), defreg(EXTCNF_CTRL),
defreg(GCR), defreg(TIMINCA), defreg(EIAC), defreg(CTRL_EXT),
defreg(IVAR), defreg(MFUTP01), defreg(MFUTP23), defreg(MANC2H),
defreg(MFVAL), defreg(MDEF), defreg(FACTPS), defreg(FTFT),
defreg(RUC), defreg(ROC), defreg(RFC), defreg(RJC),
defreg(PRC64), defreg(PRC127), defreg(PRC255), defreg(PRC511),
defreg(PRC1023), defreg(PRC1522), defreg(PTC64), defreg(PTC127),
defreg(PTC255), defreg(PTC511), defreg(PTC1023), defreg(PTC1522),
defreg(GORCL), defreg(GORCH), defreg(GOTCL), defreg(GOTCH),
defreg(RNBC), defreg(BPRC), defreg(MPRC), defreg(RFCTL),
defreg(PSRCTL), defreg(MPTC), defreg(BPTC), defreg(TSCTFC),
defreg(IAC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC),
defreg(TSCTC), defreg(RXCSUM), defreg(FUNCTAG), defreg(GSCL_1),
defreg(GSCL_2), defreg(GSCL_3), defreg(GSCL_4), defreg(GSCN_0),
defreg(GSCN_1), defreg(GSCN_2), defreg(GSCN_3), defreg(GCR2),
defreg(RAID), defreg(RSRPD), defreg(TIDV), defreg(EITR),
defreg(MRQC), defreg(RETA), defreg(RSSRK), defreg(RDBAH1),
defreg(RDBAL1), defreg(RDLEN1), defreg(RDH1), defreg(RDT1),
defreg(PBACLR), defreg(FCAL), defreg(FCAH), defreg(FCT),
defreg(FCRTH), defreg(FCRTL), defreg(FCTTV), defreg(FCRTV),
defreg(FLA), defreg(EEWR), defreg(FLOP), defreg(FLOL),
defreg(FLSWCTL), defreg(FLSWCNT), defreg(RXDCTL), defreg(RXDCTL1),
defreg(MAVTV0), defreg(MAVTV1), defreg(MAVTV2), defreg(MAVTV3),
defreg(TXSTMPL), defreg(TXSTMPH), defreg(SYSTIML), defreg(SYSTIMH),
defreg(RXCFGL), defreg(RXUDP), defreg(TIMADJL), defreg(TIMADJH),
defreg(RXSTMPH), defreg(RXSTMPL), defreg(RXSATRL), defreg(RXSATRH),
defreg(FLASHT), defreg(TIPG), defreg(RDH), defreg(RDT),
defreg(RDLEN), defreg(RDBAH), defreg(RDBAL),
defreg(TXDCTL1),
defreg(FLSWDATA),
defreg(CTRL_DUP),
defreg(EXTCNF_SIZE),
defreg(EEMNGCTL),
defreg(EEMNGDATA),
defreg(FLMNGCTL),
defreg(FLMNGDATA),
defreg(FLMNGCNT),
defreg(TSYNCRXCTL),
defreg(TSYNCTXCTL),

/* Aliases */
defreg(RDH0_A), defreg(RDT0_A), defreg(RDTR_A), defreg(RDFH_A),
defreg(RDFT_A), defreg(TDH_A), defreg(TDT_A), defreg(TIDV_A),
defreg(TDFH_A), defreg(TDFT_A), defreg(RA_A), defreg(RDBAL0_A),
defreg(TDBAL_A), defreg(TDLEN_A), defreg(VFTA_A), defreg(RDLEN0_A),
defreg(FCRTL_A), defreg(FCRTH_A)
};

#endif
958 changes: 3 additions & 955 deletions hw/net/e1000_regs.h

Large diffs are not rendered by default.

102 changes: 57 additions & 45 deletions hw/net/e1000e.c
@@ -1,37 +1,37 @@
/*
* QEMU INTEL 82574 GbE NIC emulation
*
* Software developer's manuals:
* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
*
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Dmitry Fleytman <dmitry@daynix.com>
* Leonid Bloch <leonid@daynix.com>
* Yan Vugenfirer <yan@daynix.com>
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2008 Qumranet
* Based on work done by:
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
* QEMU INTEL 82574 GbE NIC emulation
*
* Software developer's manuals:
* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
*
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Dmitry Fleytman <dmitry@daynix.com>
* Leonid Bloch <leonid@daynix.com>
* Yan Vugenfirer <yan@daynix.com>
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2008 Qumranet
* Based on work done by:
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#include "qemu/osdep.h"
#include "qemu/units.h"
Expand All @@ -42,13 +42,13 @@
#include "qemu/range.h"
#include "sysemu/sysemu.h"
#include "hw/hw.h"
#include "hw/net/mii.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/qdev-properties.h"
#include "migration/vmstate.h"

#include "e1000_regs.h"

#include "e1000_common.h"
#include "e1000x_common.h"
#include "e1000e_core.h"

Expand Down Expand Up @@ -81,6 +81,7 @@ struct E1000EState {

E1000ECore core;
bool init_vet;
bool timadj;
};

#define E1000E_MMIO_IDX 0
Expand Down Expand Up @@ -239,9 +240,9 @@ static NetClientInfo net_e1000e_info = {
};

/*
* EEPROM (NVM) contents documented in Table 36, section 6.1
* and generally 6.1.2 Software accessed words.
*/
* EEPROM (NVM) contents documented in Table 36, section 6.1
* and generally 6.1.2 Software accessed words.
*/
static const uint16_t e1000e_eeprom_template[64] = {
/* Address | Compat. | ImVer | Compat. */
0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff,
Expand Down Expand Up @@ -512,11 +513,11 @@ static void e1000e_pci_uninit(PCIDevice *pci_dev)
msi_uninit(pci_dev);
}

static void e1000e_qdev_reset(DeviceState *dev)
static void e1000e_qdev_reset_hold(Object *obj)
{
E1000EState *s = E1000E(dev);
E1000EState *s = E1000E(obj);

trace_e1000e_cb_qdev_reset();
trace_e1000e_cb_qdev_reset_hold();

e1000e_core_reset(&s->core);

Expand Down Expand Up @@ -553,6 +554,12 @@ static int e1000e_post_load(void *opaque, int version_id)
return e1000e_core_post_load(&s->core);
}

static bool e1000e_migrate_timadj(void *opaque, int version_id)
{
E1000EState *s = opaque;
return s->timadj;
}

static const VMStateDescription e1000e_vmstate_tx = {
.name = "e1000e-tx",
.version_id = 1,
Expand Down Expand Up @@ -630,12 +637,11 @@ static const VMStateDescription e1000e_vmstate = {
VMSTATE_E1000E_INTR_DELAY_TIMER(core.tidv, E1000EState),

VMSTATE_E1000E_INTR_DELAY_TIMER(core.itr, E1000EState),
VMSTATE_BOOL(core.itr_intr_pending, E1000EState),
VMSTATE_UNUSED(1),

VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(core.eitr, E1000EState,
E1000E_MSIX_VEC_NUM),
VMSTATE_BOOL_ARRAY(core.eitr_intr_pending, E1000EState,
E1000E_MSIX_VEC_NUM),
VMSTATE_UNUSED(E1000E_MSIX_VEC_NUM),

VMSTATE_UINT32(core.itr_guest_value, E1000EState),
VMSTATE_UINT32_ARRAY(core.eitr_guest_value, E1000EState,
Expand All @@ -645,6 +651,9 @@ static const VMStateDescription e1000e_vmstate = {

VMSTATE_STRUCT_ARRAY(core.tx, E1000EState, E1000E_NUM_QUEUES, 0,
e1000e_vmstate_tx, struct e1000e_tx),

VMSTATE_INT64_TEST(core.timadj, E1000EState, e1000e_migrate_timadj),

VMSTATE_END_OF_LIST()
}
};
Expand All @@ -663,12 +672,14 @@ static Property e1000e_properties[] = {
DEFINE_PROP_SIGNED("subsys", E1000EState, subsys, 0,
e1000e_prop_subsys, uint16_t),
DEFINE_PROP_BOOL("init-vet", E1000EState, init_vet, true),
DEFINE_PROP_BOOL("migrate-timadj", E1000EState, timadj, true),
DEFINE_PROP_END_OF_LIST(),
};

static void e1000e_class_init(ObjectClass *class, void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
ResettableClass *rc = RESETTABLE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);

c->realize = e1000e_pci_realize;
Expand All @@ -679,8 +690,9 @@ static void e1000e_class_init(ObjectClass *class, void *data)
c->romfile = "efi-e1000e.rom";
c->class_id = PCI_CLASS_NETWORK_ETHERNET;

rc->phases.hold = e1000e_qdev_reset_hold;

dc->desc = "Intel 82574L GbE Controller";
dc->reset = e1000e_qdev_reset;
dc->vmsd = &e1000e_vmstate;

e1000e_prop_disable_vnet = qdev_prop_uint8;
Expand Down
719 changes: 387 additions & 332 deletions hw/net/e1000e_core.c

Large diffs are not rendered by default.

70 changes: 35 additions & 35 deletions hw/net/e1000e_core.h
@@ -1,37 +1,37 @@
/*
* Core code for QEMU e1000e emulation
*
* Software developer's manuals:
* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
*
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Dmitry Fleytman <dmitry@daynix.com>
* Leonid Bloch <leonid@daynix.com>
* Yan Vugenfirer <yan@daynix.com>
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2008 Qumranet
* Based on work done by:
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
* Core code for QEMU e1000e emulation
*
* Software developer's manuals:
* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
*
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Dmitry Fleytman <dmitry@daynix.com>
* Leonid Bloch <leonid@daynix.com>
* Yan Vugenfirer <yan@daynix.com>
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2008 Qumranet
* Based on work done by:
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#ifndef HW_NET_E1000E_CORE_H
#define HW_NET_E1000E_CORE_H
Expand Down Expand Up @@ -95,10 +95,8 @@ struct E1000Core {
E1000IntrDelayTimer tidv;

E1000IntrDelayTimer itr;
bool itr_intr_pending;

E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM];
bool eitr_intr_pending[E1000E_MSIX_VEC_NUM];

VMChangeStateEntry *vmstate;

Expand All @@ -114,6 +112,8 @@ struct E1000Core {
void (*owner_start_recv)(PCIDevice *d);

uint32_t msi_causes_pending;

int64_t timadj;
};

void
Expand Down
38 changes: 33 additions & 5 deletions hw/net/e1000x_common.c
Expand Up @@ -24,9 +24,12 @@

#include "qemu/osdep.h"
#include "qemu/units.h"
#include "hw/net/mii.h"
#include "hw/pci/pci_device.h"
#include "net/eth.h"
#include "net/net.h"

#include "e1000_common.h"
#include "e1000x_common.h"

#include "trace.h"
Expand All @@ -45,9 +48,9 @@ bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac)
return true;
}

bool e1000x_is_vlan_packet(const uint8_t *buf, uint16_t vet)
bool e1000x_is_vlan_packet(const void *buf, uint16_t vet)
{
uint16_t eth_proto = lduw_be_p(buf + 12);
uint16_t eth_proto = lduw_be_p(&PKT_GET_ETH_HDR(buf)->h_proto);
bool res = (eth_proto == vet);

trace_e1000x_vlan_is_vlan_pkt(res, eth_proto, vet);
Expand All @@ -66,7 +69,7 @@ bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf)
}
ra[0] = cpu_to_le32(rp[0]);
ra[1] = cpu_to_le32(rp[1]);
if (!memcmp(buf, (uint8_t *)ra, 6)) {
if (!memcmp(buf, (uint8_t *)ra, ETH_ALEN)) {
trace_e1000x_rx_flt_ucast_match((int)(rp - mac - RA) / 2,
MAC_ARG(buf));
return true;
Expand Down Expand Up @@ -152,8 +155,8 @@ void e1000x_reset_mac_addr(NICState *nic, uint32_t *mac_regs,
void e1000x_update_regs_on_autoneg_done(uint32_t *mac, uint16_t *phy)
{
e1000x_update_regs_on_link_up(mac, phy);
phy[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
phy[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
phy[MII_ANLPAR] |= MII_ANLPAR_ACK;
phy[MII_BMSR] |= MII_BMSR_AN_COMP;
trace_e1000x_link_negotiation_done();
}

Expand Down Expand Up @@ -265,3 +268,28 @@ e1000x_read_tx_ctx_descr(struct e1000_context_desc *d,
props->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
props->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
}

void e1000x_timestamp(uint32_t *mac, int64_t timadj, size_t lo, size_t hi)
{
int64_t ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
uint32_t timinca = mac[TIMINCA];
uint32_t incvalue = timinca & E1000_TIMINCA_INCVALUE_MASK;
uint32_t incperiod = MAX(timinca >> E1000_TIMINCA_INCPERIOD_SHIFT, 1);
int64_t timestamp = timadj + muldiv64(ns, incvalue, incperiod * 16);

mac[lo] = timestamp & 0xffffffff;
mac[hi] = timestamp >> 32;
}

void e1000x_set_timinca(uint32_t *mac, int64_t *timadj, uint32_t val)
{
int64_t ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
uint32_t old_val = mac[TIMINCA];
uint32_t old_incvalue = old_val & E1000_TIMINCA_INCVALUE_MASK;
uint32_t old_incperiod = MAX(old_val >> E1000_TIMINCA_INCPERIOD_SHIFT, 1);
uint32_t incvalue = val & E1000_TIMINCA_INCVALUE_MASK;
uint32_t incperiod = MAX(val >> E1000_TIMINCA_INCPERIOD_SHIFT, 1);

mac[TIMINCA] = val;
*timadj += (muldiv64(ns, incvalue, incperiod) - muldiv64(ns, old_incvalue, old_incperiod)) / 16;
}
133 changes: 31 additions & 102 deletions hw/net/e1000x_common.h
@@ -1,108 +1,34 @@
/*
* QEMU e1000(e) emulation - shared code
*
* Copyright (c) 2008 Qumranet
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
* QEMU e1000(e) emulation - shared code
*
* Copyright (c) 2008 Qumranet
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#ifndef HW_NET_E1000X_COMMON_H
#define HW_NET_E1000X_COMMON_H

#include "e1000_regs.h"

#define defreg(x) x = (E1000_##x >> 2)
enum {
defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH0),
defreg(RDBAL0), defreg(RDH0), defreg(RDLEN0), defreg(RDT0),
defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
defreg(TDLEN1), defreg(TDBAL1), defreg(TDBAH1), defreg(TDH1),
defreg(TDT1), defreg(TORH), defreg(TORL), defreg(TOTH),
defreg(TOTL), defreg(TPR), defreg(TPT), defreg(TXDCTL),
defreg(WUFC), defreg(RA), defreg(MTA), defreg(CRCERRS),
defreg(VFTA), defreg(VET), defreg(RDTR), defreg(RADV),
defreg(TADV), defreg(ITR), defreg(SCC), defreg(ECOL),
defreg(MCC), defreg(LATECOL), defreg(COLC), defreg(DC),
defreg(TNCRS), defreg(SEQEC), defreg(CEXTERR), defreg(RLEC),
defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), defreg(XOFFTXC),
defreg(FCRUC), defreg(AIT), defreg(TDFH), defreg(TDFT),
defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(WUC),
defreg(WUS), defreg(POEMB), defreg(PBS), defreg(RDFH),
defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC),
defreg(PBM), defreg(IPAV), defreg(IP4AT), defreg(IP6AT),
defreg(WUPM), defreg(FFLT), defreg(FFMT), defreg(FFVT),
defreg(TARC0), defreg(TARC1), defreg(IAM), defreg(EXTCNF_CTRL),
defreg(GCR), defreg(TIMINCA), defreg(EIAC), defreg(CTRL_EXT),
defreg(IVAR), defreg(MFUTP01), defreg(MFUTP23), defreg(MANC2H),
defreg(MFVAL), defreg(MDEF), defreg(FACTPS), defreg(FTFT),
defreg(RUC), defreg(ROC), defreg(RFC), defreg(RJC),
defreg(PRC64), defreg(PRC127), defreg(PRC255), defreg(PRC511),
defreg(PRC1023), defreg(PRC1522), defreg(PTC64), defreg(PTC127),
defreg(PTC255), defreg(PTC511), defreg(PTC1023), defreg(PTC1522),
defreg(GORCL), defreg(GORCH), defreg(GOTCL), defreg(GOTCH),
defreg(RNBC), defreg(BPRC), defreg(MPRC), defreg(RFCTL),
defreg(PSRCTL), defreg(MPTC), defreg(BPTC), defreg(TSCTFC),
defreg(IAC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC),
defreg(TSCTC), defreg(RXCSUM), defreg(FUNCTAG), defreg(GSCL_1),
defreg(GSCL_2), defreg(GSCL_3), defreg(GSCL_4), defreg(GSCN_0),
defreg(GSCN_1), defreg(GSCN_2), defreg(GSCN_3), defreg(GCR2),
defreg(RAID), defreg(RSRPD), defreg(TIDV), defreg(EITR),
defreg(MRQC), defreg(RETA), defreg(RSSRK), defreg(RDBAH1),
defreg(RDBAL1), defreg(RDLEN1), defreg(RDH1), defreg(RDT1),
defreg(PBACLR), defreg(FCAL), defreg(FCAH), defreg(FCT),
defreg(FCRTH), defreg(FCRTL), defreg(FCTTV), defreg(FCRTV),
defreg(FLA), defreg(EEWR), defreg(FLOP), defreg(FLOL),
defreg(FLSWCTL), defreg(FLSWCNT), defreg(RXDCTL), defreg(RXDCTL1),
defreg(MAVTV0), defreg(MAVTV1), defreg(MAVTV2), defreg(MAVTV3),
defreg(TXSTMPL), defreg(TXSTMPH), defreg(SYSTIML), defreg(SYSTIMH),
defreg(RXCFGL), defreg(RXUDP), defreg(TIMADJL), defreg(TIMADJH),
defreg(RXSTMPH), defreg(RXSTMPL), defreg(RXSATRL), defreg(RXSATRH),
defreg(FLASHT), defreg(TIPG), defreg(RDH), defreg(RDT),
defreg(RDLEN), defreg(RDBAH), defreg(RDBAL),
defreg(TXDCTL1),
defreg(FLSWDATA),
defreg(CTRL_DUP),
defreg(EXTCNF_SIZE),
defreg(EEMNGCTL),
defreg(EEMNGDATA),
defreg(FLMNGCTL),
defreg(FLMNGDATA),
defreg(FLMNGCNT),
defreg(TSYNCRXCTL),
defreg(TSYNCTXCTL),

/* Aliases */
defreg(RDH0_A), defreg(RDT0_A), defreg(RDTR_A), defreg(RDFH_A),
defreg(RDFT_A), defreg(TDH_A), defreg(TDT_A), defreg(TIDV_A),
defreg(TDFH_A), defreg(TDFT_A), defreg(RA_A), defreg(RDBAL0_A),
defreg(TDBAL_A), defreg(TDLEN_A), defreg(VFTA_A), defreg(RDLEN0_A),
defreg(FCRTL_A), defreg(FCRTH_A)
};

static inline void
e1000x_inc_reg_if_not_full(uint32_t *mac, int index)
{
if (mac[index] != 0xffffffff) {
if (mac[index] != UINT32_MAX) {
mac[index]++;
}
}
Expand Down Expand Up @@ -152,16 +78,16 @@ static inline void
e1000x_update_regs_on_link_down(uint32_t *mac, uint16_t *phy)
{
mac[STATUS] &= ~E1000_STATUS_LU;
phy[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
phy[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
phy[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
phy[MII_BMSR] &= ~MII_BMSR_LINK_ST;
phy[MII_BMSR] &= ~MII_BMSR_AN_COMP;
phy[MII_ANLPAR] &= ~MII_ANLPAR_ACK;
}

static inline void
e1000x_update_regs_on_link_up(uint32_t *mac, uint16_t *phy)
{
mac[STATUS] |= E1000_STATUS_LU;
phy[PHY_STATUS] |= MII_SR_LINK_STATUS;
phy[MII_BMSR] |= MII_BMSR_LINK_ST;
}

void e1000x_update_rx_total_stats(uint32_t *mac,
Expand All @@ -178,7 +104,7 @@ uint32_t e1000x_rxbufsize(uint32_t rctl);

bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac);

bool e1000x_is_vlan_packet(const uint8_t *buf, uint16_t vet);
bool e1000x_is_vlan_packet(const void *buf, uint16_t vet);

bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf);

Expand Down Expand Up @@ -213,4 +139,7 @@ typedef struct e1000x_txd_props {
void e1000x_read_tx_ctx_descr(struct e1000_context_desc *d,
e1000x_txd_props *props);

void e1000x_timestamp(uint32_t *mac, int64_t timadj, size_t lo, size_t hi);
void e1000x_set_timinca(uint32_t *mac, int64_t *timadj, uint32_t val);

#endif
967 changes: 967 additions & 0 deletions hw/net/e1000x_regs.h

Large diffs are not rendered by default.

11 changes: 6 additions & 5 deletions hw/net/fsl_etsec/etsec.c
Expand Up @@ -29,6 +29,7 @@
#include "qemu/osdep.h"
#include "hw/sysbus.h"
#include "hw/irq.h"
#include "hw/net/mii.h"
#include "hw/ptimer.h"
#include "hw/qdev-properties.h"
#include "etsec.h"
Expand Down Expand Up @@ -339,11 +340,11 @@ static void etsec_reset(DeviceState *d)
etsec->rx_buffer_len = 0;

etsec->phy_status =
MII_SR_EXTENDED_CAPS | MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS |
MII_SR_AUTONEG_COMPLETE | MII_SR_PREAMBLE_SUPPRESS |
MII_SR_EXTENDED_STATUS | MII_SR_100T2_HD_CAPS | MII_SR_100T2_FD_CAPS |
MII_SR_10T_HD_CAPS | MII_SR_10T_FD_CAPS | MII_SR_100X_HD_CAPS |
MII_SR_100X_FD_CAPS | MII_SR_100T4_CAPS;
MII_BMSR_EXTCAP | MII_BMSR_LINK_ST | MII_BMSR_AUTONEG |
MII_BMSR_AN_COMP | MII_BMSR_MFPS | MII_BMSR_EXTSTAT |
MII_BMSR_100T2_HD | MII_BMSR_100T2_FD |
MII_BMSR_10T_HD | MII_BMSR_10T_FD |
MII_BMSR_100TX_HD | MII_BMSR_100TX_FD | MII_BMSR_100T4;

etsec_update_irq(etsec);
}
Expand Down
17 changes: 0 additions & 17 deletions hw/net/fsl_etsec/etsec.h
Expand Up @@ -76,23 +76,6 @@ typedef struct eTSEC_rxtx_bd {
#define FCB_TX_CTU (1 << 1)
#define FCB_TX_NPH (1 << 0)

/* PHY Status Register */
#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */
#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */
#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */
#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */
#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */
#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */
#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */
#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */
#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */
#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */
#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */
#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */
#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */
#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */

/* eTSEC */

/* Number of register in the device */
Expand Down
5 changes: 3 additions & 2 deletions hw/net/fsl_etsec/miim.c
Expand Up @@ -23,6 +23,7 @@
*/

#include "qemu/osdep.h"
#include "hw/net/mii.h"
#include "etsec.h"
#include "registers.h"

Expand Down Expand Up @@ -140,8 +141,8 @@ void etsec_miim_link_status(eTSEC *etsec, NetClientState *nc)
{
/* Set link status */
if (nc->link_down) {
etsec->phy_status &= ~MII_SR_LINK_STATUS;
etsec->phy_status &= ~MII_BMSR_LINK_ST;
} else {
etsec->phy_status |= MII_SR_LINK_STATUS;
etsec->phy_status |= MII_BMSR_LINK_ST;
}
}
623 changes: 623 additions & 0 deletions hw/net/igb.c

Large diffs are not rendered by default.

146 changes: 146 additions & 0 deletions hw/net/igb_common.h
@@ -0,0 +1,146 @@
/*
* QEMU igb emulation - shared definitions
*
* Copyright (c) 2020-2023 Red Hat, Inc.
* Copyright (c) 2008 Qumranet
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#ifndef HW_NET_IGB_COMMON_H
#define HW_NET_IGB_COMMON_H

#include "igb_regs.h"

#define defreg(x) x = (E1000_##x >> 2)
#define defreg_indexed(x, i) x##i = (E1000_##x(i) >> 2)
#define defreg_indexeda(x, i) x##i##_A = (E1000_##x##_A(i) >> 2)

#define defregd(x) defreg_indexed(x, 0), defreg_indexed(x, 1), \
defreg_indexed(x, 2), defreg_indexed(x, 3), \
defreg_indexed(x, 4), defreg_indexed(x, 5), \
defreg_indexed(x, 6), defreg_indexed(x, 7), \
defreg_indexed(x, 8), defreg_indexed(x, 9), \
defreg_indexed(x, 10), defreg_indexed(x, 11), \
defreg_indexed(x, 12), defreg_indexed(x, 13), \
defreg_indexed(x, 14), defreg_indexed(x, 15), \
defreg_indexeda(x, 0), defreg_indexeda(x, 1), \
defreg_indexeda(x, 2), defreg_indexeda(x, 3)

#define defregv(x) defreg_indexed(x, 0), defreg_indexed(x, 1), \
defreg_indexed(x, 2), defreg_indexed(x, 3), \
defreg_indexed(x, 4), defreg_indexed(x, 5), \
defreg_indexed(x, 6), defreg_indexed(x, 7)

enum {
defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
defreg(MPC), defreg(RCTL),
defreg(STATUS), defreg(SWSM), defreg(TCTL),
defreg(TORH), defreg(TORL), defreg(TOTH),
defreg(TOTL), defreg(TPR), defreg(TPT),
defreg(WUFC), defreg(RA), defreg(MTA), defreg(CRCERRS),
defreg(VFTA), defreg(VET),
defreg(SCC), defreg(ECOL),
defreg(MCC), defreg(LATECOL), defreg(COLC), defreg(DC),
defreg(TNCRS), defreg(RLEC),
defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), defreg(XOFFTXC),
defreg(FCRUC), defreg(TDFH), defreg(TDFT),
defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(WUC),
defreg(WUS), defreg(RDFH),
defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC),
defreg(IPAV), defreg(IP4AT), defreg(IP6AT),
defreg(WUPM), defreg(FFMT),
defreg(IAM),
defreg(GCR), defreg(TIMINCA), defreg(EIAC), defreg(CTRL_EXT),
defreg(IVAR0), defreg(MANC2H),
defreg(MFVAL), defreg(MDEF), defreg(FACTPS), defreg(FTFT),
defreg(RUC), defreg(ROC), defreg(RFC), defreg(RJC),
defreg(PRC64), defreg(PRC127), defreg(PRC255), defreg(PRC511),
defreg(PRC1023), defreg(PRC1522), defreg(PTC64), defreg(PTC127),
defreg(PTC255), defreg(PTC511), defreg(PTC1023), defreg(PTC1522),
defreg(GORCL), defreg(GORCH), defreg(GOTCL), defreg(GOTCH),
defreg(RNBC), defreg(BPRC), defreg(MPRC), defreg(RFCTL),
defreg(MPTC), defreg(BPTC),
defreg(IAC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC),
defreg(TSCTC), defreg(RXCSUM), defreg(FUNCTAG), defreg(GSCL_1),
defreg(GSCL_2), defreg(GSCL_3), defreg(GSCL_4), defreg(GSCN_0),
defreg(GSCN_1), defreg(GSCN_2), defreg(GSCN_3),
defreg_indexed(EITR, 0),
defreg(MRQC), defreg(RETA), defreg(RSSRK),
defreg(PBACLR), defreg(FCAL), defreg(FCAH), defreg(FCT),
defreg(FCRTH), defreg(FCRTL), defreg(FCTTV), defreg(FCRTV),
defreg(FLA), defreg(FLOP),
defreg(MAVTV0), defreg(MAVTV1), defreg(MAVTV2), defreg(MAVTV3),
defreg(TXSTMPL), defreg(TXSTMPH), defreg(SYSTIML), defreg(SYSTIMH),
defreg(TIMADJL), defreg(TIMADJH),
defreg(RXSTMPH), defreg(RXSTMPL), defreg(RXSATRL), defreg(RXSATRH),
defreg(TIPG),
defreg(CTRL_DUP),
defreg(EEMNGCTL),
defreg(EEMNGDATA),
defreg(FLMNGCTL),
defreg(FLMNGDATA),
defreg(FLMNGCNT),
defreg(TSYNCRXCTL),
defreg(TSYNCTXCTL),
defreg(RLPML),
defreg(UTA),

/* Aliases */
defreg(RDFH_A), defreg(RDFT_A), defreg(TDFH_A), defreg(TDFT_A),
defreg(RA_A), defreg(VFTA_A), defreg(FCRTL_A),

/* Additional regs used by IGB */
defreg(FWSM), defreg(SW_FW_SYNC),

defreg(EICS), defreg(EIMS), defreg(EIMC), defreg(EIAM),
defreg(EICR), defreg(IVAR_MISC), defreg(GPIE),

defreg(RXPBS), defregd(RDBAL), defregd(RDBAH), defregd(RDLEN),
defregd(SRRCTL), defregd(RDH), defregd(RDT),
defregd(RXDCTL), defregd(RXCTL), defregd(RQDPC), defreg(RA2),

defreg(TXPBS), defreg(TCTL_EXT), defreg(DTXCTL), defreg(HTCBDPC),
defregd(TDBAL), defregd(TDBAH), defregd(TDLEN), defregd(TDH),
defregd(TDT), defregd(TXDCTL), defregd(TXCTL),
defregd(TDWBAL), defregd(TDWBAH),

defreg(VT_CTL),

defregv(P2VMAILBOX), defregv(V2PMAILBOX), defreg(MBVFICR), defreg(MBVFIMR),
defreg(VFLRE), defreg(VFRE), defreg(VFTE), defreg(WVBR),
defreg(QDE), defreg(DTXSWC), defreg_indexed(VLVF, 0),
defregv(VMOLR), defreg(RPLOLR), defregv(VMBMEM), defregv(VMVIR),

defregv(PVTCTRL), defregv(PVTEICS), defregv(PVTEIMS), defregv(PVTEIMC),
defregv(PVTEIAC), defregv(PVTEIAM), defregv(PVTEICR), defregv(PVFGPRC),
defregv(PVFGPTC), defregv(PVFGORC), defregv(PVFGOTC), defregv(PVFMPRC),
defregv(PVFGPRLBC), defregv(PVFGPTLBC), defregv(PVFGORLBC), defregv(PVFGOTLBC),

defreg(MTA_A),

defreg(VTIVAR), defreg(VTIVAR_MISC),
};

uint64_t igb_mmio_read(void *opaque, hwaddr addr, unsigned size);
void igb_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size);

#endif
4,077 changes: 4,077 additions & 0 deletions hw/net/igb_core.c

Large diffs are not rendered by default.

146 changes: 146 additions & 0 deletions hw/net/igb_core.h
@@ -0,0 +1,146 @@
/*
* Core code for QEMU igb emulation
*
* Datasheet:
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eg-gbe-datasheet.pdf
*
* Copyright (c) 2020-2023 Red Hat, Inc.
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Akihiko Odaki <akihiko.odaki@daynix.com>
* Gal Hammmer <gal.hammer@sap.com>
* Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
* Dmitry Fleytman <dmitry@daynix.com>
* Leonid Bloch <leonid@daynix.com>
* Yan Vugenfirer <yan@daynix.com>
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2008 Qumranet
* Based on work done by:
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#ifndef HW_NET_IGB_CORE_H
#define HW_NET_IGB_CORE_H

#define E1000E_MAC_SIZE (0x8000)
#define IGB_EEPROM_SIZE (1024)

#define IGB_INTR_NUM (25)
#define IGB_MSIX_VEC_NUM (10)
#define IGBVF_MSIX_VEC_NUM (3)
#define IGB_NUM_QUEUES (16)

typedef struct IGBCore IGBCore;

enum { PHY_R = BIT(0),
PHY_W = BIT(1),
PHY_RW = PHY_R | PHY_W };

typedef struct IGBIntrDelayTimer_st {
QEMUTimer *timer;
bool running;
uint32_t delay_reg;
uint32_t delay_resolution_ns;
IGBCore *core;
} IGBIntrDelayTimer;

struct IGBCore {
uint32_t mac[E1000E_MAC_SIZE];
uint16_t phy[MAX_PHY_REG_ADDRESS + 1];
uint16_t eeprom[IGB_EEPROM_SIZE];

uint8_t rx_desc_len;

QEMUTimer *autoneg_timer;

struct igb_tx {
uint16_t vlan; /* VLAN Tag */
uint16_t mss; /* Maximum Segment Size */
bool tse; /* TCP/UDP Segmentation Enable */
bool ixsm; /* Insert IP Checksum */
bool txsm; /* Insert TCP/UDP Checksum */

bool first;
bool skip_cp;

struct NetTxPkt *tx_pkt;
} tx[IGB_NUM_QUEUES];

struct NetRxPkt *rx_pkt;

bool has_vnet;
int max_queue_num;

IGBIntrDelayTimer eitr[IGB_INTR_NUM];

VMChangeStateEntry *vmstate;

uint32_t eitr_guest_value[IGB_INTR_NUM];

uint8_t permanent_mac[ETH_ALEN];

NICState *owner_nic;
PCIDevice *owner;
void (*owner_start_recv)(PCIDevice *d);

int64_t timadj;
};

void
igb_core_write(IGBCore *core, hwaddr addr, uint64_t val, unsigned size);

uint64_t
igb_core_read(IGBCore *core, hwaddr addr, unsigned size);

void
igb_core_pci_realize(IGBCore *regs,
const uint16_t *eeprom_templ,
uint32_t eeprom_size,
const uint8_t *macaddr);

void
igb_core_reset(IGBCore *core);

void
igb_core_pre_save(IGBCore *core);

int
igb_core_post_load(IGBCore *core);

void
igb_core_set_link_status(IGBCore *core);

void
igb_core_pci_uninit(IGBCore *core);

bool
igb_can_receive(IGBCore *core);

ssize_t
igb_receive(IGBCore *core, const uint8_t *buf, size_t size);

ssize_t
igb_receive_iov(IGBCore *core, const struct iovec *iov, int iovcnt);

void
igb_start_recv(IGBCore *core);

#endif
648 changes: 648 additions & 0 deletions hw/net/igb_regs.h

Large diffs are not rendered by default.

327 changes: 327 additions & 0 deletions hw/net/igbvf.c
@@ -0,0 +1,327 @@
/*
* QEMU Intel 82576 SR/IOV Ethernet Controller Emulation
*
* Datasheet:
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eg-gbe-datasheet.pdf
*
* Copyright (c) 2020-2023 Red Hat, Inc.
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Akihiko Odaki <akihiko.odaki@daynix.com>
* Gal Hammmer <gal.hammer@sap.com>
* Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
* Dmitry Fleytman <dmitry@daynix.com>
* Leonid Bloch <leonid@daynix.com>
* Yan Vugenfirer <yan@daynix.com>
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2008 Qumranet
* Based on work done by:
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#include "qemu/osdep.h"
#include "hw/hw.h"
#include "hw/net/mii.h"
#include "hw/pci/pci_device.h"
#include "hw/pci/pcie.h"
#include "hw/pci/msix.h"
#include "net/eth.h"
#include "net/net.h"
#include "igb_common.h"
#include "igb_core.h"
#include "trace.h"
#include "qapi/error.h"

#define TYPE_IGBVF "igbvf"
OBJECT_DECLARE_SIMPLE_TYPE(IgbVfState, IGBVF)

#define IGBVF_MMIO_BAR_IDX (0)
#define IGBVF_MSIX_BAR_IDX (3)

#define IGBVF_MMIO_SIZE (16 * 1024)
#define IGBVF_MSIX_SIZE (16 * 1024)

struct IgbVfState {
PCIDevice parent_obj;

MemoryRegion mmio;
MemoryRegion msix;
};

static hwaddr vf_to_pf_addr(hwaddr addr, uint16_t vfn, bool write)
{
switch (addr) {
case E1000_CTRL:
case E1000_CTRL_DUP:
return E1000_PVTCTRL(vfn);
case E1000_EICS:
return E1000_PVTEICS(vfn);
case E1000_EIMS:
return E1000_PVTEIMS(vfn);
case E1000_EIMC:
return E1000_PVTEIMC(vfn);
case E1000_EIAC:
return E1000_PVTEIAC(vfn);
case E1000_EIAM:
return E1000_PVTEIAM(vfn);
case E1000_EICR:
return E1000_PVTEICR(vfn);
case E1000_EITR(0):
case E1000_EITR(1):
case E1000_EITR(2):
return E1000_EITR(22) + (addr - E1000_EITR(0)) - vfn * 0xC;
case E1000_IVAR0:
return E1000_VTIVAR + vfn * 4;
case E1000_IVAR_MISC:
return E1000_VTIVAR_MISC + vfn * 4;
case 0x0F04: /* PBACL */
return E1000_PBACLR;
case 0x0F0C: /* PSRTYPE */
return E1000_PSRTYPE(vfn);
case E1000_V2PMAILBOX(0):
return E1000_V2PMAILBOX(vfn);
case E1000_VMBMEM(0) ... E1000_VMBMEM(0) + 0x3F:
return addr + vfn * 0x40;
case E1000_RDBAL_A(0):
return E1000_RDBAL(vfn);
case E1000_RDBAL_A(1):
return E1000_RDBAL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_RDBAH_A(0):
return E1000_RDBAH(vfn);
case E1000_RDBAH_A(1):
return E1000_RDBAH(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_RDLEN_A(0):
return E1000_RDLEN(vfn);
case E1000_RDLEN_A(1):
return E1000_RDLEN(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_SRRCTL_A(0):
return E1000_SRRCTL(vfn);
case E1000_SRRCTL_A(1):
return E1000_SRRCTL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_RDH_A(0):
return E1000_RDH(vfn);
case E1000_RDH_A(1):
return E1000_RDH(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_RXCTL_A(0):
return E1000_RXCTL(vfn);
case E1000_RXCTL_A(1):
return E1000_RXCTL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_RDT_A(0):
return E1000_RDT(vfn);
case E1000_RDT_A(1):
return E1000_RDT(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_RXDCTL_A(0):
return E1000_RXDCTL(vfn);
case E1000_RXDCTL_A(1):
return E1000_RXDCTL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_RQDPC_A(0):
return E1000_RQDPC(vfn);
case E1000_RQDPC_A(1):
return E1000_RQDPC(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TDBAL_A(0):
return E1000_TDBAL(vfn);
case E1000_TDBAL_A(1):
return E1000_TDBAL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TDBAH_A(0):
return E1000_TDBAH(vfn);
case E1000_TDBAH_A(1):
return E1000_TDBAH(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TDLEN_A(0):
return E1000_TDLEN(vfn);
case E1000_TDLEN_A(1):
return E1000_TDLEN(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TDH_A(0):
return E1000_TDH(vfn);
case E1000_TDH_A(1):
return E1000_TDH(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TXCTL_A(0):
return E1000_TXCTL(vfn);
case E1000_TXCTL_A(1):
return E1000_TXCTL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TDT_A(0):
return E1000_TDT(vfn);
case E1000_TDT_A(1):
return E1000_TDT(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TXDCTL_A(0):
return E1000_TXDCTL(vfn);
case E1000_TXDCTL_A(1):
return E1000_TXDCTL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TDWBAL_A(0):
return E1000_TDWBAL(vfn);
case E1000_TDWBAL_A(1):
return E1000_TDWBAL(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_TDWBAH_A(0):
return E1000_TDWBAH(vfn);
case E1000_TDWBAH_A(1):
return E1000_TDWBAH(vfn + IGB_MAX_VF_FUNCTIONS);
case E1000_VFGPRC:
return E1000_PVFGPRC(vfn);
case E1000_VFGPTC:
return E1000_PVFGPTC(vfn);
case E1000_VFGORC:
return E1000_PVFGORC(vfn);
case E1000_VFGOTC:
return E1000_PVFGOTC(vfn);
case E1000_VFMPRC:
return E1000_PVFMPRC(vfn);
case E1000_VFGPRLBC:
return E1000_PVFGPRLBC(vfn);
case E1000_VFGPTLBC:
return E1000_PVFGPTLBC(vfn);
case E1000_VFGORLBC:
return E1000_PVFGORLBC(vfn);
case E1000_VFGOTLBC:
return E1000_PVFGOTLBC(vfn);
case E1000_STATUS:
case E1000_FRTIMER:
if (write) {
return HWADDR_MAX;
}
/* fallthrough */
case 0x34E8: /* PBTWAC */
case 0x24E8: /* PBRWAC */
return addr;
}

trace_igbvf_wrn_io_addr_unknown(addr);

return HWADDR_MAX;
}

static void igbvf_write_config(PCIDevice *dev, uint32_t addr, uint32_t val,
int len)
{
trace_igbvf_write_config(addr, val, len);
pci_default_write_config(dev, addr, val, len);
}

static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
{
PCIDevice *vf = PCI_DEVICE(opaque);
PCIDevice *pf = pcie_sriov_get_pf(vf);

addr = vf_to_pf_addr(addr, pcie_sriov_vf_number(vf), false);
return addr == HWADDR_MAX ? 0 : igb_mmio_read(pf, addr, size);
}

static void igbvf_mmio_write(void *opaque, hwaddr addr, uint64_t val,
unsigned size)
{
PCIDevice *vf = PCI_DEVICE(opaque);
PCIDevice *pf = pcie_sriov_get_pf(vf);

addr = vf_to_pf_addr(addr, pcie_sriov_vf_number(vf), true);
if (addr != HWADDR_MAX) {
igb_mmio_write(pf, addr, val, size);
}
}

static const MemoryRegionOps mmio_ops = {
.read = igbvf_mmio_read,
.write = igbvf_mmio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.impl = {
.min_access_size = 4,
.max_access_size = 4,
},
};

static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
{
IgbVfState *s = IGBVF(dev);
int ret;
int i;

dev->config_write = igbvf_write_config;

memory_region_init_io(&s->mmio, OBJECT(dev), &mmio_ops, s, "igbvf-mmio",
IGBVF_MMIO_SIZE);
pcie_sriov_vf_register_bar(dev, IGBVF_MMIO_BAR_IDX, &s->mmio);

memory_region_init(&s->msix, OBJECT(dev), "igbvf-msix", IGBVF_MSIX_SIZE);
pcie_sriov_vf_register_bar(dev, IGBVF_MSIX_BAR_IDX, &s->msix);

ret = msix_init(dev, IGBVF_MSIX_VEC_NUM, &s->msix, IGBVF_MSIX_BAR_IDX, 0,
&s->msix, IGBVF_MSIX_BAR_IDX, 0x2000, 0x70, errp);
if (ret) {
return;
}

for (i = 0; i < IGBVF_MSIX_VEC_NUM; i++) {
msix_vector_use(dev, i);
}

if (pcie_endpoint_cap_init(dev, 0xa0) < 0) {
hw_error("Failed to initialize PCIe capability");
}

if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
hw_error("Failed to initialize AER capability");
}

pcie_ari_init(dev, 0x150, 1);
}

static void igbvf_pci_uninit(PCIDevice *dev)
{
IgbVfState *s = IGBVF(dev);

pcie_aer_exit(dev);
pcie_cap_exit(dev);
msix_unuse_all_vectors(dev);
msix_uninit(dev, &s->msix, &s->msix);
}

static void igbvf_class_init(ObjectClass *class, void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);

c->realize = igbvf_pci_realize;
c->exit = igbvf_pci_uninit;
c->vendor_id = PCI_VENDOR_ID_INTEL;
c->device_id = E1000_DEV_ID_82576_VF;
c->revision = 1;
c->class_id = PCI_CLASS_NETWORK_ETHERNET;

dc->desc = "Intel 82576 Virtual Function";
dc->user_creatable = false;

set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
}

static const TypeInfo igbvf_info = {
.name = TYPE_IGBVF,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(IgbVfState),
.class_init = igbvf_class_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_PCIE_DEVICE },
{ }
},
};

static void igb_register_types(void)
{
type_register_static(&igbvf_info);
}

type_init(igb_register_types)
2 changes: 2 additions & 0 deletions hw/net/meson.build
Expand Up @@ -10,6 +10,8 @@ softmmu_ss.add(when: 'CONFIG_PCNET_COMMON', if_true: files('pcnet.c'))
softmmu_ss.add(when: 'CONFIG_E1000_PCI', if_true: files('e1000.c', 'e1000x_common.c'))
softmmu_ss.add(when: 'CONFIG_E1000E_PCI_EXPRESS', if_true: files('net_tx_pkt.c', 'net_rx_pkt.c'))
softmmu_ss.add(when: 'CONFIG_E1000E_PCI_EXPRESS', if_true: files('e1000e.c', 'e1000e_core.c', 'e1000x_common.c'))
softmmu_ss.add(when: 'CONFIG_IGB_PCI_EXPRESS', if_true: files('net_tx_pkt.c', 'net_rx_pkt.c'))
softmmu_ss.add(when: 'CONFIG_IGB_PCI_EXPRESS', if_true: files('igb.c', 'igbvf.c', 'igb_core.c'))
softmmu_ss.add(when: 'CONFIG_RTL8139_PCI', if_true: files('rtl8139.c'))
softmmu_ss.add(when: 'CONFIG_TULIP', if_true: files('tulip.c'))
softmmu_ss.add(when: 'CONFIG_VMXNET3_PCI', if_true: files('net_tx_pkt.c', 'net_rx_pkt.c'))
Expand Down
102 changes: 51 additions & 51 deletions hw/net/net_rx_pkt.c
Expand Up @@ -30,14 +30,11 @@ struct NetRxPkt {
uint32_t tot_len;
uint16_t tci;
size_t ehdr_buf_len;
bool has_virt_hdr;
eth_pkt_types_e packet_type;

/* Analysis results */
bool isip4;
bool isip6;
bool isudp;
bool istcp;
bool hasip4;
bool hasip6;

size_t l3hdr_off;
size_t l4hdr_off;
Expand All @@ -48,10 +45,9 @@ struct NetRxPkt {
eth_l4_hdr_info l4hdr_info;
};

void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
void net_rx_pkt_init(struct NetRxPkt **pkt)
{
struct NetRxPkt *p = g_malloc0(sizeof *p);
p->has_virt_hdr = has_virt_hdr;
p->vec = NULL;
p->vec_len_total = 0;
*pkt = p;
Expand Down Expand Up @@ -107,12 +103,11 @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
iov, iovcnt, ploff, pkt->tot_len);
}

eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6,
&pkt->isudp, &pkt->istcp,
eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->hasip4, &pkt->hasip6,
&pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
&pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);

trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp,
trace_net_rx_pkt_parsed(pkt->hasip4, pkt->hasip6, pkt->l4hdr_info.proto,
pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
}

Expand Down Expand Up @@ -201,22 +196,20 @@ void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,

assert(pkt);

eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6,
&pkt->isudp, &pkt->istcp,
eth_get_protocols(&iov, 1, &pkt->hasip4, &pkt->hasip6,
&pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
&pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
}

void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
bool *isip4, bool *isip6,
bool *isudp, bool *istcp)
bool *hasip4, bool *hasip6,
EthL4HdrProto *l4hdr_proto)
{
assert(pkt);

*isip4 = pkt->isip4;
*isip6 = pkt->isip6;
*isudp = pkt->isudp;
*istcp = pkt->istcp;
*hasip4 = pkt->hasip4;
*hasip6 = pkt->hasip6;
*l4hdr_proto = pkt->l4hdr_info.proto;
}

size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt)
Expand Down Expand Up @@ -333,58 +326,58 @@ net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,

switch (type) {
case NetPktRssIpV4:
assert(pkt->isip4);
assert(pkt->hasip4);
trace_net_rx_pkt_rss_ip4();
_net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV4Tcp:
assert(pkt->isip4);
assert(pkt->istcp);
assert(pkt->hasip4);
assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
trace_net_rx_pkt_rss_ip4_tcp();
_net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
_net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV6Tcp:
assert(pkt->isip6);
assert(pkt->istcp);
assert(pkt->hasip6);
assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
trace_net_rx_pkt_rss_ip6_tcp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
_net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV6:
assert(pkt->isip6);
assert(pkt->hasip6);
trace_net_rx_pkt_rss_ip6();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
break;
case NetPktRssIpV6Ex:
assert(pkt->isip6);
assert(pkt->hasip6);
trace_net_rx_pkt_rss_ip6_ex();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
break;
case NetPktRssIpV6TcpEx:
assert(pkt->isip6);
assert(pkt->istcp);
assert(pkt->hasip6);
assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
trace_net_rx_pkt_rss_ip6_ex_tcp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
_net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV4Udp:
assert(pkt->isip4);
assert(pkt->isudp);
assert(pkt->hasip4);
assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
trace_net_rx_pkt_rss_ip4_udp();
_net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
_net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV6Udp:
assert(pkt->isip6);
assert(pkt->isudp);
assert(pkt->hasip6);
assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
trace_net_rx_pkt_rss_ip6_udp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
_net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV6UdpEx:
assert(pkt->isip6);
assert(pkt->isudp);
assert(pkt->hasip6);
assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
trace_net_rx_pkt_rss_ip6_ex_udp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
_net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
Expand All @@ -406,7 +399,7 @@ uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt)
{
assert(pkt);

if (pkt->isip4) {
if (pkt->hasip4) {
return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id);
}

Expand All @@ -417,7 +410,7 @@ bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt)
{
assert(pkt);

if (pkt->istcp) {
if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP) {
return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK;
}

Expand All @@ -428,7 +421,7 @@ bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt)
{
assert(pkt);

if (pkt->istcp) {
if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP) {
return pkt->l4hdr_info.has_tcp_data;
}

Expand Down Expand Up @@ -465,18 +458,18 @@ void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
}

bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt)
{
assert(pkt);

return pkt->ehdr_buf_len ? true : false;
memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
}

bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt)
bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
{
assert(pkt);

return pkt->has_virt_hdr;
return pkt->ehdr_buf_len ? true : false;
}

uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
Expand All @@ -494,7 +487,7 @@ bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid)

trace_net_rx_pkt_l3_csum_validate_entry();

if (!pkt->isip4) {
if (!pkt->hasip4) {
trace_net_rx_pkt_l3_csum_validate_not_ip4();
return false;
}
Expand Down Expand Up @@ -525,8 +518,8 @@ _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)

trace_net_rx_pkt_l4_csum_calc_entry();

if (pkt->isip4) {
if (pkt->isudp) {
if (pkt->hasip4) {
if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP) {
csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
trace_net_rx_pkt_l4_csum_calc_ip4_udp();
} else {
Expand All @@ -539,7 +532,7 @@ _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)
csl, &cso);
trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
} else {
if (pkt->isudp) {
if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP) {
csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
trace_net_rx_pkt_l4_csum_calc_ip6_udp();
} else {
Expand Down Expand Up @@ -573,17 +566,19 @@ bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)

trace_net_rx_pkt_l4_csum_validate_entry();

if (!pkt->istcp && !pkt->isudp) {
if (pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_TCP &&
pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_UDP) {
trace_net_rx_pkt_l4_csum_validate_not_xxp();
return false;
}

if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) {
if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP &&
pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
return false;
}

if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
if (pkt->hasip4 && pkt->ip4hdr_info.fragment) {
trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
return false;
}
Expand All @@ -604,22 +599,27 @@ bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt)

trace_net_rx_pkt_l4_csum_fix_entry();

if (pkt->istcp) {
switch (pkt->l4hdr_info.proto) {
case ETH_L4_HDR_PROTO_TCP:
l4_cso = offsetof(struct tcp_header, th_sum);
trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso);
} else if (pkt->isudp) {
break;

case ETH_L4_HDR_PROTO_UDP:
if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
return false;
}
l4_cso = offsetof(struct udp_header, uh_sum);
trace_net_rx_pkt_l4_csum_fix_udp(l4_cso);
} else {
break;

default:
trace_net_rx_pkt_l4_csum_fix_not_xxp();
return false;
}

if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
if (pkt->hasip4 && pkt->ip4hdr_info.fragment) {
trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
return false;
}
Expand Down
31 changes: 14 additions & 17 deletions hw/net/net_rx_pkt.h
Expand Up @@ -37,10 +37,9 @@ void net_rx_pkt_uninit(struct NetRxPkt *pkt);
* Init function for rx packet functionality
*
* @pkt: packet pointer
* @has_virt_hdr: device uses virtio header
*
*/
void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr);
void net_rx_pkt_init(struct NetRxPkt **pkt);

/**
* returns total length of data attached to rx context
Expand All @@ -67,15 +66,14 @@ void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
* fetches packet analysis results
*
* @pkt: packet
* @isip4: whether the packet given is IPv4
* @isip6: whether the packet given is IPv6
* @isudp: whether the packet given is UDP
* @istcp: whether the packet given is TCP
* @hasip4: whether the packet has an IPv4 header
* @hasip6: whether the packet has an IPv6 header
* @l4hdr_proto: protocol of L4 header
*
*/
void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
bool *isip4, bool *isip6,
bool *isudp, bool *istcp);
bool *hasip4, bool *hasip6,
EthL4HdrProto *l4hdr_proto);

/**
* fetches L3 header offset
Expand Down Expand Up @@ -214,15 +212,6 @@ uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt);
*/
bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt);

/**
* notifies caller if the packet has virtio header
*
* @pkt: packet
* @ret: true if packet has virtio header, false otherwize
*
*/
bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt);

/**
* attach scatter-gather data to rx packet
*
Expand Down Expand Up @@ -322,6 +311,14 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
const struct iovec *iov, int iovcnt);

/**
* unset vhdr data from packet context
*
* @pkt: packet
*
*/
void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt);

/**
* save packet type in packet context
*
Expand Down
332 changes: 253 additions & 79 deletions hw/net/net_tx_pkt.c

Large diffs are not rendered by default.

27 changes: 15 additions & 12 deletions hw/net/net_tx_pkt.h
Expand Up @@ -26,16 +26,17 @@

struct NetTxPkt;

typedef void (* NetTxPktCallback)(void *, const struct iovec *, int, const struct iovec *, int);

/**
* Init function for tx packet functionality
*
* @pkt: packet pointer
* @pci_dev: PCI device processing this packet
* @max_frags: max tx ip fragments
* @has_virt_hdr: device uses virtio header.
*/
void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev,
uint32_t max_frags, bool has_virt_hdr);
uint32_t max_frags);

/**
* Clean all tx packet resources.
Expand All @@ -59,9 +60,10 @@ struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt);
* @tso_enable: TSO enabled
* @csum_enable: CSO enabled
* @gso_size: MSS size for TSO
* @ret: operation result
*
*/
void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
bool csum_enable, uint32_t gso_size);

/**
Expand Down Expand Up @@ -161,15 +163,16 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt);
bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc);

/**
* Redirect packet directly to receive path (emulate loopback phy).
* Handles sw offloads if vhdr is not supported.
*
* @pkt: packet
* @nc: NetClientState
* @ret: operation result
*
*/
bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc);
* Send packet with a custom function.
*
* @pkt: packet
* @offload: whether the callback implements offloading
* @callback: a function to be called back for each transformed packet
* @context: a pointer to be passed to the callback.
* @ret: operation result
*/
bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
NetTxPktCallback callback, void *context);

/**
* parse raw packet data and analyze offload requirements.
Expand Down
50 changes: 39 additions & 11 deletions hw/net/trace-events
Expand Up @@ -61,7 +61,7 @@ pcnet_ioport_read(void *opaque, uint64_t addr, unsigned size) "opaque=%p addr=0x
pcnet_ioport_write(void *opaque, uint64_t addr, uint64_t data, unsigned size) "opaque=%p addr=0x%"PRIx64" data=0x%"PRIx64" size=%d"

# net_rx_pkt.c
net_rx_pkt_parsed(bool ip4, bool ip6, bool udp, bool tcp, size_t l3o, size_t l4o, size_t l5o) "RX packet parsed: ip4: %d, ip6: %d, udp: %d, tcp: %d, l3 offset: %zu, l4 offset: %zu, l5 offset: %zu"
net_rx_pkt_parsed(bool ip4, bool ip6, int l4proto, size_t l3o, size_t l4o, size_t l5o) "RX packet parsed: ip4: %d, ip6: %d, l4 protocol: %d, l3 offset: %zu, l4 offset: %zu, l5 offset: %zu"
net_rx_pkt_l4_csum_validate_entry(void) "Starting L4 checksum validation"
net_rx_pkt_l4_csum_validate_not_xxp(void) "Not a TCP/UDP packet"
net_rx_pkt_l4_csum_validate_udp_with_no_checksum(void) "UDP packet without checksum"
Expand Down Expand Up @@ -165,8 +165,8 @@ e1000e_rx_descr(int ridx, uint64_t base, uint8_t len) "Next RX descriptor: ring
e1000e_rx_set_rctl(uint32_t rctl) "RCTL = 0x%x"
e1000e_rx_receive_iov(int iovcnt) "Received vector of %d fragments"
e1000e_rx_flt_dropped(void) "Received packet dropped by RX filter"
e1000e_rx_written_to_guest(uint32_t causes) "Received packet written to guest (ICR causes %u)"
e1000e_rx_not_written_to_guest(uint32_t causes) "Received packet NOT written to guest (ICR causes %u)"
e1000e_rx_written_to_guest(int queue_idx) "Received packet written to guest (queue %d)"
e1000e_rx_not_written_to_guest(int queue_idx) "Received packet NOT written to guest (queue %d)"
e1000e_rx_interrupt_set(uint32_t causes) "Receive interrupt set (ICR causes %u)"
e1000e_rx_interrupt_delayed(uint32_t causes) "Receive interrupt delayed (ICR causes %u)"
e1000e_rx_set_cso(int cso_state) "RX CSO state set to %d"
Expand All @@ -177,18 +177,16 @@ e1000e_rx_start_recv(void)
e1000e_rx_rss_started(void) "Starting RSS processing"
e1000e_rx_rss_disabled(void) "RSS is disabled"
e1000e_rx_rss_type(uint32_t type) "RSS type is %u"
e1000e_rx_rss_ip4(bool isfragment, bool istcp, uint32_t mrqc, bool tcpipv4_enabled, bool ipv4_enabled) "RSS IPv4: fragment %d, tcp %d, mrqc 0x%X, tcpipv4 enabled %d, ipv4 enabled %d"
e1000e_rx_rss_ip4(int l4hdr_proto, uint32_t mrqc, bool tcpipv4_enabled, bool ipv4_enabled) "RSS IPv4: L4 header protocol %d, mrqc 0x%X, tcpipv4 enabled %d, ipv4 enabled %d"
e1000e_rx_rss_ip6_rfctl(uint32_t rfctl) "RSS IPv6: rfctl 0x%X"
e1000e_rx_rss_ip6(bool ex_dis, bool new_ex_dis, bool istcp, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: ex_dis: %d, new_ex_dis: %d, tcp %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6 enabled %d, ipv6ex enabled %d, ipv6 enabled %d"
e1000e_rx_rss_dispatched_to_queue(int queue_idx) "Packet being dispatched to queue %d"
e1000e_rx_rss_ip6(bool ex_dis, bool new_ex_dis, int l4hdr_proto, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: ex_dis: %d, new_ex_dis: %d, L4 header protocol %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6 enabled %d, ipv6ex enabled %d, ipv6 enabled %d"

e1000e_rx_metadata_protocols(bool isip4, bool isip6, bool isudp, bool istcp) "protocols: ip4: %d, ip6: %d, udp: %d, tcp: %d"
e1000e_rx_metadata_protocols(bool hasip4, bool hasip6, int l4hdr_protocol) "protocols: ip4: %d, ip6: %d, l4hdr: %d"
e1000e_rx_metadata_vlan(uint16_t vlan_tag) "VLAN tag is 0x%X"
e1000e_rx_metadata_rss(uint32_t rss, uint32_t mrq) "RSS data: rss: 0x%X, mrq: 0x%X"
e1000e_rx_metadata_ip_id(uint16_t ip_id) "the IPv4 ID is 0x%X"
e1000e_rx_metadata_ack(void) "the packet is TCP ACK"
e1000e_rx_metadata_pkt_type(uint32_t pkt_type) "the packet type is %u"
e1000e_rx_metadata_no_virthdr(void) "the packet has no virt-header"
e1000e_rx_metadata_virthdr_no_csum_info(void) "virt-header does not contain checksum info"
e1000e_rx_metadata_l3_cso_disabled(void) "IP4 CSO is disabled"
e1000e_rx_metadata_l4_cso_disabled(void) "TCP/UDP CSO is disabled"
Expand All @@ -201,10 +199,8 @@ e1000e_rx_metadata_ipv6_filtering_disabled(void) "IPv6 RX filtering disabled by
e1000e_vlan_vet(uint16_t vet) "Setting VLAN ethernet type 0x%X"

e1000e_irq_msi_notify(uint32_t cause) "MSI notify 0x%x"
e1000e_irq_throttling_no_pending_interrupts(void) "No pending interrupts to notify"
e1000e_irq_msi_notify_postponed(void) "Sending MSI postponed by ITR"
e1000e_irq_legacy_notify_postponed(void) "Raising legacy IRQ postponed by ITR"
e1000e_irq_throttling_no_pending_vec(int idx) "No pending interrupts for vector %d"
e1000e_irq_msix_notify_postponed_vec(int idx) "Sending MSI-X postponed by EITR[%d]"
e1000e_irq_legacy_notify(bool level) "IRQ line state: %d"
e1000e_irq_msix_notify_vec(uint32_t vector) "MSI-X notify vector 0x%x"
Expand Down Expand Up @@ -253,7 +249,7 @@ e1000e_vm_state_stopped(void) "VM state is stopped"
# e1000e.c
e1000e_cb_pci_realize(void) "E1000E PCI realize entry"
e1000e_cb_pci_uninit(void) "E1000E PCI unit entry"
e1000e_cb_qdev_reset(void) "E1000E qdev reset entry"
e1000e_cb_qdev_reset_hold(void) "E1000E qdev reset hold"
e1000e_cb_pre_save(void) "E1000E pre save entry"
e1000e_cb_post_load(void) "E1000E post load entry"

Expand All @@ -274,6 +270,38 @@ e1000e_msix_use_vector_fail(uint32_t vec, int32_t res) "Failed to use MSI-X vect
e1000e_mac_set_permanent(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "Set permanent MAC: %02x:%02x:%02x:%02x:%02x:%02x"
e1000e_cfg_support_virtio(bool support) "Virtio header supported: %d"

# igb.c
igb_write_config(uint32_t address, uint32_t val, int len) "CONFIG write 0x%"PRIx32", value: 0x%"PRIx32", len: %"PRId32
igbvf_write_config(uint32_t address, uint32_t val, int len) "CONFIG write 0x%"PRIx32", value: 0x%"PRIx32", len: %"PRId32

# igb_core.c
igb_core_mdic_read(uint32_t addr, uint32_t data) "MDIC READ: PHY[%u] = 0x%x"
igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED"
igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"

igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"

igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X"

igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled"
igb_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x"
igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x"
igb_irq_read_iam(uint32_t icr) "Current IAM: 0x%x"
igb_irq_write_eics(uint32_t val, bool msix) "Update EICS: 0x%x MSI-X: %d"
igb_irq_write_eims(uint32_t val, bool msix) "Update EIMS: 0x%x MSI-X: %d"
igb_irq_write_eimc(uint32_t val, uint32_t eims, bool msix) "Update EIMC: 0x%x EIMS: 0x%x MSI-X: %d"
igb_irq_write_eiac(uint32_t val) "Update EIAC: 0x%x"
igb_irq_write_eiam(uint32_t val, bool msix) "Update EIAM: 0x%x MSI-X: %d"
igb_irq_write_eicr(uint32_t val, bool msix) "Update EICR: 0x%x MSI-X: %d"
igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x"
igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x"
igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x"

# igbvf.c
igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64

# spapr_llan.c
spapr_vlan_get_rx_bd_from_pool_found(int pool, int32_t count, uint32_t rx_bufs) "pool=%d count=%"PRId32" rxbufs=%"PRIu32
spapr_vlan_get_rx_bd_from_page(int buf_ptr, uint64_t bd) "use_buf_ptr=%d bd=0x%016"PRIx64
Expand Down
85 changes: 51 additions & 34 deletions hw/net/virtio-net.c
Expand Up @@ -1746,39 +1746,61 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
return 0;
}

static uint8_t virtio_net_get_hash_type(bool isip4,
bool isip6,
bool isudp,
bool istcp,
static uint8_t virtio_net_get_hash_type(bool hasip4,
bool hasip6,
EthL4HdrProto l4hdr_proto,
uint32_t types)
{
if (isip4) {
if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
return NetPktRssIpV4Tcp;
}
if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
return NetPktRssIpV4Udp;
if (hasip4) {
switch (l4hdr_proto) {
case ETH_L4_HDR_PROTO_TCP:
if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
return NetPktRssIpV4Tcp;
}
break;

case ETH_L4_HDR_PROTO_UDP:
if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
return NetPktRssIpV4Udp;
}
break;

default:
break;
}

if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
return NetPktRssIpV4;
}
} else if (isip6) {
uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
} else if (hasip6) {
switch (l4hdr_proto) {
case ETH_L4_HDR_PROTO_TCP:
if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
return NetPktRssIpV6TcpEx;
}
if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
return NetPktRssIpV6Tcp;
}
break;

case ETH_L4_HDR_PROTO_UDP:
if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
return NetPktRssIpV6UdpEx;
}
if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
return NetPktRssIpV6Udp;
}
break;

if (istcp && (types & mask)) {
return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
default:
break;
}
mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
if (isudp && (types & mask)) {
return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;

if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
return NetPktRssIpV6Ex;
}
mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
if (types & mask) {
return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
NetPktRssIpV6Ex : NetPktRssIpV6;
if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
return NetPktRssIpV6;
}
}
return 0xff;
Expand All @@ -1800,7 +1822,8 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
struct NetRxPkt *pkt = n->rx_pkt;
uint8_t net_hash_type;
uint32_t hash;
bool isip4, isip6, isudp, istcp;
bool hasip4, hasip6;
EthL4HdrProto l4hdr_proto;
static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
VIRTIO_NET_HASH_REPORT_IPv4,
VIRTIO_NET_HASH_REPORT_TCPv4,
Expand All @@ -1815,14 +1838,8 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,

net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
size - n->host_hdr_len);
net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
istcp = isudp = false;
}
if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
istcp = isudp = false;
}
net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
n->rss_data.hash_types);
if (net_hash_type > NetPktRssIpV6UdpEx) {
if (n->rss_data.populate_hash) {
Expand Down Expand Up @@ -3718,7 +3735,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
QTAILQ_INIT(&n->rsc_chains);
n->qdev = dev;

net_rx_pkt_init(&n->rx_pkt, false);
net_rx_pkt_init(&n->rx_pkt);

if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
virtio_net_load_ebpf(n);
Expand Down
58 changes: 27 additions & 31 deletions hw/net/vmxnet3.c
Expand Up @@ -440,19 +440,19 @@ vmxnet3_setup_tx_offloads(VMXNET3State *s)
{
switch (s->offload_mode) {
case VMXNET3_OM_NONE:
net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
break;
return net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);

case VMXNET3_OM_CSUM:
net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
VMW_PKPRN("L4 CSO requested\n");
break;
return net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);

case VMXNET3_OM_TSO:
net_tx_pkt_build_vheader(s->tx_pkt, true, true,
s->cso_or_gso_size);
net_tx_pkt_update_ip_checksums(s->tx_pkt);
VMW_PKPRN("GSO offload requested.");
if (!net_tx_pkt_build_vheader(s->tx_pkt, true, true,
s->cso_or_gso_size)) {
return false;
}
net_tx_pkt_update_ip_checksums(s->tx_pkt);
break;

default:
Expand Down Expand Up @@ -847,21 +847,20 @@ static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
size_t pkt_len)
{
struct virtio_net_hdr *vhdr;
bool isip4, isip6, istcp, isudp;
bool hasip4, hasip6;
EthL4HdrProto l4hdr_proto;
uint8_t *data;
int len;

if (!net_rx_pkt_has_virt_hdr(pkt)) {
return;
}

vhdr = net_rx_pkt_get_vhdr(pkt);
if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
return;
}

net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
if (!(isip4 || isip6) || !(istcp || isudp)) {
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
if (!(hasip4 || hasip6) ||
(l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
l4hdr_proto != ETH_L4_HDR_PROTO_UDP)) {
return;
}

Expand Down Expand Up @@ -889,7 +888,8 @@ static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
struct Vmxnet3_RxCompDesc *rxcd)
{
int csum_ok, is_gso;
bool isip4, isip6, istcp, isudp;
bool hasip4, hasip6;
EthL4HdrProto l4hdr_proto;
struct virtio_net_hdr *vhdr;
uint8_t offload_type;

Expand All @@ -898,10 +898,6 @@ static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
}

if (!net_rx_pkt_has_virt_hdr(pkt)) {
goto nocsum;
}

vhdr = net_rx_pkt_get_vhdr(pkt);
/*
* Checksum is valid when lower level tell so or when lower level
Expand All @@ -919,16 +915,18 @@ static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
goto nocsum;
}

net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
if ((!istcp && !isudp) || (!isip4 && !isip6)) {
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
if ((l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
l4hdr_proto != ETH_L4_HDR_PROTO_UDP) ||
(!hasip4 && !hasip6)) {
goto nocsum;
}

rxcd->cnc = 0;
rxcd->v4 = isip4 ? 1 : 0;
rxcd->v6 = isip6 ? 1 : 0;
rxcd->tcp = istcp ? 1 : 0;
rxcd->udp = isudp ? 1 : 0;
rxcd->v4 = hasip4 ? 1 : 0;
rxcd->v6 = hasip6 ? 1 : 0;
rxcd->tcp = l4hdr_proto == ETH_L4_HDR_PROTO_TCP;
rxcd->udp = l4hdr_proto == ETH_L4_HDR_PROTO_UDP;
rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
return;

Expand Down Expand Up @@ -1521,9 +1519,8 @@ static void vmxnet3_activate_device(VMXNET3State *s)

/* Preallocate TX packet wrapper */
VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
s->max_tx_frags, s->peer_has_vhdr);
net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), s->max_tx_frags);
net_rx_pkt_init(&s->rx_pkt);

/* Read rings memory locations for RX queues */
for (i = 0; i < s->rxq_num; i++) {
Expand Down Expand Up @@ -2402,9 +2399,8 @@ static int vmxnet3_post_load(void *opaque, int version_id)
{
VMXNET3State *s = opaque;

net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
s->max_tx_frags, s->peer_has_vhdr);
net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), s->max_tx_frags);
net_rx_pkt_init(&s->rx_pkt);

if (s->msix_used) {
vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
Expand Down
5 changes: 5 additions & 0 deletions hw/pci/pcie_sriov.c
Expand Up @@ -300,3 +300,8 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
}
return NULL;
}

uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
{
return dev->exp.sriov_pf.num_vfs;
}
14 changes: 13 additions & 1 deletion include/hw/net/mii.h
Expand Up @@ -55,6 +55,7 @@
#define MII_BMCR_CTST (1 << 7) /* Collision test */
#define MII_BMCR_SPEED1000 (1 << 6) /* MSB of Speed (1000) */

#define MII_BMSR_100T4 (1 << 15) /* Can do 100mbps T4 */
#define MII_BMSR_100TX_FD (1 << 14) /* Can do 100mbps, full-duplex */
#define MII_BMSR_100TX_HD (1 << 13) /* Can do 100mbps, half-duplex */
#define MII_BMSR_10T_FD (1 << 12) /* Can do 10mbps, full-duplex */
Expand All @@ -81,20 +82,31 @@
#define MII_ANLPAR_ACK (1 << 14)
#define MII_ANLPAR_PAUSEASY (1 << 11) /* can pause asymmetrically */
#define MII_ANLPAR_PAUSE (1 << 10) /* can pause */
#define MII_ANLPAR_T4 (1 << 9)
#define MII_ANLPAR_TXFD (1 << 8)
#define MII_ANLPAR_TX (1 << 7)
#define MII_ANLPAR_10FD (1 << 6)
#define MII_ANLPAR_10 (1 << 5)
#define MII_ANLPAR_CSMACD (1 << 0)

#define MII_ANER_NWAY (1 << 0) /* Can do N-way auto-nego */
#define MII_ANER_NP (1 << 2) /* Next Page Able */
#define MII_ANER_NWAY (1 << 0) /* Can do N-way auto-nego */

#define MII_ANNP_MP (1 << 13) /* Message Page */

#define MII_CTRL1000_MASTER (1 << 11) /* MASTER-SLAVE Manual Configuration Value */
#define MII_CTRL1000_PORT (1 << 10) /* T2_Repeater/DTE bit */
#define MII_CTRL1000_FULL (1 << 9) /* 1000BASE-T full duplex */
#define MII_CTRL1000_HALF (1 << 8) /* 1000BASE-T half duplex */

#define MII_STAT1000_LOK (1 << 13) /* Local Receiver Status */
#define MII_STAT1000_ROK (1 << 12) /* Remote Receiver Status */
#define MII_STAT1000_FULL (1 << 11) /* 1000BASE-T full duplex */
#define MII_STAT1000_HALF (1 << 10) /* 1000BASE-T half duplex */

#define MII_EXTSTAT_1000T_FD (1 << 13) /* 1000BASE-T Full Duplex */
#define MII_EXTSTAT_1000T_HD (1 << 12) /* 1000BASE-T Half Duplex */

/* List of vendor identifiers */
/* RealTek 8201 */
#define RTL8201CP_PHYID1 0x0000
Expand Down
3 changes: 3 additions & 0 deletions include/hw/pci/pcie_sriov.h
Expand Up @@ -76,4 +76,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev);
*/
PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n);

/* Returns the current number of virtual functions. */
uint16_t pcie_sriov_num_vfs(PCIDevice *dev);

#endif /* QEMU_PCIE_SRIOV_H */
15 changes: 8 additions & 7 deletions include/net/eth.h
Expand Up @@ -381,30 +381,31 @@ typedef struct eth_ip4_hdr_info_st {
bool fragment;
} eth_ip4_hdr_info;

typedef enum EthL4HdrProto {
ETH_L4_HDR_PROTO_INVALID,
ETH_L4_HDR_PROTO_TCP,
ETH_L4_HDR_PROTO_UDP
} EthL4HdrProto;

typedef struct eth_l4_hdr_info_st {
union {
struct tcp_header tcp;
struct udp_header udp;
} hdr;

EthL4HdrProto proto;
bool has_tcp_data;
} eth_l4_hdr_info;

void eth_get_protocols(const struct iovec *iov, int iovcnt,
bool *isip4, bool *isip6,
bool *isudp, bool *istcp,
bool *hasip4, bool *hasip6,
size_t *l3hdr_off,
size_t *l4hdr_off,
size_t *l5hdr_off,
eth_ip6_hdr_info *ip6hdr_info,
eth_ip4_hdr_info *ip4hdr_info,
eth_l4_hdr_info *l4hdr_info);

void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
void *l3hdr, size_t l3hdr_len,
size_t l3payload_len,
size_t frag_offset, bool more_frags);

void
eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len);

Expand Down
6 changes: 6 additions & 0 deletions include/net/net.h
Expand Up @@ -56,8 +56,10 @@ typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
typedef bool (HasUfo)(NetClientState *);
typedef bool (HasVnetHdr)(NetClientState *);
typedef bool (HasVnetHdrLen)(NetClientState *, int);
typedef bool (GetUsingVnetHdr)(NetClientState *);
typedef void (UsingVnetHdr)(NetClientState *, bool);
typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
typedef int (GetVnetHdrLen)(NetClientState *);
typedef void (SetVnetHdrLen)(NetClientState *, int);
typedef int (SetVnetLE)(NetClientState *, bool);
typedef int (SetVnetBE)(NetClientState *, bool);
Expand All @@ -84,8 +86,10 @@ typedef struct NetClientInfo {
HasUfo *has_ufo;
HasVnetHdr *has_vnet_hdr;
HasVnetHdrLen *has_vnet_hdr_len;
GetUsingVnetHdr *get_using_vnet_hdr;
UsingVnetHdr *using_vnet_hdr;
SetOffload *set_offload;
GetVnetHdrLen *get_vnet_hdr_len;
SetVnetHdrLen *set_vnet_hdr_len;
SetVnetLE *set_vnet_le;
SetVnetBE *set_vnet_be;
Expand Down Expand Up @@ -185,9 +189,11 @@ void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
bool qemu_has_ufo(NetClientState *nc);
bool qemu_has_vnet_hdr(NetClientState *nc);
bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
bool qemu_get_using_vnet_hdr(NetClientState *nc);
void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
int ecn, int ufo);
int qemu_get_vnet_hdr_len(NetClientState *nc);
void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
int qemu_set_vnet_le(NetClientState *nc, bool is_le);
int qemu_set_vnet_be(NetClientState *nc, bool is_be);
Expand Down
11 changes: 7 additions & 4 deletions net/dump.c
Expand Up @@ -61,12 +61,13 @@ struct pcap_sf_pkthdr {
uint32_t len;
};

static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt)
static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt,
int offset)
{
struct pcap_sf_pkthdr hdr;
int64_t ts;
int caplen;
size_t size = iov_size(iov, cnt);
size_t size = iov_size(iov, cnt) - offset;
struct iovec dumpiov[cnt + 1];

/* Early return in case of previous error. */
Expand All @@ -84,7 +85,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt)

dumpiov[0].iov_base = &hdr;
dumpiov[0].iov_len = sizeof(hdr);
cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen);
cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, offset, caplen);

if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) {
error_report("network dump write error - stopping dump");
Expand Down Expand Up @@ -153,8 +154,10 @@ static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr,
int iovcnt, NetPacketSent *sent_cb)
{
NetFilterDumpState *nfds = FILTER_DUMP(nf);
int offset = qemu_get_using_vnet_hdr(nf->netdev) ?
qemu_get_vnet_hdr_len(nf->netdev) : 0;

dump_receive_iov(&nfds->ds, iov, iovcnt);
dump_receive_iov(&nfds->ds, iov, iovcnt, offset);
return 0;
}

Expand Down
118 changes: 43 additions & 75 deletions net/eth.c
Expand Up @@ -137,8 +137,7 @@ _eth_tcp_has_data(bool is_ip4,
}

void eth_get_protocols(const struct iovec *iov, int iovcnt,
bool *isip4, bool *isip6,
bool *isudp, bool *istcp,
bool *hasip4, bool *hasip6,
size_t *l3hdr_off,
size_t *l4hdr_off,
size_t *l5hdr_off,
Expand All @@ -151,8 +150,10 @@ void eth_get_protocols(const struct iovec *iov, int iovcnt,
size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
size_t input_size = iov_size(iov, iovcnt);
size_t copied;
uint8_t ip_p;

*isip4 = *isip6 = *isudp = *istcp = false;
*hasip4 = *hasip6 = false;
l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;

proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);

Expand All @@ -166,68 +167,62 @@ void eth_get_protocols(const struct iovec *iov, int iovcnt,
}

copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));

*isip4 = true;

if (copied < sizeof(*iphdr)) {
if (copied < sizeof(*iphdr) ||
IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
return;
}

if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
if (iphdr->ip_p == IP_PROTO_TCP) {
*istcp = true;
} else if (iphdr->ip_p == IP_PROTO_UDP) {
*isudp = true;
}
}

*hasip4 = true;
ip_p = iphdr->ip_p;
ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
*l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);

fragment = ip4hdr_info->fragment;
} else if (proto == ETH_P_IPV6) {

*isip6 = true;
if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
ip6hdr_info)) {
if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
*istcp = true;
} else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
*isudp = true;
}
} else {
if (!eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, ip6hdr_info)) {
return;
}

*hasip6 = true;
ip_p = ip6hdr_info->l4proto;
*l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
fragment = ip6hdr_info->fragment;
} else {
return;
}

if (!fragment) {
if (*istcp) {
*istcp = _eth_copy_chunk(input_size,
iov, iovcnt,
*l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
&l4hdr_info->hdr.tcp);

if (*istcp) {
*l5hdr_off = *l4hdr_off +
TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);

l4hdr_info->has_tcp_data =
_eth_tcp_has_data(proto == ETH_P_IP,
&ip4hdr_info->ip4_hdr,
&ip6hdr_info->ip6_hdr,
*l4hdr_off - *l3hdr_off,
&l4hdr_info->hdr.tcp);
}
} else if (*isudp) {
*isudp = _eth_copy_chunk(input_size,
iov, iovcnt,
*l4hdr_off, sizeof(l4hdr_info->hdr.udp),
&l4hdr_info->hdr.udp);
if (fragment) {
return;
}

switch (ip_p) {
case IP_PROTO_TCP:
if (_eth_copy_chunk(input_size,
iov, iovcnt,
*l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
&l4hdr_info->hdr.tcp)) {
l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
*l5hdr_off = *l4hdr_off +
TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);

l4hdr_info->has_tcp_data =
_eth_tcp_has_data(proto == ETH_P_IP,
&ip4hdr_info->ip4_hdr,
&ip6hdr_info->ip6_hdr,
*l4hdr_off - *l3hdr_off,
&l4hdr_info->hdr.tcp);
}
break;

case IP_PROTO_UDP:
if (_eth_copy_chunk(input_size,
iov, iovcnt,
*l4hdr_off, sizeof(l4hdr_info->hdr.udp),
&l4hdr_info->hdr.udp)) {
l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
*l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
}
break;
}
}

Expand Down Expand Up @@ -314,33 +309,6 @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
return 0;
}

void
eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
void *l3hdr, size_t l3hdr_len,
size_t l3payload_len,
size_t frag_offset, bool more_frags)
{
const struct iovec l2vec = {
.iov_base = (void *) l2hdr,
.iov_len = l2hdr_len
};

if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
uint16_t orig_flags;
struct ip_header *iphdr = (struct ip_header *) l3hdr;
uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
uint16_t new_ip_off;

assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
assert((frag_off_units & ~IP_OFFMASK) == 0);

orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0);
iphdr->ip_off = cpu_to_be16(new_ip_off);
iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
}
}

void
eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
{
Expand Down
18 changes: 18 additions & 0 deletions net/net.c
Expand Up @@ -513,6 +513,15 @@ bool qemu_has_vnet_hdr_len(NetClientState *nc, int len)
return nc->info->has_vnet_hdr_len(nc, len);
}

bool qemu_get_using_vnet_hdr(NetClientState *nc)
{
if (!nc || !nc->info->get_using_vnet_hdr) {
return false;
}

return nc->info->get_using_vnet_hdr(nc);
}

void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
{
if (!nc || !nc->info->using_vnet_hdr) {
Expand All @@ -532,6 +541,15 @@ void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
}

int qemu_get_vnet_hdr_len(NetClientState *nc)
{
if (!nc || !nc->info->get_vnet_hdr_len) {
return 0;
}

return nc->info->get_vnet_hdr_len(nc);
}

void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
{
if (!nc || !nc->info->set_vnet_hdr_len) {
Expand Down
16 changes: 16 additions & 0 deletions net/tap.c
Expand Up @@ -255,6 +255,13 @@ static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
return !!tap_probe_vnet_hdr_len(s->fd, len);
}

static int tap_get_vnet_hdr_len(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);

return s->host_vnet_hdr_len;
}

static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
Expand All @@ -268,6 +275,13 @@ static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
s->host_vnet_hdr_len = len;
}

static bool tap_get_using_vnet_hdr(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);

return s->using_vnet_hdr;
}

static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
Expand Down Expand Up @@ -372,8 +386,10 @@ static NetClientInfo net_tap_info = {
.has_ufo = tap_has_ufo,
.has_vnet_hdr = tap_has_vnet_hdr,
.has_vnet_hdr_len = tap_has_vnet_hdr_len,
.get_using_vnet_hdr = tap_get_using_vnet_hdr,
.using_vnet_hdr = tap_using_vnet_hdr,
.set_offload = tap_set_offload,
.get_vnet_hdr_len = tap_get_vnet_hdr_len,
.set_vnet_hdr_len = tap_set_vnet_hdr_len,
.set_vnet_le = tap_set_vnet_le,
.set_vnet_be = tap_set_vnet_be,
Expand Down
1 change: 1 addition & 0 deletions scripts/ci/org.centos/stream/8/x86_64/test-avocado
Expand Up @@ -30,6 +30,7 @@ make get-vm-images
tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \
tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \
tests/avocado/hotplug_cpu.py:HotPlugCPU.test \
tests/avocado/igb.py:IGB.test \
tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \
tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \
tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \
Expand Down
38 changes: 38 additions & 0 deletions tests/avocado/igb.py
@@ -0,0 +1,38 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# ethtool tests for igb registers, interrupts, etc

from avocado_qemu import LinuxTest

class IGB(LinuxTest):
"""
:avocado: tags=accel:kvm
:avocado: tags=arch:x86_64
:avocado: tags=distro:fedora
:avocado: tags=distro_version:31
:avocado: tags=machine:q35
"""

timeout = 180

def test(self):
self.require_accelerator('kvm')
kernel_url = self.distro.pxeboot_url + 'vmlinuz'
kernel_hash = '5b6f6876e1b5bda314f93893271da0d5777b1f3c'
kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
initrd_url = self.distro.pxeboot_url + 'initrd.img'
initrd_hash = 'dd0340a1b39bd28f88532babd4581c67649ec5b1'
initrd_path = self.fetch_asset(initrd_url, asset_hash=initrd_hash)

# Ideally we want to test MSI as well, but it is blocked by a bug
# fixed with:
# https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=28e96556baca7056d11d9fb3cdd0aba4483e00d8
kernel_params = self.distro.default_kernel_params + ' pci=nomsi'

self.vm.add_args('-kernel', kernel_path,
'-initrd', initrd_path,
'-append', kernel_params,
'-accel', 'kvm',
'-device', 'igb')
self.launch_and_wait()
self.ssh_command('dnf -y install ethtool')
self.ssh_command('ethtool -t eth1 offline')
25 changes: 15 additions & 10 deletions tests/qtest/e1000e-test.c
Expand Up @@ -27,6 +27,7 @@
#include "qemu/osdep.h"
#include "libqtest-single.h"
#include "libqos/pci-pc.h"
#include "net/eth.h"
#include "qemu/sockets.h"
#include "qemu/iov.h"
#include "qemu/module.h"
Expand All @@ -35,17 +36,21 @@
#include "libqos/e1000e.h"
#include "hw/net/e1000_regs.h"

static const struct eth_header packet = {
.h_dest = E1000E_ADDRESS,
.h_source = E1000E_ADDRESS,
};

static void e1000e_send_verify(QE1000E *d, int *test_sockets, QGuestAllocator *alloc)
{
static const char test[] = "TEST";
struct e1000_tx_desc descr;
char buffer[64];
int ret;
uint32_t recv_len;

/* Prepare test data buffer */
uint64_t data = guest_alloc(alloc, sizeof(buffer));
memwrite(data, test, sizeof(test));
memwrite(data, &packet, sizeof(packet));

/* Prepare TX descriptor */
memset(&descr, 0, sizeof(descr));
Expand All @@ -71,7 +76,7 @@ static void e1000e_send_verify(QE1000E *d, int *test_sockets, QGuestAllocator *a
g_assert_cmpint(ret, == , sizeof(recv_len));
ret = recv(test_sockets[0], buffer, sizeof(buffer), 0);
g_assert_cmpint(ret, ==, sizeof(buffer));
g_assert_cmpstr(buffer, == , test);
g_assert_false(memcmp(buffer, &packet, sizeof(packet)));

/* Free test data buffer */
guest_free(alloc, data);
Expand All @@ -81,24 +86,24 @@ static void e1000e_receive_verify(QE1000E *d, int *test_sockets, QGuestAllocator
{
union e1000_rx_desc_extended descr;

char test[] = "TEST";
int len = htonl(sizeof(test));
struct eth_header test_iov = packet;
int len = htonl(sizeof(packet));
struct iovec iov[] = {
{
.iov_base = &len,
.iov_len = sizeof(len),
},{
.iov_base = test,
.iov_len = sizeof(test),
.iov_base = &test_iov,
.iov_len = sizeof(packet),
},
};

char buffer[64];
int ret;

/* Send a dummy packet to device's socket*/
ret = iov_send(test_sockets[0], iov, 2, 0, sizeof(len) + sizeof(test));
g_assert_cmpint(ret, == , sizeof(test) + sizeof(len));
ret = iov_send(test_sockets[0], iov, 2, 0, sizeof(len) + sizeof(packet));
g_assert_cmpint(ret, == , sizeof(packet) + sizeof(len));

/* Prepare test data buffer */
uint64_t data = guest_alloc(alloc, sizeof(buffer));
Expand All @@ -119,7 +124,7 @@ static void e1000e_receive_verify(QE1000E *d, int *test_sockets, QGuestAllocator

/* Check data sent to the backend */
memread(data, buffer, sizeof(buffer));
g_assert_cmpstr(buffer, == , test);
g_assert_false(memcmp(buffer, &packet, sizeof(packet)));

/* Free test data buffer */
guest_free(alloc, data);
Expand Down
5 changes: 5 additions & 0 deletions tests/qtest/fuzz/generic_fuzz_configs.h
Expand Up @@ -90,6 +90,11 @@ const generic_fuzz_config predefined_configs[] = {
.args = "-M q35 -nodefaults "
"-device e1000e,netdev=net0 -netdev user,id=net0",
.objects = "e1000e",
},{
.name = "igb",
.args = "-M q35 -nodefaults "
"-device igb,netdev=net0 -netdev user,id=net0",
.objects = "igb",
},{
.name = "cirrus-vga",
.args = "-machine q35 -nodefaults -device cirrus-vga",
Expand Down