Skip to content

Commit 80c4b92

Browse files
yishaihAlex Williamson
authored andcommitted
vfio: Introduce the DMA logging feature support
Introduce the DMA logging feature support in the vfio core layer. It includes the processing of the device start/stop/report DMA logging UAPIs and calling the relevant driver 'op' to do the work. Specifically, Upon start, the core translates the given input ranges into an interval tree, checks for unexpected overlapping, non aligned ranges and then pass the translated input to the driver for start tracking the given ranges. Upon report, the core translates the given input user space bitmap and page size into an IOVA kernel bitmap iterator. Then it iterates it and call the driver to set the corresponding bits for the dirtied pages in a specific IOVA range. Upon stop, the driver is called to stop the previous started tracking. The next patches from the series will introduce the mlx5 driver implementation for the logging ops. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20220908183448.195262-6-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent 58ccf01 commit 80c4b92

File tree

4 files changed

+207
-2
lines changed

4 files changed

+207
-2
lines changed

drivers/vfio/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ menuconfig VFIO
33
tristate "VFIO Non-Privileged userspace driver framework"
44
select IOMMU_API
55
select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
6+
select INTERVAL_TREE
67
help
78
VFIO provides a framework for secure userspace device drivers.
89
See Documentation/driver-api/vfio.rst for more details.

drivers/vfio/pci/vfio_pci_core.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,6 +2128,11 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
21282128
return -EINVAL;
21292129
}
21302130

2131+
if (vdev->vdev.log_ops && !(vdev->vdev.log_ops->log_start &&
2132+
vdev->vdev.log_ops->log_stop &&
2133+
vdev->vdev.log_ops->log_read_and_clear))
2134+
return -EINVAL;
2135+
21312136
/*
21322137
* Prevent binding to PFs with VFs enabled, the VFs might be in use
21332138
* by the host or other users. We cannot capture the VFs if they

drivers/vfio/vfio_main.c

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include <linux/wait.h>
3434
#include <linux/sched/signal.h>
3535
#include <linux/pm_runtime.h>
36+
#include <linux/interval_tree.h>
37+
#include <linux/iova_bitmap.h>
3638
#include "vfio.h"
3739

3840
#define DRIVER_VERSION "0.3"
@@ -1658,6 +1660,167 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
16581660
return 0;
16591661
}
16601662

1663+
/* Ranges should fit into a single kernel page */
1664+
#define LOG_MAX_RANGES \
1665+
(PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
1666+
1667+
static int
1668+
vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
1669+
u32 flags, void __user *arg,
1670+
size_t argsz)
1671+
{
1672+
size_t minsz =
1673+
offsetofend(struct vfio_device_feature_dma_logging_control,
1674+
ranges);
1675+
struct vfio_device_feature_dma_logging_range __user *ranges;
1676+
struct vfio_device_feature_dma_logging_control control;
1677+
struct vfio_device_feature_dma_logging_range range;
1678+
struct rb_root_cached root = RB_ROOT_CACHED;
1679+
struct interval_tree_node *nodes;
1680+
u64 iova_end;
1681+
u32 nnodes;
1682+
int i, ret;
1683+
1684+
if (!device->log_ops)
1685+
return -ENOTTY;
1686+
1687+
ret = vfio_check_feature(flags, argsz,
1688+
VFIO_DEVICE_FEATURE_SET,
1689+
sizeof(control));
1690+
if (ret != 1)
1691+
return ret;
1692+
1693+
if (copy_from_user(&control, arg, minsz))
1694+
return -EFAULT;
1695+
1696+
nnodes = control.num_ranges;
1697+
if (!nnodes)
1698+
return -EINVAL;
1699+
1700+
if (nnodes > LOG_MAX_RANGES)
1701+
return -E2BIG;
1702+
1703+
ranges = u64_to_user_ptr(control.ranges);
1704+
nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
1705+
GFP_KERNEL);
1706+
if (!nodes)
1707+
return -ENOMEM;
1708+
1709+
for (i = 0; i < nnodes; i++) {
1710+
if (copy_from_user(&range, &ranges[i], sizeof(range))) {
1711+
ret = -EFAULT;
1712+
goto end;
1713+
}
1714+
if (!IS_ALIGNED(range.iova, control.page_size) ||
1715+
!IS_ALIGNED(range.length, control.page_size)) {
1716+
ret = -EINVAL;
1717+
goto end;
1718+
}
1719+
1720+
if (check_add_overflow(range.iova, range.length, &iova_end) ||
1721+
iova_end > ULONG_MAX) {
1722+
ret = -EOVERFLOW;
1723+
goto end;
1724+
}
1725+
1726+
nodes[i].start = range.iova;
1727+
nodes[i].last = range.iova + range.length - 1;
1728+
if (interval_tree_iter_first(&root, nodes[i].start,
1729+
nodes[i].last)) {
1730+
/* Range overlapping */
1731+
ret = -EINVAL;
1732+
goto end;
1733+
}
1734+
interval_tree_insert(nodes + i, &root);
1735+
}
1736+
1737+
ret = device->log_ops->log_start(device, &root, nnodes,
1738+
&control.page_size);
1739+
if (ret)
1740+
goto end;
1741+
1742+
if (copy_to_user(arg, &control, sizeof(control))) {
1743+
ret = -EFAULT;
1744+
device->log_ops->log_stop(device);
1745+
}
1746+
1747+
end:
1748+
kfree(nodes);
1749+
return ret;
1750+
}
1751+
1752+
static int
1753+
vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
1754+
u32 flags, void __user *arg,
1755+
size_t argsz)
1756+
{
1757+
int ret;
1758+
1759+
if (!device->log_ops)
1760+
return -ENOTTY;
1761+
1762+
ret = vfio_check_feature(flags, argsz,
1763+
VFIO_DEVICE_FEATURE_SET, 0);
1764+
if (ret != 1)
1765+
return ret;
1766+
1767+
return device->log_ops->log_stop(device);
1768+
}
1769+
1770+
static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
1771+
unsigned long iova, size_t length,
1772+
void *opaque)
1773+
{
1774+
struct vfio_device *device = opaque;
1775+
1776+
return device->log_ops->log_read_and_clear(device, iova, length, iter);
1777+
}
1778+
1779+
static int
1780+
vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
1781+
u32 flags, void __user *arg,
1782+
size_t argsz)
1783+
{
1784+
size_t minsz =
1785+
offsetofend(struct vfio_device_feature_dma_logging_report,
1786+
bitmap);
1787+
struct vfio_device_feature_dma_logging_report report;
1788+
struct iova_bitmap *iter;
1789+
u64 iova_end;
1790+
int ret;
1791+
1792+
if (!device->log_ops)
1793+
return -ENOTTY;
1794+
1795+
ret = vfio_check_feature(flags, argsz,
1796+
VFIO_DEVICE_FEATURE_GET,
1797+
sizeof(report));
1798+
if (ret != 1)
1799+
return ret;
1800+
1801+
if (copy_from_user(&report, arg, minsz))
1802+
return -EFAULT;
1803+
1804+
if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
1805+
return -EINVAL;
1806+
1807+
if (check_add_overflow(report.iova, report.length, &iova_end) ||
1808+
iova_end > ULONG_MAX)
1809+
return -EOVERFLOW;
1810+
1811+
iter = iova_bitmap_alloc(report.iova, report.length,
1812+
report.page_size,
1813+
u64_to_user_ptr(report.bitmap));
1814+
if (IS_ERR(iter))
1815+
return PTR_ERR(iter);
1816+
1817+
ret = iova_bitmap_for_each(iter, device,
1818+
vfio_device_log_read_and_clear);
1819+
1820+
iova_bitmap_free(iter);
1821+
return ret;
1822+
}
1823+
16611824
static int vfio_ioctl_device_feature(struct vfio_device *device,
16621825
struct vfio_device_feature __user *arg)
16631826
{
@@ -1691,6 +1854,18 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
16911854
return vfio_ioctl_device_feature_mig_device_state(
16921855
device, feature.flags, arg->data,
16931856
feature.argsz - minsz);
1857+
case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
1858+
return vfio_ioctl_device_feature_logging_start(
1859+
device, feature.flags, arg->data,
1860+
feature.argsz - minsz);
1861+
case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
1862+
return vfio_ioctl_device_feature_logging_stop(
1863+
device, feature.flags, arg->data,
1864+
feature.argsz - minsz);
1865+
case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
1866+
return vfio_ioctl_device_feature_logging_report(
1867+
device, feature.flags, arg->data,
1868+
feature.argsz - minsz);
16941869
default:
16951870
if (unlikely(!device->ops->device_feature))
16961871
return -EINVAL;

include/linux/vfio.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/workqueue.h>
1515
#include <linux/poll.h>
1616
#include <uapi/linux/vfio.h>
17+
#include <linux/iova_bitmap.h>
1718

1819
struct kvm;
1920

@@ -33,10 +34,11 @@ struct vfio_device {
3334
struct device *dev;
3435
const struct vfio_device_ops *ops;
3536
/*
36-
* mig_ops is a static property of the vfio_device which must be set
37-
* prior to registering the vfio_device.
37+
* mig_ops/log_ops is a static property of the vfio_device which must
38+
* be set prior to registering the vfio_device.
3839
*/
3940
const struct vfio_migration_ops *mig_ops;
41+
const struct vfio_log_ops *log_ops;
4042
struct vfio_group *group;
4143
struct vfio_device_set *dev_set;
4244
struct list_head dev_set_list;
@@ -108,6 +110,28 @@ struct vfio_migration_ops {
108110
enum vfio_device_mig_state *curr_state);
109111
};
110112

113+
/**
114+
* @log_start: Optional callback to ask the device start DMA logging.
115+
* @log_stop: Optional callback to ask the device stop DMA logging.
116+
* @log_read_and_clear: Optional callback to ask the device read
117+
* and clear the dirty DMAs in some given range.
118+
*
119+
* The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set
120+
* of features does not track logging state relative to the device,
121+
* therefore the device implementation of vfio_log_ops must handle
122+
* arbitrary user requests. This includes rejecting subsequent calls
123+
* to log_start without an intervening log_stop, as well as graceful
124+
* handling of log_stop and log_read_and_clear from invalid states.
125+
*/
126+
struct vfio_log_ops {
127+
int (*log_start)(struct vfio_device *device,
128+
struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
129+
int (*log_stop)(struct vfio_device *device);
130+
int (*log_read_and_clear)(struct vfio_device *device,
131+
unsigned long iova, unsigned long length,
132+
struct iova_bitmap *dirty);
133+
};
134+
111135
/**
112136
* vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
113137
* @flags: Arg from the device_feature op

0 commit comments

Comments
 (0)