Skip to content

Commit

Permalink
cxl/mem: Add the cxl_mem driver
Browse files Browse the repository at this point in the history
At this point the subsystem can enumerate all CXL ports (CXL.mem decode
resources in upstream switch ports and host bridges) in a system. The
last mile is connecting those ports to endpoints.

The cxl_mem driver connects an endpoint device to the platform CXL.mem
protoctol decode-topology. At ->probe() time it walks its
device-topology-ancestry and adds a CXL Port object at every Upstream
Port hop until it gets to CXL root. The CXL root object is only present
after a platform firmware driver registers platform CXL resources. For
ACPI based platform this is managed by the ACPI0017 device and the
cxl_acpi driver.

The ports are registered such that disabling a given port automatically
unregisters all descendant ports, and the chain can only be registered
after the root is established.

Given ACPI device scanning may run asynchronously compared to PCI device
scanning the root driver is tasked with rescanning the bus after the
root successfully probes.

Conversely if any ports in a chain between the root and an endpoint
becomes disconnected it subsequently triggers the endpoint to
unregister. Given lock depenedencies the endpoint unregistration happens
in a workqueue asynchronously. If userspace cares about synchronizing
delayed work after port events the /sys/bus/cxl/flush attribute is
available for that purpose.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
[djbw: clarify changelog, rework hotplug support]
Link: https://lore.kernel.org/r/164398782997.903003.9725273241627693186.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
  • Loading branch information
Ben Widawsky authored and djbw committed Feb 9, 2022
1 parent 2703c16 commit 8dd2bc0
Show file tree
Hide file tree
Showing 13 changed files with 425 additions and 5 deletions.
9 changes: 9 additions & 0 deletions Documentation/ABI/testing/sysfs-bus-cxl
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
What: /sys/bus/cxl/flush
Date: Januarry, 2022
KernelVersion: v5.18
Contact: linux-cxl@vger.kernel.org
Description:
(WO) If userspace manually unbinds a port the kernel schedules
all descendant memdevs for unbind. Writing '1' to this attribute
flushes that work.

What: /sys/bus/cxl/devices/memX/firmware_version
Date: December, 2020
KernelVersion: v5.12
Expand Down
9 changes: 9 additions & 0 deletions Documentation/driver-api/cxl/memory-devices.rst
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,9 @@ CXL Memory Device
.. kernel-doc:: drivers/cxl/pci.c
:internal:

.. kernel-doc:: drivers/cxl/mem.c
:doc: cxl mem

CXL Port
--------
.. kernel-doc:: drivers/cxl/port.c
Expand All @@ -344,6 +347,12 @@ CXL Core
.. kernel-doc:: drivers/cxl/core/port.c
:identifiers:

.. kernel-doc:: drivers/cxl/core/pci.c
:doc: cxl core pci

.. kernel-doc:: drivers/cxl/core/pci.c
:identifiers:

.. kernel-doc:: drivers/cxl/core/pmem.c
:doc: cxl pmem

Expand Down
16 changes: 16 additions & 0 deletions drivers/cxl/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,22 @@ config CXL_PMEM

If unsure say 'm'.

config CXL_MEM
tristate "CXL: Memory Expansion"
depends on CXL_PCI
default CXL_BUS
help
The CXL.mem protocol allows a device to act as a provider of "System
RAM" and/or "Persistent Memory" that is fully coherent as if the
memory were attached to the typical CPU memory controller. This is
known as HDM "Host-managed Device Memory".

Say 'y/m' to enable a driver that will attach to CXL.mem devices for
memory expansion and control of HDM. See Chapter 9.13 in the CXL 2.0
specification for a detailed description of HDM.

If unsure say 'm'.

config CXL_PORT
default CXL_BUS
tristate
Expand Down
2 changes: 2 additions & 0 deletions drivers/cxl/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CXL_BUS) += core/
obj-$(CONFIG_CXL_PCI) += cxl_pci.o
obj-$(CONFIG_CXL_MEM) += cxl_mem.o
obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
obj-$(CONFIG_CXL_PORT) += cxl_port.o

cxl_mem-y := mem.o
cxl_pci-y := pci.o
cxl_acpi-y := acpi.o
cxl_pmem-y := pmem.o
Expand Down
3 changes: 2 additions & 1 deletion drivers/cxl/acpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ static int cxl_acpi_probe(struct platform_device *pdev)
if (rc < 0)
return rc;

return 0;
/* In case PCI is scanned before ACPI re-trigger memdev attach */
return cxl_bus_rescan();
}

static const struct acpi_device_id cxl_acpi_ids[] = {
Expand Down
16 changes: 16 additions & 0 deletions drivers/cxl/core/memdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ static const struct device_type cxl_memdev_type = {
.groups = cxl_memdev_attribute_groups,
};

bool is_cxl_memdev(struct device *dev)
{
return dev->type == &cxl_memdev_type;
}
EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);

/**
* set_exclusive_cxl_commands() - atomically disable user cxl commands
* @cxlds: The device state to operate on
Expand Down Expand Up @@ -213,6 +219,15 @@ static void cxl_memdev_unregister(void *_cxlmd)
put_device(dev);
}

static void detach_memdev(struct work_struct *work)
{
struct cxl_memdev *cxlmd;

cxlmd = container_of(work, typeof(*cxlmd), detach_work);
device_release_driver(&cxlmd->dev);
put_device(&cxlmd->dev);
}

static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
const struct file_operations *fops)
{
Expand All @@ -237,6 +252,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
dev->type = &cxl_memdev_type;
device_set_pm_not_required(dev);
INIT_WORK(&cxlmd->detach_work, detach_memdev);

cdev = &cxlmd->cdev;
cdev_init(cdev, fops);
Expand Down
105 changes: 101 additions & 4 deletions drivers/cxl/core/port.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/workqueue.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/pci.h>
Expand Down Expand Up @@ -46,6 +47,8 @@ static int cxl_device_id(struct device *dev)
return CXL_DEVICE_ROOT;
return CXL_DEVICE_PORT;
}
if (is_cxl_memdev(dev))
return CXL_DEVICE_MEMORY_EXPANDER;
return 0;
}

Expand Down Expand Up @@ -318,8 +321,10 @@ static void unregister_port(void *_port)
{
struct cxl_port *port = _port;

if (!is_cxl_root(port))
if (!is_cxl_root(port)) {
device_lock_assert(port->dev.parent);
port->uport = NULL;
}

device_unregister(&port->dev);
}
Expand Down Expand Up @@ -410,7 +415,9 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
if (parent_port)
port->depth = parent_port->depth + 1;
dev = &port->dev;
if (parent_port)
if (is_cxl_memdev(uport))
rc = dev_set_name(dev, "endpoint%d", port->id);
else if (parent_port)
rc = dev_set_name(dev, "port%d", port->id);
else
rc = dev_set_name(dev, "root%d", port->id);
Expand Down Expand Up @@ -790,6 +797,38 @@ static struct device *grandparent(struct device *dev)
return NULL;
}

static void delete_endpoint(void *data)
{
struct cxl_memdev *cxlmd = data;
struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev);
struct cxl_port *parent_port;
struct device *parent;

parent_port = cxl_mem_find_port(cxlmd);
if (!parent_port)
return;
parent = &parent_port->dev;

cxl_device_lock(parent);
if (parent->driver && endpoint->uport) {
devm_release_action(parent, cxl_unlink_uport, endpoint);
devm_release_action(parent, unregister_port, endpoint);
}
cxl_device_unlock(parent);
put_device(parent);
put_device(&endpoint->dev);
}

int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
{
struct device *dev = &cxlmd->dev;

get_device(&endpoint->dev);
dev_set_drvdata(dev, endpoint);
return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);

/*
* The natural end of life of a non-root 'cxl_port' is when its parent port goes
* through a ->remove() event ("top-down" unregistration). The unnatural trigger
Expand Down Expand Up @@ -1034,6 +1073,12 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL);

struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd)
{
return find_cxl_port(grandparent(&cxlmd->dev));
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL);

struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port,
const struct device *dev)
{
Expand Down Expand Up @@ -1352,12 +1397,54 @@ static void cxl_bus_remove(struct device *dev)
cxl_nested_unlock(dev);
}

static struct workqueue_struct *cxl_bus_wq;

int cxl_bus_rescan(void)
{
return bus_rescan_devices(&cxl_bus_type);
}
EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);

bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
{
return queue_work(cxl_bus_wq, &cxlmd->detach_work);
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);

/* for user tooling to ensure port disable work has completed */
static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count)
{
if (sysfs_streq(buf, "1")) {
flush_workqueue(cxl_bus_wq);
return count;
}

return -EINVAL;
}

static BUS_ATTR_WO(flush);

static struct attribute *cxl_bus_attributes[] = {
&bus_attr_flush.attr,
NULL,
};

static struct attribute_group cxl_bus_attribute_group = {
.attrs = cxl_bus_attributes,
};

static const struct attribute_group *cxl_bus_attribute_groups[] = {
&cxl_bus_attribute_group,
NULL,
};

struct bus_type cxl_bus_type = {
.name = "cxl",
.uevent = cxl_bus_uevent,
.match = cxl_bus_match,
.probe = cxl_bus_probe,
.remove = cxl_bus_remove,
.bus_groups = cxl_bus_attribute_groups,
};
EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);

Expand All @@ -1371,12 +1458,21 @@ static __init int cxl_core_init(void)
if (rc)
return rc;

cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0);
if (!cxl_bus_wq) {
rc = -ENOMEM;
goto err_wq;
}

rc = bus_register(&cxl_bus_type);
if (rc)
goto err;
goto err_bus;

return 0;

err:
err_bus:
destroy_workqueue(cxl_bus_wq);
err_wq:
cxl_memdev_exit();
cxl_mbox_exit();
return rc;
Expand All @@ -1385,6 +1481,7 @@ static __init int cxl_core_init(void)
static void cxl_core_exit(void)
{
bus_unregister(&cxl_bus_type);
destroy_workqueue(cxl_bus_wq);
cxl_memdev_exit();
cxl_mbox_exit();
}
Expand Down
6 changes: 6 additions & 0 deletions drivers/cxl/cxl.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,9 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
struct cxl_port *parent_port);
struct cxl_port *find_cxl_root(struct device *dev);
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
int cxl_bus_rescan(void);
struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd);
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);

struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
struct device *dport, int port_id,
Expand All @@ -345,6 +348,8 @@ struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);

struct cxl_hdm;
struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port);
int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm);
Expand Down Expand Up @@ -377,6 +382,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_NVDIMM 2
#define CXL_DEVICE_PORT 3
#define CXL_DEVICE_ROOT 4
#define CXL_DEVICE_MEMORY_EXPANDER 5

#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
Expand Down
8 changes: 8 additions & 0 deletions drivers/cxl/cxlmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@
* @dev: driver core device object
* @cdev: char dev core object for ioctl operations
* @cxlds: The device state backing this device
* @detach_work: active memdev lost a port in its ancestry
* @id: id number of this memdev instance.
*/
struct cxl_memdev {
struct device dev;
struct cdev cdev;
struct cxl_dev_state *cxlds;
struct work_struct detach_work;
int id;
};

Expand All @@ -48,6 +50,12 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
return container_of(dev, struct cxl_memdev, dev);
}

bool is_cxl_memdev(struct device *dev);
static inline bool is_cxl_endpoint(struct cxl_port *port)
{
return is_cxl_memdev(port->uport);
}

struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);

/**
Expand Down
Loading

0 comments on commit 8dd2bc0

Please sign in to comment.