Skip to content

Commit

Permalink
Merge branch 'for-6.9/cxl-qos' into for-6.9/cxl
Browse files Browse the repository at this point in the history
Pick up support for CXL "HMEM reporting" for v6.9, i.e. build an HMAT
from CXL CDAT and PCIe switch information.
  • Loading branch information
djbw committed Mar 13, 2024
2 parents c6c3187 + debdce2 commit d5c0078
Show file tree
Hide file tree
Showing 13 changed files with 492 additions and 58 deletions.
34 changes: 34 additions & 0 deletions Documentation/ABI/testing/sysfs-bus-cxl
Original file line number Diff line number Diff line change
Expand Up @@ -552,3 +552,37 @@ Description:
attribute is only visible for devices supporting the
capability. The retrieved errors are logged as kernel
events when cxl_poison event tracing is enabled.


What: /sys/bus/cxl/devices/regionZ/accessY/read_bandwidth
/sys/bus/cxl/devices/regionZ/accessY/write_banwidth
Date: Jan, 2024
KernelVersion: v6.9
Contact: linux-cxl@vger.kernel.org
Description:
(RO) The aggregated read or write bandwidth of the region. The
number is the accumulated read or write bandwidth of all CXL memory
devices that contributes to the region in MB/s. It is
identical data that should appear in
/sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth or
/sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth.
See Documentation/ABI/stable/sysfs-devices-node. access0 provides
the number to the closest initiator and access1 provides the
number to the closest CPU.


What: /sys/bus/cxl/devices/regionZ/accessY/read_latency
/sys/bus/cxl/devices/regionZ/accessY/write_latency
Date: Jan, 2024
KernelVersion: v6.9
Contact: linux-cxl@vger.kernel.org
Description:
(RO) The read or write latency of the region. The number is
the worst read or write latency of all CXL memory devices that
contributes to the region in nanoseconds. It is identical data
that should appear in
/sys/devices/system/node/nodeX/accessY/initiators/read_latency or
/sys/devices/system/node/nodeX/accessY/initiators/write_latency.
See Documentation/ABI/stable/sysfs-devices-node. access0 provides
the number to the closest initiator and access1 provides the
number to the closest CPU.
83 changes: 64 additions & 19 deletions drivers/acpi/numa/hmat.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,8 @@ struct target_cache {
};

enum {
NODE_ACCESS_CLASS_0 = 0,
NODE_ACCESS_CLASS_1,
NODE_ACCESS_CLASS_GENPORT_SINK,
NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL = ACCESS_COORDINATE_MAX,
NODE_ACCESS_CLASS_GENPORT_SINK_CPU,
NODE_ACCESS_CLASS_MAX,
};

Expand All @@ -75,6 +74,7 @@ struct memory_target {
struct node_cache_attrs cache_attrs;
u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE];
bool registered;
bool ext_updated; /* externally updated */
};

struct memory_initiator {
Expand Down Expand Up @@ -127,7 +127,8 @@ static struct memory_target *acpi_find_genport_target(u32 uid)
/**
* acpi_get_genport_coordinates - Retrieve the access coordinates for a generic port
* @uid: ACPI unique id
* @coord: The access coordinates written back out for the generic port
* @coord: The access coordinates written back out for the generic port.
* Expect 2 levels array.
*
* Return: 0 on success. Errno on failure.
*
Expand All @@ -143,7 +144,10 @@ int acpi_get_genport_coordinates(u32 uid,
if (!target)
return -ENOENT;

*coord = target->coord[NODE_ACCESS_CLASS_GENPORT_SINK];
coord[ACCESS_COORDINATE_LOCAL] =
target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL];
coord[ACCESS_COORDINATE_CPU] =
target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_CPU];

return 0;
}
Expand Down Expand Up @@ -325,6 +329,35 @@ static void hmat_update_target_access(struct memory_target *target,
}
}

int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
enum access_coordinate_class access)
{
struct memory_target *target;
int pxm;

if (nid == NUMA_NO_NODE)
return -EINVAL;

pxm = node_to_pxm(nid);
guard(mutex)(&target_lock);
target = find_mem_target(pxm);
if (!target)
return -ENODEV;

hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY,
coord->read_latency, access);
hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY,
coord->write_latency, access);
hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH,
coord->read_bandwidth, access);
hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH,
coord->write_bandwidth, access);
target->ext_updated = true;

return 0;
}
EXPORT_SYMBOL_GPL(hmat_update_target_coordinates);

static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc)
{
struct memory_locality *loc;
Expand Down Expand Up @@ -374,11 +407,11 @@ static __init void hmat_update_target(unsigned int tgt_pxm, unsigned int init_px

if (target && target->processor_pxm == init_pxm) {
hmat_update_target_access(target, type, value,
NODE_ACCESS_CLASS_0);
ACCESS_COORDINATE_LOCAL);
/* If the node has a CPU, update access 1 */
if (node_state(pxm_to_node(init_pxm), N_CPU))
hmat_update_target_access(target, type, value,
NODE_ACCESS_CLASS_1);
ACCESS_COORDINATE_CPU);
}
}

Expand Down Expand Up @@ -696,8 +729,13 @@ static void hmat_update_target_attrs(struct memory_target *target,
u32 best = 0;
int i;

/* Don't update if an external agent has changed the data. */
if (target->ext_updated)
return;

/* Don't update for generic port if there's no device handle */
if (access == NODE_ACCESS_CLASS_GENPORT_SINK &&
if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL ||
access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
!(*(u16 *)target->gen_port_device_handle))
return;

Expand All @@ -709,7 +747,8 @@ static void hmat_update_target_attrs(struct memory_target *target,
*/
if (target->processor_pxm != PXM_INVAL) {
cpu_nid = pxm_to_node(target->processor_pxm);
if (access == 0 || node_state(cpu_nid, N_CPU)) {
if (access == ACCESS_COORDINATE_LOCAL ||
node_state(cpu_nid, N_CPU)) {
set_bit(target->processor_pxm, p_nodes);
return;
}
Expand Down Expand Up @@ -737,7 +776,9 @@ static void hmat_update_target_attrs(struct memory_target *target,
list_for_each_entry(initiator, &initiators, node) {
u32 value;

if (access == 1 && !initiator->has_cpu) {
if ((access == ACCESS_COORDINATE_CPU ||
access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
!initiator->has_cpu) {
clear_bit(initiator->processor_pxm, p_nodes);
continue;
}
Expand Down Expand Up @@ -770,20 +811,24 @@ static void __hmat_register_target_initiators(struct memory_target *target,
}
}

static void hmat_register_generic_target_initiators(struct memory_target *target)
static void hmat_update_generic_target(struct memory_target *target)
{
static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);

__hmat_register_target_initiators(target, p_nodes,
NODE_ACCESS_CLASS_GENPORT_SINK);
hmat_update_target_attrs(target, p_nodes,
NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL);
hmat_update_target_attrs(target, p_nodes,
NODE_ACCESS_CLASS_GENPORT_SINK_CPU);
}

static void hmat_register_target_initiators(struct memory_target *target)
{
static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);

__hmat_register_target_initiators(target, p_nodes, 0);
__hmat_register_target_initiators(target, p_nodes, 1);
__hmat_register_target_initiators(target, p_nodes,
ACCESS_COORDINATE_LOCAL);
__hmat_register_target_initiators(target, p_nodes,
ACCESS_COORDINATE_CPU);
}

static void hmat_register_target_cache(struct memory_target *target)
Expand Down Expand Up @@ -835,7 +880,7 @@ static void hmat_register_target(struct memory_target *target)
*/
mutex_lock(&target_lock);
if (*(u16 *)target->gen_port_device_handle) {
hmat_register_generic_target_initiators(target);
hmat_update_generic_target(target);
target->registered = true;
}
mutex_unlock(&target_lock);
Expand All @@ -854,8 +899,8 @@ static void hmat_register_target(struct memory_target *target)
if (!target->registered) {
hmat_register_target_initiators(target);
hmat_register_target_cache(target);
hmat_register_target_perf(target, NODE_ACCESS_CLASS_0);
hmat_register_target_perf(target, NODE_ACCESS_CLASS_1);
hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL);
hmat_register_target_perf(target, ACCESS_COORDINATE_CPU);
target->registered = true;
}
mutex_unlock(&target_lock);
Expand Down Expand Up @@ -927,7 +972,7 @@ static int hmat_calculate_adistance(struct notifier_block *self,
return NOTIFY_OK;

mutex_lock(&target_lock);
hmat_update_target_attrs(target, p_nodes, 1);
hmat_update_target_attrs(target, p_nodes, ACCESS_COORDINATE_CPU);
mutex_unlock(&target_lock);

perf = &target->coord[1];
Expand Down
11 changes: 11 additions & 0 deletions drivers/acpi/numa/srat.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ static int node_to_pxm_map[MAX_NUMNODES]
unsigned char acpi_srat_revision __initdata;
static int acpi_numa __initdata;

static int last_real_pxm;

void __init disable_srat(void)
{
acpi_numa = -1;
Expand Down Expand Up @@ -536,6 +538,7 @@ int __init acpi_numa_init(void)
if (node_to_pxm_map[i] > fake_pxm)
fake_pxm = node_to_pxm_map[i];
}
last_real_pxm = fake_pxm;
fake_pxm++;
acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
&fake_pxm);
Expand All @@ -547,6 +550,14 @@ int __init acpi_numa_init(void)
return 0;
}

bool acpi_node_backed_by_real_pxm(int nid)
{
int pxm = node_to_pxm(nid);

return pxm <= last_real_pxm;
}
EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);

static int acpi_get_pxm(acpi_handle h)
{
unsigned long long pxm;
Expand Down
7 changes: 4 additions & 3 deletions drivers/base/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ static void node_access_release(struct device *dev)
}

static struct node_access_nodes *node_init_node_access(struct node *node,
unsigned int access)
enum access_coordinate_class access)
{
struct node_access_nodes *access_node;
struct device *dev;
Expand Down Expand Up @@ -191,7 +191,7 @@ static struct attribute *access_attrs[] = {
* @access: The access class the for the given attributes
*/
void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
unsigned int access)
enum access_coordinate_class access)
{
struct node_access_nodes *c;
struct node *node;
Expand All @@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
}
}
}
EXPORT_SYMBOL_GPL(node_set_perf_attrs);

/**
* struct node_cache_info - Internal tracking for memory node caches
Expand Down Expand Up @@ -689,7 +690,7 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
*/
int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
unsigned int access)
enum access_coordinate_class access)
{
struct node *init_node, *targ_node;
struct node_access_nodes *initiator, *target;
Expand Down
8 changes: 5 additions & 3 deletions drivers/cxl/acpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -530,13 +530,15 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
if (kstrtou32(acpi_device_uid(hb), 0, &uid))
return -EINVAL;

rc = acpi_get_genport_coordinates(uid, &dport->hb_coord);
rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
if (rc < 0)
return rc;

/* Adjust back to picoseconds from nanoseconds */
dport->hb_coord.read_latency *= 1000;
dport->hb_coord.write_latency *= 1000;
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
dport->hb_coord[i].read_latency *= 1000;
dport->hb_coord[i].write_latency *= 1000;
}

return 0;
}
Expand Down
Loading

0 comments on commit d5c0078

Please sign in to comment.