Skip to content

Commit

Permalink
soundwire: use driver callbacks directly with proper locking
Browse files Browse the repository at this point in the history
In the SoundWire probe, we store a pointer from the driver ops into
the 'slave' structure. This can lead to kernel oopses when unbinding
codec drivers, e.g. with the following sequence to remove machine
driver and codec driver.

/sbin/modprobe -r snd_soc_sof_sdw
/sbin/modprobe -r snd_soc_rt711

The full details can be found in the BugLink below, for reference the
two following examples show different cases of driver ops/callbacks
being invoked after the driver .remove().

kernel: BUG: kernel NULL pointer dereference, address: 0000000000000150
kernel: Workqueue: events cdns_update_slave_status_work [soundwire_cadence]
kernel: RIP: 0010:mutex_lock+0x19/0x30
kernel: Call Trace:
kernel:  ? sdw_handle_slave_status+0x426/0xe00 [soundwire_bus 94ff184bf398570c3f8ff7efe9e32529f532e4ae]
kernel:  ? newidle_balance+0x26a/0x400
kernel:  ? cdns_update_slave_status_work+0x1e9/0x200 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82]

kernel: BUG: unable to handle page fault for address: ffffffffc07654c8
kernel: Workqueue: pm pm_runtime_work
kernel: RIP: 0010:sdw_bus_prep_clk_stop+0x6f/0x160 [soundwire_bus]
kernel: Call Trace:
kernel:  <TASK>
kernel:  sdw_cdns_clock_stop+0xb5/0x1b0 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82]
kernel:  intel_suspend_runtime+0x5f/0x120 [soundwire_intel aca858f7c87048d3152a4a41bb68abb9b663a1dd]
kernel:  ? dpm_sysfs_remove+0x60/0x60

This was not detected earlier in Intel tests since the tests first
remove the parent PCI device and shut down the bus. The sequence
above is a corner case which keeps the bus operational but without a
driver bound.

This patch removes the use the 'slave' ops and uses proper locking to
make sure there are no live callbacks based on a dangling pointer
executed during or after the driver unbinding sequence.

Adding device_lock()/unlock() seems fitting since it's what is already
used by the device and power management core routines, however this
needs to be done carefully to avoid self-inflicted dead-locks. There
are two cases where the device core code will take a lock, and taking
the same lock while doing through suspend/resume routines would be
problematic.

The first case is on attachment. The attachment and device resume can
happen asynchronously, and in the case where the resume happens first
adding device_lock() does not work at all. Fortunately, in this case
the codec driver resume routine needs to wait on the
'initialization_complete', so using device_trylock() is fine. In all
other cases unrelated to attachment, a regular device_lock() needs to
be taken.

The second case is the routine sdw_update_slave_status(), which can be
called from two different places, and the device lock may be taken
already by the device core. Using device_trylock() also helps prevent
this issue - and in case the device is locked already by the device
core, it's safe to proceed since the call to sdw_update_slave_status()
happens during the parent device PM routines. The parent-child
relationship guarantees that the lock will not be released while the
dev->driver pointer is used.

The issue with the ops pointer has been there since December 2017, but
there were so many changes in the bus handling code that this patch
will not apply cleanly all the way to this initial commit. The changes
can be easily backported though.

Thanks to Dan Williams for his suggestions on an earlier version of
this patch.

BugLink: thesofproject#3531
Fixes: 56d4fe3 ("soundwire: Add MIPI DisCo property helpers")
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
  • Loading branch information
plbossart committed Apr 20, 2022
1 parent d5bac83 commit 080a243
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 40 deletions.
97 changes: 80 additions & 17 deletions drivers/soundwire/bus.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <linux/pm_runtime.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/soundwire/sdw.h>
#include <linux/soundwire/sdw_type.h>
#include "bus.h"
#include "sysfs_local.h"

Expand Down Expand Up @@ -846,15 +847,21 @@ static int sdw_slave_clk_stop_callback(struct sdw_slave *slave,
enum sdw_clk_stop_mode mode,
enum sdw_clk_stop_type type)
{
int ret;
struct device *dev = &slave->dev;
struct sdw_driver *drv;
int ret = 0;

if (slave->ops && slave->ops->clk_stop) {
ret = slave->ops->clk_stop(slave, mode, type);
if (ret < 0)
return ret;
device_lock(dev);

if (dev->driver) {
drv = drv_to_sdw_driver(dev->driver);
if (drv->ops && drv->ops->clk_stop)
ret = drv->ops->clk_stop(slave, mode, type);
}

return 0;
device_unlock(dev);

return ret;
}

static int sdw_slave_clk_stop_prepare(struct sdw_slave *slave,
Expand Down Expand Up @@ -1616,14 +1623,25 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
}

/* Update the Slave driver */
if (slave_notify && slave->ops &&
slave->ops->interrupt_callback) {
slave_intr.sdca_cascade = sdca_cascade;
slave_intr.control_port = clear;
memcpy(slave_intr.port, &port_status,
sizeof(slave_intr.port));

slave->ops->interrupt_callback(slave, &slave_intr);
if (slave_notify) {
struct device *dev = &slave->dev;
struct sdw_driver *drv;

device_lock(dev);

if (dev->driver) {
drv = drv_to_sdw_driver(dev->driver);
if (drv->ops && drv->ops->interrupt_callback) {
slave_intr.sdca_cascade = sdca_cascade;
slave_intr.control_port = clear;
memcpy(slave_intr.port, &port_status,
sizeof(slave_intr.port));

drv->ops->interrupt_callback(slave, &slave_intr);
}
}

device_unlock(dev);
}

/* Ack interrupt */
Expand Down Expand Up @@ -1697,8 +1715,12 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
static int sdw_update_slave_status(struct sdw_slave *slave,
enum sdw_slave_status status)
{
struct device *dev = &slave->dev;
struct sdw_driver *drv;
unsigned long time;

device_lock_assert(dev);

if (!slave->probed) {
/*
* the slave status update is typically handled in an
Expand All @@ -1716,10 +1738,13 @@ static int sdw_update_slave_status(struct sdw_slave *slave,
}
}

if (!slave->ops || !slave->ops->update_status)
return 0;
if (dev->driver) {
drv = drv_to_sdw_driver(dev->driver);
if (drv->ops && drv->ops->update_status)
return drv->ops->update_status(slave, status);
}

return slave->ops->update_status(slave, status);
return 0;
}

/**
Expand All @@ -1734,6 +1759,7 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
struct sdw_slave *slave;
bool attached_initializing;
int i, ret = 0;
bool lock;

/* first check if any Slaves fell off the bus */
for (i = 1; i <= SDW_MAX_DEVICES; i++) {
Expand Down Expand Up @@ -1770,6 +1796,8 @@ int sdw_handle_slave_status(struct sdw_bus *bus,

/* Continue to check other slave statuses */
for (i = 1; i <= SDW_MAX_DEVICES; i++) {
lock = false;

mutex_lock(&bus->bus_lock);
if (test_bit(i, bus->assigned) == false) {
mutex_unlock(&bus->bus_lock);
Expand Down Expand Up @@ -1828,7 +1856,25 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
break;
}

/*
* if the device lock is taken while attaching, this means a resume
* operation started. In this case, we don't want to take a lock since this
* would create a dead-lock and the resume will time out.
* Using device_trylock() works since the resume function
* should wait on the initialization_complete signal, so there's no risk
* of the lock being released while sdw_update_slave_status() is invoked
* In other cases, we use a regular device_lock().
*/
if (attached_initializing)
lock = device_trylock(&slave->dev);
else
device_lock(&slave->dev);

ret = sdw_update_slave_status(slave, status[i]);

if (!attached_initializing || lock)
device_unlock(&slave->dev);

if (ret < 0)
dev_err(&slave->dev,
"Update Slave status failed:%d\n", ret);
Expand Down Expand Up @@ -1860,6 +1906,7 @@ EXPORT_SYMBOL(sdw_handle_slave_status);
void sdw_clear_slave_status(struct sdw_bus *bus, u32 request)
{
struct sdw_slave *slave;
bool lock;
int i;

/* Check all non-zero devices */
Expand All @@ -1878,7 +1925,23 @@ void sdw_clear_slave_status(struct sdw_bus *bus, u32 request)
if (slave->status != SDW_SLAVE_UNATTACHED) {
sdw_modify_slave_status(slave, SDW_SLAVE_UNATTACHED);
slave->first_interrupt_done = false;

lock = device_trylock(&slave->dev);

/*
* this bus/manager-level function can only be called from
* a resume sequence. If the peripheral device (child of the
* manager device) is locked, this indicates a resume operation
* initiated by the device core to deal with .remove() or .shutdown()
* at the peripheral level. With the parent-child order enforced
* by PM frameworks on resume, the peripheral resume has not started
* yet, so it's safe to assume the lock will not be released while
* the update_status callback is invoked.
*/
sdw_update_slave_status(slave, SDW_SLAVE_UNATTACHED);

if (lock)
device_unlock(&slave->dev);
}

/* keep track of request, used in pm_runtime resume */
Expand Down
6 changes: 2 additions & 4 deletions drivers/soundwire/bus_type.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ static int sdw_drv_probe(struct device *dev)
if (!id)
return -ENODEV;

slave->ops = drv->ops;

/*
* attach to power domain but don't turn on (last arg)
*/
Expand All @@ -118,8 +116,8 @@ static int sdw_drv_probe(struct device *dev)
}

/* device is probed so let's read the properties now */
if (slave->ops && slave->ops->read_prop)
slave->ops->read_prop(slave);
if (drv->ops && drv->ops->read_prop)
drv->ops->read_prop(slave);

/* init the sysfs as we have properties now */
ret = sdw_slave_sysfs_init(slave);
Expand Down
57 changes: 38 additions & 19 deletions drivers/soundwire/stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/soundwire/sdw.h>
#include <linux/soundwire/sdw_type.h>
#include <sound/soc.h>
#include "bus.h"

Expand Down Expand Up @@ -401,20 +402,26 @@ static int sdw_do_port_prep(struct sdw_slave_runtime *s_rt,
struct sdw_prepare_ch prep_ch,
enum sdw_port_prep_ops cmd)
{
const struct sdw_slave_ops *ops = s_rt->slave->ops;
int ret;
struct device *dev = &s_rt->slave->dev;
struct sdw_driver *drv;
int ret = 0;

if (ops->port_prep) {
ret = ops->port_prep(s_rt->slave, &prep_ch, cmd);
if (ret < 0) {
dev_err(&s_rt->slave->dev,
"Slave Port Prep cmd %d failed: %d\n",
cmd, ret);
return ret;
device_lock(dev);

if (dev->driver) {
drv = drv_to_sdw_driver(dev->driver);
if (drv->ops && drv->ops->port_prep) {
ret = drv->ops->port_prep(s_rt->slave, &prep_ch, cmd);
if (ret < 0)
dev_err(&s_rt->slave->dev,
"Slave Port Prep cmd %d failed: %d\n",
cmd, ret);
}
}

return 0;
device_unlock(dev);

return ret;
}

static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus,
Expand Down Expand Up @@ -578,7 +585,7 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt)
struct sdw_slave_runtime *s_rt;
struct sdw_bus *bus = m_rt->bus;
struct sdw_slave *slave;
int ret = 0;
int ret;

if (bus->ops->set_bus_conf) {
ret = bus->ops->set_bus_conf(bus, &bus->params);
Expand All @@ -587,19 +594,31 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt)
}

list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
slave = s_rt->slave;
struct sdw_driver *drv;
struct device *dev;

if (slave->ops->bus_config) {
ret = slave->ops->bus_config(slave, &bus->params);
if (ret < 0) {
dev_err(bus->dev, "Notify Slave: %d failed\n",
slave->dev_num);
return ret;
slave = s_rt->slave;
dev = &slave->dev;

device_lock(dev);

if (dev->driver) {
drv = drv_to_sdw_driver(dev->driver);
if (drv->ops && drv->ops->bus_config) {
ret = drv->ops->bus_config(slave, &bus->params);
if (ret < 0) {
device_unlock(dev);
dev_err(bus->dev, "Notify Slave: %d failed\n",
slave->dev_num);
return ret;
}
}
}

device_unlock(dev);
}

return ret;
return 0;
}

/**
Expand Down

0 comments on commit 080a243

Please sign in to comment.