Skip to content

Commit

Permalink
net/sched: fix false lockdep warning on qdisc root lock
Browse files Browse the repository at this point in the history
Xiumei and Christoph reported the following lockdep splat, complaining of
the qdisc root lock being taken twice:

 ============================================
 WARNING: possible recursive locking detected
 6.7.0-rc3+ torvalds#598 Not tainted
 --------------------------------------------
 swapper/2/0 is trying to acquire lock:
 ffff888177190110 (&sch->q.lock){+.-.}-{2:2}, at: __dev_queue_xmit+0x1560/0x2e70

 but task is already holding lock:
 ffff88811995a110 (&sch->q.lock){+.-.}-{2:2}, at: __dev_queue_xmit+0x1560/0x2e70

 other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock(&sch->q.lock);
   lock(&sch->q.lock);

  *** DEADLOCK ***

  May be due to missing lock nesting notation

 5 locks held by swapper/2/0:
  #0: ffff888135a09d98 ((&in_dev->mr_ifc_timer)){+.-.}-{0:0}, at: call_timer_fn+0x11a/0x510
  #1: ffffffffaaee5260 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x2c0/0x1ed0
  #2: ffffffffaaee5200 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x209/0x2e70
  #3: ffff88811995a110 (&sch->q.lock){+.-.}-{2:2}, at: __dev_queue_xmit+0x1560/0x2e70
  #4: ffffffffaaee5200 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x209/0x2e70

 stack backtrace:
 CPU: 2 PID: 0 Comm: swapper/2 Not tainted 6.7.0-rc3+ torvalds#598
 Hardware name: Red Hat KVM, BIOS 1.13.0-2.module+el8.3.0+7353+9de0a3cc 04/01/2014
 Call Trace:
  <IRQ>
  dump_stack_lvl+0x4a/0x80
  __lock_acquire+0xfdd/0x3150
  lock_acquire+0x1ca/0x540
  _raw_spin_lock+0x34/0x80
  __dev_queue_xmit+0x1560/0x2e70
  tcf_mirred_act+0x82e/0x1260 [act_mirred]
  tcf_action_exec+0x161/0x480
  tcf_classify+0x689/0x1170
  prio_enqueue+0x316/0x660 [sch_prio]
  dev_qdisc_enqueue+0x46/0x220
  __dev_queue_xmit+0x1615/0x2e70
  ip_finish_output2+0x1218/0x1ed0
  __ip_finish_output+0x8b3/0x1350
  ip_output+0x163/0x4e0
  igmp_ifc_timer_expire+0x44b/0x930
  call_timer_fn+0x1a2/0x510
  run_timer_softirq+0x54d/0x11a0
  __do_softirq+0x1b3/0x88f
  irq_exit_rcu+0x18f/0x1e0
  sysvec_apic_timer_interrupt+0x6f/0x90
  </IRQ>

This happens when TC does a mirred egress redirect from the root qdisc of
device A to the root qdisc of device B. As long as these two locks aren't
protecting the same qdisc, they can be acquired in chain: add a per-qdisc
lockdep key to silence false warnings.
This dynamic key should safely replace the static key we have in sch_htb:
it was added to allow enqueueing to the device "direct qdisc" while still
holding the qdisc root lock.

v2: don't use static keys anymore in HTB direct qdiscs (thanks Eric Dumazet)

CC: Maxim Mikityanskiy <maxim@isovalent.com>
CC: Xiumei Mu <xmu@redhat.com>
Reported-by: Christoph Paasch <cpaasch@apple.com>
Closes: multipath-tcp/mptcp_net-next#451
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/r/7dc06d6158f72053cf877a82e2a7a5bd23692faa.1713448007.git.dcaratti@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
dcaratti authored and Paolo Abeni committed Apr 26, 2024
1 parent 1cedb16 commit af0cb3f
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 19 deletions.
1 change: 1 addition & 0 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ struct Qdisc {

struct rcu_head rcu;
netdevice_tracker dev_tracker;
struct lock_class_key root_lock_key;
/* private data */
long privdata[] ____cacheline_aligned;
};
Expand Down
3 changes: 3 additions & 0 deletions net/sched/sch_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
gnet_stats_basic_sync_init(&sch->bstats);
lockdep_register_key(&sch->root_lock_key);
spin_lock_init(&sch->q.lock);
lockdep_set_class(&sch->q.lock, &sch->root_lock_key);

if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
Expand Down Expand Up @@ -1068,6 +1070,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
if (ops->destroy)
ops->destroy(qdisc);

lockdep_unregister_key(&qdisc->root_lock_key);
module_put(ops->owner);
netdev_put(dev, &qdisc->dev_tracker);

Expand Down
22 changes: 3 additions & 19 deletions net/sched/sch_htb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1039,13 +1039,6 @@ static void htb_work_func(struct work_struct *work)
rcu_read_unlock();
}

static void htb_set_lockdep_class_child(struct Qdisc *q)
{
static struct lock_class_key child_key;

lockdep_set_class(qdisc_lock(q), &child_key);
}

static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
{
return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
Expand Down Expand Up @@ -1132,7 +1125,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
return -ENOMEM;
}

htb_set_lockdep_class_child(qdisc);
q->direct_qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
Expand Down Expand Up @@ -1468,7 +1460,6 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
}

if (q->offload) {
htb_set_lockdep_class_child(new);
/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
qdisc_refcount_inc(new);
old_q = htb_graft_helper(dev_queue, new);
Expand Down Expand Up @@ -1733,11 +1724,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
cl->parent->common.classid,
NULL);
if (q->offload) {
if (new_q)
htb_set_lockdep_class_child(new_q);
if (q->offload)
htb_parent_to_leaf_offload(sch, dev_queue, new_q);
}
}

sch_tree_lock(sch);
Expand Down Expand Up @@ -1947,13 +1935,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
classid, NULL);
if (q->offload) {
if (new_q) {
htb_set_lockdep_class_child(new_q);
/* One ref for cl->leaf.q, the other for
* dev_queue->qdisc.
*/
/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
if (new_q)
qdisc_refcount_inc(new_q);
}
old_q = htb_graft_helper(dev_queue, new_q);
/* No qdisc_put needed. */
WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
Expand Down

0 comments on commit af0cb3f

Please sign in to comment.