Skip to content

Commit

Permalink
mm: mmap_lock: add tracepoints around lock acquisition
Browse files Browse the repository at this point in the history
The goal of these tracepoints is to be able to debug lock contention
issues.  This lock is acquired on most (all?) mmap / munmap / page fault
operations, so a multi-threaded process which does a lot of these can
experience significant contention.

We trace just before we start acquisition, when the acquisition returns
(whether it succeeded or not), and when the lock is released (or
downgraded).  The events are broken out by lock type (read / write).

The events are also broken out by memcg path.  For container-based
workloads, users often think of several processes in a memcg as a single
logical "task", so collecting statistics at this level is useful.

The end goal is to get latency information.  This isn't directly included
in the trace events.  Instead, users are expected to compute the time
between "start locking" and "acquire returned", using e.g.  synthetic
events or BPF.  The benefit we get from this is simpler code.

Because we use tracepoint_enabled() to decide whether or not to trace,
this patch has effectively no overhead unless tracepoints are enabled at
runtime.  If tracepoints are enabled, there is a performance impact, but
how much depends on exactly what e.g.  the BPF program does.

[axelrasmussen@google.com: fix use-after-free race and css ref leak in tracepoints]
  Link: https://lkml.kernel.org/r/20201130233504.3725241-1-axelrasmussen@google.com
[axelrasmussen@google.com: v3]
  Link: https://lkml.kernel.org/r/20201207213358.573750-1-axelrasmussen@google.com
[rostedt@goodmis.org: in-depth examples of tracepoint_enabled() usage, and per-cpu-per-context buffer design]

Link: https://lkml.kernel.org/r/20201105211739.568279-2-axelrasmussen@google.com
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
CmdrMoozy authored and torvalds committed Dec 15, 2020
1 parent 777f303 commit 2b5067a
Show file tree
Hide file tree
Showing 4 changed files with 427 additions and 6 deletions.
94 changes: 89 additions & 5 deletions include/linux/mmap_lock.h
Original file line number Diff line number Diff line change
@@ -1,69 +1,152 @@
#ifndef _LINUX_MMAP_LOCK_H
#define _LINUX_MMAP_LOCK_H

#include <linux/lockdep.h>
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/rwsem.h>
#include <linux/tracepoint-defs.h>
#include <linux/types.h>

#define MMAP_LOCK_INITIALIZER(name) \
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),

DECLARE_TRACEPOINT(mmap_lock_start_locking);
DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
DECLARE_TRACEPOINT(mmap_lock_released);

#ifdef CONFIG_TRACING

void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
bool success);
void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);

static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
bool write)
{
if (tracepoint_enabled(mmap_lock_start_locking))
__mmap_lock_do_trace_start_locking(mm, write);
}

static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
bool write, bool success)
{
if (tracepoint_enabled(mmap_lock_acquire_returned))
__mmap_lock_do_trace_acquire_returned(mm, write, success);
}

static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
{
if (tracepoint_enabled(mmap_lock_released))
__mmap_lock_do_trace_released(mm, write);
}

#else /* !CONFIG_TRACING */

static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
bool write)
{
}

static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
bool write, bool success)
{
}

static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
{
}

#endif /* CONFIG_TRACING */

static inline void mmap_init_lock(struct mm_struct *mm)
{
init_rwsem(&mm->mmap_lock);
}

static inline void mmap_write_lock(struct mm_struct *mm)
{
__mmap_lock_trace_start_locking(mm, true);
down_write(&mm->mmap_lock);
__mmap_lock_trace_acquire_returned(mm, true, true);
}

static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
{
__mmap_lock_trace_start_locking(mm, true);
down_write_nested(&mm->mmap_lock, subclass);
__mmap_lock_trace_acquire_returned(mm, true, true);
}

static inline int mmap_write_lock_killable(struct mm_struct *mm)
{
return down_write_killable(&mm->mmap_lock);
int ret;

__mmap_lock_trace_start_locking(mm, true);
ret = down_write_killable(&mm->mmap_lock);
__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
return ret;
}

static inline bool mmap_write_trylock(struct mm_struct *mm)
{
return down_write_trylock(&mm->mmap_lock) != 0;
bool ret;

__mmap_lock_trace_start_locking(mm, true);
ret = down_write_trylock(&mm->mmap_lock) != 0;
__mmap_lock_trace_acquire_returned(mm, true, ret);
return ret;
}

static inline void mmap_write_unlock(struct mm_struct *mm)
{
up_write(&mm->mmap_lock);
__mmap_lock_trace_released(mm, true);
}

static inline void mmap_write_downgrade(struct mm_struct *mm)
{
downgrade_write(&mm->mmap_lock);
__mmap_lock_trace_acquire_returned(mm, false, true);
}

static inline void mmap_read_lock(struct mm_struct *mm)
{
__mmap_lock_trace_start_locking(mm, false);
down_read(&mm->mmap_lock);
__mmap_lock_trace_acquire_returned(mm, false, true);
}

static inline int mmap_read_lock_killable(struct mm_struct *mm)
{
return down_read_killable(&mm->mmap_lock);
int ret;

__mmap_lock_trace_start_locking(mm, false);
ret = down_read_killable(&mm->mmap_lock);
__mmap_lock_trace_acquire_returned(mm, false, ret == 0);
return ret;
}

static inline bool mmap_read_trylock(struct mm_struct *mm)
{
return down_read_trylock(&mm->mmap_lock) != 0;
bool ret;

__mmap_lock_trace_start_locking(mm, false);
ret = down_read_trylock(&mm->mmap_lock) != 0;
__mmap_lock_trace_acquire_returned(mm, false, ret);
return ret;
}

static inline void mmap_read_unlock(struct mm_struct *mm)
{
up_read(&mm->mmap_lock);
__mmap_lock_trace_released(mm, false);
}

static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
{
if (down_read_trylock(&mm->mmap_lock)) {
if (mmap_read_trylock(mm)) {
rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
return true;
}
Expand All @@ -73,6 +156,7 @@ static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
up_read_non_owner(&mm->mmap_lock);
__mmap_lock_trace_released(mm, false);
}

static inline void mmap_assert_locked(struct mm_struct *mm)
Expand Down
107 changes: 107 additions & 0 deletions include/trace/events/mmap_lock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mmap_lock

#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MMAP_LOCK_H

#include <linux/tracepoint.h>
#include <linux/types.h>

struct mm_struct;

extern int trace_mmap_lock_reg(void);
extern void trace_mmap_lock_unreg(void);

TRACE_EVENT_FN(mmap_lock_start_locking,

TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),

TP_ARGS(mm, memcg_path, write),

TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__string(memcg_path, memcg_path)
__field(bool, write)
),

TP_fast_assign(
__entry->mm = mm;
__assign_str(memcg_path, memcg_path);
__entry->write = write;
),

TP_printk(
"mm=%p memcg_path=%s write=%s\n",
__entry->mm,
__get_str(memcg_path),
__entry->write ? "true" : "false"
),

trace_mmap_lock_reg, trace_mmap_lock_unreg
);

TRACE_EVENT_FN(mmap_lock_acquire_returned,

TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write,
bool success),

TP_ARGS(mm, memcg_path, write, success),

TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__string(memcg_path, memcg_path)
__field(bool, write)
__field(bool, success)
),

TP_fast_assign(
__entry->mm = mm;
__assign_str(memcg_path, memcg_path);
__entry->write = write;
__entry->success = success;
),

TP_printk(
"mm=%p memcg_path=%s write=%s success=%s\n",
__entry->mm,
__get_str(memcg_path),
__entry->write ? "true" : "false",
__entry->success ? "true" : "false"
),

trace_mmap_lock_reg, trace_mmap_lock_unreg
);

TRACE_EVENT_FN(mmap_lock_released,

TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),

TP_ARGS(mm, memcg_path, write),

TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__string(memcg_path, memcg_path)
__field(bool, write)
),

TP_fast_assign(
__entry->mm = mm;
__assign_str(memcg_path, memcg_path);
__entry->write = write;
),

TP_printk(
"mm=%p memcg_path=%s write=%s\n",
__entry->mm,
__get_str(memcg_path),
__entry->write ? "true" : "false"
),

trace_mmap_lock_reg, trace_mmap_lock_unreg
);

#endif /* _TRACE_MMAP_LOCK_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
2 changes: 1 addition & 1 deletion mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
mm_init.o percpu.o slab_common.o \
compaction.o vmacache.o \
interval_tree.o list_lru.o workingset.o \
debug.o gup.o $(mmu-y)
debug.o gup.o mmap_lock.o $(mmu-y)

# Give 'page_alloc' its own module-parameter namespace
page-alloc-y := page_alloc.o
Expand Down

0 comments on commit 2b5067a

Please sign in to comment.