switch fput to task_work_add
... and schedule_work() for interrupt/kernel_thread callers
(and yes, now it *is* OK to call from interrupt).

We are guaranteed that __fput() will be done before we return
to userland (or exit).  Note that for fput() from a kernel
thread we get an async behaviour; it's almost always OK, but
sometimes you might need to have __fput() completed before
you do anything else.  There are two mechanisms for that -
a general barrier (flush_delayed_fput()) and explicit
__fput_sync().  Both should be used with care (as was the
case for fput() from kernel threads all along).  See comments
in fs/file_table.c for details.

Signed-off-by: Al Viro <>
Al Viro committed Jul 22, 2012
1 parent a2d4c71 commit 4a9d4b0
72 changes: 70 additions & 2 deletions fs/file_table.c
Expand Up @@ -23,6 +23,8 @@
#include <linux/lglock.h>
#include <linux/percpu_counter.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/task_work.h>
#include <linux/ima.h>

#include <linux/atomic.h>
Expand Down Expand Up @@ -251,7 +253,6 @@ static void __fput(struct file *file)
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
if (file->f_mode & FMODE_WRITE)
Expand All @@ -263,10 +264,77 @@ static void __fput(struct file *file)

static DEFINE_SPINLOCK(delayed_fput_lock);
static LIST_HEAD(delayed_fput_list);
static void delayed_fput(struct work_struct *unused)
list_splice_init(&delayed_fput_list, &head);
while (!list_empty(&head)) {
struct file *f = list_first_entry(&head, struct file, f_u.fu_list);

static void ____fput(struct callback_head *work)
__fput(container_of(work, struct file, f_u.fu_rcuhead));

* If kernel thread really needs to have the final fput() it has done
* to complete, call this. The only user right now is the boot - we
* *do* need to make sure our writes to binaries on initramfs has
* not left us with opened struct file waiting for __fput() - execve()
* won't work without that. Please, don't add more callers without
* very good reasons; in particular, never call that with locks
* held and never call that from a thread that might need to do
* some work on any kind of umount.
void flush_delayed_fput(void)

static DECLARE_WORK(delayed_fput_work, delayed_fput);

void fput(struct file *file)
if (atomic_long_dec_and_test(&file->f_count))
if (atomic_long_dec_and_test(&file->f_count)) {
struct task_struct *task = current;
if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) {
unsigned long flags;
spin_lock_irqsave(&delayed_fput_lock, flags);
list_add(&file->f_u.fu_list, &delayed_fput_list);
spin_unlock_irqrestore(&delayed_fput_lock, flags);
init_task_work(&file->f_u.fu_rcuhead, ____fput);
task_work_add(task, &file->f_u.fu_rcuhead, true);

* synchronous analog of fput(); for kernel threads that might be needed
* in some umount() (and thus can't use flush_delayed_fput() without
* risking deadlocks), need to wait for completion of __fput() and know
* for this specific struct file it won't involve anything that would
* need them. Use only if you really need it - at the very least,
* don't blindly convert fput() by kernel thread to that.
void __fput_sync(struct file *file)
if (atomic_long_dec_and_test(&file->f_count)) {
struct task_struct *task = current;
BUG_ON(!(task->flags & PF_KTHREAD));

Expand Down
3 changes: 3 additions & 0 deletions include/linux/file.h
Expand Up @@ -39,4 +39,7 @@ extern void put_unused_fd(unsigned int fd);

extern void fd_install(unsigned int fd, struct file *file);

extern void flush_delayed_fput(void);
extern void __fput_sync(struct file *);

#endif /* __LINUX_FILE_H */
3 changes: 2 additions & 1 deletion init/main.c
Expand Up @@ -68,6 +68,7 @@
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <linux/file.h>

#include <asm/io.h>
#include <asm/bugs.h>
Expand Down Expand Up @@ -804,8 +805,8 @@ static noinline int init_post(void)
system_state = SYSTEM_RUNNING;

current->signal->flags |= SIGNAL_UNKILLABLE;

if (ramdisk_execute_command) {
Expand Down

