Skip to content

Commit 4a9d4b0

Browse files
author
Al Viro
committed
switch fput to task_work_add
... and schedule_work() for interrupt/kernel_thread callers (and yes, now it *is* OK to call from interrupt). We are guaranteed that __fput() will be done before we return to userland (or exit). Note that for fput() from a kernel thread we get an async behaviour; it's almost always OK, but sometimes you might need to have __fput() completed before you do anything else. There are two mechanisms for that - a general barrier (flush_delayed_fput()) and explicit __fput_sync(). Both should be used with care (as was the case for fput() from kernel threads all along). See comments in fs/file_table.c for details. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent a2d4c71 commit 4a9d4b0

File tree

3 files changed

+75
-3
lines changed

3 files changed

+75
-3
lines changed

fs/file_table.c

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <linux/lglock.h>
2424
#include <linux/percpu_counter.h>
2525
#include <linux/percpu.h>
26+
#include <linux/hardirq.h>
27+
#include <linux/task_work.h>
2628
#include <linux/ima.h>
2729

2830
#include <linux/atomic.h>
@@ -251,7 +253,6 @@ static void __fput(struct file *file)
251253
}
252254
fops_put(file->f_op);
253255
put_pid(file->f_owner.pid);
254-
file_sb_list_del(file);
255256
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
256257
i_readcount_dec(inode);
257258
if (file->f_mode & FMODE_WRITE)
@@ -263,10 +264,77 @@ static void __fput(struct file *file)
263264
mntput(mnt);
264265
}
265266

267+
static DEFINE_SPINLOCK(delayed_fput_lock);
268+
static LIST_HEAD(delayed_fput_list);
269+
static void delayed_fput(struct work_struct *unused)
270+
{
271+
LIST_HEAD(head);
272+
spin_lock_irq(&delayed_fput_lock);
273+
list_splice_init(&delayed_fput_list, &head);
274+
spin_unlock_irq(&delayed_fput_lock);
275+
while (!list_empty(&head)) {
276+
struct file *f = list_first_entry(&head, struct file, f_u.fu_list);
277+
list_del_init(&f->f_u.fu_list);
278+
__fput(f);
279+
}
280+
}
281+
282+
static void ____fput(struct callback_head *work)
283+
{
284+
__fput(container_of(work, struct file, f_u.fu_rcuhead));
285+
}
286+
287+
/*
288+
* If kernel thread really needs to have the final fput() it has done
289+
* to complete, call this. The only user right now is the boot - we
290+
* *do* need to make sure our writes to binaries on initramfs has
291+
* not left us with opened struct file waiting for __fput() - execve()
292+
* won't work without that. Please, don't add more callers without
293+
* very good reasons; in particular, never call that with locks
294+
* held and never call that from a thread that might need to do
295+
* some work on any kind of umount.
296+
*/
297+
void flush_delayed_fput(void)
298+
{
299+
delayed_fput(NULL);
300+
}
301+
302+
static DECLARE_WORK(delayed_fput_work, delayed_fput);
303+
266304
void fput(struct file *file)
267305
{
268-
if (atomic_long_dec_and_test(&file->f_count))
306+
if (atomic_long_dec_and_test(&file->f_count)) {
307+
struct task_struct *task = current;
308+
file_sb_list_del(file);
309+
if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) {
310+
unsigned long flags;
311+
spin_lock_irqsave(&delayed_fput_lock, flags);
312+
list_add(&file->f_u.fu_list, &delayed_fput_list);
313+
schedule_work(&delayed_fput_work);
314+
spin_unlock_irqrestore(&delayed_fput_lock, flags);
315+
return;
316+
}
317+
init_task_work(&file->f_u.fu_rcuhead, ____fput);
318+
task_work_add(task, &file->f_u.fu_rcuhead, true);
319+
}
320+
}
321+
322+
/*
323+
* synchronous analog of fput(); for kernel threads that might be needed
324+
* in some umount() (and thus can't use flush_delayed_fput() without
325+
* risking deadlocks), need to wait for completion of __fput() and know
326+
* for this specific struct file it won't involve anything that would
327+
* need them. Use only if you really need it - at the very least,
328+
* don't blindly convert fput() by kernel thread to that.
329+
*/
330+
void __fput_sync(struct file *file)
331+
{
332+
if (atomic_long_dec_and_test(&file->f_count)) {
333+
struct task_struct *task = current;
334+
file_sb_list_del(file);
335+
BUG_ON(!(task->flags & PF_KTHREAD));
269336
__fput(file);
337+
}
270338
}
271339

272340
EXPORT_SYMBOL(fput);

include/linux/file.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,7 @@ extern void put_unused_fd(unsigned int fd);
3939

4040
extern void fd_install(unsigned int fd, struct file *file);
4141

42+
extern void flush_delayed_fput(void);
43+
extern void __fput_sync(struct file *);
44+
4245
#endif /* __LINUX_FILE_H */

init/main.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
#include <linux/shmem_fs.h>
6969
#include <linux/slab.h>
7070
#include <linux/perf_event.h>
71+
#include <linux/file.h>
7172

7273
#include <asm/io.h>
7374
#include <asm/bugs.h>
@@ -804,8 +805,8 @@ static noinline int init_post(void)
804805
system_state = SYSTEM_RUNNING;
805806
numa_default_policy();
806807

807-
808808
current->signal->flags |= SIGNAL_UNKILLABLE;
809+
flush_delayed_fput();
809810

810811
if (ramdisk_execute_command) {
811812
run_init_process(ramdisk_execute_command);

0 commit comments

Comments
 (0)