Skip to content

Commit f33ea7f

Browse files
Nick PigginLinus Torvalds
authored andcommitted
[PATCH] fix get_user_pages bug
Checking pte_dirty instead of pte_write in __follow_page is problematic for s390, and for copy_one_pte which leaves dirty when clearing write. So revert __follow_page to check pte_write as before, and make do_wp_page pass back a special extra VM_FAULT_WRITE bit to say it has done its full job: once get_user_pages receives this value, it no longer requires pte_write in __follow_page. But most callers of handle_mm_fault, in the various architectures, have switch statements which do not expect this new case. To avoid changing them all in a hurry, make an inline wrapper function (using the old name) that masks off the new bit, and use the extended interface with double underscores. Yes, we do have a call to do_wp_page from do_swap_page, but no need to change that: in rare case it's needed, another do_wp_page will follow. Signed-off-by: Hugh Dickins <hugh@veritas.com> [ Cleanups by Nick Piggin ] Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent 5cb4cc0 commit f33ea7f

2 files changed

Lines changed: 40 additions & 13 deletions

File tree

include/linux/mm.h

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -625,10 +625,16 @@ static inline int page_mapped(struct page *page)
625625
* Used to decide whether a process gets delivered SIGBUS or
626626
* just gets major/minor fault counters bumped up.
627627
*/
628-
#define VM_FAULT_OOM (-1)
629-
#define VM_FAULT_SIGBUS 0
630-
#define VM_FAULT_MINOR 1
631-
#define VM_FAULT_MAJOR 2
628+
#define VM_FAULT_OOM 0x00
629+
#define VM_FAULT_SIGBUS 0x01
630+
#define VM_FAULT_MINOR 0x02
631+
#define VM_FAULT_MAJOR 0x03
632+
633+
/*
634+
* Special case for get_user_pages.
635+
* Must be in a distinct bit from the above VM_FAULT_ flags.
636+
*/
637+
#define VM_FAULT_WRITE 0x10
632638

633639
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
634640

@@ -704,7 +710,13 @@ extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsign
704710
extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
705711
extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
706712
extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
707-
extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
713+
extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
714+
715+
static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
716+
{
717+
return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE);
718+
}
719+
708720
extern int make_pages_present(unsigned long addr, unsigned long end);
709721
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
710722
void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);

mm/memory.c

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -811,15 +811,18 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
811811
pte = *ptep;
812812
pte_unmap(ptep);
813813
if (pte_present(pte)) {
814-
if (write && !pte_dirty(pte))
814+
if (write && !pte_write(pte))
815815
goto out;
816816
if (read && !pte_read(pte))
817817
goto out;
818818
pfn = pte_pfn(pte);
819819
if (pfn_valid(pfn)) {
820820
page = pfn_to_page(pfn);
821-
if (accessed)
821+
if (accessed) {
822+
if (write && !pte_dirty(pte) &&!PageDirty(page))
823+
set_page_dirty(page);
822824
mark_page_accessed(page);
825+
}
823826
return page;
824827
}
825828
}
@@ -941,10 +944,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
941944
}
942945
spin_lock(&mm->page_table_lock);
943946
do {
947+
int write_access = write;
944948
struct page *page;
945949

946950
cond_resched_lock(&mm->page_table_lock);
947-
while (!(page = follow_page(mm, start, write))) {
951+
while (!(page = follow_page(mm, start, write_access))) {
948952
/*
949953
* Shortcut for anonymous pages. We don't want
950954
* to force the creation of pages tables for
@@ -957,7 +961,16 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
957961
break;
958962
}
959963
spin_unlock(&mm->page_table_lock);
960-
switch (handle_mm_fault(mm,vma,start,write)) {
964+
switch (__handle_mm_fault(mm, vma, start,
965+
write_access)) {
966+
case VM_FAULT_WRITE:
967+
/*
968+
* do_wp_page has broken COW when
969+
* necessary, even if maybe_mkwrite
970+
* decided not to set pte_write
971+
*/
972+
write_access = 0;
973+
/* FALLTHRU */
961974
case VM_FAULT_MINOR:
962975
tsk->min_flt++;
963976
break;
@@ -1220,6 +1233,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
12201233
struct page *old_page, *new_page;
12211234
unsigned long pfn = pte_pfn(pte);
12221235
pte_t entry;
1236+
int ret;
12231237

12241238
if (unlikely(!pfn_valid(pfn))) {
12251239
/*
@@ -1247,7 +1261,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
12471261
lazy_mmu_prot_update(entry);
12481262
pte_unmap(page_table);
12491263
spin_unlock(&mm->page_table_lock);
1250-
return VM_FAULT_MINOR;
1264+
return VM_FAULT_MINOR|VM_FAULT_WRITE;
12511265
}
12521266
}
12531267
pte_unmap(page_table);
@@ -1274,6 +1288,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
12741288
/*
12751289
* Re-check the pte - we dropped the lock
12761290
*/
1291+
ret = VM_FAULT_MINOR;
12771292
spin_lock(&mm->page_table_lock);
12781293
page_table = pte_offset_map(pmd, address);
12791294
if (likely(pte_same(*page_table, pte))) {
@@ -1290,12 +1305,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
12901305

12911306
/* Free the old page.. */
12921307
new_page = old_page;
1308+
ret |= VM_FAULT_WRITE;
12931309
}
12941310
pte_unmap(page_table);
12951311
page_cache_release(new_page);
12961312
page_cache_release(old_page);
12971313
spin_unlock(&mm->page_table_lock);
1298-
return VM_FAULT_MINOR;
1314+
return ret;
12991315

13001316
no_new_page:
13011317
page_cache_release(old_page);
@@ -1987,7 +2003,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
19872003
if (write_access) {
19882004
if (!pte_write(entry))
19892005
return do_wp_page(mm, vma, address, pte, pmd, entry);
1990-
19912006
entry = pte_mkdirty(entry);
19922007
}
19932008
entry = pte_mkyoung(entry);
@@ -2002,7 +2017,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
20022017
/*
20032018
* By the time we get here, we already hold the mm semaphore
20042019
*/
2005-
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
2020+
int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
20062021
unsigned long address, int write_access)
20072022
{
20082023
pgd_t *pgd;

0 commit comments

Comments
 (0)