This repository has been archived by the owner on Jun 4, 2018. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Kernels] 3.18, 4.1 - backport DirtyCOW patch
- Loading branch information
1 parent
f556ab7
commit 4dd6916
Showing
4 changed files
with
184 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
From: Linus Torvalds <torvalds@linux-foundation.org> | ||
Date: Thu, 13 Oct 2016 13:07:36 -0700 | ||
Subject: mm: remove gup_flags FOLL_WRITE games from __get_user_pages() | ||
|
||
commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 upstream. | ||
|
||
This is an ancient bug that was actually attempted to be fixed once | ||
(badly) by me eleven years ago in commit 4ceb5db9757a ("Fix | ||
get_user_pages() race for write access") but that was then undone due to | ||
problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). | ||
|
||
In the meantime, the s390 situation has long been fixed, and we can now | ||
fix it by checking the pte_dirty() bit properly (and do it better). The | ||
s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement | ||
software dirty bits") which made it into v3.9. Earlier kernels will | ||
have to look at the page state itself. | ||
|
||
Also, the VM has become more scalable, and what used a purely | ||
theoretical race back then has become easier to trigger. | ||
|
||
To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, | ||
we already did a COW" rather than play racy games with FOLL_WRITE that | ||
is very fundamental, and then use the pte dirty flag to validate that | ||
the FOLL_COW flag is still valid. | ||
|
||
Reported-and-tested-by: Phil "not Paul" Oester <kernel@linuxace.com> | ||
Acked-by: Hugh Dickins <hughd@google.com> | ||
Reviewed-by: Michal Hocko <mhocko@suse.com> | ||
Cc: Andy Lutomirski <luto@kernel.org> | ||
Cc: Kees Cook <keescook@chromium.org> | ||
Cc: Oleg Nesterov <oleg@redhat.com> | ||
Cc: Willy Tarreau <w@1wt.eu> | ||
Cc: Nick Piggin <npiggin@gmail.com> | ||
Cc: Greg Thelen <gthelen@google.com> | ||
Cc: stable@vger.kernel.org | ||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
[carnil: backport to 3.18, adjust context] | ||
Signed-off-by: Philip Mueller <philm@manjaro.org> | ||
--- | ||
include/linux/mm.h | 1 + | ||
mm/gup.c | 14 ++++++++++++-- | ||
2 files changed, 13 insertions(+), 2 deletions(-) | ||
|
||
--- a/include/linux/mm.h | ||
+++ b/include/linux/mm.h | ||
@@ -2029,6 +2029,7 @@ static inline struct page *follow_page(s | ||
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | ||
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ | ||
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ | ||
+#define FOLL_COW 0x4000 /* internal GUP flag */ | ||
|
||
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | ||
void *data); | ||
--- a/mm/gup.c | ||
+++ b/mm/gup.c | ||
@@ -32,6 +32,16 @@ static struct page *no_page_table(struct | ||
return NULL; | ||
} | ||
|
||
+/* | ||
+ * FOLL_FORCE can write to even unwritable pte's, but only | ||
+ * after we've gone through a COW cycle and they are dirty. | ||
+ */ | ||
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) | ||
+{ | ||
+ return pte_write(pte) || | ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); | ||
+} | ||
+ | ||
static struct page *follow_page_pte(struct vm_area_struct *vma, | ||
unsigned long address, pmd_t *pmd, unsigned int flags) | ||
{ | ||
@@ -66,7 +76,7 @@ retry: | ||
} | ||
if ((flags & FOLL_NUMA) && pte_numa(pte)) | ||
goto no_page; | ||
- if ((flags & FOLL_WRITE) && !pte_write(pte)) { | ||
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { | ||
pte_unmap_unlock(ptep, ptl); | ||
return NULL; | ||
} | ||
@@ -315,7 +325,7 @@ static int faultin_page(struct task_stru | ||
* reCOWed by userspace write). | ||
*/ | ||
if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) | ||
- *flags &= ~FOLL_WRITE; | ||
+ *flags |= FOLL_COW; | ||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
From: Linus Torvalds <torvalds@linux-foundation.org> | ||
Date: Thu, 13 Oct 2016 13:07:36 -0700 | ||
Subject: mm: remove gup_flags FOLL_WRITE games from __get_user_pages() | ||
|
||
commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 upstream. | ||
|
||
This is an ancient bug that was actually attempted to be fixed once | ||
(badly) by me eleven years ago in commit 4ceb5db9757a ("Fix | ||
get_user_pages() race for write access") but that was then undone due to | ||
problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). | ||
|
||
In the meantime, the s390 situation has long been fixed, and we can now | ||
fix it by checking the pte_dirty() bit properly (and do it better). The | ||
s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement | ||
software dirty bits") which made it into v3.9. Earlier kernels will | ||
have to look at the page state itself. | ||
|
||
Also, the VM has become more scalable, and what used a purely | ||
theoretical race back then has become easier to trigger. | ||
|
||
To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, | ||
we already did a COW" rather than play racy games with FOLL_WRITE that | ||
is very fundamental, and then use the pte dirty flag to validate that | ||
the FOLL_COW flag is still valid. | ||
|
||
Reported-and-tested-by: Phil "not Paul" Oester <kernel@linuxace.com> | ||
Acked-by: Hugh Dickins <hughd@google.com> | ||
Reviewed-by: Michal Hocko <mhocko@suse.com> | ||
Cc: Andy Lutomirski <luto@kernel.org> | ||
Cc: Kees Cook <keescook@chromium.org> | ||
Cc: Oleg Nesterov <oleg@redhat.com> | ||
Cc: Willy Tarreau <w@1wt.eu> | ||
Cc: Nick Piggin <npiggin@gmail.com> | ||
Cc: Greg Thelen <gthelen@google.com> | ||
Cc: stable@vger.kernel.org | ||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
[carnil: backport to 3.18, adjust context] | ||
Signed-off-by: Philip Mueller <philm@manjaro.org> | ||
--- | ||
include/linux/mm.h | 1 + | ||
mm/gup.c | 14 ++++++++++++-- | ||
2 files changed, 13 insertions(+), 2 deletions(-) | ||
|
||
--- a/include/linux/mm.h | ||
+++ b/include/linux/mm.h | ||
@@ -2029,6 +2029,7 @@ static inline struct page *follow_page(s | ||
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | ||
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ | ||
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ | ||
+#define FOLL_COW 0x4000 /* internal GUP flag */ | ||
|
||
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | ||
void *data); | ||
--- a/mm/gup.c | ||
+++ b/mm/gup.c | ||
@@ -32,6 +32,16 @@ static struct page *no_page_table(struct | ||
return NULL; | ||
} | ||
|
||
+/* | ||
+ * FOLL_FORCE can write to even unwritable pte's, but only | ||
+ * after we've gone through a COW cycle and they are dirty. | ||
+ */ | ||
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) | ||
+{ | ||
+ return pte_write(pte) || | ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); | ||
+} | ||
+ | ||
static struct page *follow_page_pte(struct vm_area_struct *vma, | ||
unsigned long address, pmd_t *pmd, unsigned int flags) | ||
{ | ||
@@ -66,7 +76,7 @@ retry: | ||
} | ||
if ((flags & FOLL_NUMA) && pte_numa(pte)) | ||
goto no_page; | ||
- if ((flags & FOLL_WRITE) && !pte_write(pte)) { | ||
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { | ||
pte_unmap_unlock(ptep, ptl); | ||
return NULL; | ||
} | ||
@@ -315,7 +325,7 @@ static int faultin_page(struct task_stru | ||
* reCOWed by userspace write). | ||
*/ | ||
if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) | ||
- *flags &= ~FOLL_WRITE; | ||
+ *flags |= FOLL_COW; | ||
return 0; | ||
} | ||
|