Skip to content

Commit

Permalink
Merge branch 'akpm' (patches from Andrew)
Browse files Browse the repository at this point in the history
Merge more fixes from Andrew Morton:
 "17 fixes"

Mostly mm fixes and one ocfs2 locking fix.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm: memcontrol: fix network errors from failing __GFP_ATOMIC charges
  mm/memory_hotplug: fix updating the node span
  scripts/gdb: fix debugging modules compiled with hot/cold partitioning
  mm: slab: make page_cgroup_ino() to recognize non-compound slab pages properly
  MAINTAINERS: update information for "MEMORY MANAGEMENT"
  dump_stack: avoid the livelock of the dump_lock
  zswap: add Vitaly to the maintainers list
  mm/page_alloc.c: ratelimit allocation failure warnings more aggressively
  mm/khugepaged: fix might_sleep() warn with CONFIG_HIGHPTE=y
  mm, vmstat: reduce zone->lock holding time by /proc/pagetypeinfo
  mm, vmstat: hide /proc/pagetypeinfo from normal users
  mm/mmu_notifiers: use the right return code for WARN_ON
  ocfs2: protect extent tree in ocfs2_prepare_inode_for_write()
  mm: thp: handle page cache THP correctly in PageTransCompoundMap
  mm, meminit: recalculate pcpu batch and high limits after init completes
  mm/gup_benchmark: fix MAP_HUGETLB case
  mm: memcontrol: fix NULL-ptr deref in percpu stats flush
  • Loading branch information
torvalds committed Nov 6, 2019
2 parents 26bc672 + 869712f commit 4dd5815
Show file tree
Hide file tree
Showing 15 changed files with 188 additions and 79 deletions.
5 changes: 5 additions & 0 deletions MAINTAINERS
Expand Up @@ -10519,8 +10519,12 @@ F: mm/memblock.c
F: Documentation/core-api/boot-time-mm.rst

MEMORY MANAGEMENT
M: Andrew Morton <akpm@linux-foundation.org>
L: linux-mm@kvack.org
W: http://www.linux-mm.org
T: quilt https://ozlabs.org/~akpm/mmotm/
T: quilt https://ozlabs.org/~akpm/mmots/
T: git git://github.com/hnaz/linux-mm.git
S: Maintained
F: include/linux/mm.h
F: include/linux/gfp.h
Expand Down Expand Up @@ -18034,6 +18038,7 @@ F: Documentation/vm/zsmalloc.rst
ZSWAP COMPRESSED SWAP CACHING
M: Seth Jennings <sjenning@redhat.com>
M: Dan Streetman <ddstreet@ieee.org>
M: Vitaly Wool <vitaly.wool@konsulko.com>
L: linux-mm@kvack.org
S: Maintained
F: mm/zswap.c
Expand Down
134 changes: 90 additions & 44 deletions fs/ocfs2/file.c
Expand Up @@ -2098,53 +2098,89 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
return 0;
}

static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
struct file *file,
loff_t pos, size_t count,
int *meta_level)
static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
struct buffer_head **di_bh,
int meta_level,
int overwrite_io,
int write_sem,
int wait)
{
int ret;
struct buffer_head *di_bh = NULL;
u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
u32 clusters =
ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
int ret = 0;

ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret) {
mlog_errno(ret);
if (wait)
ret = ocfs2_inode_lock(inode, NULL, meta_level);
else
ret = ocfs2_try_inode_lock(inode,
overwrite_io ? NULL : di_bh, meta_level);
if (ret < 0)
goto out;

if (wait) {
if (write_sem)
down_write(&OCFS2_I(inode)->ip_alloc_sem);
else
down_read(&OCFS2_I(inode)->ip_alloc_sem);
} else {
if (write_sem)
ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem);
else
ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem);

if (!ret) {
ret = -EAGAIN;
goto out_unlock;
}
}

*meta_level = 1;
return ret;

ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
if (ret)
mlog_errno(ret);
out_unlock:
brelse(*di_bh);
ocfs2_inode_unlock(inode, meta_level);
out:
brelse(di_bh);
return ret;
}

static void ocfs2_inode_unlock_for_extent_tree(struct inode *inode,
struct buffer_head **di_bh,
int meta_level,
int write_sem)
{
if (write_sem)
up_write(&OCFS2_I(inode)->ip_alloc_sem);
else
up_read(&OCFS2_I(inode)->ip_alloc_sem);

brelse(*di_bh);
*di_bh = NULL;

if (meta_level >= 0)
ocfs2_inode_unlock(inode, meta_level);
}

static int ocfs2_prepare_inode_for_write(struct file *file,
loff_t pos, size_t count, int wait)
{
int ret = 0, meta_level = 0, overwrite_io = 0;
int write_sem = 0;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
struct buffer_head *di_bh = NULL;
u32 cpos;
u32 clusters;

/*
* We start with a read level meta lock and only jump to an ex
* if we need to make modifications here.
*/
for(;;) {
if (wait)
ret = ocfs2_inode_lock(inode, NULL, meta_level);
else
ret = ocfs2_try_inode_lock(inode,
overwrite_io ? NULL : &di_bh, meta_level);
ret = ocfs2_inode_lock_for_extent_tree(inode,
&di_bh,
meta_level,
overwrite_io,
write_sem,
wait);
if (ret < 0) {
meta_level = -1;
if (ret != -EAGAIN)
mlog_errno(ret);
goto out;
Expand All @@ -2156,15 +2192,8 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
*/
if (!wait && !overwrite_io) {
overwrite_io = 1;
if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
ret = -EAGAIN;
goto out_unlock;
}

ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
brelse(di_bh);
di_bh = NULL;
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret);
Expand All @@ -2183,7 +2212,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* set inode->i_size at the end of a write. */
if (should_remove_suid(dentry)) {
if (meta_level == 0) {
ocfs2_inode_unlock(inode, meta_level);
ocfs2_inode_unlock_for_extent_tree(inode,
&di_bh,
meta_level,
write_sem);
meta_level = 1;
continue;
}
Expand All @@ -2197,18 +2229,32 @@ static int ocfs2_prepare_inode_for_write(struct file *file,

ret = ocfs2_check_range_for_refcount(inode, pos, count);
if (ret == 1) {
ocfs2_inode_unlock(inode, meta_level);
meta_level = -1;

ret = ocfs2_prepare_inode_for_refcount(inode,
file,
pos,
count,
&meta_level);
ocfs2_inode_unlock_for_extent_tree(inode,
&di_bh,
meta_level,
write_sem);
ret = ocfs2_inode_lock_for_extent_tree(inode,
&di_bh,
meta_level,
overwrite_io,
1,
wait);
write_sem = 1;
if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret);
goto out;
}

cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
clusters =
ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
}

if (ret < 0) {
mlog_errno(ret);
if (ret != -EAGAIN)
mlog_errno(ret);
goto out_unlock;
}

Expand All @@ -2219,10 +2265,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
pos, count, wait);

brelse(di_bh);

if (meta_level >= 0)
ocfs2_inode_unlock(inode, meta_level);
ocfs2_inode_unlock_for_extent_tree(inode,
&di_bh,
meta_level,
write_sem);

out:
return ret;
Expand Down
5 changes: 0 additions & 5 deletions include/linux/mm.h
Expand Up @@ -695,11 +695,6 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)

extern void kvfree(const void *addr);

static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
}

static inline int compound_mapcount(struct page *page)
{
VM_BUG_ON_PAGE(!PageCompound(page), page);
Expand Down
5 changes: 5 additions & 0 deletions include/linux/mm_types.h
Expand Up @@ -221,6 +221,11 @@ struct page {
#endif
} _struct_page_alignment;

static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
}

/*
* Used for sizing the vmemmap region on some architectures
*/
Expand Down
20 changes: 18 additions & 2 deletions include/linux/page-flags.h
Expand Up @@ -622,12 +622,28 @@ static inline int PageTransCompound(struct page *page)
*
* Unlike PageTransCompound, this is safe to be called only while
* split_huge_pmd() cannot run from under us, like if protected by the
* MMU notifier, otherwise it may result in page->_mapcount < 0 false
* MMU notifier, otherwise it may result in page->_mapcount check false
* positives.
*
* We have to treat page cache THP differently since every subpage of it
* would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
* mapped in the current process so comparing subpage's _mapcount to
* compound_mapcount to filter out PTE mapped case.
*/
static inline int PageTransCompoundMap(struct page *page)
{
return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0;
struct page *head;

if (!PageTransCompound(page))
return 0;

if (PageAnon(page))
return atomic_read(&page->_mapcount) < 0;

head = compound_head(page);
/* File THP is PMD mapped and not PTE mapped */
return atomic_read(&page->_mapcount) ==
atomic_read(compound_mapcount_ptr(head));
}

/*
Expand Down
7 changes: 6 additions & 1 deletion lib/dump_stack.c
Expand Up @@ -106,7 +106,12 @@ asmlinkage __visible void dump_stack(void)
was_locked = 1;
} else {
local_irq_restore(flags);
cpu_relax();
/*
* Wait for the lock to release before jumping to
* atomic_cmpxchg() in order to mitigate the thundering herd
* problem.
*/
do { cpu_relax(); } while (atomic_read(&dump_lock) != -1);
goto retry;
}

Expand Down
7 changes: 4 additions & 3 deletions mm/khugepaged.c
Expand Up @@ -1028,12 +1028,13 @@ static void collapse_huge_page(struct mm_struct *mm,

anon_vma_lock_write(vma->anon_vma);

pte = pte_offset_map(pmd, address);
pte_ptl = pte_lockptr(mm, pmd);

mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm,
address, address + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);

pte = pte_offset_map(pmd, address);
pte_ptl = pte_lockptr(mm, pmd);

pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
/*
* After this gup_fast can't run anymore. This also removes
Expand Down
23 changes: 16 additions & 7 deletions mm/memcontrol.c
Expand Up @@ -484,7 +484,7 @@ ino_t page_cgroup_ino(struct page *page)
unsigned long ino = 0;

rcu_read_lock();
if (PageHead(page) && PageSlab(page))
if (PageSlab(page) && !PageTail(page))
memcg = memcg_from_slab_page(page);
else
memcg = READ_ONCE(page->mem_cgroup);
Expand Down Expand Up @@ -2534,6 +2534,15 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
goto retry;
}

/*
* Memcg doesn't have a dedicated reserve for atomic
* allocations. But like the global atomic pool, we need to
* put the burden of reclaim on regular allocation requests
* and let these go through as privileged allocations.
*/
if (gfp_mask & __GFP_ATOMIC)
goto force;

/*
* Unlike in global OOM situations, memcg is not in a physical
* memory shortage. Allow dying and OOM-killed tasks to
Expand Down Expand Up @@ -5014,12 +5023,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
int node;

/*
* Flush percpu vmstats and vmevents to guarantee the value correctness
* on parent's and all ancestor levels.
*/
memcg_flush_percpu_vmstats(memcg, false);
memcg_flush_percpu_vmevents(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
Expand All @@ -5030,6 +5033,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
static void mem_cgroup_free(struct mem_cgroup *memcg)
{
memcg_wb_domain_exit(memcg);
/*
* Flush percpu vmstats and vmevents to guarantee the value correctness
* on parent's and all ancestor levels.
*/
memcg_flush_percpu_vmstats(memcg, false);
memcg_flush_percpu_vmevents(memcg);
__mem_cgroup_free(memcg);
}

Expand Down
8 changes: 8 additions & 0 deletions mm/memory_hotplug.c
Expand Up @@ -447,6 +447,14 @@ static void update_pgdat_span(struct pglist_data *pgdat)
zone->spanned_pages;

/* No need to lock the zones, they can't change. */
if (!zone->spanned_pages)
continue;
if (!node_end_pfn) {
node_start_pfn = zone->zone_start_pfn;
node_end_pfn = zone_end_pfn;
continue;
}

if (zone_end_pfn > node_end_pfn)
node_end_pfn = zone_end_pfn;
if (zone->zone_start_pfn < node_start_pfn)
Expand Down
2 changes: 1 addition & 1 deletion mm/mmu_notifier.c
Expand Up @@ -180,7 +180,7 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
mn->ops->invalidate_range_start, _ret,
!mmu_notifier_range_blockable(range) ? "non-" : "");
WARN_ON(mmu_notifier_range_blockable(range) ||
ret != -EAGAIN);
_ret != -EAGAIN);
ret = _ret;
}
}
Expand Down

0 comments on commit 4dd5815

Please sign in to comment.