mm: memcg/slab: use a single set of kmem_caches for all allocations

Instead of having two sets of kmem_caches: one for system-wide and non-accounted allocations and the second one shared by all accounted allocations, we can use just one. The idea is simple: space for obj_cgroup metadata can be allocated on demand and filled only for accounted allocations. It allows to remove a bunch of code which is required to handle kmem_cache clones for accounted allocations. There is no more need to create them, accumulate statistics, propagate attributes, etc. It's a quite significant simplification. Also, because the total number of slab_caches is reduced almost twice (not all kmem_caches have a memcg clone), some additional memory savings are expected. On my devvm it additionally saves about 3.5% of slab memory. [guro@fb.com: fix build on MIPS] Link: http://lkml.kernel.org/r/20200717214810.3733082-1-guro@fb.com Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Roman Gushchin <guro@fb.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Christoph Lameter <cl@linux.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Tejun Heo <tj@kernel.org> Cc: Naresh Kamboju <naresh.kamboju@linaro.org> Link: http://lkml.kernel.org/r/20200623174037.3951353-18-guro@fb.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
torvalds · Aug 7, 2020 · 10befea · 10befea
1 parent 15999ee
commit 10befea
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 590 deletions.
diff --git a/include/linux/slab.h b/include/linux/slab.h
@@ -155,8 +155,6 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name,
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 
-void memcg_create_kmem_cache(struct kmem_cache *cachep);
-
 /*
  * Please use this macro to create slab caches. Simply specify the
  * name of the structure and maybe some flags that are listed above.

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
@@ -72,9 +72,6 @@ struct kmem_cache {
 	int obj_offset;
 #endif /* CONFIG_DEBUG_SLAB */
 
-#ifdef CONFIG_MEMCG
-	struct memcg_cache_params memcg_params;
-#endif
 #ifdef CONFIG_KASAN
 	struct kasan_cache kasan_info;
 #endif

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
@@ -108,17 +108,7 @@ struct kmem_cache {
 	struct list_head list;	/* List of slab caches */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
-	struct work_struct kobj_remove_work;
 #endif
-#ifdef CONFIG_MEMCG
-	struct memcg_cache_params memcg_params;
-	/* For propagation, maximum size of a stored attr */
-	unsigned int max_attr_size;
-#ifdef CONFIG_SYSFS
-	struct kset *memcg_kset;
-#endif
-#endif
-
 #ifdef CONFIG_SLAB_FREELIST_HARDENED
 	unsigned long random;
 #endif

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
@@ -2800,6 +2800,26 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg)
 }
 
 #ifdef CONFIG_MEMCG_KMEM
+int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
+				 gfp_t gfp)
+{
+	unsigned int objects = objs_per_slab_page(s, page);
+	void *vec;
+
+	vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
+			   page_to_nid(page));
+	if (!vec)
+		return -ENOMEM;
+
+	if (cmpxchg(&page->obj_cgroups, NULL,
+		    (struct obj_cgroup **) ((unsigned long)vec | 0x1UL)))
+		kfree(vec);
+	else
+		kmemleak_not_leak(vec);
+
+	return 0;
+}
+
 /*
  * Returns a pointer to the memory cgroup to which the kernel object is charged.
  *
@@ -2826,7 +2846,10 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p)
 
 		off = obj_to_index(page->slab_cache, page, p);
 		objcg = page_obj_cgroups(page)[off];
-		return obj_cgroup_memcg(objcg);
+		if (objcg)
+			return obj_cgroup_memcg(objcg);
+
+		return NULL;
 	}
 
 	/* All other pages use page->mem_cgroup */

diff --git a/mm/slab.c b/mm/slab.c
@@ -1379,11 +1379,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
 		return NULL;
 	}
 
-	if (charge_slab_page(page, flags, cachep->gfporder, cachep)) {
-		__free_pages(page, cachep->gfporder);
-		return NULL;
-	}
-
+	charge_slab_page(page, flags, cachep->gfporder, cachep);
 	__SetPageSlab(page);
 	/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
 	if (sk_memalloc_socks() && page_is_pfmemalloc(page))
@@ -3799,8 +3795,8 @@ static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
 }
 
 /* Always called with the slab_mutex held */
-static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
-				int batchcount, int shared, gfp_t gfp)
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+			    int batchcount, int shared, gfp_t gfp)
 {
 	struct array_cache __percpu *cpu_cache, *prev;
 	int cpu;
@@ -3845,30 +3841,6 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 	return setup_kmem_cache_nodes(cachep, gfp);
 }
 
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
-				int batchcount, int shared, gfp_t gfp)
-{
-	int ret;
-	struct kmem_cache *c;
-
-	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
-
-	if (slab_state < FULL)
-		return ret;
-
-	if ((ret < 0) || !is_root_cache(cachep))
-		return ret;
-
-	lockdep_assert_held(&slab_mutex);
-	c = memcg_cache(cachep);
-	if (c) {
-		/* return value determined by the root cache only */
-		__do_tune_cpucache(c, limit, batchcount, shared, gfp);
-	}
-
-	return ret;
-}
-
 /* Called with slab_mutex held always */
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 {
@@ -3881,13 +3853,6 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 	if (err)
 		goto end;
 
-	if (!is_root_cache(cachep)) {
-		struct kmem_cache *root = memcg_root_cache(cachep);
-		limit = root->limit;
-		shared = root->shared;
-		batchcount = root->batchcount;
-	}
-
 	if (limit && shared && batchcount)
 		goto skip_setup;
 	/*