Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework perf hw_breakpoint support #38

Closed
mpe opened this issue Mar 4, 2016 · 3 comments
Closed

Rework perf hw_breakpoint support #38

mpe opened this issue Mar 4, 2016 · 3 comments
Labels
bug It's a bug medium Possibly not too difficult
Milestone

Comments

@mpe
Copy link
Member

mpe commented Mar 4, 2016

The powerpc hw_breakpoint support is the only arch that uses the arch_unregister_hw_breakpoint() hook. In there we do:

if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
    bp->ctx->task->thread.last_hit_ubp = NULL;

Peterz strongly suggests we should not be using bp->ctx in there, and that we should (probably) instead be storing the breakpoint info per-cpu like other arches:

Indeed, but if there's a preemption point in between setting and using
that state, the ctx->task pointer might not actually still point to the
same task. With inherited events the event might get swapped to the next
task if it has the exact same (inherited) event configuration instead of
reprogramming the hardware.

The code needs to be reworked & tested.

mpe referenced this issue in linuxppc/linux May 31, 2016
The binary GCD algorithm is based on the following facts:
	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)

Even on x86 machines with reasonable division hardware, the binary
algorithm runs about 25% faster (80% the execution time) than the
division-based Euclidian algorithm.

On platforms like Alpha and ARMv6 where division is a function call to
emulation code, it's even more significant.

There are two variants of the code here, depending on whether a fast
__ffs (find least significant set bit) instruction is available.  This
allows the unpredictable branches in the bit-at-a-time shifting loop to
be eliminated.

If fast __ffs is not available, the "even/odd" GCD variant is used.

I use the following code to benchmark:

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <string.h>
	#include <time.h>
	#include <unistd.h>

	#define swap(a, b) \
		do { \
			a ^= b; \
			b ^= a; \
			a ^= b; \
		} while (0)

	unsigned long gcd0(unsigned long a, unsigned long b)
	{
		unsigned long r;

		if (a < b) {
			swap(a, b);
		}

		if (b == 0)
			return a;

		while ((r = a % b) != 0) {
			a = b;
			b = r;
		}

		return b;
	}

	unsigned long gcd1(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd2(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	unsigned long gcd3(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);
		if (b == 1)
			return r & -r;

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == 1)
				return r & -r;
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd4(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;
		if (b == r)
			return r;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == r)
				return r;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
		gcd0, gcd1, gcd2, gcd3, gcd4,
	};

	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))

	#if defined(__x86_64__)

	#define rdtscll(val) do { \
		unsigned long __a,__d; \
		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
	} while(0)

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		unsigned long long start, end;
		unsigned long long ret;
		unsigned long gcd_res;

		rdtscll(start);
		gcd_res = gcd(a, b);
		rdtscll(end);

		if (end >= start)
			ret = end - start;
		else
			ret = ~0ULL - start + 1 + end;

		*res = gcd_res;
		return ret;
	}

	#else

	static inline struct timespec read_time(void)
	{
		struct timespec time;
		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
		return time;
	}

	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
	{
		struct timespec temp;

		if ((end.tv_nsec - start.tv_nsec) < 0) {
			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
		} else {
			temp.tv_sec = end.tv_sec - start.tv_sec;
			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
		}

		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
	}

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		struct timespec start, end;
		unsigned long gcd_res;

		start = read_time();
		gcd_res = gcd(a, b);
		end = read_time();

		*res = gcd_res;
		return diff_time(start, end);
	}

	#endif

	static inline unsigned long get_rand()
	{
		if (sizeof(long) == 8)
			return (unsigned long)rand() << 32 | rand();
		else
			return rand();
	}

	int main(int argc, char **argv)
	{
		unsigned int seed = time(0);
		int loops = 100;
		int repeats = 1000;
		unsigned long (*res)[TEST_ENTRIES];
		unsigned long long elapsed[TEST_ENTRIES];
		int i, j, k;

		for (;;) {
			int opt = getopt(argc, argv, "n:r:s:");
			/* End condition always first */
			if (opt == -1)
				break;

			switch (opt) {
			case 'n':
				loops = atoi(optarg);
				break;
			case 'r':
				repeats = atoi(optarg);
				break;
			case 's':
				seed = strtoul(optarg, NULL, 10);
				break;
			default:
				/* You won't actually get here. */
				break;
			}
		}

		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
		memset(elapsed, 0, sizeof(elapsed));

		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			/* Do we have args? */
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			unsigned long long min_elapsed[TEST_ENTRIES];
			for (k = 0; k < repeats; k++) {
				for (i = 0; i < TEST_ENTRIES; i++) {
					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
					if (k == 0 || min_elapsed[i] > tmp)
						min_elapsed[i] = tmp;
				}
			}
			for (i = 0; i < TEST_ENTRIES; i++)
				elapsed[i] += min_elapsed[i];
		}

		for (i = 0; i < TEST_ENTRIES; i++)
			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);

		k = 0;
		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			for (i = 1; i < TEST_ENTRIES; i++) {
				if (res[j][i] != res[j][0])
					break;
			}
			if (i < TEST_ENTRIES) {
				if (k == 0) {
					k = 1;
					fprintf(stderr, "Error:\n");
				}
				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
				for (i = 0; i < TEST_ENTRIES; i++)
					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
			}
		}

		if (k == 0)
			fprintf(stderr, "PASS\n");

		free(res);

		return 0;
	}

Compiled with "-O2", on "VirtualBox 4.4.0-22-generic torvalds#38-Ubuntu x86_64" got:

  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 10174
  gcd1: elapsed 2120
  gcd2: elapsed 2902
  gcd3: elapsed 2039
  gcd4: elapsed 2812
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9309
  gcd1: elapsed 2280
  gcd2: elapsed 2822
  gcd3: elapsed 2217
  gcd4: elapsed 2710
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9589
  gcd1: elapsed 2098
  gcd2: elapsed 2815
  gcd3: elapsed 2030
  gcd4: elapsed 2718
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9914
  gcd1: elapsed 2309
  gcd2: elapsed 2779
  gcd3: elapsed 2228
  gcd4: elapsed 2709
  PASS

[akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
Signed-off-by: George Spelvin <linux@horizon.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mpe referenced this issue in linuxppc/linux Dec 16, 2016
When locking a GPIO line as IRQ, we go to lengths to
double-check that the line is really set as input before
marking it as used for IRQ. This is not good on GPIO chips
that can sleep, because this function is called in IRQ-safe
context. Just skip this if it can't be checked quickly.

Currently this happens on sleeping expanders such as STMPE
or TC3589x:

BUG: scheduling while atomic: swapper/1/0x00000002
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 4.9.0-rc1+ torvalds#38
Hardware name: Nomadik STn8815
[<c000f2e0>] (unwind_backtrace) from [<c000d244>] (show_stack+0x10/0x14)
[<c000d244>] (show_stack) from [<c0037b78>] (__schedule_bug+0x54/0x80)
[<c0037b78>] (__schedule_bug) from [<c042df14>] (__schedule+0x3a0/0x460)
[<c042df14>] (__schedule) from [<c042e028>] (schedule+0x54/0xb8)
(...)

This patch fixes that problem and relies on the direction
read from the chip when it was added.

Cc: stable@vger.kernel.org
Fixes: 9c10280 ("gpio: flush direction status in gpiochip_lock_as_irq()")
Cc: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
mpe referenced this issue in linuxppc/linux May 29, 2017
qxl_release_map will enter an atomic context, but since we still need to
alloc memory for BOs, we better delay that until we have everything we
need, in case we need to sleep inside the allocation.  This avoids the
Sleep in atomic state below, which was reported by Mike.

 [   43.910362] BUG: sleeping function called from invalid context at mm/slab.h:432
 [   43.910955] in_atomic(): 1, irqs_disabled(): 0, pid: 2077, name: Xorg
 [   43.911472] Preemption disabled at:
 [   43.911478] [<ffffffffa02b1c45>] qxl_bo_kmap_atomic_page+0xa5/0x100 [qxl]
 [   43.912103] CPU: 0 PID: 2077 Comm: Xorg Tainted: G            E   4.12.0-master torvalds#38
 [ 43.912550] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
 rel-1.8.1-0-g4adadbd-20161202_174313-build11a 04/01/2014
 [   43.913202] Call Trace:
 [   43.913371]  dump_stack+0x65/0x89
 [   43.913581]  ? qxl_bo_kmap_atomic_page+0xa5/0x100 [qxl]
 [   43.913876]  ___might_sleep+0x11a/0x190
 [   43.914095]  __might_sleep+0x4a/0x80
 [   43.914319]  ? qxl_bo_create+0x50/0x190 [qxl]
 [   43.914565]  kmem_cache_alloc_trace+0x46/0x180
 [   43.914836]  qxl_bo_create+0x50/0x190 [qxl]
 [   43.915082]  ? refcount_dec_and_test+0x11/0x20
 [   43.915332]  ? ttm_mem_io_reserve+0x41/0xe0 [ttm]
 [   43.915595]  qxl_alloc_bo_reserved+0x37/0xb0 [qxl]
 [   43.915884]  qxl_cursor_atomic_update+0x8f/0x260 [qxl]
 [   43.916172]  ? drm_atomic_helper_update_legacy_modeset_state+0x1d6/0x210 [drm_kms_helper]
 [   43.916623]  drm_atomic_helper_commit_planes+0xec/0x230 [drm_kms_helper]
 [   43.916995]  drm_atomic_helper_commit_tail+0x2b/0x60 [drm_kms_helper]
 [   43.917398]  commit_tail+0x65/0x70 [drm_kms_helper]
 [   43.917693]  drm_atomic_helper_commit+0xa9/0x100 [drm_kms_helper]
 [   43.918039]  drm_atomic_commit+0x4b/0x50 [drm]
 [   43.918334]  drm_atomic_helper_update_plane+0xf1/0x110 [drm_kms_helper]
 [   43.918902]  __setplane_internal+0x19f/0x280 [drm]
 [   43.919240]  drm_mode_cursor_universal+0x101/0x1c0 [drm]
 [   43.919541]  drm_mode_cursor_common+0x15b/0x1d0 [drm]
 [   43.919858]  drm_mode_cursor2_ioctl+0xe/0x10 [drm]
 [   43.920157]  drm_ioctl+0x211/0x460 [drm]
 [   43.920383]  ? drm_mode_cursor_ioctl+0x50/0x50 [drm]
 [   43.920664]  ? handle_mm_fault+0x93/0x160
 [   43.920893]  do_vfs_ioctl+0x96/0x6e0
 [   43.921117]  ? __fget+0x73/0xa0
 [   43.921322]  SyS_ioctl+0x41/0x70
 [   43.921545]  entry_SYSCALL_64_fastpath+0x1a/0xa5
 [   43.922188] RIP: 0033:0x7f1145804bc7
 [   43.922526] RSP: 002b:00007ffcd3e50508 EFLAGS: 00003246 ORIG_RAX: 0000000000000010
 [   43.923367] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007f1145804bc7
 [   43.923852] RDX: 00007ffcd3e50540 RSI: 00000000c02464bb RDI: 000000000000000b
 [   43.924299] RBP: 0000000000000040 R08: 0000000000000040 R09: 000000000000000c
 [   43.924694] R10: 00007ffcd3e50340 R11: 0000000000003246 R12: 0000000000000018
 [   43.925128] R13: 00000000022bc390 R14: 0000000000000040 R15: 00007ffcd3e5062c

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170519175819.15682-1-krisman@collabora.co.uk
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
mpe referenced this issue in linuxppc/linux Jul 13, 2017
Correct these checkpatch.pl errors:

|ERROR: space required before that '-' (ctx:OxO)
|torvalds#37: FILE: include/linux/bug.h:37:
|+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))

|ERROR: space required before that '-' (ctx:OxO)
|torvalds#38: FILE: include/linux/bug.h:38:
|+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))

I decided to wrap the bitfield expressions that begin with minus signs
in parentheses rather than insert spaces before the minus signs.

Link: http://lkml.kernel.org/r/20170525120316.24473-5-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mpe referenced this issue in linuxppc/linux Mar 9, 2018
pppol2tp_release uses call_rcu to put the final ref on its socket. But
the session object doesn't hold a ref on the session socket so may be
freed while the pppol2tp_put_sk RCU callback is scheduled. Fix this by
having the session hold a ref on its socket until the session is
destroyed. It is this ref that is dropped via call_rcu.

Sessions are also deleted via l2tp_tunnel_closeall. This must now also put
the final ref via call_rcu. So move the call_rcu call site into
pppol2tp_session_close so that this happens in both destroy paths. A
common destroy path should really be implemented, perhaps with
l2tp_tunnel_closeall calling l2tp_session_delete like pppol2tp_release
does, but this will be looked at later.

ODEBUG: activate active (active state 1) object type: rcu_head hint:           (null)
WARNING: CPU: 3 PID: 13407 at lib/debugobjects.c:291 debug_print_object+0x166/0x220
Modules linked in:
CPU: 3 PID: 13407 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ torvalds#38
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:debug_print_object+0x166/0x220
RSP: 0018:ffff880013647a00 EFLAGS: 00010082
RAX: dffffc0000000008 RBX: 0000000000000003 RCX: ffffffff814d3333
RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff88001a59f6d0
RBP: ffff880013647a40 R08: 0000000000000000 R09: 0000000000000001
R10: ffff8800136479a8 R11: 0000000000000000 R12: 0000000000000001
R13: ffffffff86161420 R14: ffffffff85648b60 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffff88001a580000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000006022000 CR4: 00000000000006e0
Call Trace:
 debug_object_activate+0x38b/0x530
 ? debug_object_assert_init+0x3b0/0x3b0
 ? __mutex_unlock_slowpath+0x85/0x8b0
 ? pppol2tp_session_destruct+0x110/0x110
 __call_rcu.constprop.66+0x39/0x890
 ? __call_rcu.constprop.66+0x39/0x890
 call_rcu_sched+0x17/0x20
 pppol2tp_release+0x2c7/0x440
 ? fcntl_setlk+0xca0/0xca0
 ? sock_alloc_file+0x340/0x340
 sock_release+0x92/0x1e0
 sock_close+0x1b/0x20
 __fput+0x296/0x6e0
 ____fput+0x1a/0x20
 task_work_run+0x127/0x1a0
 do_exit+0x7f9/0x2ce0
 ? SYSC_connect+0x212/0x310
 ? mm_update_next_owner+0x690/0x690
 ? up_read+0x1f/0x40
 ? __do_page_fault+0x3c8/0xca0
 do_group_exit+0x10d/0x330
 ? do_group_exit+0x330/0x330
 SyS_exit_group+0x22/0x30
 do_syscall_64+0x1e0/0x730
 ? trace_hardirqs_off_thunk+0x1a/0x1c
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7f362e471259
RSP: 002b:00007ffe389abe08 EFLAGS: 00000202 ORIG_RAX: 00000000000000e7
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f362e471259
RDX: 00007f362e471259 RSI: 000000000000002e RDI: 0000000000000000
RBP: 00007ffe389abe30 R08: 0000000000000000 R09: 00007f362e944270
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffe389abf50 R14: 0000000000000000 R15: 0000000000000000
Code: 8d 3c dd a0 8f 64 85 48 89 fa 48 c1 ea 03 80 3c 02 00 75 7b 48 8b 14 dd a0 8f 64 85 4c 89 f6 48 c7 c7 20 85 64 85 e
8 2a 55 14 ff <0f> 0b 83 05 ad 2a 68 04 01 48 83 c4 18 5b 41 5c 41 5d 41 5e 41

Fixes: ee40fb2 ("l2tp: protect sock pointer of struct pppol2tp_session with RCU")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
mpe referenced this issue in linuxppc/linux May 16, 2018
syzbot caught an infinite recursion in nsh_gso_segment().

Problem here is that we need to make sure the NSH header is of
reasonable length.

BUG: MAX_LOCK_DEPTH too low!
turning off the locking correctness validator.
depth: 48  max: 48!
48 locks held by syz-executor0/10189:
 #0:         (ptrval) (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x30f/0x34c0 net/core/dev.c:3517
 #1:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 #1:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 #2:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 #2:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 #3:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 #3:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 #4:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 #4:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 #5:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 #5:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#6:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#6:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#7:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#7:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#8:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#8:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#9:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#9:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#10:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#10:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#11:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#11:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#12:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#12:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#13:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#13:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#14:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#14:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#15:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#15:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#16:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#16:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#17:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#17:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#18:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#18:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#19:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#19:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#20:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#20:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#21:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#21:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#22:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#22:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#23:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#23:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#24:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#24:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#25:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#25:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#26:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#26:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#27:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#27:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#28:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#28:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#29:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#29:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#30:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#30:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#31:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#31:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
dccp_close: ABORT with 65423 bytes unread
 torvalds#32:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#32:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#33:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#33:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#34:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#34:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#35:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#35:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#36:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#36:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#37:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#37:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#38:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#38:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#39:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#39:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#40:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#40:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#41:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#41:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#42:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#42:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#43:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#43:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#44:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#44:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#45:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#45:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#46:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#46:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
 torvalds#47:         (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline]
 torvalds#47:         (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787
INFO: lockdep is turned off.
CPU: 1 PID: 10189 Comm: syz-executor0 Not tainted 4.17.0-rc2+ torvalds#26
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
 __lock_acquire+0x1788/0x5140 kernel/locking/lockdep.c:3449
 lock_acquire+0x1dc/0x520 kernel/locking/lockdep.c:3920
 rcu_lock_acquire include/linux/rcupdate.h:246 [inline]
 rcu_read_lock include/linux/rcupdate.h:632 [inline]
 skb_mac_gso_segment+0x25b/0x720 net/core/dev.c:2789
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107
 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792
 __skb_gso_segment+0x3bb/0x870 net/core/dev.c:2865
 skb_gso_segment include/linux/netdevice.h:4025 [inline]
 validate_xmit_skb+0x54d/0xd90 net/core/dev.c:3118
 validate_xmit_skb_list+0xbf/0x120 net/core/dev.c:3168
 sch_direct_xmit+0x354/0x11e0 net/sched/sch_generic.c:312
 qdisc_restart net/sched/sch_generic.c:399 [inline]
 __qdisc_run+0x741/0x1af0 net/sched/sch_generic.c:410
 __dev_xmit_skb net/core/dev.c:3243 [inline]
 __dev_queue_xmit+0x28ea/0x34c0 net/core/dev.c:3551
 dev_queue_xmit+0x17/0x20 net/core/dev.c:3616
 packet_snd net/packet/af_packet.c:2951 [inline]
 packet_sendmsg+0x40f8/0x6070 net/packet/af_packet.c:2976
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:639
 __sys_sendto+0x3d7/0x670 net/socket.c:1789
 __do_sys_sendto net/socket.c:1801 [inline]
 __se_sys_sendto net/socket.c:1797 [inline]
 __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1797
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: c411ed8 ("nsh: add GSO support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jiri Benc <jbenc@redhat.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
mpe referenced this issue in linuxppc/linux Nov 13, 2018
Increase kasan instrumented kernel stack size from 32k to 64k. Other
architectures seems to get away with just doubling kernel stack size under
kasan, but on s390 this appears to be not enough due to bigger frame size.
The particular pain point is kasan inlined checks (CONFIG_KASAN_INLINE
vs CONFIG_KASAN_OUTLINE). With inlined checks one particular case hitting
stack overflow is fs sync on xfs filesystem:

 #0 [9a0681e8]  704 bytes  check_usage at 34b1fc
 #1 [9a0684a8]  432 bytes  check_usage at 34c710
 #2 [9a068658]  1048 bytes  validate_chain at 35044a
 #3 [9a068a70]  312 bytes  __lock_acquire at 3559fe
 #4 [9a068ba8]  440 bytes  lock_acquire at 3576ee
 #5 [9a068d60]  104 bytes  _raw_spin_lock at 21b44e0
 torvalds#6 [9a068dc8]  1992 bytes  enqueue_entity at 2dbf72
 torvalds#7 [9a069590]  1496 bytes  enqueue_task_fair at 2df5f0
 torvalds#8 [9a069b68]  64 bytes  ttwu_do_activate at 28f438
 torvalds#9 [9a069ba8]  552 bytes  try_to_wake_up at 298c4c
 torvalds#10 [9a069dd0]  168 bytes  wake_up_worker at 23f97c
 torvalds#11 [9a069e78]  200 bytes  insert_work at 23fc2e
 torvalds#12 [9a069f40]  648 bytes  __queue_work at 2487c0
 torvalds#13 [9a06a1c8]  200 bytes  __queue_delayed_work at 24db28
 torvalds#14 [9a06a290]  248 bytes  mod_delayed_work_on at 24de84
 torvalds#15 [9a06a388]  24 bytes  kblockd_mod_delayed_work_on at 153e2a0
 torvalds#16 [9a06a3a0]  288 bytes  __blk_mq_delay_run_hw_queue at 158168c
 torvalds#17 [9a06a4c0]  192 bytes  blk_mq_run_hw_queue at 1581a3c
 torvalds#18 [9a06a580]  184 bytes  blk_mq_sched_insert_requests at 15a2192
 torvalds#19 [9a06a638]  1024 bytes  blk_mq_flush_plug_list at 1590f3a
 torvalds#20 [9a06aa38]  704 bytes  blk_flush_plug_list at 1555028
 torvalds#21 [9a06acf8]  320 bytes  schedule at 219e476
 torvalds#22 [9a06ae38]  760 bytes  schedule_timeout at 21b0aac
 torvalds#23 [9a06b130]  408 bytes  wait_for_common at 21a1706
 torvalds#24 [9a06b2c8]  360 bytes  xfs_buf_iowait at fa1540
 torvalds#25 [9a06b430]  256 bytes  __xfs_buf_submit at fadae6
 torvalds#26 [9a06b530]  264 bytes  xfs_buf_read_map at fae3f6
 torvalds#27 [9a06b638]  656 bytes  xfs_trans_read_buf_map at 10ac9a8
 torvalds#28 [9a06b8c8]  304 bytes  xfs_btree_kill_root at e72426
 torvalds#29 [9a06b9f8]  288 bytes  xfs_btree_lookup_get_block at e7bc5e
 torvalds#30 [9a06bb18]  624 bytes  xfs_btree_lookup at e7e1a6
 torvalds#31 [9a06bd88]  2664 bytes  xfs_alloc_ag_vextent_near at dfa070
 torvalds#32 [9a06c7f0]  144 bytes  xfs_alloc_ag_vextent at dff3ca
 torvalds#33 [9a06c880]  1128 bytes  xfs_alloc_vextent at e05fce
 torvalds#34 [9a06cce8]  584 bytes  xfs_bmap_btalloc at e58342
 torvalds#35 [9a06cf30]  1336 bytes  xfs_bmapi_write at e618de
 torvalds#36 [9a06d468]  776 bytes  xfs_iomap_write_allocate at ff678e
 torvalds#37 [9a06d770]  720 bytes  xfs_map_blocks at f82af8
 torvalds#38 [9a06da40]  928 bytes  xfs_writepage_map at f83cd6
 torvalds#39 [9a06dde0]  320 bytes  xfs_do_writepage at f85872
 torvalds#40 [9a06df20]  1320 bytes  write_cache_pages at 73dfe8
 torvalds#41 [9a06e448]  208 bytes  xfs_vm_writepages at f7f892
 torvalds#42 [9a06e518]  88 bytes  do_writepages at 73fe6a
 torvalds#43 [9a06e570]  872 bytes  __writeback_single_inode at a20cb6
 torvalds#44 [9a06e8d8]  664 bytes  writeback_sb_inodes at a23be2
 torvalds#45 [9a06eb70]  296 bytes  __writeback_inodes_wb at a242e0
 torvalds#46 [9a06ec98]  928 bytes  wb_writeback at a2500e
 torvalds#47 [9a06f038]  848 bytes  wb_do_writeback at a260ae
 torvalds#48 [9a06f388]  536 bytes  wb_workfn at a28228
 torvalds#49 [9a06f5a0]  1088 bytes  process_one_work at 24a234
 torvalds#50 [9a06f9e0]  1120 bytes  worker_thread at 24ba26
 torvalds#51 [9a06fe40]  104 bytes  kthread at 26545a
 #52 [9a06fea8]             kernel_thread_starter at 21b6b62

To be able to increase the stack size to 64k reuse LLILL instruction
in __switch_to function to load 64k - STACK_FRAME_OVERHEAD - __PT_SIZE
(65192) value as unsigned.

Reported-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
@deece
Copy link

deece commented Nov 28, 2018

Still relevant, see arch/powerpc/kernel/hw_breakpoint.c:arch_unregister_hw_breakpoint()

@mpe mpe transferred this issue from linuxppc/linux Jan 7, 2019
@mpe mpe added bug It's a bug medium Possibly not too difficult labels Jan 7, 2019
@mpe
Copy link
Member Author

mpe commented Aug 2, 2023

@mpe mpe added this to the v6.6 milestone Aug 24, 2023
@mpe
Copy link
Member Author

mpe commented May 6, 2024

Fixed by @BenjaminGrayNp1 in torvalds/linux@53834a0

@mpe mpe closed this as completed May 6, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug It's a bug medium Possibly not too difficult
Projects
Status: Done
Development

No branches or pull requests

2 participants