Skip to content

Commit

Permalink
Solve 'Too many args to microtask' problem
Browse files Browse the repository at this point in the history
This patch solves 'Too many args to microtask' problem which occurs
while executing lulesh2.0.3 benchmark on AArch64.

To solve this I had to wrtite AArch64 assembly version of
__kmp_invoke_microtask() function, similar to x86 and x86_64
implementations.

Differential Revision: http://reviews.llvm.org/D19879

llvm-svn: 269399
  • Loading branch information
pawosm-arm committed May 13, 2016
1 parent 12e7931 commit 7e5e868
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 3 deletions.
141 changes: 141 additions & 0 deletions openmp/runtime/src/z_Linux_asm.s
Expand Up @@ -109,6 +109,32 @@ KMP_PREFIX_UNDERSCORE(\proc):
# endif // KMP_OS_DARWIN
#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64

#if KMP_OS_LINUX && KMP_ARCH_AARCH64

# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
// Format labels so that they don't override function names in gdb's backtraces
# define KMP_LABEL(x) .L_##x // local label hidden from backtraces

.macro ALIGN size
.align 1<<(\size)
.endm

.macro DEBUG_INFO proc
.cfi_endproc
// Not sure why we need .type and .size for the functions
ALIGN 2
.type \proc,@function
.size \proc,.-\proc
.endm

.macro PROC proc
ALIGN 2
.globl KMP_PREFIX_UNDERSCORE(\proc)
KMP_PREFIX_UNDERSCORE(\proc):
.cfi_startproc
.endm

#endif // KMP_OS_LINUX && KMP_ARCH_AARCH64

// -----------------------------------------------------------------------
// data
Expand Down Expand Up @@ -1414,6 +1440,121 @@ KMP_LABEL(kmp_1_exit):
// -----------------------------------------------------------------------
#endif /* KMP_ARCH_X86_64 */

// '
#if KMP_OS_LINUX && KMP_ARCH_AARCH64

//------------------------------------------------------------------------
//
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int gtid, int tid,
// int argc, void *p_argv[] ) {
// (*pkfn)( & gtid, & tid, argv[0], ... );
// return 1;
// }
//
// parameters:
// x0: pkfn
// w1: gtid
// w2: tid
// w3: argc
// x4: p_argv
// x5: &exit_frame
//
// locals:
// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
// __tid: tid parm pushed on stack so can pass &tid to pkfn
//
// reg temps:
// x8: used to hold pkfn address
// w9: used as temporary for number of pkfn parms
// x10: used to traverse p_argv array
// x11: used as temporary for stack placement calculation
// x12: used as temporary for stack parameters
// x19: used to preserve exit_frame_ptr, callee-save
//
// return: w0 (always 1/TRUE)
//

__gtid = 4
__tid = 8

// -- Begin __kmp_invoke_microtask
// mark_begin;
.text
PROC __kmp_invoke_microtask

stp x29, x30, [sp, #-16]!
# if OMPT_SUPPORT
stp x19, x20, [sp, #-16]!
# endif
mov x29, sp

orr w9, wzr, #1
add w9, w9, w3, lsr #1
sub sp, sp, w9, lsl #4
mov x11, sp

mov x8, x0
str w1, [x29, #-__gtid]
str w2, [x29, #-__tid]
mov w9, w3
mov x10, x4
# if OMPT_SUPPORT
mov x19, x5
str x29, [x19]
# endif

sub x0, x29, #__gtid
sub x1, x29, #__tid

cbz w9, KMP_LABEL(kmp_1)
ldr x2, [x10]

sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x3, [x10, #8]!

sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x4, [x10, #8]!

sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x5, [x10, #8]!

sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x6, [x10, #8]!

sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x7, [x10, #8]!

KMP_LABEL(kmp_0):
sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x12, [x10, #8]!
str x12, [x11], #8
b KMP_LABEL(kmp_0)
KMP_LABEL(kmp_1):
blr x8
orr w0, wzr, #1
mov sp, x29
# if OMPT_SUPPORT
str xzr, [x19]
ldp x19, x20, [sp], #16
# endif
ldp x29, x30, [sp], #16
ret

DEBUG_INFO __kmp_invoke_microtask
// -- End __kmp_invoke_microtask

#endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */

#if KMP_ARCH_ARM
.data
.comm .gomp_critical_user_,32,8
Expand Down
6 changes: 3 additions & 3 deletions openmp/runtime/src/z_Linux_util.c
Expand Up @@ -518,7 +518,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
return old_value;
}

# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || (KMP_OS_LINUX && KMP_ARCH_AARCH64)
kmp_int8
__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d )
{
Expand Down Expand Up @@ -552,7 +552,7 @@ __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
}
return old_value;
}
# endif /* KMP_ARCH_X86 */
# endif /* KMP_ARCH_X86 || KMP_ARCH_PPC64 || (KMP_OS_LINUX && KMP_ARCH_AARCH64) */

kmp_int64
__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d )
Expand Down Expand Up @@ -2574,7 +2574,7 @@ __kmp_get_load_balance( int max )

#endif // USE_LOAD_BALANCE

#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC)
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))

// we really only need the case with 1 argument, because CLANG always build
// a struct of pointers to shared variables referenced in the outlined function
Expand Down

0 comments on commit 7e5e868

Please sign in to comment.