From 97958c9bb83cec44b6ce13e732b53de0171a5d43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 10 Nov 2022 15:02:05 +0200 Subject: [PATCH] [openmp] Support building for armv7 Windows with mingw tools This does things in the same way as D137168 / a356782426f5bf54a00570e1f925345e5fda7b2e and D101173 / 4fb0aaf03381473ec8af727edb4b5d59b64b0d60 did for aarch64. This adds a C implementation of __kmp_invoke_microtask in the same way as the fallback C implementation in z_Linux_util.cpp. Both the existing C fallback used on arm linux, and this one added here, fail test/misc_bugs/many-microtask-args.c similarly (which could be considered as an XFAIL). Differential Revision: https://reviews.llvm.org/D138689 --- openmp/runtime/src/kmp.h | 2 +- openmp/runtime/src/kmp_atomic.cpp | 4 +- openmp/runtime/src/kmp_os.h | 4 +- openmp/runtime/src/kmp_platform.h | 3 + openmp/runtime/src/z_Windows_NT-586_util.cpp | 95 +++++++++++++++++++- openmp/runtime/src/z_Windows_NT_util.cpp | 4 +- 6 files changed, 103 insertions(+), 9 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 8a2bcedb4c9aa..79d8e79c2999c 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3616,7 +3616,7 @@ extern void __kmp_check_stack_overlap(kmp_info_t *thr); extern void __kmp_expand_host_name(char *buffer, size_t size); extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && KMP_ARCH_AARCH64) +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)) extern void __kmp_initialize_system_tick(void); /* Initialize timer tick value */ #endif diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp index 21c2c60bfb60f..a7d16a4f0ec85 100644 --- a/openmp/runtime/src/kmp_atomic.cpp +++ b/openmp/runtime/src/kmp_atomic.cpp @@ -832,7 +832,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, // end of the first part of the workaround for C78287 #endif // USE_CMPXCHG_FIX -#if KMP_OS_WINDOWS && KMP_ARCH_AARCH64 +#if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM) // Undo explicit type casts to get MSVC ARM64 to build. Uses // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG #undef OP_CMPXCHG @@ -863,7 +863,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, (*lhs) = (*lhs)OP rhs; \ __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); -#endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64 +#endif // KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM) #if KMP_ARCH_X86 || KMP_ARCH_X86_64 diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 1a802ced69c11..7972a12bc30da 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -184,7 +184,7 @@ typedef unsigned long long kmp_uint64; #error "Can't determine size_t printf format specifier." #endif -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_ARM #define KMP_SIZE_T_MAX (0xFFFFFFFF) #else #define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) @@ -456,7 +456,7 @@ enum kmp_mem_fence_type { // Synchronization primitives -#if KMP_ASM_INTRINS && KMP_OS_WINDOWS && !(KMP_ARCH_AARCH64 && defined(__GNUC__)) +#if KMP_ASM_INTRINS && KMP_OS_WINDOWS && !((KMP_ARCH_AARCH64 || KMP_ARCH_ARM) && defined(__GNUC__)) #if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG #pragma intrinsic(InterlockedExchangeAdd) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index 830d00d7e0ddf..fcfd8bc5d8d9a 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -101,6 +101,9 @@ #elif defined(__aarch64__) || defined(_M_ARM64) #undef KMP_ARCH_AARCH64 #define KMP_ARCH_AARCH64 1 +#elif defined(__arm__) || defined(_M_ARM) +#undef KMP_ARCH_ARMV7 +#define KMP_ARCH_ARMV7 1 #else #undef KMP_ARCH_X86 #define KMP_ARCH_X86 1 diff --git a/openmp/runtime/src/z_Windows_NT-586_util.cpp b/openmp/runtime/src/z_Windows_NT-586_util.cpp index befd2577f5498..c06e5aaa205dc 100644 --- a/openmp/runtime/src/z_Windows_NT-586_util.cpp +++ b/openmp/runtime/src/z_Windows_NT-586_util.cpp @@ -12,7 +12,7 @@ #include "kmp.h" -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM) /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to use compare_and_store for these routines */ @@ -189,4 +189,95 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, } #endif -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 */ +#if KMP_ARCH_ARM +// This matches the generic fallback implementation of __kmp_invoke_microtask +// from z_Linux_util.cpp, which is used on Linux on ARM. +int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, + void *p_argv[] +#if OMPT_SUPPORT + , + void **exit_frame_ptr +#endif +) { +#if OMPT_SUPPORT + *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +#endif + + switch (argc) { + default: + fprintf(stderr, "Too many args to microtask: %d!\n", argc); + fflush(stderr); + exit(-1); + case 0: + (*pkfn)(>id, &tid); + break; + case 1: + (*pkfn)(>id, &tid, p_argv[0]); + break; + case 2: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1]); + break; + case 3: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); + break; + case 4: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); + break; + case 5: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); + break; + case 6: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5]); + break; + case 7: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6]); + break; + case 8: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7]); + break; + case 9: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8]); + break; + case 10: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]); + break; + case 11: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]); + break; + case 12: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11]); + break; + case 13: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12]); + break; + case 14: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13]); + break; + case 15: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14]); + break; + } + +#if OMPT_SUPPORT + *exit_frame_ptr = 0; +#endif + + return 1; +} +#endif + +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM */ diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp index d6ec80e9c7b4e..ed62bc3136959 100644 --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -78,7 +78,7 @@ struct SYSTEM_THREAD { }; // SYSTEM_THREAD KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, KernelTime) == 0); -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_ARM KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 28); KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 52); #else @@ -108,7 +108,7 @@ typedef SYSTEM_PROCESS_INFORMATION *PSYSTEM_PROCESS_INFORMATION; KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, NextEntryOffset) == 0); KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, CreateTime) == 32); KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ImageName) == 56); -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_ARM KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 68); KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 76); KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 88);