-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TSan] Add instrumentation of AVX2 and AVX512 instructions #74636
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-compiler-rt-sanitizer Author: Joachim (jprotze) ChangesCurrently, ThreadSanitizer only instruments memory accesses up to a width of 128 bit and explicitly skips instrumentation of wider memory accesses. This means that TSan is blind for AVX2 and AVX512 memory instructions. This patch adds instrumentation and runtime support for 256bit and 512bit memory loads/stores. Additionally, vector gather/scatter instructions are considered for instrumentation. These instructions allow to gather individual data elements from memory into a single vector register and scatter the elements from a vector register into individual memory locations. Some of the tests use Patch is 51.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74636.diff 27 Files Affected:
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index a8e078f1ebc98..ab6200fce2455 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -100,6 +100,8 @@ check_cxx_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR
check_cxx_compiler_flag(-fno-coverage-mapping COMPILER_RT_HAS_FNO_COVERAGE_MAPPING_FLAG)
check_cxx_compiler_flag("-Werror -mcrc32" COMPILER_RT_HAS_MCRC32_FLAG)
check_cxx_compiler_flag("-Werror -msse4.2" COMPILER_RT_HAS_MSSE4_2_FLAG)
+check_cxx_compiler_flag("-Werror -mavx2" COMPILER_RT_HAS_MAVX2_FLAG)
+check_cxx_compiler_flag("-Werror -mavx512f" COMPILER_RT_HAS_MAVX512F_FLAG)
check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG)
check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG)
check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG)
diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
index 791c0596f65ab..4df1a6c8fca89 100644
--- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
@@ -241,6 +241,17 @@ else()
else()
set(TSAN_ASM_SOURCES)
endif()
+ add_compiler_rt_object_libraries(RTTSanAVX2
+ ARCHS ${arch}
+ SOURCES tsan_interface_avx2.cpp
+ ADDITIONAL_HEADERS tsan_interface_avx2.h
+ #CFLAGS ${TSAN_RTL_CFLAGS} $<IF:"$COMPILER_RT_HAS_MAVX2_FLAG","-mavx2","">)
+ CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX2_FLAG}>,-mavx2,"">)
+ add_compiler_rt_object_libraries(RTTSanAVX512
+ ARCHS ${arch}
+ SOURCES tsan_interface_avx512.cpp
+ ADDITIONAL_HEADERS tsan_interface_avx512.h
+ CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX512F_FLAG}>,-mavx512f,"">)
add_compiler_rt_runtime(clang_rt.tsan
STATIC
ARCHS ${arch}
@@ -252,6 +263,8 @@ else()
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
$<TARGET_OBJECTS:RTUbsan.${arch}>
+ $<TARGET_OBJECTS:RTTSanAVX2.${arch}>
+ $<TARGET_OBJECTS:RTTSanAVX512.${arch}>
ADDITIONAL_HEADERS ${TSAN_HEADERS}
CFLAGS ${TSAN_RTL_CFLAGS}
PARENT_TARGET tsan)
@@ -276,6 +289,8 @@ else()
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
$<TARGET_OBJECTS:RTUbsan.${arch}>
+ $<TARGET_OBJECTS:RTTSanAVX2.${arch}>
+ $<TARGET_OBJECTS:RTTSanAVX512.${arch}>
ADDITIONAL_HEADERS ${TSAN_HEADERS}
CFLAGS ${TSAN_RTL_DYNAMIC_CFLAGS}
DEFS SANITIZER_SHARED
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
index e6c4bf2e60a7b..c97cf62e2e9bd 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
@@ -13,6 +13,7 @@
#include "tsan_interface.h"
#include "tsan_interface_ann.h"
#include "tsan_rtl.h"
+
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_ptrauth.h"
@@ -42,18 +43,42 @@ void __tsan_write16_pc(void *addr, void *pc) {
// __tsan_unaligned_read/write calls are emitted by compiler.
-void __tsan_unaligned_read16(const void *addr) {
+template <unsigned int N>
+void __tsan_unaligned_readx(const void *addr) {
uptr pc = CALLERPC;
ThreadState *thr = cur_thread();
- UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
- UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
+ for (unsigned int i = 0; i < N / 8; i++)
+ UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessRead);
}
-void __tsan_unaligned_write16(void *addr) {
+template <unsigned int N>
+void __tsan_unaligned_writex(void *addr) {
uptr pc = CALLERPC;
ThreadState *thr = cur_thread();
- UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
- UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
+ for (unsigned int i = 0; i < N / 8; i++)
+ UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessWrite);
+}
+
+void __tsan_unaligned_read16(const void *addr) {
+ __tsan_unaligned_readx<16>(addr);
+}
+
+void __tsan_unaligned_write16(void *addr) { __tsan_unaligned_writex<16>(addr); }
+
+extern "C" void __tsan_unaligned_read32(const void *addr) {
+ __tsan_unaligned_readx<32>(addr);
+}
+
+extern "C" void __tsan_unaligned_write32(void *addr) {
+ __tsan_unaligned_writex<32>(addr);
+}
+
+extern "C" void __tsan_unaligned_read64(const void *addr) {
+ __tsan_unaligned_readx<64>(addr);
+}
+
+extern "C" void __tsan_unaligned_write64(void *addr) {
+ __tsan_unaligned_writex<64>(addr);
}
extern "C" {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.h b/compiler-rt/lib/tsan/rtl/tsan_interface.h
index 3731c90d45915..ec24aaa9578d7 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.h
@@ -53,11 +53,15 @@ SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read2(const void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read4(const void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read8(const void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read16(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read32(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read64(const void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write2(void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write4(void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write8(void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write16(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write32(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write64(void *addr);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1_pc(void *addr, void *pc);
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2_pc(void *addr, void *pc);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.inc b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
index b0a424ff9c255..b7894e167db9e 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.inc
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
@@ -38,6 +38,18 @@ void __tsan_read16(void *addr) {
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
}
+extern "C" void __tsan_read32(void *addr) {
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
+}
+
+extern "C" void __tsan_read64(void *addr) {
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessRead);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessRead);
+}
+
void __tsan_write1(void *addr) {
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
}
@@ -58,6 +70,21 @@ void __tsan_write16(void *addr) {
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
}
+extern "C" void __tsan_write32(void *addr) {
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
+}
+
+extern "C" void __tsan_write64(void *addr) {
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessWrite);
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessWrite);
+}
+
+// Our vector instructions
+// TODO
+
void __tsan_read1_pc(void *addr, void *pc) {
MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
new file mode 100644
index 0000000000000..cc50afd383d5b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
@@ -0,0 +1,37 @@
+#include "tsan_interface_avx2.h"
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+#ifdef __AVX__
+extern "C" void __tsan_scatter_vector4(__m256i vaddr, int size, uint8_t mask) {
+ void *addr[4] = {};
+ _mm256_store_si256((__m256i *)addr, vaddr);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ for (int i = 0; i < 4; i++)
+ if ((mask >> i) & 1)
+ UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
+}
+
+extern "C" void __tsan_gather_vector4(__m256i vaddr, int size, uint8_t mask) {
+ void *addr[4] = {};
+ _mm256_store_si256((__m256i *)addr, vaddr);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ for (int i = 0; i < 4; i++)
+ if ((mask >> i) & 1)
+ UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
+}
+#endif /*__AVX__*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
new file mode 100644
index 0000000000000..84c001be8855b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
@@ -0,0 +1,46 @@
+//===-- tsan_interface_avx2.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_AVX2_H
+#define TSAN_INTERFACE_AVX2_H
+
+#include <immintrin.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
+#include <stdint.h>
+using __sanitizer::tid_t;
+using __sanitizer::uptr;
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !SANITIZER_GO
+# ifdef __AVX__
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector4(__m256i vaddr,
+ int width,
+ uint8_t mask);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector4(__m256i vaddr,
+ int width,
+ uint8_t mask);
+# endif /*__AVX__*/
+#endif // SANITIZER_GO
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /*TSAN_INTERFACE_AVX2_H*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
new file mode 100644
index 0000000000000..ab8fbf2af3a76
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
@@ -0,0 +1,43 @@
+#include "tsan_interface_avx512.h"
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+#ifdef __AVX512F__
+extern "C" void __tsan_scatter_vector8(__m512i vaddr, int size, uint8_t mask) {
+ void *addr[8] = {};
+ __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
+ __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+ _mm256_store_si256((__m256i *)addr, v256_1);
+ _mm256_store_si256((__m256i *)&(addr[4]), v256_2);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ for (int i = 0; i < 8; i++)
+ if ((mask >> i) & 1)
+ UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
+}
+
+extern "C" void __tsan_gather_vector8(__m512i vaddr, int size, uint8_t mask) {
+ void *addr[8] = {};
+ __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
+ __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+ _mm256_store_si256((__m256i *)addr, v256_1);
+ _mm256_store_si256((__m256i *)(&addr[4]), v256_2);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ for (int i = 0; i < 8; i++)
+ if ((mask >> i) & 1)
+ UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
+}
+#endif /*__AVX512F__*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
new file mode 100644
index 0000000000000..179f64a89a9f1
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
@@ -0,0 +1,46 @@
+//===-- tsan_interface_avx512.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_AVX512_H
+#define TSAN_INTERFACE_AVX512_H
+
+#include <immintrin.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
+#include <stdint.h>
+using __sanitizer::tid_t;
+using __sanitizer::uptr;
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !SANITIZER_GO
+# ifdef __AVX512F__
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector8(__m512i vaddr,
+ int width,
+ uint8_t mask);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector8(__m512i vaddr,
+ int width,
+ uint8_t mask);
+# endif /*__AVX512F__*/
+#endif // SANITIZER_GO
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /*TSAN_INTERFACE_AVX512_H*/
diff --git a/compiler-rt/test/tsan/simd_broadcast_norace.c b/compiler-rt/test/tsan/simd_broadcast_norace.c
new file mode 100644
index 0000000000000..3a7c2cfe279dc
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_broadcast_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+# define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+# define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE c;
+
+void *Thread(intptr_t offset) {
+ for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+ for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+ A[j] += c;
+ }
+ barrier_wait(&barrier);
+ return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+ barrier_init(&barrier, 2);
+ pthread_t t[2];
+ pthread_create(&t[0], NULL, Thread1, NULL);
+ pthread_create(&t[1], NULL, Thread2, NULL);
+ pthread_join(t[0], NULL);
+ pthread_join(t[1], NULL);
+ fprintf(stderr, "DONE\n");
+ return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_broadcast_race.c b/compiler-rt/test/tsan/simd_broadcast_race.c
new file mode 100644
index 0000000000000..08d6207ede722
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_broadcast_race.c
@@ -0,0 +1,43 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+# define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+# define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+
+void *Thread(intptr_t offset) {
+ for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+ for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+ A[j] += A[64];
+ }
+ barrier_wait(&barrier);
+ return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+ barrier_init(&barrier, 2);
+ pthread_t t[2];
+ pthread_create(&t[0], NULL, Thread1, NULL);
+ pthread_create(&t[1], NULL, Thread2, NULL);
+ pthread_join(t[0], NULL);
+ pthread_join(t[1], NULL);
+ return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_gather_race.c b/compiler-rt/test/tsan/simd_gather_race.c
new file mode 100644
index 0000000000000..1d7c68a0bc93e
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_gather_race.c
@@ -0,0 +1,44 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+# define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+# define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+ for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+ for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+ A[j + CHUNK_SIZE] = A[j * 2] + B[j];
+ }
+ barrier_wait(&barrier);
+ return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+ barrier_init(&barrier, 2);
+ pthread_t t[2];
+ pthread_create(&t[0], NULL, Thread1, NULL);
+ pthread_create(&t[1], NULL, Thread2, NULL);
+ pthread_join(t[0], NULL);
+ pthread_join(t[1], NULL);
+ return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_gatherscatter_norace.c b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
new file mode 100644
index 0000000000000..3f5994119223c
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCh...
[truncated]
|
|
|
Currently, ThreadSanitizer only instruments memory accesses up to a width of 128 bit and explicitly skips instrumentation of wider memory accesses. This means that TSan is blind for AVX2 and AVX512 memory instructions. This patch adds instrumentation and runtime support for 256bit and 512bit memory loads/stores. Additionally, vector gather/scatter instructions are considered for instrumentation. These instructions allow to gather individual data elements from memory into a single vector register and scatter the elements from a vector register into individual memory locations. Since the vector of addresses is passed as a 256bit / 512bit vector, the new interface functions are compiled separately with the specific compiler flags. This avoids that AVX instructions are introduced into other parts of the runtime. Since the new interface is only called on architectures that actually support AVX instructions, this separation maintains the portability of the runtime. Some of the tests use #pragma omp simd as a portable way to generate vector instructions across architectures. The construct is independent of the OpenMP runtime. Therefore the tests used base-language threading. Some of the tests directly call into the new runtime functions, since we found no way to actually generate scatter/gather instructions with masks different from 0xFF. Under review as llvm#74636
isa<InvokeInst>(Inst)) { | ||
if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { | ||
auto CFunc = CI->getCalledFunction(); | ||
if (CFunc && (CFunc->getName().contains("llvm.masked.scatter") || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's not how intrinsic suppose to be checked.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::masked_scatter:
InstrumentationIRBuilder IRB(I); | ||
StringRef FunctionNameRef = | ||
dyn_cast<CallInst>(I)->getCalledFunction()->getName(); | ||
bool IsScatter = FunctionNameRef.contains("scatter"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same here
@@ -804,8 +868,8 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr, | |||
const DataLayout &DL) { | |||
assert(OrigTy->isSized()); | |||
uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); | |||
if (TypeSize != 8 && TypeSize != 16 && | |||
TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { | |||
if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
!isPowerOf2_32() || <8 || >512
add_compiler_rt_object_libraries(RTTSanAVX2 | ||
ARCHS ${arch} | ||
SOURCES tsan_interface_avx2.cpp | ||
ADDITIONAL_HEADERS tsan_interface_avx2.h |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we want to export this header always?
The compiler is not necessary used on the machine where the it's built.
If we include it unconditionally, it will also make cmake files simpler.
(int64_t)(A + 7), (int64_t)(A + 6), (int64_t)(A + 5), (int64_t)(A + 4), | ||
#endif | ||
(int64_t)(A + 3), (int64_t)(A + 2), (int64_t)(A + 1), (int64_t)(A + 0)); | ||
tsan_scatter_func(vaddr, sizeof(TYPE), mask); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the target intrinsic that should be lowered to this call? Can't we call that intrinsic instead?
It's always better to be more realistic in tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment, we only added llvm vector intrinsics to the ThreadSanitizer pass. These intrinsics are generated by the vectorizer (e.g., triggered by OpenMP simd, or clang simd directives). According to our understanding, we cannot explicitly place these intrinsics into C source code.
Furthermore, we failed to convince the vectorizer to actually emit masked llvm vector intrinsics (i.e., having not all mask bits True), as the cost heuristic would typically prefer generating scalar code instead.
@felilxtomski will add instrumentation to the TSan pass for the related x86 vector intrinsics. Then we might trigger this instrumentation from C code.
|
||
using namespace __tsan; | ||
|
||
#ifdef __AVX512F__ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't it be better to always compile this file with -mavx512f?
#include <immintrin.h> | ||
#include <sanitizer_common/sanitizer_internal_defs.h> | ||
#include <stdint.h> | ||
using __sanitizer::tid_t; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does not seem to be used.
#include <sanitizer_common/sanitizer_internal_defs.h> | ||
#include <stdint.h> | ||
using __sanitizer::tid_t; | ||
using __sanitizer::uptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does not seem to be used.
#include <immintrin.h> | ||
#include <sanitizer_common/sanitizer_internal_defs.h> | ||
#include <stdint.h> | ||
using __sanitizer::tid_t; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does not seem to be used.
#include <sanitizer_common/sanitizer_internal_defs.h> | ||
#include <stdint.h> | ||
using __sanitizer::tid_t; | ||
using __sanitizer::uptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does not seem to be used.
|
||
#ifdef __AVX__ | ||
extern "C" void __tsan_scatter_vector4(__m256i vaddr, int size, uint8_t mask) { | ||
void *addr[4] = {}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We always cast it to something else and never use it as 'void*', so I would do s/void*/uptr/ to remove at least 1 cast.
Here and below.
The latest commits added checks for the hardware support of AVX on the build system to only enable the tests when the test system supports the instructions. ( |
Currently, ThreadSanitizer only instruments memory accesses up to a width of 128 bit and explicitly skips instrumentation of wider memory accesses. This means that TSan is blind for AVX2 and AVX512 memory instructions.
This patch adds instrumentation and runtime support for 256bit and 512bit memory loads/stores. Additionally, vector gather/scatter instructions are considered for instrumentation. These instructions allow to gather individual data elements from memory into a single vector register and scatter the elements from a vector register into individual memory locations.
Since the vector of addresses is passed as a 256bit / 512bit vector, the new interface functions are compiled separately with the specific compiler flags. This avoids that AVX instructions are introduced into other parts of the runtime. Since the new interface is only called on architectures that actually support AVX instructions, this separation maintains the portability of the runtime.
Some of the tests use
#pragma omp simd
as a portable way to generate vector instructions across architectures. The construct is independent of the OpenMP runtime. Therefore the tests used base-language threading.Some of the tests directly call into the new runtime functions, since we found no way to actually generate scatter/gather instructions with masks different from 0xFF.