diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index a6cf18c43c19c4..1b34cff44e7645 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1899,14 +1899,17 @@ static void emitReductionListCopy( // new element. bool IncrScratchpadSrc = false; bool IncrScratchpadDest = false; + QualType PrivatePtrType = C.getPointerType(Private->getType()); + llvm::Type *PrivateLlvmPtrType = CGF.ConvertType(PrivatePtrType); switch (Action) { case RemoteLaneToThread: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); - SrcElementAddr = CGF.EmitLoadOfPointer( - SrcElementPtrAddr, - C.getPointerType(Private->getType())->castAs()); + SrcElementAddr = + CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( + SrcElementPtrAddr, PrivateLlvmPtrType), + PrivatePtrType->castAs()); // Step 1.2: Create a temporary to store the element in the destination // Reduce list. @@ -1920,24 +1923,27 @@ static void emitReductionListCopy( case ThreadCopy: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); - SrcElementAddr = CGF.EmitLoadOfPointer( - SrcElementPtrAddr, - C.getPointerType(Private->getType())->castAs()); + SrcElementAddr = + CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( + SrcElementPtrAddr, PrivateLlvmPtrType), + PrivatePtrType->castAs()); // Step 1.2: Get the address for dest element. The destination // element has already been created on the thread's stack. DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); - DestElementAddr = CGF.EmitLoadOfPointer( - DestElementPtrAddr, - C.getPointerType(Private->getType())->castAs()); + DestElementAddr = + CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( + DestElementPtrAddr, PrivateLlvmPtrType), + PrivatePtrType->castAs()); break; } case ThreadToScratchpad: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); - SrcElementAddr = CGF.EmitLoadOfPointer( - SrcElementPtrAddr, - C.getPointerType(Private->getType())->castAs()); + SrcElementAddr = + CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( + SrcElementPtrAddr, PrivateLlvmPtrType), + PrivatePtrType->castAs()); // Step 1.2: Get the address for dest element: // address = base + index * ElementSizeInChars. diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index 9e7158fabde659..d8f1519f315ff7 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -108,9 +108,9 @@ int bar(int n){ // CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to double** +// CHECK: [[ELT:%.+]] = load double*, double** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // // CHECK: [[ELT_CAST:%.+]] = bitcast double* [[ELT]] to i64* // CHECK: [[REMOTE_ELT_CAST:%.+]] = bitcast double* [[REMOTE_ELT]] to i64* @@ -159,11 +159,11 @@ int bar(int n){ // // CHECK: [[DO_COPY]] // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to double** +// CHECK: [[REMOTE_ELT:%.+]] = load double*, double** [[REMOTE_ELT_REF_CAST]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to double** +// CHECK: [[ELT:%.+]] = load double*, double** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] @@ -329,9 +329,9 @@ int bar(int n){ // CHECK: store i8* [[REMOTE_ELT1]], i8** [[REMOTE_ELT_REF]], align // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to float** +// CHECK: [[ELT:%.+]] = load float*, float** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // // CHECK: [[ELT_CAST:%.+]] = bitcast float* [[ELT]] to i32* // CHECK: [[REMOTE_ELT2_CAST:%.+]] = bitcast float* [[REMOTE_ELT2]] to i32* @@ -387,11 +387,11 @@ int bar(int n){ // CHECK: store i8 [[REMOTE_ELT_VAL]], i8* [[ELT_VOID]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to float** +// CHECK: [[REMOTE_ELT:%.+]] = load float*, float** [[REMOTE_ELT_REF_CAST]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to float** +// CHECK: [[ELT:%.+]] = load float*, float** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] @@ -612,9 +612,9 @@ int bar(int n){ // CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i32** +// CHECK: [[ELT:%.+]] = load i32*, i32** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align // // CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size() @@ -626,9 +626,9 @@ int bar(int n){ // CHECK: store i8* [[REMOTE_ELT1C]], i8** [[REMOTE_ELT_REF]], align // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i16** +// CHECK: [[ELT:%.+]] = load i16*, i16** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align // // CHECK: [[ELT_CAST:%.+]] = sext i16 [[ELT_VAL]] to i32 @@ -677,20 +677,20 @@ int bar(int n){ // // CHECK: [[DO_COPY]] // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to i32** +// CHECK: [[REMOTE_ELT:%.+]] = load i32*, i32** [[REMOTE_ELT_REF_CAST]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i32** +// CHECK: [[ELT:%.+]] = load i32*, i32** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], +// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to i16** +// CHECK: [[REMOTE_ELT:%.+]] = load i16*, i16** [[REMOTE_ELT_REF_CAST]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* +// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i16** +// CHECK: [[ELT:%.+]] = load i16*, i16** [[ELT_REF_CAST]], // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index a06be48566e75b..404f8e6c4a9c09 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -453,12 +453,12 @@ void test() { // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex"* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** +// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP11]], i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i64* // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() @@ -468,7 +468,7 @@ void test() { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -496,13 +496,13 @@ void test() { // CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to %"class.std::complex"** +// CHECK1-NEXT: [[TMP43:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to %"class.std::complex"** +// CHECK1-NEXT: [[TMP46:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP45]], align 8 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP43]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: @@ -1020,12 +1020,12 @@ void test() { // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** +// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP11]], i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP11]] to i64* // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK1: .shuffle.pre_cond: @@ -1049,7 +1049,7 @@ void test() { // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK1: .shuffle.exit: // CHECK1-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1077,13 +1077,13 @@ void test() { // CHECK1-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to %"class.std::complex.0"** +// CHECK1-NEXT: [[TMP51:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %"class.std::complex.0"** +// CHECK1-NEXT: [[TMP54:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP53]], align 8 // CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* -// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP51]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: @@ -1687,12 +1687,12 @@ void test() { // CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex"* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP12]], i64 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** +// CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP11]], i64 1 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i64* // CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() @@ -1702,7 +1702,7 @@ void test() { // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK2-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1730,13 +1730,13 @@ void test() { // CHECK2-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: // CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 -// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* -// CHECK2-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* +// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to %"class.std::complex"** +// CHECK2-NEXT: [[TMP43:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP42]], align 8 +// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to %"class.std::complex"** +// CHECK2-NEXT: [[TMP46:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP45]], align 8 // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* -// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* +// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP43]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: @@ -2254,12 +2254,12 @@ void test() { // CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** +// CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP11]], i64 1 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP11]] to i64* // CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK2: .shuffle.pre_cond: @@ -2283,7 +2283,7 @@ void test() { // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK2: .shuffle.exit: // CHECK2-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -2311,13 +2311,13 @@ void test() { // CHECK2-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: // CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 -// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 -// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* -// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* +// CHECK2-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to %"class.std::complex.0"** +// CHECK2-NEXT: [[TMP51:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP50]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %"class.std::complex.0"** +// CHECK2-NEXT: [[TMP54:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP53]], align 8 // CHECK2-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* -// CHECK2-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK2-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP51]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: @@ -2921,12 +2921,12 @@ void test() { // CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex"* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP12]], i64 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** +// CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP11]], i64 1 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i64* // CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() @@ -2936,7 +2936,7 @@ void test() { // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK3-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -2964,13 +2964,13 @@ void test() { // CHECK3-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 -// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* -// CHECK3-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* +// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to %"class.std::complex"** +// CHECK3-NEXT: [[TMP43:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP42]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to %"class.std::complex"** +// CHECK3-NEXT: [[TMP46:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP45]], align 8 // CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* -// CHECK3-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* +// CHECK3-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP43]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: @@ -3488,12 +3488,12 @@ void test() { // CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** +// CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP11]], i64 1 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP11]] to i64* // CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK3: .shuffle.pre_cond: @@ -3517,7 +3517,7 @@ void test() { // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK3: .shuffle.exit: // CHECK3-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i8* [[TMP31]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -3545,13 +3545,13 @@ void test() { // CHECK3-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 -// CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* -// CHECK3-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* +// CHECK3-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to %"class.std::complex.0"** +// CHECK3-NEXT: [[TMP51:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP50]], align 8 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %"class.std::complex.0"** +// CHECK3-NEXT: [[TMP54:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP53]], align 8 // CHECK3-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* -// CHECK3-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK3-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP51]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index afde7e819e9351..642530e70cf990 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4291,12 +4291,12 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** +// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* // CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() @@ -4306,7 +4306,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -4334,12 +4334,12 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to double* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to double* -// CHECK1-NEXT: [[TMP47:%.*]] = load double, double* [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK1-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** +// CHECK1-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 // CHECK1-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: @@ -4632,12 +4632,12 @@ int bar(int n){ // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 // CHECK1-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to float* -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP24]], i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i64 1 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP24]] to i32* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* // CHECK1-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() @@ -4647,7 +4647,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i64 1 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i64 1 // CHECK1-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -4681,12 +4681,12 @@ int bar(int n){ // CHECK1-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 // CHECK1-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 // CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP61:%.*]] = load i8*, i8** [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP61]] to float* -// CHECK1-NEXT: [[TMP64:%.*]] = load float, float* [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** +// CHECK1-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** +// CHECK1-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 // CHECK1-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: @@ -5071,37 +5071,37 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP23]] to i16* -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP25]], i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK1-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP25]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 // CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 // CHECK1-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP25]], i64 1 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 // CHECK1-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -5129,20 +5129,20 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i8* [[TMP57]] to i32* -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK1-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 // CHECK1-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 // CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP64]] to i16* -// CHECK1-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP65]], align 2 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK1-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK1-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 // CHECK1-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: @@ -5249,37 +5249,37 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP23]] to i16* -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP25]], i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK1-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP25]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 // CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 // CHECK1-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP25]], i64 1 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 // CHECK1-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -5307,20 +5307,20 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i8* [[TMP57]] to i32* -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK1-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 // CHECK1-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 // CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP64]] to i16* -// CHECK1-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP65]], align 2 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK1-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK1-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 // CHECK1-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: @@ -5610,12 +5610,12 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** +// CHECK2-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* // CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() @@ -5625,7 +5625,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 // CHECK2-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -5653,12 +5653,12 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: // CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to double* -// CHECK2-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to double* -// CHECK2-NEXT: [[TMP47:%.*]] = load double, double* [[TMP45]], align 8 +// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK2-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** +// CHECK2-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 // CHECK2-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: @@ -5951,12 +5951,12 @@ int bar(int n){ // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 // CHECK2-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to float* -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP24]], i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** +// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i32 1 // CHECK2-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP24]] to i32* +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* // CHECK2-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() @@ -5966,7 +5966,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 // CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i32 1 // CHECK2-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK2-NEXT: store i8* [[TMP35]], i8** [[TMP23]], align 4 +// CHECK2-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 4 // CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -6000,12 +6000,12 @@ int bar(int n){ // CHECK2-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 // CHECK2-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 // CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP61:%.*]] = load i8*, i8** [[TMP60]], align 4 -// CHECK2-NEXT: [[TMP62:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK2-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP61]] to float* -// CHECK2-NEXT: [[TMP64:%.*]] = load float, float* [[TMP62]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** +// CHECK2-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** +// CHECK2-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 // CHECK2-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: @@ -6389,37 +6389,37 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP23]] to i16* -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK2-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK2-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP25]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 // CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 // CHECK2-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 // CHECK2-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK2-NEXT: store i8* [[TMP36]], i8** [[TMP24]], align 4 +// CHECK2-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -6447,20 +6447,20 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: // CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 -// CHECK2-NEXT: [[TMP58:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK2-NEXT: [[TMP59:%.*]] = bitcast i8* [[TMP57]] to i32* -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK2-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK2-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 // CHECK2-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 // CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP64:%.*]] = load i8*, i8** [[TMP63]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK2-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP64]] to i16* -// CHECK2-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP65]], align 2 +// CHECK2-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK2-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK2-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 // CHECK2-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: @@ -6567,37 +6567,37 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP23]] to i16* -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK2-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK2-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP25]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 // CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 // CHECK2-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 // CHECK2-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK2-NEXT: store i8* [[TMP36]], i8** [[TMP24]], align 4 +// CHECK2-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -6625,20 +6625,20 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: // CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 -// CHECK2-NEXT: [[TMP58:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK2-NEXT: [[TMP59:%.*]] = bitcast i8* [[TMP57]] to i32* -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK2-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK2-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 // CHECK2-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 // CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP64:%.*]] = load i8*, i8** [[TMP63]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK2-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP64]] to i16* -// CHECK2-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP65]], align 2 +// CHECK2-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK2-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK2-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 // CHECK2-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: @@ -6928,12 +6928,12 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** +// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* // CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() @@ -6943,7 +6943,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 // CHECK3-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -6971,12 +6971,12 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to double* -// CHECK3-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to double* -// CHECK3-NEXT: [[TMP47:%.*]] = load double, double* [[TMP45]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK3-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** +// CHECK3-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 // CHECK3-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: @@ -7269,12 +7269,12 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 // CHECK3-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to float* -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP24]], i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** +// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i32 1 // CHECK3-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP24]] to i32* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* // CHECK3-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() @@ -7284,7 +7284,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i32 1 // CHECK3-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK3-NEXT: store i8* [[TMP35]], i8** [[TMP23]], align 4 +// CHECK3-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -7318,12 +7318,12 @@ int bar(int n){ // CHECK3-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 // CHECK3-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP61:%.*]] = load i8*, i8** [[TMP60]], align 4 -// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK3-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP61]] to float* -// CHECK3-NEXT: [[TMP64:%.*]] = load float, float* [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** +// CHECK3-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** +// CHECK3-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 // CHECK3-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: @@ -7707,37 +7707,37 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP23]] to i16* -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK3-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP25]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 // CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 // CHECK3-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 // CHECK3-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK3-NEXT: store i8* [[TMP36]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -7765,20 +7765,20 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 -// CHECK3-NEXT: [[TMP58:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK3-NEXT: [[TMP59:%.*]] = bitcast i8* [[TMP57]] to i32* -// CHECK3-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK3-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK3-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 // CHECK3-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 // CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP64:%.*]] = load i8*, i8** [[TMP63]], align 4 -// CHECK3-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK3-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP64]] to i16* -// CHECK3-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP65]], align 2 +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK3-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK3-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 // CHECK3-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: @@ -7885,37 +7885,37 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP23]] to i16* -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK3-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP25]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 // CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 // CHECK3-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP25]], i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 // CHECK3-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK3-NEXT: store i8* [[TMP36]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -7943,20 +7943,20 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 -// CHECK3-NEXT: [[TMP58:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK3-NEXT: [[TMP59:%.*]] = bitcast i8* [[TMP57]] to i32* -// CHECK3-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK3-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK3-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 // CHECK3-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 // CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP64:%.*]] = load i8*, i8** [[TMP63]], align 4 -// CHECK3-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK3-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP64]] to i16* -// CHECK3-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP65]], align 2 +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK3-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK3-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 // CHECK3-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp index b734703ac07de7..0b4082fc186981 100644 --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -185,12 +185,12 @@ int main() // CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 // CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i64 1 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** +// CHECK-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i64 1 // CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* // CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() @@ -200,7 +200,7 @@ int main() // CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8 +// CHECK-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8 // CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -228,12 +228,12 @@ int main() // CHECK-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK: then4: // CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to double* -// CHECK-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to double* -// CHECK-NEXT: [[TMP47:%.*]] = load double, double* [[TMP45]], align 8 +// CHECK-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 8 +// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** +// CHECK-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 8 +// CHECK-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 // CHECK-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 // CHECK-NEXT: br label [[IFCONT6:%.*]] // CHECK: else5: