diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index a13d74743c3bd..abecf5250f4cf 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -153,9 +153,11 @@ static RecordDecl *buildRecordForGlobalizedVars( Field->addAttr(*I); } } else { - llvm::APInt ArraySize(32, BufSize); - Type = C.getConstantArrayType(Type, ArraySize, nullptr, - ArraySizeModifier::Normal, 0); + if (BufSize > 1) { + llvm::APInt ArraySize(32, BufSize); + Type = C.getConstantArrayType(Type, ArraySize, nullptr, + ArraySizeModifier::Normal, 0); + } Field = FieldDecl::Create( C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, C.getTrivialTypeSourceInfo(Type, SourceLocation()), @@ -2205,8 +2207,7 @@ static llvm::Value *emitListToGlobalCopyFunction( llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), LLVMReductionsBufferTy->getPointerTo()); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2224,12 +2225,12 @@ static llvm::Value *emitListToGlobalCopyFunction( const ValueDecl *VD = cast(Private)->getDecl(); // Global = Buffer.VD[Idx]; const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(), - GlobAddr.getPointer(), Idxs); - GlobLVal.setAddress(Address(BufferPtr, + GlobLVal.setAddress(Address(GlobAddr.getPointer(), CGF.ConvertTypeForMem(Private->getType()), GlobAddr.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { @@ -2316,8 +2317,7 @@ static llvm::Value *emitListToGlobalReduceFunction( Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); auto IPriv = Privates.begin(); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2326,12 +2326,13 @@ static llvm::Value *emitListToGlobalReduceFunction( // Global = Buffer.VD[Idx]; const ValueDecl *VD = cast(*IPriv)->getDecl(); const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP( - GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs); - CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy); + CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false, + C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; @@ -2413,8 +2414,7 @@ static llvm::Value *emitGlobalToListCopyFunction( CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), LLVMReductionsBufferTy->getPointerTo()); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2432,12 +2432,12 @@ static llvm::Value *emitGlobalToListCopyFunction( const ValueDecl *VD = cast(Private)->getDecl(); // Global = Buffer.VD[Idx]; const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(), - GlobAddr.getPointer(), Idxs); - GlobLVal.setAddress(Address(BufferPtr, + GlobLVal.setAddress(Address(GlobAddr.getPointer(), CGF.ConvertTypeForMem(Private->getType()), GlobAddr.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { @@ -2524,8 +2524,7 @@ static llvm::Value *emitGlobalToListReduceFunction( Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); auto IPriv = Privates.begin(); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2534,12 +2533,13 @@ static llvm::Value *emitGlobalToListReduceFunction( // Global = Buffer.VD[Idx]; const ValueDecl *VD = cast(*IPriv)->getDecl(); const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP( - GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs); - CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy); + CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false, + C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index ef4a695975d95..360a780c75383 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -248,10 +248,10 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: store double [[TMP9]], ptr [[E]], align 8 // CHECK1-NEXT: ret void // // @@ -268,9 +268,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[E]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK1-NEXT: ret void @@ -290,9 +290,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[E]], align 8 // CHECK1-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // @@ -310,9 +310,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[E]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void @@ -576,16 +576,16 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i8 [[TMP9]], ptr [[C]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 -// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: store float [[TMP13]], ptr [[D]], align 4 // CHECK1-NEXT: ret void // // @@ -602,13 +602,13 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[D]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void @@ -628,15 +628,15 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 4 // CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[D]], align 4 // CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK1-NEXT: ret void // @@ -654,13 +654,13 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[D]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void @@ -1147,16 +1147,16 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 -// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: store i16 [[TMP13]], ptr [[B]], align 4 // CHECK1-NEXT: ret void // // @@ -1173,13 +1173,13 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void @@ -1199,15 +1199,15 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 4 // CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK1-NEXT: ret void // @@ -1225,13 +1225,13 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP4]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void @@ -1435,10 +1435,10 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: store double [[TMP9]], ptr [[E]], align 8 // CHECK2-NEXT: ret void // // @@ -1455,9 +1455,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[E]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK2-NEXT: ret void @@ -1477,9 +1477,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[E]], align 8 // CHECK2-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK2-NEXT: ret void // @@ -1497,9 +1497,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[E]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void @@ -1763,16 +1763,16 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK2-NEXT: store i8 [[TMP9]], ptr [[C]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 -// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 +// CHECK2-NEXT: store float [[TMP13]], ptr [[D]], align 4 // CHECK2-NEXT: ret void // // @@ -1789,13 +1789,13 @@ int bar(int n){ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[C]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[D]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void @@ -1815,15 +1815,15 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 4 // CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[D]], align 4 // CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK2-NEXT: ret void // @@ -1841,13 +1841,13 @@ int bar(int n){ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[C]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[D]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void @@ -2334,16 +2334,16 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 -// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK2-NEXT: store i16 [[TMP13]], ptr [[B]], align 4 // CHECK2-NEXT: ret void // // @@ -2360,13 +2360,13 @@ int bar(int n){ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void @@ -2386,15 +2386,15 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 4 // CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK2-NEXT: ret void // @@ -2412,13 +2412,13 @@ int bar(int n){ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP4]] -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void @@ -2622,10 +2622,10 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: store double [[TMP9]], ptr [[E]], align 8 // CHECK3-NEXT: ret void // // @@ -2642,9 +2642,9 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[E]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK3-NEXT: ret void @@ -2664,9 +2664,9 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[E]], align 8 // CHECK3-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK3-NEXT: ret void // @@ -2684,9 +2684,9 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x double], ptr [[E]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[E]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void @@ -2950,16 +2950,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i8 [[TMP9]], ptr [[C]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1 // CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: store float [[TMP13]], ptr [[D]], align 4 // CHECK3-NEXT: ret void // // @@ -2976,13 +2976,13 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[D]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void @@ -3002,15 +3002,15 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 4 // CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[D]], align 4 // CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK3-NEXT: ret void // @@ -3028,13 +3028,13 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8], ptr [[C]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x float], ptr [[D]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[D]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void @@ -3521,16 +3521,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1 // CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 -// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[B]], align 4 // CHECK3-NEXT: ret void // // @@ -3547,13 +3547,13 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void @@ -3573,15 +3573,15 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 // CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 4 // CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK3-NEXT: ret void // @@ -3599,13 +3599,13 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i16], ptr [[B]], i32 0, i32 [[TMP4]] -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp index 87d55c870ae4f..3b1af7618794d 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp @@ -1809,9 +1809,9 @@ int foo() { // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) +// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SUM]], ptr align 4 [[TMP7]], i64 400, i1 false) // IR-GPU-NEXT: ret void // // @@ -1832,9 +1832,9 @@ int foo() { // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] -// IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// IR-GPU-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] // IR-GPU-NEXT: ret void @@ -1857,9 +1857,9 @@ int foo() { // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP8]], i64 400, i1 false) +// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[SUM]], i64 400, i1 false) // IR-GPU-NEXT: ret void // // @@ -1880,9 +1880,9 @@ int foo() { // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] -// IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// IR-GPU-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] // IR-GPU-NEXT: ret void diff --git a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp index d2d9fe9f7a884..0a9db3edabccb 100644 --- a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp @@ -65,8 +65,7 @@ static uint32_t gpu_irregular_simd_reduce(void *reduce_data, } #endif -static int32_t nvptx_parallel_reduce_nowait( - void *reduce_data, +static int32_t nvptx_parallel_reduce_nowait(void *reduce_data, ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct) { uint32_t BlockThreadId = mapping::getThreadIdInBlock(); diff --git a/openmp/libomptarget/test/offloading/multiple_reductions_simple.c b/openmp/libomptarget/test/offloading/multiple_reductions_simple.c new file mode 100644 index 0000000000000..54e55f97a2373 --- /dev/null +++ b/openmp/libomptarget/test/offloading/multiple_reductions_simple.c @@ -0,0 +1,17 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// RUN: %libomptarget-compileopt-run-and-check-generic + +#include + +int main(int argc, char **argv) { + + unsigned s1 = 0, s2 = 1; +#pragma omp target teams distribute parallel for reduction(+ : s1, s2) + for (int i = 0; i < 10000; ++i) { + s1 += i; + s2 += i; + } + + // CHECK: 49995000 : 49995001 + printf("%i : %i\n", s1, s2); +}