Skip to content

Commit

Permalink
[OpenMP][FIX] Ensure device reduction geps work for multi-var reductions
Browse files Browse the repository at this point in the history
If we have more than one reduction variable we need to be consistent
wrt. indexing. In 3de645e we broke this
as the buffer type was reduced to a singleton but the index computation
was not adjusted to account for that offset. This fixes it by
interleaving the reduction variables properly in a array-of-struct
style. We can revert it back to struct-of-array in a follow up if turns
out to be a problem. I doubt it since half the accesses should benefit
from the locallity this layout offers and only the other half were
consecutive before.
  • Loading branch information
jdoerfert committed Nov 10, 2023
1 parent bc81f8c commit 7318fe6
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 221 deletions.
54 changes: 27 additions & 27 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,11 @@ static RecordDecl *buildRecordForGlobalizedVars(
Field->addAttr(*I);
}
} else {
llvm::APInt ArraySize(32, BufSize);
Type = C.getConstantArrayType(Type, ArraySize, nullptr,
ArraySizeModifier::Normal, 0);
if (BufSize > 1) {
llvm::APInt ArraySize(32, BufSize);
Type = C.getConstantArrayType(Type, ArraySize, nullptr,
ArraySizeModifier::Normal, 0);
}
Field = FieldDecl::Create(
C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
C.getTrivialTypeSourceInfo(Type, SourceLocation()),
Expand Down Expand Up @@ -2205,8 +2207,7 @@ static llvm::Value *emitListToGlobalCopyFunction(
llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
LLVMReductionsBufferTy->getPointerTo());
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
Expand All @@ -2224,12 +2225,12 @@ static llvm::Value *emitListToGlobalCopyFunction(
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
// Global = Buffer.VD[Idx];
const FieldDecl *FD = VarFieldMap.lookup(VD);
llvm::Value *BufferPtr =
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
GlobAddr.getPointer(), Idxs);
GlobLVal.setAddress(Address(BufferPtr,
GlobLVal.setAddress(Address(GlobAddr.getPointer(),
CGF.ConvertTypeForMem(Private->getType()),
GlobAddr.getAlignment()));
switch (CGF.getEvaluationKind(Private->getType())) {
Expand Down Expand Up @@ -2316,8 +2317,7 @@ static llvm::Value *emitListToGlobalReduceFunction(
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
auto IPriv = Privates.begin();
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
Expand All @@ -2326,12 +2326,13 @@ static llvm::Value *emitListToGlobalReduceFunction(
// Global = Buffer.VD[Idx];
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
const FieldDecl *FD = VarFieldMap.lookup(VD);
llvm::Value *BufferPtr =
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
C.VoidPtrTy);
if ((*IPriv)->getType()->isVariablyModifiedType()) {
// Store array size.
++Idx;
Expand Down Expand Up @@ -2413,8 +2414,7 @@ static llvm::Value *emitGlobalToListCopyFunction(
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
LLVMReductionsBufferTy->getPointerTo());

llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
Expand All @@ -2432,12 +2432,12 @@ static llvm::Value *emitGlobalToListCopyFunction(
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
// Global = Buffer.VD[Idx];
const FieldDecl *FD = VarFieldMap.lookup(VD);
llvm::Value *BufferPtr =
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
GlobAddr.getPointer(), Idxs);
GlobLVal.setAddress(Address(BufferPtr,
GlobLVal.setAddress(Address(GlobAddr.getPointer(),
CGF.ConvertTypeForMem(Private->getType()),
GlobAddr.getAlignment()));
switch (CGF.getEvaluationKind(Private->getType())) {
Expand Down Expand Up @@ -2524,8 +2524,7 @@ static llvm::Value *emitGlobalToListReduceFunction(
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
auto IPriv = Privates.begin();
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
Expand All @@ -2534,12 +2533,13 @@ static llvm::Value *emitGlobalToListReduceFunction(
// Global = Buffer.VD[Idx];
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
const FieldDecl *FD = VarFieldMap.lookup(VD);
llvm::Value *BufferPtr =
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
C.VoidPtrTy);
if ((*IPriv)->getType()->isVariablyModifiedType()) {
// Store array size.
++Idx;
Expand Down

0 comments on commit 7318fe6

Please sign in to comment.