@@ -2120,17 +2120,17 @@ INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
2120
2120
false , false )
2121
2121
2122
2122
bool AMDGPUCodeGenPrepareImpl::visitMbcntLo(IntrinsicInst &I) {
2123
- // On wave32 targets, mbcnt.lo(~0, 0) can be replaced with workitem.id.x
2123
+ // On wave32 targets, mbcnt.lo(~0, 0) can be replaced with workitem.id.x.
2124
2124
if (!ST.isWave32 ())
2125
2125
return false ;
2126
2126
2127
- // Check for pattern mbcnt.lo(~0, 0)
2127
+ // Check for pattern mbcnt.lo(~0, 0).
2128
2128
auto *Arg0C = dyn_cast<ConstantInt>(I.getArgOperand (0 ));
2129
2129
auto *Arg1C = dyn_cast<ConstantInt>(I.getArgOperand (1 ));
2130
2130
if (!Arg0C || !Arg1C || !Arg0C->isAllOnesValue () || !Arg1C->isZero ())
2131
2131
return false ;
2132
2132
2133
- // Check reqd_work_group_size similar to mbcnt_hi case
2133
+ // Check reqd_work_group_size similar to mbcnt_hi case.
2134
2134
Function *F = I.getFunction ();
2135
2135
if (!F)
2136
2136
return false ;
@@ -2154,8 +2154,8 @@ bool AMDGPUCodeGenPrepareImpl::visitMbcntLo(IntrinsicInst &I) {
2154
2154
I.eraseFromParent ();
2155
2155
return true ;
2156
2156
}
2157
- // Handle bitmask case: when X dimension evenly splits into waves
2158
- // mbcnt.lo(~0, 0) = workitem.id.x() & (wave_size - 1)
2157
+ // Handle bitmask case: when X dimension evenly splits into waves.
2158
+ // mbcnt.lo(~0, 0) = workitem.id.x() & (wave_size - 1).
2159
2159
if (ST.hasWavefrontsEvenlySplittingXDim (*F, /* RequiresUniformYZ=*/ true )) {
2160
2160
if (Wave != 0 && isPowerOf2_32 (Wave)) {
2161
2161
IRBuilder<> B (&I);
@@ -2165,7 +2165,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMbcntLo(IntrinsicInst &I) {
2165
2165
Constant *Mask = ConstantInt::get (ITy, Wave - 1 );
2166
2166
Instruction *AndInst = cast<Instruction>(B.CreateAnd (Tid, Mask));
2167
2167
AndInst->takeName (&I);
2168
- // Note: Range metadata cannot be applied to 'and' instructions
2168
+ // Note: Range metadata cannot be applied to 'and' instructions.
2169
2169
I.replaceAllUsesWith (AndInst);
2170
2170
I.eraseFromParent ();
2171
2171
return true ;
@@ -2201,7 +2201,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMbcntHi(IntrinsicInst &I) {
2201
2201
}
2202
2202
}
2203
2203
2204
- // Pattern: mbcnt.hi(~0, mbcnt.lo(~0, 0))
2204
+ // Pattern: mbcnt.hi(~0, mbcnt.lo(~0, 0)).
2205
2205
auto *HiArg1 = dyn_cast<CallInst>(I.getArgOperand (1 ));
2206
2206
if (!HiArg1)
2207
2207
return false ;
@@ -2210,12 +2210,12 @@ bool AMDGPUCodeGenPrepareImpl::visitMbcntHi(IntrinsicInst &I) {
2210
2210
if (!CalledF || CalledF->getIntrinsicID () != Intrinsic::amdgcn_mbcnt_lo)
2211
2211
return false ;
2212
2212
2213
- // hi arg0 must be all-ones
2213
+ // hi arg0 must be all-ones.
2214
2214
auto *HiArg0C = dyn_cast<ConstantInt>(I.getArgOperand (0 ));
2215
2215
if (!HiArg0C || !HiArg0C->isAllOnesValue ())
2216
2216
return false ;
2217
2217
2218
- // lo args: arg0 == ~0, arg1 == 0
2218
+ // lo args: arg0 == ~0, arg1 == 0.
2219
2219
Value *Lo0 = HiArg1->getArgOperand (0 );
2220
2220
Value *Lo1 = HiArg1->getArgOperand (1 );
2221
2221
auto *Lo0C = dyn_cast<ConstantInt>(Lo0);
0 commit comments