@@ -1168,10 +1168,8 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
1168
1168
1169
1169
Value *ThreadNum = getOrCreateThreadID (SrcLoc);
1170
1170
1171
- // TODO: extract scheduling type and map it to OMP constant. This is curently
1172
- // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first.
1173
- constexpr int StaticSchedType = 34 ;
1174
- Constant *SchedulingType = ConstantInt::get (I32Type, StaticSchedType);
1171
+ Constant *SchedulingType =
1172
+ ConstantInt::get (I32Type, static_cast <int >(OMPScheduleType::Static));
1175
1173
1176
1174
// Call the "init" function and update the trip count of the loop with the
1177
1175
// value it produced.
@@ -1220,6 +1218,148 @@ CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop(
1220
1218
return createStaticWorkshareLoop (Loc, CLI, AllocaIP, NeedsBarrier);
1221
1219
}
1222
1220
1221
+ // / Returns an LLVM function to call for initializing loop bounds using OpenMP
1222
+ // / dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1223
+ // / the runtime. Always interpret integers as unsigned similarly to
1224
+ // / CanonicalLoopInfo.
1225
+ static FunctionCallee
1226
+ getKmpcForDynamicInitForType (Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
1227
+ unsigned Bitwidth = Ty->getIntegerBitWidth ();
1228
+ if (Bitwidth == 32 )
1229
+ return OMPBuilder.getOrCreateRuntimeFunction (
1230
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
1231
+ if (Bitwidth == 64 )
1232
+ return OMPBuilder.getOrCreateRuntimeFunction (
1233
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
1234
+ llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
1235
+ }
1236
+
1237
+ // / Returns an LLVM function to call for updating the next loop using OpenMP
1238
+ // / dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1239
+ // / the runtime. Always interpret integers as unsigned similarly to
1240
+ // / CanonicalLoopInfo.
1241
+ static FunctionCallee
1242
+ getKmpcForDynamicNextForType (Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
1243
+ unsigned Bitwidth = Ty->getIntegerBitWidth ();
1244
+ if (Bitwidth == 32 )
1245
+ return OMPBuilder.getOrCreateRuntimeFunction (
1246
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
1247
+ if (Bitwidth == 64 )
1248
+ return OMPBuilder.getOrCreateRuntimeFunction (
1249
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
1250
+ llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
1251
+ }
1252
+
1253
+ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop (
1254
+ const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1255
+ InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
1256
+ // Set up the source location value for OpenMP runtime.
1257
+ Builder.SetCurrentDebugLocation (Loc.DL );
1258
+
1259
+ Constant *SrcLocStr = getOrCreateSrcLocStr (Loc);
1260
+ Value *SrcLoc = getOrCreateIdent (SrcLocStr);
1261
+
1262
+ // Declare useful OpenMP runtime functions.
1263
+ Value *IV = CLI->getIndVar ();
1264
+ Type *IVTy = IV->getType ();
1265
+ FunctionCallee DynamicInit = getKmpcForDynamicInitForType (IVTy, M, *this );
1266
+ FunctionCallee DynamicNext = getKmpcForDynamicNextForType (IVTy, M, *this );
1267
+
1268
+ // Allocate space for computed loop bounds as expected by the "init" function.
1269
+ Builder.restoreIP (AllocaIP);
1270
+ Type *I32Type = Type::getInt32Ty (M.getContext ());
1271
+ Value *PLastIter = Builder.CreateAlloca (I32Type, nullptr , " p.lastiter" );
1272
+ Value *PLowerBound = Builder.CreateAlloca (IVTy, nullptr , " p.lowerbound" );
1273
+ Value *PUpperBound = Builder.CreateAlloca (IVTy, nullptr , " p.upperbound" );
1274
+ Value *PStride = Builder.CreateAlloca (IVTy, nullptr , " p.stride" );
1275
+
1276
+ // At the end of the preheader, prepare for calling the "init" function by
1277
+ // storing the current loop bounds into the allocated space. A canonical loop
1278
+ // always iterates from 0 to trip-count with step 1. Note that "init" expects
1279
+ // and produces an inclusive upper bound.
1280
+ BasicBlock *PreHeader = CLI->getPreheader ();
1281
+ Builder.SetInsertPoint (PreHeader->getTerminator ());
1282
+ Constant *One = ConstantInt::get (IVTy, 1 );
1283
+ Builder.CreateStore (One, PLowerBound);
1284
+ Value *UpperBound = CLI->getTripCount ();
1285
+ Builder.CreateStore (UpperBound, PUpperBound);
1286
+ Builder.CreateStore (One, PStride);
1287
+
1288
+ BasicBlock *Header = CLI->getHeader ();
1289
+ BasicBlock *Exit = CLI->getExit ();
1290
+ BasicBlock *Cond = CLI->getCond ();
1291
+ InsertPointTy AfterIP = CLI->getAfterIP ();
1292
+
1293
+ // The CLI will be "broken" in the code below, as the loop is no longer
1294
+ // a valid canonical loop.
1295
+
1296
+ if (!Chunk)
1297
+ Chunk = One;
1298
+
1299
+ Value *ThreadNum = getOrCreateThreadID (SrcLoc);
1300
+
1301
+ OMPScheduleType DynamicSchedType =
1302
+ OMPScheduleType::DynamicChunked | OMPScheduleType::ModifierNonmonotonic;
1303
+ Constant *SchedulingType =
1304
+ ConstantInt::get (I32Type, static_cast <int >(DynamicSchedType));
1305
+
1306
+ // Call the "init" function.
1307
+ Builder.CreateCall (DynamicInit,
1308
+ {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
1309
+ UpperBound, /* step */ One, Chunk});
1310
+
1311
+ // An outer loop around the existing one.
1312
+ BasicBlock *OuterCond = BasicBlock::Create (
1313
+ PreHeader->getContext (), Twine (PreHeader->getName ()) + " .outer.cond" ,
1314
+ PreHeader->getParent ());
1315
+ // This needs to be 32-bit always, so can't use the IVTy Zero above.
1316
+ Builder.SetInsertPoint (OuterCond, OuterCond->getFirstInsertionPt ());
1317
+ Value *Res =
1318
+ Builder.CreateCall (DynamicNext, {SrcLoc, ThreadNum, PLastIter,
1319
+ PLowerBound, PUpperBound, PStride});
1320
+ Constant *Zero32 = ConstantInt::get (I32Type, 0 );
1321
+ Value *MoreWork = Builder.CreateCmp (CmpInst::ICMP_NE, Res, Zero32);
1322
+ Value *LowerBound =
1323
+ Builder.CreateSub (Builder.CreateLoad (IVTy, PLowerBound), One, " lb" );
1324
+ Builder.CreateCondBr (MoreWork, Header, Exit);
1325
+
1326
+ // Change PHI-node in loop header to use outer cond rather than preheader,
1327
+ // and set IV to the LowerBound.
1328
+ Instruction *Phi = &Header->front ();
1329
+ auto *PI = cast<PHINode>(Phi);
1330
+ PI->setIncomingBlock (0 , OuterCond);
1331
+ PI->setIncomingValue (0 , LowerBound);
1332
+
1333
+ // Then set the pre-header to jump to the OuterCond
1334
+ Instruction *Term = PreHeader->getTerminator ();
1335
+ auto *Br = cast<BranchInst>(Term);
1336
+ Br->setSuccessor (0 , OuterCond);
1337
+
1338
+ // Modify the inner condition:
1339
+ // * Use the UpperBound returned from the DynamicNext call.
1340
+ // * jump to the loop outer loop when done with one of the inner loops.
1341
+ Builder.SetInsertPoint (Cond, Cond->getFirstInsertionPt ());
1342
+ UpperBound = Builder.CreateLoad (IVTy, PUpperBound, " ub" );
1343
+ Instruction *Comp = &*Builder.GetInsertPoint ();
1344
+ auto *CI = cast<CmpInst>(Comp);
1345
+ CI->setOperand (1 , UpperBound);
1346
+ // Redirect the inner exit to branch to outer condition.
1347
+ Instruction *Branch = &Cond->back ();
1348
+ auto *BI = cast<BranchInst>(Branch);
1349
+ assert (BI->getSuccessor (1 ) == Exit);
1350
+ BI->setSuccessor (1 , OuterCond);
1351
+
1352
+ // Add the barrier if requested.
1353
+ if (NeedsBarrier) {
1354
+ Builder.SetInsertPoint (&Exit->back ());
1355
+ createBarrier (LocationDescription (Builder.saveIP (), Loc.DL ),
1356
+ omp::Directive::OMPD_for, /* ForceSimpleCall */ false ,
1357
+ /* CheckCancelFlag */ false );
1358
+ }
1359
+
1360
+ return AfterIP;
1361
+ }
1362
+
1223
1363
// / Make \p Source branch to \p Target.
1224
1364
// /
1225
1365
// / Handles two situations:
@@ -1901,7 +2041,7 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
1901
2041
llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
1902
2042
1903
2043
Function *Fn =
1904
- getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_threadprivate_cached);
2044
+ getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_threadprivate_cached);
1905
2045
1906
2046
return Builder.CreateCall (Fn, Args);
1907
2047
}
0 commit comments