@@ -5676,6 +5676,130 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
56765676                        AL.getNumArgs () > 2  ? AL.getArgAsExpr (2 ) : nullptr );
56775677}
56785678
5679+ static  std::pair<Expr *, int >
5680+ makeClusterDimsArgExpr (Sema &S, Expr *E, const  CUDAClusterDimsAttr &AL,
5681+                        const  unsigned  Idx) {
5682+   if  (S.DiagnoseUnexpandedParameterPack (E))
5683+     return  {nullptr , 0 };
5684+ 
5685+   //  Accept template arguments for now as they depend on something else.
5686+   //  We'll get to check them when they eventually get instantiated.
5687+   if  (E->isValueDependent ())
5688+     return  {E, 1 };
5689+ 
5690+   std::optional<llvm::APSInt> I = llvm::APSInt (64 );
5691+   if  (!(I = E->getIntegerConstantExpr (S.Context ))) {
5692+     S.Diag (E->getExprLoc (), diag::err_attribute_argument_n_type)
5693+         << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange ();
5694+     return  {nullptr , 0 };
5695+   }
5696+   //  Make sure we can fit it in 4 bits.
5697+   if  (!I->isIntN (4 )) {
5698+     S.Diag (E->getExprLoc (), diag::err_ice_too_large)
5699+         << toString (*I, 10 , false ) << 4  << /*  Unsigned */   1 ;
5700+     return  {nullptr , 0 };
5701+   }
5702+   if  (*I < 0 )
5703+     S.Diag (E->getExprLoc (), diag::warn_attribute_argument_n_negative)
5704+         << &AL << Idx << E->getSourceRange ();
5705+ 
5706+   //  We may need to perform implicit conversion of the argument.
5707+   InitializedEntity Entity = InitializedEntity::InitializeParameter (
5708+       S.Context , S.Context .getConstType (S.Context .IntTy ), /* consume*/   false );
5709+   ExprResult ValArg = S.PerformCopyInitialization (Entity, SourceLocation (), E);
5710+   assert (!ValArg.isInvalid () &&
5711+          " Unexpected PerformCopyInitialization() failure."  );
5712+ 
5713+   return  {ValArg.getAs <Expr>(), I->getZExtValue ()};
5714+ }
5715+ 
5716+ CUDAClusterDimsAttr *Sema::createClusterDimsAttr (const  AttributeCommonInfo &CI,
5717+                                                  Expr *X, Expr *Y, Expr *Z) {
5718+   CUDAClusterDimsAttr TmpAttr (Context, CI, X, Y, Z);
5719+ 
5720+   int  ValX = 1 ;
5721+   int  ValY = 1 ;
5722+   int  ValZ = 1 ;
5723+ 
5724+   std::tie (X, ValX) = makeClusterDimsArgExpr (*this , X, TmpAttr, /* Idx=*/ 0 );
5725+   if  (!X)
5726+     return  nullptr ;
5727+ 
5728+   if  (Y) {
5729+     std::tie (Y, ValY) = makeClusterDimsArgExpr (*this , Y, TmpAttr, /* Idx=*/ 1 );
5730+     if  (!Y)
5731+       return  nullptr ;
5732+   }
5733+ 
5734+   if  (Z) {
5735+     std::tie (Z, ValZ) = makeClusterDimsArgExpr (*this , Z, TmpAttr, /* Idx=*/ 2 );
5736+     if  (!Z)
5737+       return  nullptr ;
5738+   }
5739+ 
5740+   int  FlatDim = ValX * ValY * ValZ;
5741+   auto  TT = (!Context.getLangOpts ().CUDAIsDevice  && Context.getAuxTargetInfo ())
5742+                 ? Context.getAuxTargetInfo ()->getTriple ()
5743+                 : Context.getTargetInfo ().getTriple ();
5744+   int  MaxDim = 1 ;
5745+   if  (TT.isNVPTX ())
5746+     MaxDim = 8 ;
5747+   else  if  (TT.isAMDGPU ())
5748+     MaxDim = 16 ;
5749+   else 
5750+     return  nullptr ;
5751+ 
5752+   //  A maximum of 8 thread blocks in a cluster is supported as a portable
5753+   //  cluster size in CUDA. The number is 16 for AMDGPU.
5754+   if  (FlatDim > MaxDim) {
5755+     Diag (CI.getLoc (), diag::err_cuda_cluster_dims_too_large) << MaxDim;
5756+     return  nullptr ;
5757+   }
5758+ 
5759+   return  ::new  (Context) CUDAClusterDimsAttr (Context, CI, X, Y, Z);
5760+ }
5761+ 
5762+ void  Sema::addClusterDimsAttr (Decl *D, const  AttributeCommonInfo &CI, Expr *X,
5763+                               Expr *Y, Expr *Z) {
5764+   if  (auto  *Attr = createClusterDimsAttr (CI, X, Y, Z))
5765+     D->addAttr (Attr);
5766+ }
5767+ 
5768+ void  Sema::addNoClusterAttr (Decl *D, const  AttributeCommonInfo &CI) {
5769+   if  (CUDANoClusterAttr *Attr = ::new  (Context) CUDANoClusterAttr (Context, CI))
5770+     D->addAttr (Attr);
5771+ }
5772+ 
5773+ static  void  handleClusterDimsAttr (Sema &S, Decl *D, const  ParsedAttr &AL) {
5774+   auto  &TTI = S.Context .getTargetInfo ();
5775+   auto  Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5776+   if  ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5777+       (TTI.getTriple ().isAMDGPU () && Arch < clang::OffloadArch::GFX1250)) {
5778+     S.Diag (AL.getLoc (), diag::err_cuda_cluster_attr_not_supported) << 0 ;
5779+     return ;
5780+   }
5781+ 
5782+   if  (!AL.checkAtLeastNumArgs (S, /* Num=*/ 1 ) ||
5783+       !AL.checkAtMostNumArgs (S, /* Num=*/ 3 ))
5784+     return ;
5785+ 
5786+   S.addClusterDimsAttr (D, AL, AL.getArgAsExpr (0 ),
5787+                        AL.getNumArgs () > 1  ? AL.getArgAsExpr (1 ) : nullptr ,
5788+                        AL.getNumArgs () > 2  ? AL.getArgAsExpr (2 ) : nullptr );
5789+ }
5790+ 
5791+ static  void  handleNoClusterAttr (Sema &S, Decl *D, const  ParsedAttr &AL) {
5792+   auto  &TTI = S.Context .getTargetInfo ();
5793+   auto  Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5794+   if  ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5795+       (TTI.getTriple ().isAMDGPU () && Arch < clang::OffloadArch::GFX1250)) {
5796+     S.Diag (AL.getLoc (), diag::err_cuda_cluster_attr_not_supported) << 1 ;
5797+     return ;
5798+   }
5799+ 
5800+   S.addNoClusterAttr (D, AL);
5801+ }
5802+ 
56795803static  void  handleArgumentWithTypeTagAttr (Sema &S, Decl *D,
56805804                                          const  ParsedAttr &AL) {
56815805  if  (!AL.isArgIdent (0 )) {
@@ -7141,6 +7265,12 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
71417265  case  ParsedAttr::AT_CUDALaunchBounds:
71427266    handleLaunchBoundsAttr (S, D, AL);
71437267    break ;
7268+   case  ParsedAttr::AT_CUDAClusterDims:
7269+     handleClusterDimsAttr (S, D, AL);
7270+     break ;
7271+   case  ParsedAttr::AT_CUDANoCluster:
7272+     handleNoClusterAttr (S, D, AL);
7273+     break ;
71447274  case  ParsedAttr::AT_Restrict:
71457275    handleRestrictAttr (S, D, AL);
71467276    break ;
0 commit comments