@@ -90,15 +90,44 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const {
9090      [](const  CodeGenIntrinsic &Int1, const  CodeGenIntrinsic &Int2) {
9191        return  Int1.Name  == Int2.Name ;
9292      });
93-   if  (I == Intrinsics.end ())
94-     return ;
93+   if  (I != Intrinsics.end ()) {
94+     //  Found 2 intrinsics with same name.
95+     const  CodeGenIntrinsic &First = *I;
96+     const  CodeGenIntrinsic &Second = *(I + 1 );
97+     PrintError (Second.TheDef ,
98+                Twine (" Intrinsic `"  ) + First.Name  + " ` is already defined"  );
99+     PrintFatalNote (First.TheDef , " Previous definition here"  );
100+   }
95101
96-   //  Found a duplicate intrinsics.
97-   const  CodeGenIntrinsic &First = *I;
98-   const  CodeGenIntrinsic &Second = *(I + 1 );
99-   PrintError (Second.TheDef ,
100-              Twine (" Intrinsic `"  ) + First.Name  + " ` is already defined"  );
101-   PrintFatalNote (First.TheDef , " Previous definition here"  );
102+   //  Now detect intrinsics that may have the same enum name. For that, we first
103+   //  sort the intrinsics by their enum name.
104+   std::vector<const  CodeGenIntrinsic *> SortedByEnumName;
105+   SortedByEnumName.reserve (size ());
106+   for  (const  CodeGenIntrinsic &Int : Intrinsics)
107+     SortedByEnumName.push_back (&Int);
108+ 
109+   llvm::sort (SortedByEnumName, [](const  CodeGenIntrinsic *LHS,
110+                                   const  CodeGenIntrinsic *RHS) {
111+     //  To ensure deterministic sorted order when duplicates are
112+     //  present, use record ID as a tie-breaker
113+     unsigned  LhsID = LHS->TheDef ->getID ();
114+     unsigned  RhsID = RHS->TheDef ->getID ();
115+     return  std::tie (LHS->EnumName , LhsID) < std::tie (RHS->EnumName , RhsID);
116+   });
117+   auto  J = std::adjacent_find (
118+       SortedByEnumName.begin (), SortedByEnumName.end (),
119+       [](const  CodeGenIntrinsic *Int1, const  CodeGenIntrinsic *Int2) {
120+         return  Int1->EnumName  == Int2->EnumName ;
121+       });
122+ 
123+   if  (J != SortedByEnumName.end ()) {
124+     //  Found 2 intrinsics with same enum name.
125+     const  CodeGenIntrinsic *First = *J;
126+     const  CodeGenIntrinsic *Second = *(J + 1 );
127+     PrintError (Second->TheDef , Twine (" `Intrinsic::"  ) + First->EnumName  +
128+                                    " ` is already defined"  );
129+     PrintFatalNote (First->TheDef , " Previous definition here"  );
130+   }
102131}
103132
104133//  For target independent intrinsics, check that their second dotted component
@@ -257,6 +286,24 @@ const CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
257286  return  *Iter->second ;
258287}
259288
289+ //  Sanitize the intrinsic name by replacing each _ pair with a single _ and
290+ //  optionally each single _ (in the original input string) with .
291+ static  void  sanitizeName (std::string &Name, bool  ReplaceSingleUnderscore) {
292+   size_t  Next = 0 ;
293+   for  (size_t  I = 0 , E = Name.size (); I < E; ++I) {
294+     if  (Name[I] == ' _'   && I + 1  < E && Name[I + 1 ] == ' _'  ) {
295+       Name[Next++] = ' _'  ;
296+       //  Skip over both the _s.
297+       ++I;
298+     } else  if  (ReplaceSingleUnderscore && Name[I] == ' _'  ) {
299+       Name[Next++] = ' .'  ;
300+     } else  {
301+       Name[Next++] = Name[I];
302+     }
303+   }
304+   Name = Name.substr (0 , Next);
305+ }
306+ 
260307CodeGenIntrinsic::CodeGenIntrinsic (const  Record *R,
261308                                   const  CodeGenIntrinsicContext &Ctx)
262309    : TheDef(R) {
@@ -267,7 +314,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
267314    PrintFatalError (DefLoc,
268315                    " Intrinsic '"   + DefName + " ' does not start with 'int_'!"  );
269316
270-   EnumName = DefName.substr (4 );
317+   EnumName = DefName.substr (4 ). str () ;
271318
272319  //  Ignore a missing ClangBuiltinName field.
273320  ClangBuiltinName =
@@ -278,16 +325,43 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
278325  TargetPrefix = R->getValueAsString (" TargetPrefix"  );
279326  Name = R->getValueAsString (" LLVMName"  ).str ();
280327
328+   //  Note, we only sanitize __ in intrinsic names and not their C++ enum names.
329+   //  The rationale is that if we sanitize enum names as well (by just replacing
330+   //  _ pairs with _) we may get conflicting enum names for different record
331+   //  names which is not desirable. For example:
332+   // 
333+   //                  Enum Name      Enum Name     Intrinsic Name
334+   //                  (Sanitized)    (Original)
335+   // 
336+   //   int_x__y_z     x_y_z          x__y_z        llvm.x_y.z
337+   //   int_x_y_z      x_y_z          x_y_z         llvm.x.y.z
338+   // 
339+   //  So with no enum name sanitization, two different record names will not
340+   //  conflicts in both enum names and intrinsic names. The side-effect is that
341+   //  intrinsics like int_clear_cache will need to be named int_clear__cache to
342+   //  have their default name be "llvm.clear_cache" but then their intrisnic name
343+   //  will change to "Intrinsic::clear__cache".
344+ 
345+   //  Alternatively, we do sanitize the enum name (which preserved a lot of
346+   //  existing names), but then detect the cases where 2 different records may
347+   //  end up generating the same enum name. This/ can be done by extending
348+   //  CheckDuplicateIntrinsics() to detect duplicated enum names as well and
349+   //  fail if that happens.
350+   //  Note: (Implementing this option).
351+ 
281352  if  (Name == " "  ) {
282353    //  If an explicit name isn't specified, derive one from the DefName.
283-     Name = " llvm."   + EnumName. str () ;
284-     llvm::replace (Name, ' _ ' ,  ' . '  );
354+     Name = " llvm."   + EnumName;
355+     sanitizeName (Name, /* ReplaceSingleUnderscore */   true );
285356  } else  {
286357    //  Verify it starts with "llvm.".
287358    if  (!StringRef (Name).starts_with (" llvm."  ))
288359      PrintFatalError (DefLoc, " Intrinsic '"   + DefName +
289360                                  " 's name does not start with 'llvm.'!"  );
290361  }
362+   //  Sanitize the enum name by just replacing each pair of _ with a single _.
363+   //  That way, most existing intrinsic names stay the same.
364+   sanitizeName (EnumName, /* ReplaceSingleUnderscore*/   false );
291365
292366  //  If TargetPrefix is specified, make sure that Name starts with
293367  //  "llvm.<targetprefix>.".
0 commit comments