@@ -90,15 +90,44 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const {
9090 [](const CodeGenIntrinsic &Int1, const CodeGenIntrinsic &Int2) {
9191 return Int1.Name == Int2.Name ;
9292 });
93- if (I == Intrinsics.end ())
94- return ;
93+ if (I != Intrinsics.end ()) {
94+ // Found 2 intrinsics with same name.
95+ const CodeGenIntrinsic &First = *I;
96+ const CodeGenIntrinsic &Second = *(I + 1 );
97+ PrintError (Second.TheDef ,
98+ Twine (" Intrinsic `" ) + First.Name + " ` is already defined" );
99+ PrintFatalNote (First.TheDef , " Previous definition here" );
100+ }
95101
96- // Found a duplicate intrinsics.
97- const CodeGenIntrinsic &First = *I;
98- const CodeGenIntrinsic &Second = *(I + 1 );
99- PrintError (Second.TheDef ,
100- Twine (" Intrinsic `" ) + First.Name + " ` is already defined" );
101- PrintFatalNote (First.TheDef , " Previous definition here" );
102+ // Now detect intrinsics that may have the same enum name. For that, we first
103+ // sort the intrinsics by their enum name.
104+ std::vector<const CodeGenIntrinsic *> SortedByEnumName;
105+ SortedByEnumName.reserve (size ());
106+ for (const CodeGenIntrinsic &Int : Intrinsics)
107+ SortedByEnumName.push_back (&Int);
108+
109+ llvm::sort (SortedByEnumName, [](const CodeGenIntrinsic *LHS,
110+ const CodeGenIntrinsic *RHS) {
111+ // To ensure deterministic sorted order when duplicates are
112+ // present, use record ID as a tie-breaker
113+ unsigned LhsID = LHS->TheDef ->getID ();
114+ unsigned RhsID = RHS->TheDef ->getID ();
115+ return std::tie (LHS->EnumName , LhsID) < std::tie (RHS->EnumName , RhsID);
116+ });
117+ auto J = std::adjacent_find (
118+ SortedByEnumName.begin (), SortedByEnumName.end (),
119+ [](const CodeGenIntrinsic *Int1, const CodeGenIntrinsic *Int2) {
120+ return Int1->EnumName == Int2->EnumName ;
121+ });
122+
123+ if (J != SortedByEnumName.end ()) {
124+ // Found 2 intrinsics with same enum name.
125+ const CodeGenIntrinsic *First = *J;
126+ const CodeGenIntrinsic *Second = *(J + 1 );
127+ PrintError (Second->TheDef , Twine (" `Intrinsic::" ) + First->EnumName +
128+ " ` is already defined" );
129+ PrintFatalNote (First->TheDef , " Previous definition here" );
130+ }
102131}
103132
104133// For target independent intrinsics, check that their second dotted component
@@ -257,6 +286,24 @@ const CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
257286 return *Iter->second ;
258287}
259288
289+ // Sanitize the intrinsic name by replacing each _ pair with a single _ and
290+ // optionally each single _ (in the original input string) with .
291+ static void sanitizeName (std::string &Name, bool ReplaceSingleUnderscore) {
292+ size_t Next = 0 ;
293+ for (size_t I = 0 , E = Name.size (); I < E; ++I) {
294+ if (Name[I] == ' _' && I + 1 < E && Name[I + 1 ] == ' _' ) {
295+ Name[Next++] = ' _' ;
296+ // Skip over both the _s.
297+ ++I;
298+ } else if (ReplaceSingleUnderscore && Name[I] == ' _' ) {
299+ Name[Next++] = ' .' ;
300+ } else {
301+ Name[Next++] = Name[I];
302+ }
303+ }
304+ Name = Name.substr (0 , Next);
305+ }
306+
260307CodeGenIntrinsic::CodeGenIntrinsic (const Record *R,
261308 const CodeGenIntrinsicContext &Ctx)
262309 : TheDef(R) {
@@ -267,7 +314,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
267314 PrintFatalError (DefLoc,
268315 " Intrinsic '" + DefName + " ' does not start with 'int_'!" );
269316
270- EnumName = DefName.substr (4 );
317+ EnumName = DefName.substr (4 ). str () ;
271318
272319 // Ignore a missing ClangBuiltinName field.
273320 ClangBuiltinName =
@@ -278,16 +325,43 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
278325 TargetPrefix = R->getValueAsString (" TargetPrefix" );
279326 Name = R->getValueAsString (" LLVMName" ).str ();
280327
328+ // Note, we only sanitize __ in intrinsic names and not their C++ enum names.
329+ // The rationale is that if we sanitize enum names as well (by just replacing
330+ // _ pairs with _) we may get conflicting enum names for different record
331+ // names which is not desirable. For example:
332+ //
333+ // Enum Name Enum Name Intrinsic Name
334+ // (Sanitized) (Original)
335+ //
336+ // int_x__y_z x_y_z x__y_z llvm.x_y.z
337+ // int_x_y_z x_y_z x_y_z llvm.x.y.z
338+ //
339+ // So with no enum name sanitization, two different record names will not
340+ // conflicts in both enum names and intrinsic names. The side-effect is that
341+ // intrinsics like int_clear_cache will need to be named int_clear__cache to
342+ // have their default name be "llvm.clear_cache" but then their intrisnic name
343+ // will change to "Intrinsic::clear__cache".
344+
345+ // Alternatively, we do sanitize the enum name (which preserved a lot of
346+ // existing names), but then detect the cases where 2 different records may
347+ // end up generating the same enum name. This/ can be done by extending
348+ // CheckDuplicateIntrinsics() to detect duplicated enum names as well and
349+ // fail if that happens.
350+ // Note: (Implementing this option).
351+
281352 if (Name == " " ) {
282353 // If an explicit name isn't specified, derive one from the DefName.
283- Name = " llvm." + EnumName. str () ;
284- llvm::replace (Name, ' _ ' , ' . ' );
354+ Name = " llvm." + EnumName;
355+ sanitizeName (Name, /* ReplaceSingleUnderscore */ true );
285356 } else {
286357 // Verify it starts with "llvm.".
287358 if (!StringRef (Name).starts_with (" llvm." ))
288359 PrintFatalError (DefLoc, " Intrinsic '" + DefName +
289360 " 's name does not start with 'llvm.'!" );
290361 }
362+ // Sanitize the enum name by just replacing each pair of _ with a single _.
363+ // That way, most existing intrinsic names stay the same.
364+ sanitizeName (EnumName, /* ReplaceSingleUnderscore*/ false );
291365
292366 // If TargetPrefix is specified, make sure that Name starts with
293367 // "llvm.<targetprefix>.".
0 commit comments