[CUDA] When we emit an error that might have been deferred, also prin…

…t a callstack. Summary: Previously, when you did something not allowed in a host+device function and then caused it to be codegen'ed, we would print out an error telling you that you did something bad, but we wouldn't tell you how we decided that the function needed to be codegen'ed. This change causes us to print out a callstack when emitting deferred errors. This is immensely helpful when debugging highly-templated code, where it's often unclear how a function became known-emitted. We only print the callstack once per function, after we print the all deferred errors. This patch also switches all of our hashtables to using canonical FunctionDecls instead of regular FunctionDecls. This prevents a number of bugs, some of which are caught by tests added here, in which we assume that two FDs for the same function have the same pointer value. Reviewers: rnk Subscribers: cfe-commits, tra Differential Revision: https://reviews.llvm.org/D25704 llvm-svn: 284647
llvm · Oct 19, 2016 · 6c86e91 · 6c86e91
1 parent ebe8b83
commit 6c86e91
Show file tree

Hide file tree

Showing 10 changed files with 197 additions and 82 deletions.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6702,6 +6702,7 @@ def err_deleted_function_use : Error<"attempt to use a deleted function">;
 def err_deleted_inherited_ctor_use : Error<
   "constructor inherited by %0 from base class %1 is implicitly deleted">;
 
+def note_called_by : Note<"called by %0">;
 def err_kern_type_not_void_return : Error<
   "kernel function type %0 must have void return type">;
 def err_kern_is_nonstatic_method : Error<

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
@@ -9249,26 +9249,42 @@ class Sema {
   /// Diagnostics that are emitted only if we discover that the given function
   /// must be codegen'ed.  Because handling these correctly adds overhead to
   /// compilation, this is currently only enabled for CUDA compilations.
-  llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
+  llvm::DenseMap<CanonicalDeclPtr<FunctionDecl>,
+                 std::vector<PartialDiagnosticAt>>
       CUDADeferredDiags;
 
   /// FunctionDecls plus raw encodings of SourceLocations for which
   /// CheckCUDACall has emitted a (maybe deferred) "bad call" diagnostic.  We
   /// use this to avoid emitting the same deferred diag twice.
-  llvm::DenseSet<std::pair<FunctionDecl *, unsigned>> LocsWithCUDACallDiags;
+  llvm::DenseSet<std::pair<CanonicalDeclPtr<FunctionDecl>, unsigned>>
+      LocsWithCUDACallDiags;
 
-  /// The set of CUDA functions that we've discovered must be emitted by tracing
-  /// the call graph.  Functions that we can tell a priori must be emitted
-  /// aren't added to this set.
-  llvm::DenseSet<FunctionDecl *> CUDAKnownEmittedFns;
+  /// A pair of a canonical FunctionDecl and a SourceLocation.
+  struct FunctionDeclAndLoc {
+    CanonicalDeclPtr<FunctionDecl> FD;
+    SourceLocation Loc;
+  };
+
+  /// An inverse call graph, mapping known-emitted functions to one of their
+  /// known-emitted callers (plus the location of the call).
+  ///
+  /// Functions that we can tell a priori must be emitted aren't added to this
+  /// map.
+  llvm::DenseMap</* Callee = */ CanonicalDeclPtr<FunctionDecl>,
+                 /* Caller = */ FunctionDeclAndLoc>
+      CUDAKnownEmittedFns;
 
   /// A partial call graph maintained during CUDA compilation to support
-  /// deferred diagnostics.  Specifically, functions are only added here if, at
-  /// the time they're added, they are not known-emitted.  As soon as we
-  /// discover that a function is known-emitted, we remove it and everything it
-  /// transitively calls from this set and add those functions to
-  /// CUDAKnownEmittedFns.
-  llvm::DenseMap<FunctionDecl *, llvm::SetVector<FunctionDecl *>> CUDACallGraph;
+  /// deferred diagnostics.
+  ///
+  /// Functions are only added here if, at the time they're considered, they are
+  /// not known-emitted.  As soon as we discover that a function is
+  /// known-emitted, we remove it and everything it transitively calls from this
+  /// set and add those functions to CUDAKnownEmittedFns.
+  llvm::DenseMap</* Caller = */ CanonicalDeclPtr<FunctionDecl>,
+                 /* Callees = */ llvm::MapVector<CanonicalDeclPtr<FunctionDecl>,
+                                                 SourceLocation>>
+      CUDACallGraph;
 
   /// Diagnostic builder for CUDA errors which may or may not be deferred.
   ///
@@ -9291,13 +9307,19 @@ class Sema {
       K_Nop,
       /// Emit the diagnostic immediately (i.e., behave like Sema::Diag()).
       K_Immediate,
+      /// Emit the diagnostic immediately, and, if it's a warning or error, also
+      /// emit a call stack showing how this function can be reached by an a
+      /// priori known-emitted function.
+      K_ImmediateWithCallStack,
       /// Create a deferred diagnostic, which is emitted only if the function
-      /// it's attached to is codegen'ed.
+      /// it's attached to is codegen'ed.  Also emit a call stack as with
+      /// K_ImmediateWithCallStack.
       K_Deferred
     };
 
     CUDADiagBuilder(Kind K, SourceLocation Loc, unsigned DiagID,
                     FunctionDecl *Fn, Sema &S);
+    ~CUDADiagBuilder();
 
     /// Convertible to bool: True if we immediately emitted an error, false if
     /// we didn't emit an error or we created a deferred error.
@@ -9309,38 +9331,29 @@ class Sema {
     ///
     /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably
     /// want to use these instead of creating a CUDADiagBuilder yourself.
-    operator bool() const { return ImmediateDiagBuilder.hasValue(); }
+    operator bool() const { return ImmediateDiag.hasValue(); }
 
     template <typename T>
     friend const CUDADiagBuilder &operator<<(const CUDADiagBuilder &Diag,
                                              const T &Value) {
-      if (Diag.ImmediateDiagBuilder.hasValue())
-        *Diag.ImmediateDiagBuilder << Value;
-      else if (Diag.PartialDiagInfo.hasValue())
-        Diag.PartialDiagInfo->PD << Value;
+      if (Diag.ImmediateDiag.hasValue())
+        *Diag.ImmediateDiag << Value;
+      else if (Diag.PartialDiag.hasValue())
+        *Diag.PartialDiag << Value;
       return Diag;
     }
 
   private:
-    struct PartialDiagnosticInfo {
-      PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD,
-                            FunctionDecl *Fn)
-          : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
-
-      ~PartialDiagnosticInfo() {
-        S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)});
-      }
-
-      Sema &S;
-      SourceLocation Loc;
-      PartialDiagnostic PD;
-      FunctionDecl *Fn;
-    };
+    Sema &S;
+    SourceLocation Loc;
+    unsigned DiagID;
+    FunctionDecl *Fn;
+    bool ShowCallStack;
 
     // Invariant: At most one of these Optionals has a value.
     // FIXME: Switch these to a Variant once that exists.
-    llvm::Optional<Sema::SemaDiagnosticBuilder> ImmediateDiagBuilder;
-    llvm::Optional<PartialDiagnosticInfo> PartialDiagInfo;
+    llvm::Optional<SemaDiagnosticBuilder> ImmediateDiag;
+    llvm::Optional<PartialDiagnostic> PartialDiag;
   };
 
   /// Creates a CUDADiagBuilder that emits the diagnostic if the current context

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
@@ -488,22 +488,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
 }
 
-Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
-                                       unsigned DiagID, FunctionDecl *Fn,
-                                       Sema &S) {
-  switch (K) {
-  case K_Nop:
-    break;
-  case K_Immediate:
-    ImmediateDiagBuilder.emplace(S.Diag(Loc, DiagID));
-    break;
-  case K_Deferred:
-    assert(Fn && "Must have a function to attach the deferred diag to.");
-    PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn);
-    break;
-  }
-}
-
 // In CUDA, there are some constructs which may appear in semantically-valid
 // code, but trigger errors if we ever generate code for the function in which
 // they appear.  Essentially every construct you're not allowed to use on the
@@ -528,6 +512,54 @@ Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
 // until we discover that the function is known-emitted, at which point we take
 // it out of this map and emit the diagnostic.
 
+Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
+                                       unsigned DiagID, FunctionDecl *Fn,
+                                       Sema &S)
+    : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn),
+      ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) {
+  switch (K) {
+  case K_Nop:
+    break;
+  case K_Immediate:
+  case K_ImmediateWithCallStack:
+    ImmediateDiag.emplace(S.Diag(Loc, DiagID));
+    break;
+  case K_Deferred:
+    assert(Fn && "Must have a function to attach the deferred diag to.");
+    PartialDiag.emplace(S.PDiag(DiagID));
+    break;
+  }
+}
+
+// Print notes showing how we can reach FD starting from an a priori
+// known-callable function.
+static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) {
+  auto FnIt = S.CUDAKnownEmittedFns.find(FD);
+  while (FnIt != S.CUDAKnownEmittedFns.end()) {
+    DiagnosticBuilder Builder(
+        S.Diags.Report(FnIt->second.Loc, diag::note_called_by));
+    Builder << FnIt->second.FD;
+    Builder.setForceEmit();
+
+    FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD);
+  }
+}
+
+Sema::CUDADiagBuilder::~CUDADiagBuilder() {
+  if (ImmediateDiag) {
+    // Emit our diagnostic and, if it was a warning or error, output a callstack
+    // if Fn isn't a priori known-emitted.
+    bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel(
+                                DiagID, Loc) >= DiagnosticsEngine::Warning;
+    ImmediateDiag.reset(); // Emit the immediate diag.
+    if (IsWarningOrError && ShowCallStack)
+      EmitCallStackNotes(S, Fn);
+  } else if (PartialDiag) {
+    assert(ShowCallStack && "Must always show call stack for deferred diags.");
+    S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)});
+  }
+}
+
 // Do we know that we will eventually codegen the given function?
 static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
   // Templates are emitted when they're instantiated.
@@ -568,7 +600,7 @@ Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
       // mode until the function is known-emitted.
       if (getLangOpts().CUDAIsDevice) {
         return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
-                   ? CUDADiagBuilder::K_Immediate
+                   ? CUDADiagBuilder::K_ImmediateWithCallStack
                    : CUDADiagBuilder::K_Deferred;
       }
       return CUDADiagBuilder::K_Nop;
@@ -596,7 +628,7 @@ Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
         return CUDADiagBuilder::K_Nop;
 
       return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
-                 ? CUDADiagBuilder::K_Immediate
+                 ? CUDADiagBuilder::K_ImmediateWithCallStack
                  : CUDADiagBuilder::K_Deferred;
     default:
       return CUDADiagBuilder::K_Nop;
@@ -612,63 +644,84 @@ static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
   auto It = S.CUDADeferredDiags.find(FD);
   if (It == S.CUDADeferredDiags.end())
     return;
+  bool HasWarningOrError = false;
   for (PartialDiagnosticAt &PDAt : It->second) {
     const SourceLocation &Loc = PDAt.first;
     const PartialDiagnostic &PD = PDAt.second;
+    HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel(
+                             PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning;
     DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
     Builder.setForceEmit();
     PD.Emit(Builder);
   }
   S.CUDADeferredDiags.erase(It);
+
+  // FIXME: Should this be called after every warning/error emitted in the loop
+  // above, instead of just once per function?  That would be consistent with
+  // how we handle immediate errors, but it also seems like a bit much.
+  if (HasWarningOrError)
+    EmitCallStackNotes(S, FD);
 }
 
 // Indicate that this function (and thus everything it transtively calls) will
 // be codegen'ed, and emit any deferred diagnostics on this function and its
 // (transitive) callees.
-static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
+static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller,
+                             FunctionDecl *OrigCallee, SourceLocation OrigLoc) {
   // Nothing to do if we already know that FD is emitted.
-  if (IsKnownEmitted(S, FD)) {
-    assert(!S.CUDACallGraph.count(FD));
+  if (IsKnownEmitted(S, OrigCallee)) {
+    assert(!S.CUDACallGraph.count(OrigCallee));
     return;
   }
 
-  // We've just discovered that FD is known-emitted.  Walk our call graph to see
-  // what else we can now discover also must be emitted.
-  llvm::SmallVector<FunctionDecl *, 4> Worklist = {FD};
-  llvm::SmallSet<FunctionDecl *, 4> Seen;
-  Seen.insert(FD);
+  // We've just discovered that OrigCallee is known-emitted.  Walk our call
+  // graph to see what else we can now discover also must be emitted.
+
+  struct CallInfo {
+    FunctionDecl *Caller;
+    FunctionDecl *Callee;
+    SourceLocation Loc;
+  };
+  llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}};
+  llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen;
+  Seen.insert(OrigCallee);
   while (!Worklist.empty()) {
-    FunctionDecl *Caller = Worklist.pop_back_val();
-    assert(!IsKnownEmitted(S, Caller) &&
+    CallInfo C = Worklist.pop_back_val();
+    assert(!IsKnownEmitted(S, C.Callee) &&
            "Worklist should not contain known-emitted functions.");
-    S.CUDAKnownEmittedFns.insert(Caller);
-    EmitDeferredDiags(S, Caller);
+    S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc};
+    EmitDeferredDiags(S, C.Callee);
 
     // If this is a template instantiation, explore its callgraph as well:
     // Non-dependent calls are part of the template's callgraph, while dependent
     // calls are part of to the instantiation's call graph.
-    if (auto *Templ = Caller->getPrimaryTemplate()) {
+    if (auto *Templ = C.Callee->getPrimaryTemplate()) {
       FunctionDecl *TemplFD = Templ->getAsFunction();
       if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
         Seen.insert(TemplFD);
-        Worklist.push_back(TemplFD);
+        Worklist.push_back(
+            {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc});
       }
     }
 
-    // Add all functions called by Caller to our worklist.
-    auto CGIt = S.CUDACallGraph.find(Caller);
+    // Add all functions called by Callee to our worklist.
+    auto CGIt = S.CUDACallGraph.find(C.Callee);
     if (CGIt == S.CUDACallGraph.end())
       continue;
 
-    for (FunctionDecl *Callee : CGIt->second) {
-      if (Seen.count(Callee) || IsKnownEmitted(S, Callee))
+    for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc :
+         CGIt->second) {
+      FunctionDecl *NewCallee = FDLoc.first;
+      SourceLocation CallLoc = FDLoc.second;
+      if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee))
         continue;
-      Seen.insert(Callee);
-      Worklist.push_back(Callee);
+      Seen.insert(NewCallee);
+      Worklist.push_back(
+          {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc});
     }
 
-    // Caller is now known-emitted, so we no longer need to maintain its list of
-    // callees in CUDACallGraph.
+    // C.Callee is now known-emitted, so we no longer need to maintain its list
+    // of callees in CUDACallGraph.
     S.CUDACallGraph.erase(CGIt);
   }
 }
@@ -686,7 +739,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
   // Otherwise, mark the call in our call graph so we can traverse it later.
   bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
   if (CallerKnownEmitted)
-    MarkKnownEmitted(*this, Callee);
+    MarkKnownEmitted(*this, Caller, Callee, Loc);
   else {
     // If we have
     //   host fn calls kernel fn calls host+device,
@@ -695,7 +748,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
     // that, when compiling for host, only HD functions actually called from the
     // host get marked as known-emitted.
     if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
-      CUDACallGraph[Caller].insert(Callee);
+      CUDACallGraph[Caller].insert({Callee, Loc});
   }
 
   CUDADiagBuilder::Kind DiagKind = [&] {
@@ -707,7 +760,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
       // If we know the caller will be emitted, we know this wrong-side call
       // will be emitted, so it's an immediate error.  Otherwise, defer the
       // error until we know the caller is emitted.
-      return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate
+      return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack
                                 : CUDADiagBuilder::K_Deferred;
     default:
       return CUDADiagBuilder::K_Nop;
@@ -729,7 +782,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
   CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
                   Caller, *this)
       << Callee;
-  return DiagKind != CUDADiagBuilder::K_Immediate;
+  return DiagKind != CUDADiagBuilder::K_Immediate &&
+         DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack;
 }
 
 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {

diff --git a/clang/test/SemaCUDA/bad-calls-on-same-line.cu b/clang/test/SemaCUDA/bad-calls-on-same-line.cu
@@ -35,5 +35,7 @@ inline __host__ __device__ void hd() {
 void host_fn() {
   hd<int>();
   hd<double>();  // expected-note {{function template specialization 'hd<double>'}}
+  // expected-note@-1 {{called by 'host_fn'}}
   hd<float>();  // expected-note {{function template specialization 'hd<float>'}}
+  // expected-note@-1 {{called by 'host_fn'}}
 }
diff --git a/clang/test/SemaCUDA/call-device-fn-from-host.cu b/clang/test/SemaCUDA/call-device-fn-from-host.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -verify
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
+// RUN:   -verify -verify-ignore-unexpected=note
 
 // Note: This test won't work with -fsyntax-only, because some of these errors
 // are emitted during codegen.

diff --git a/clang/test/SemaCUDA/call-host-fn-from-device.cu b/clang/test/SemaCUDA/call-host-fn-from-device.cu
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \
-// RUN:   -emit-llvm -o /dev/null -verify
+// RUN:   -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note
 
 // Note: This test won't work with -fsyntax-only, because some of these errors
 // are emitted during codegen.