diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h new file mode 100644 index 0000000000000..818d78668ff15 --- /dev/null +++ b/clang/include/clang/Basic/Sarif.h @@ -0,0 +1,440 @@ +//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult. +/// +/// The document built can be accessed as a JSON Object. +/// Several value semantic types are also introduced which represent properties +/// of the SARIF standard, such as 'artifact', 'result', 'rule'. +/// +/// A SARIF (Static Analysis Results Interchange Format) document is JSON +/// document that describes in detail the results of running static analysis +/// tools on a project. Each (non-trivial) document consists of at least one +/// "run", which are themselves composed of details such as: +/// * Tool: The tool that was run +/// * Rules: The rules applied during the tool run, represented by +/// \c reportingDescriptor objects in SARIF +/// * Results: The matches for the rules applied against the project(s) being +/// evaluated, represented by \c result objects in SARIF +/// +/// Reference: +/// 1. The SARIF standard +/// 2. SARIF
reportingDescriptor
+/// 3. SARIF
result
+//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_SARIF_H +#define LLVM_CLANG_BASIC_SARIF_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/Version.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include +#include +#include +#include +#include + +namespace clang { + +class SarifDocumentWriter; +class SourceManager; + +namespace detail { + +/// \internal +/// An artifact location is SARIF's way of describing the complete location +/// of an artifact encountered during analysis. The \c artifactLocation object +/// typically consists of a URI, and/or an index to reference the artifact it +/// locates. +/// +/// This builder makes an additional assumption: that every artifact encountered +/// by \c clang will be a physical, top-level artifact. Which is why the static +/// creation method \ref SarifArtifactLocation::create takes a mandatory URI +/// parameter. The official standard states that either a \c URI or \c Index +/// must be available in the object, \c clang picks the \c URI as a reasonable +/// default, because it intends to deal in physical artifacts for now. +/// +/// Reference: +/// 1. artifactLocation object +/// 2. \ref SarifArtifact +class SarifArtifactLocation { +private: + friend class clang::SarifDocumentWriter; + + llvm::Optional Index; + std::string URI; + + SarifArtifactLocation() = delete; + explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {} + +public: + static SarifArtifactLocation create(llvm::StringRef URI) { + return SarifArtifactLocation{URI.str()}; + } + + SarifArtifactLocation setIndex(uint32_t Idx) { + Index = Idx; + return *this; + } +}; + +/// \internal +/// An artifact in SARIF is any object (a sequence of bytes) addressable by +/// a URI (RFC 3986). The most common type of artifact for clang's use-case +/// would be source files. SARIF's artifact object is described in detail in +/// section 3.24. +// +/// Since every clang artifact MUST have a location (there being no nested +/// artifacts), the creation method \ref SarifArtifact::create requires a +/// \ref SarifArtifactLocation object. +/// +/// Reference: +/// 1. artifact object +class SarifArtifact { +private: + friend class clang::SarifDocumentWriter; + + llvm::Optional Offset; + llvm::Optional Length; + std::string MimeType; + SarifArtifactLocation Location; + llvm::SmallVector Roles; + + SarifArtifact() = delete; + + explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {} + +public: + static SarifArtifact create(const SarifArtifactLocation &Loc) { + return SarifArtifact{Loc}; + } + + SarifArtifact setOffset(uint32_t ArtifactOffset) { + Offset = ArtifactOffset; + return *this; + } + + SarifArtifact setLength(size_t NumBytes) { + Length = NumBytes; + return *this; + } + + SarifArtifact setRoles(std::initializer_list ArtifactRoles) { + Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end()); + return *this; + } + + SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) { + MimeType = ArtifactMimeType.str(); + return *this; + } +}; + +} // namespace detail + +enum class ThreadFlowImportance { Important, Essential, Unimportant }; + +/// A thread flow is a sequence of code locations that specify a possible path +/// through a single thread of execution. +/// A thread flow in SARIF is related to a code flow which describes +/// the progress of one or more programs through one or more thread flows. +/// +/// Reference: +/// 1. threadFlow object +/// 2. codeFlow object +class ThreadFlow { + friend class SarifDocumentWriter; + + CharSourceRange Range; + ThreadFlowImportance Importance; + std::string Message; + + ThreadFlow() = default; + +public: + static ThreadFlow create() { return {}; } + + ThreadFlow setRange(const CharSourceRange &ItemRange) { + assert(ItemRange.isCharRange() && + "ThreadFlows require a character granular source range!"); + Range = ItemRange; + return *this; + } + + ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) { + Importance = ItemImportance; + return *this; + } + + ThreadFlow setMessage(llvm::StringRef ItemMessage) { + Message = ItemMessage.str(); + return *this; + } +}; + +/// A SARIF rule (\c reportingDescriptor object) contains information that +/// describes a reporting item generated by a tool. A reporting item is +/// either a result of analysis or notification of a condition encountered by +/// the tool. Rules are arbitrary but are identifiable by a hierarchical +/// rule-id. +/// +/// This builder provides an interface to create SARIF \c reportingDescriptor +/// objects via the \ref SarifRule::create static method. +/// +/// Reference: +/// 1. reportingDescriptor object +class SarifRule { + friend class clang::SarifDocumentWriter; + + std::string Name; + std::string Id; + std::string Description; + std::string HelpURI; + + SarifRule() = default; + +public: + static SarifRule create() { return {}; } + + SarifRule setName(llvm::StringRef RuleName) { + Name = RuleName.str(); + return *this; + } + + SarifRule setRuleId(llvm::StringRef RuleId) { + Id = RuleId.str(); + return *this; + } + + SarifRule setDescription(llvm::StringRef RuleDesc) { + Description = RuleDesc.str(); + return *this; + } + + SarifRule setHelpURI(llvm::StringRef RuleHelpURI) { + HelpURI = RuleHelpURI.str(); + return *this; + } +}; + +/// A SARIF result (also called a "reporting item") is a unit of output +/// produced when one of the tool's \c reportingDescriptor encounters a match +/// on the file being analysed by the tool. +/// +/// This builder provides a \ref SarifResult::create static method that can be +/// used to create an empty shell onto which attributes can be added using the +/// \c setX(...) methods. +/// +/// For example: +/// \code{.cpp} +/// SarifResult result = SarifResult::create(...) +/// .setRuleId(...) +/// .setDiagnosticMessage(...); +/// \endcode +/// +/// Reference: +/// 1. SARIF
result
+class SarifResult { + friend class clang::SarifDocumentWriter; + + // NOTE: + // This type cannot fit all possible indexes representable by JSON, but is + // chosen because it is the largest unsigned type that can be safely + // converted to an \c int64_t. + uint32_t RuleIdx; + std::string RuleId; + std::string DiagnosticMessage; + llvm::SmallVector Locations; + llvm::SmallVector ThreadFlows; + + SarifResult() = delete; + explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {} + +public: + static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; } + + SarifResult setIndex(uint32_t Idx) { + RuleIdx = Idx; + return *this; + } + + SarifResult setRuleId(llvm::StringRef Id) { + RuleId = Id.str(); + return *this; + } + + SarifResult setDiagnosticMessage(llvm::StringRef Message) { + DiagnosticMessage = Message.str(); + return *this; + } + + SarifResult setLocations(llvm::ArrayRef DiagLocs) { +#ifndef NDEBUG + for (const auto &Loc : DiagLocs) { + assert(Loc.isCharRange() && + "SARIF Results require character granular source ranges!"); + } +#endif + Locations.assign(DiagLocs.begin(), DiagLocs.end()); + return *this; + } + SarifResult setThreadFlows(llvm::ArrayRef ThreadFlowResults) { + ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end()); + return *this; + } +}; + +/// This class handles creating a valid SARIF document given various input +/// attributes. However, it requires an ordering among certain method calls: +/// +/// 1. Because every SARIF document must contain at least 1 \c run, callers +/// must ensure that \ref SarifDocumentWriter::createRun is is called before +/// any other methods. +/// 2. If SarifDocumentWriter::endRun is called, callers MUST call +/// SarifDocumentWriter::createRun, before invoking any of the result +/// aggregation methods such as SarifDocumentWriter::appendResult etc. +class SarifDocumentWriter { +private: + const llvm::StringRef SchemaURI{ + "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/" + "sarif-schema-2.1.0.json"}; + const llvm::StringRef SchemaVersion{"2.1.0"}; + + /// \internal + /// Return a pointer to the current tool. Asserts that a run exists. + llvm::json::Object &getCurrentTool(); + + /// \internal + /// Checks if there is a run associated with this document. + /// + /// \return true on success + bool hasRun() const; + + /// \internal + /// Reset portions of the internal state so that the document is ready to + /// receive data for a new run. + void reset(); + + /// \internal + /// Return a mutable reference to the current run, after asserting it exists. + /// + /// \note It is undefined behavior to call this if a run does not exist in + /// the SARIF document. + llvm::json::Object &getCurrentRun(); + + /// Create a code flow object for the given threadflows. + /// See \ref ThreadFlow. + /// + /// \note It is undefined behavior to call this if a run does not exist in + /// the SARIF document. + llvm::json::Object + createCodeFlow(const llvm::ArrayRef ThreadFlows); + + /// Add the given threadflows to the ones this SARIF document knows about. + llvm::json::Array + createThreadFlows(const llvm::ArrayRef ThreadFlows); + + /// Add the given \ref CharSourceRange to the SARIF document as a physical + /// location, with its corresponding artifact. + llvm::json::Object createPhysicalLocation(const CharSourceRange &R); + +public: + SarifDocumentWriter() = delete; + + /// Create a new empty SARIF document with the given source manager. + SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} + + /// Release resources held by this SARIF document. + ~SarifDocumentWriter() = default; + + /// Create a new run with which any upcoming analysis will be associated. + /// Each run requires specifying the tool that is generating reporting items. + void createRun(const llvm::StringRef ShortToolName, + const llvm::StringRef LongToolName, + const llvm::StringRef ToolVersion = CLANG_VERSION_STRING); + + /// If there is a current run, end it. + /// + /// This method collects various book-keeping required to clear and close + /// resources associated with the current run, but may also allocate some + /// for the next run. + /// + /// Calling \ref endRun before associating a run through \ref createRun leads + /// to undefined behaviour. + void endRun(); + + /// Associate the given rule with the current run. + /// + /// Returns an integer rule index for the created rule that is unique within + /// the current run, which can then be used to create a \ref SarifResult + /// to add to the current run. Note that a rule must exist before being + /// referenced by a result. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour. + size_t createRule(const SarifRule &Rule); + + /// Append a new result to the currently in-flight run. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour. + /// \pre + /// \c RuleIdx used to create the result must correspond to a rule known by + /// the SARIF document. It must be the value returned by a previous call + /// to \ref createRule. + void appendResult(const SarifResult &SarifResult); + + /// Return the SARIF document in its current state. + /// Calling this will trigger a copy of the internal state including all + /// reported diagnostics, resulting in an expensive call. + llvm::json::Object createDocument(); + +private: + /// Source Manager to use for the current SARIF document. + const SourceManager &SourceMgr; + + /// Flag to track the state of this document: + /// A closed document is one on which a new runs must be created. + /// This could be a document that is freshly created, or has recently + /// finished writing to a previous run. + bool Closed = true; + + /// A sequence of SARIF runs. + /// Each run object describes a single run of an analysis tool and contains + /// the output of that run. + /// + /// Reference: run object + llvm::json::Array Runs; + + /// The list of rules associated with the most recent active run. These are + /// defined using the diagnostics passed to the SarifDocument. Each rule + /// need not be unique through the result set. E.g. there may be several + /// 'syntax' errors throughout code under analysis, each of which has its + /// own specific diagnostic message (and consequently, RuleId). Rules are + /// also known as "reportingDescriptor" objects in SARIF. + /// + /// Reference: rules property + llvm::SmallVector CurrentRules; + + /// The list of artifacts that have been encountered on the most recent active + /// run. An artifact is defined in SARIF as a sequence of bytes addressable + /// by a URI. A common example for clang's case would be files named by + /// filesystem paths. + llvm::StringMap CurrentArtifacts; +}; +} // namespace clang + +#endif // LLVM_CLANG_BASIC_SARIF_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index c815b571bc9c0..284e73b1c11fd 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -63,6 +63,7 @@ add_clang_library(clangBasic NoSanitizeList.cpp SanitizerSpecialCaseList.cpp Sanitizers.cpp + Sarif.cpp SourceLocation.cpp SourceManager.cpp Stack.cpp diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp new file mode 100644 index 0000000000000..668e60d47eecd --- /dev/null +++ b/clang/lib/Basic/Sarif.cpp @@ -0,0 +1,389 @@ +//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the SARIFDocumentWriter class, and +/// associated builders such as: +/// - \ref SarifArtifact +/// - \ref SarifArtifactLocation +/// - \ref SarifRule +/// - \ref SarifResult +//===----------------------------------------------------------------------===// +#include "clang/Basic/Sarif.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" + +#include +#include + +using namespace clang; +using namespace llvm; + +using clang::detail::SarifArtifact; +using clang::detail::SarifArtifactLocation; + +static StringRef getFileName(const FileEntry &FE) { + StringRef Filename = FE.tryGetRealPathName(); + if (Filename.empty()) + Filename = FE.getName(); + return Filename; +} +/// \name URI +/// @{ + +/// \internal +/// \brief +/// Return the RFC3986 encoding of the input character. +/// +/// \param C Character to encode to RFC3986. +/// +/// \return The RFC3986 representation of \c C. +static std::string percentEncodeURICharacter(char C) { + // RFC 3986 claims alpha, numeric, and this handful of + // characters are not reserved for the path component and + // should be written out directly. Otherwise, percent + // encode the character and write that out instead of the + // reserved character. + if (llvm::isAlnum(C) || + StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) + return std::string(&C, 1); + return "%" + llvm::toHex(StringRef(&C, 1)); +} + +/// \internal +/// \brief Return a URI representing the given file name. +/// +/// \param Filename The filename to be represented as URI. +/// +/// \return RFC3986 URI representing the input file name. +static std::string fileNameToURI(StringRef Filename) { + SmallString<32> Ret = StringRef("file://"); + + // Get the root name to see if it has a URI authority. + StringRef Root = sys::path::root_name(Filename); + if (Root.startswith("//")) { + // There is an authority, so add it to the URI. + Ret += Root.drop_front(2).str(); + } else if (!Root.empty()) { + // There is no authority, so end the component and add the root to the URI. + Ret += Twine("/" + Root).str(); + } + + auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); + assert(Iter != End && "Expected there to be a non-root path component."); + // Add the rest of the path components, encoding any reserved characters; + // we skip past the first path component, as it was handled it above. + std::for_each(++Iter, End, [&Ret](StringRef Component) { + // For reasons unknown to me, we may get a backslash with Windows native + // paths for the initial backslash following the drive component, which + // we need to ignore as a URI path part. + if (Component == "\\") + return; + + // Add the separator between the previous path part and the one being + // currently processed. + Ret += "/"; + + // URI encode the part. + for (char C : Component) { + Ret += percentEncodeURICharacter(C); + } + }); + + return std::string(Ret); +} +/// @} + +/// \brief Calculate the column position expressed in the number of UTF-8 code +/// points from column start to the source location +/// +/// \param Loc The source location whose column needs to be calculated. +/// \param TokenLen Optional hint for when the token is multiple bytes long. +/// +/// \return The column number as a UTF-8 aware byte offset from column start to +/// the effective source location. +static unsigned int adjustColumnPos(FullSourceLoc Loc, + unsigned int TokenLen = 0) { + assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); + + std::pair LocInfo = Loc.getDecomposedLoc(); + Optional Buf = + Loc.getManager().getBufferOrNone(LocInfo.first); + assert(Buf && "got an invalid buffer for the location's file"); + assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && + "token extends past end of buffer?"); + + // Adjust the offset to be the start of the line, since we'll be counting + // Unicode characters from there until our column offset. + unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); + unsigned int Ret = 1; + while (Off < (LocInfo.second + TokenLen)) { + Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); + Ret++; + } + + return Ret; +} + +/// \name SARIF Utilities +/// @{ + +/// \internal +json::Object createMessage(StringRef Text) { + return json::Object{{"text", Text.str()}}; +} + +/// \internal +/// \pre CharSourceRange must be a token range +static json::Object createTextRegion(const SourceManager &SM, + const CharSourceRange &R) { + FullSourceLoc FirstTokenLoc{R.getBegin(), SM}; + FullSourceLoc LastTokenLoc{R.getEnd(), SM}; + json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()}, + {"startColumn", adjustColumnPos(FirstTokenLoc)}, + {"endColumn", adjustColumnPos(LastTokenLoc)}}; + if (FirstTokenLoc != LastTokenLoc) { + Region["endLine"] = LastTokenLoc.getExpansionLineNumber(); + } + return Region; +} + +static json::Object createLocation(json::Object &&PhysicalLocation, + StringRef Message = "") { + json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; + if (!Message.empty()) + Ret.insert({"message", createMessage(Message)}); + return Ret; +} + +static StringRef importanceToStr(ThreadFlowImportance I) { + switch (I) { + case ThreadFlowImportance::Important: + return "important"; + case ThreadFlowImportance::Essential: + return "essential"; + case ThreadFlowImportance::Unimportant: + return "unimportant"; + } + llvm_unreachable("Fully covered switch is not so fully covered"); +} + +static json::Object +createThreadFlowLocation(json::Object &&Location, + const ThreadFlowImportance &Importance) { + return json::Object{{"location", std::move(Location)}, + {"importance", importanceToStr(Importance)}}; +} +/// @} + +json::Object +SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { + assert(R.isValid() && + "Cannot create a physicalLocation from invalid SourceRange!"); + assert(R.isCharRange() && + "Cannot create a physicalLocation from a token range!"); + FullSourceLoc Start{R.getBegin(), SourceMgr}; + const FileEntry *FE = Start.getExpansionLoc().getFileEntry(); + assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); + + const std::string &FileURI = fileNameToURI(getFileName(*FE)); + auto I = CurrentArtifacts.find(FileURI); + + if (I == CurrentArtifacts.end()) { + uint32_t Idx = static_cast(CurrentArtifacts.size()); + const SarifArtifactLocation &Location = + SarifArtifactLocation::create(FileURI).setIndex(Idx); + const SarifArtifact &Artifact = SarifArtifact::create(Location) + .setRoles({"resultFile"}) + .setLength(FE->getSize()) + .setMimeType("text/plain"); + auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); + // If inserted, ensure the original iterator points to the newly inserted + // element, so it can be used downstream. + if (StatusIter.second) + I = StatusIter.first; + } + assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); + const SarifArtifactLocation &Location = I->second.Location; + uint32_t Idx = Location.Index.getValue(); + return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}}, + {"region", createTextRegion(SourceMgr, R)}}}; +} + +json::Object &SarifDocumentWriter::getCurrentTool() { + assert(!Closed && "SARIF Document is closed. " + "Need to call createRun() before using getcurrentTool!"); + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state. + assert(!Runs.empty() && "There are no runs associated with the document!"); + + return *Runs.back().getAsObject()->get("tool")->getAsObject(); +} + +void SarifDocumentWriter::reset() { + CurrentRules.clear(); + CurrentArtifacts.clear(); +} + +void SarifDocumentWriter::endRun() { + // Exit early if trying to close a closed Document. + if (Closed) { + reset(); + return; + } + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state. + assert(!Runs.empty() && "There are no runs associated with the document!"); + + // Flush all the rules. + json::Object &Tool = getCurrentTool(); + json::Array Rules; + for (const SarifRule &R : CurrentRules) { + json::Object Rule{ + {"name", R.Name}, + {"id", R.Id}, + {"fullDescription", json::Object{{"text", R.Description}}}}; + if (!R.HelpURI.empty()) + Rule["helpUri"] = R.HelpURI; + Rules.emplace_back(std::move(Rule)); + } + json::Object &Driver = *Tool.getObject("driver"); + Driver["rules"] = std::move(Rules); + + // Flush all the artifacts. + json::Object &Run = getCurrentRun(); + json::Array *Artifacts = Run.getArray("artifacts"); + for (const auto &Pair : CurrentArtifacts) { + const SarifArtifact &A = Pair.getValue(); + json::Object Loc{{"uri", A.Location.URI}}; + if (A.Location.Index.hasValue()) { + Loc["index"] = static_cast(A.Location.Index.getValue()); + } + json::Object Artifact; + Artifact["location"] = std::move(Loc); + if (A.Length.hasValue()) + Artifact["length"] = static_cast(A.Length.getValue()); + if (!A.Roles.empty()) + Artifact["roles"] = json::Array(A.Roles); + if (!A.MimeType.empty()) + Artifact["mimeType"] = A.MimeType; + if (A.Offset.hasValue()) + Artifact["offset"] = A.Offset; + Artifacts->push_back(json::Value(std::move(Artifact))); + } + + // Clear, reset temporaries before next run. + reset(); + + // Mark the document as closed. + Closed = true; +} + +json::Array +SarifDocumentWriter::createThreadFlows(ArrayRef ThreadFlows) { + json::Object Ret{{"locations", json::Array{}}}; + json::Array Locs; + for (const auto &ThreadFlow : ThreadFlows) { + json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); + json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); + Locs.emplace_back( + createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); + } + Ret["locations"] = std::move(Locs); + return json::Array{std::move(Ret)}; +} + +json::Object +SarifDocumentWriter::createCodeFlow(ArrayRef ThreadFlows) { + return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; +} + +void SarifDocumentWriter::createRun(StringRef ShortToolName, + StringRef LongToolName, + StringRef ToolVersion) { + // Clear resources associated with a previous run. + endRun(); + + // Signify a new run has begun. + Closed = false; + + json::Object Tool{ + {"driver", + json::Object{{"name", ShortToolName}, + {"fullName", LongToolName}, + {"language", "en-US"}, + {"version", ToolVersion}, + {"informationUri", + "https://clang.llvm.org/docs/UsersManual.html"}}}}; + json::Object TheRun{{"tool", std::move(Tool)}, + {"results", {}}, + {"artifacts", {}}, + {"columnKind", "unicodeCodePoints"}}; + Runs.emplace_back(std::move(TheRun)); +} + +json::Object &SarifDocumentWriter::getCurrentRun() { + assert(!Closed && + "SARIF Document is closed. " + "Can only getCurrentRun() if document is opened via createRun(), " + "create a run first"); + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state. + assert(!Runs.empty() && "There are no runs associated with the document!"); + return *Runs.back().getAsObject(); +} + +size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { + size_t Ret = CurrentRules.size(); + CurrentRules.emplace_back(Rule); + return Ret; +} + +void SarifDocumentWriter::appendResult(const SarifResult &Result) { + size_t RuleIdx = Result.RuleIdx; + assert(RuleIdx < CurrentRules.size() && + "Trying to reference a rule that doesn't exist"); + json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, + {"ruleIndex", static_cast(RuleIdx)}, + {"ruleId", CurrentRules[RuleIdx].Id}}; + if (!Result.Locations.empty()) { + json::Array Locs; + for (auto &Range : Result.Locations) { + Locs.emplace_back(createLocation(createPhysicalLocation(Range))); + } + Ret["locations"] = std::move(Locs); + } + if (!Result.ThreadFlows.empty()) + Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; + json::Object &Run = getCurrentRun(); + json::Array *Results = Run.getArray("results"); + Results->emplace_back(std::move(Ret)); +} + +json::Object SarifDocumentWriter::createDocument() { + // Flush all temporaries to their destinations if needed. + endRun(); + + json::Object Doc{ + {"$schema", SchemaURI}, + {"version", SchemaVersion}, + }; + if (!Runs.empty()) + Doc["runs"] = json::Array(Runs); + return Doc; +} diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt index b6f5d79e87c77..6c00f63332af4 100644 --- a/clang/unittests/Basic/CMakeLists.txt +++ b/clang/unittests/Basic/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(BasicTests FileManagerTest.cpp LineOffsetMappingTest.cpp SanitizersTest.cpp + SarifTest.cpp SourceManagerTest.cpp ) diff --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp new file mode 100644 index 0000000000000..ff58c6b78b2e0 --- /dev/null +++ b/clang/unittests/Basic/SarifTest.cpp @@ -0,0 +1,320 @@ +//===- unittests/Basic/SarifTest.cpp - Test writing SARIF documents -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Sarif.h" +#include "clang/Basic/DiagnosticIDs.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemOptions.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "gmock/gmock-matchers.h" +#include "gtest/gtest-death-test.h" +#include "gtest/gtest-matchers.h" +#include "gtest/gtest.h" + +#include + +using namespace clang; + +namespace { + +using LineCol = std::pair; + +static std::string serializeSarifDocument(llvm::json::Object &&Doc) { + std::string Output; + llvm::json::Value value(std::move(Doc)); + llvm::raw_string_ostream OS{Output}; + OS << llvm::formatv("{0}", value); + OS.flush(); + return Output; +} + +class SarifDocumentWriterTest : public ::testing::Test { +protected: + SarifDocumentWriterTest() + : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem), + FileMgr(FileSystemOptions(), InMemoryFileSystem), + DiagID(new DiagnosticIDs()), DiagOpts(new DiagnosticOptions()), + Diags(DiagID, DiagOpts.get(), new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr) {} + + IntrusiveRefCntPtr InMemoryFileSystem; + FileManager FileMgr; + IntrusiveRefCntPtr DiagID; + IntrusiveRefCntPtr DiagOpts; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + LangOptions LangOpts; + + FileID registerSource(llvm::StringRef Name, const char *SourceText, + bool IsMainFile = false) { + std::unique_ptr SourceBuf = + llvm::MemoryBuffer::getMemBuffer(SourceText); + const FileEntry *SourceFile = + FileMgr.getVirtualFile(Name, SourceBuf->getBufferSize(), 0); + SourceMgr.overrideFileContents(SourceFile, std::move(SourceBuf)); + FileID FID = SourceMgr.getOrCreateFileID(SourceFile, SrcMgr::C_User); + if (IsMainFile) + SourceMgr.setMainFileID(FID); + return FID; + } + + CharSourceRange getFakeCharSourceRange(FileID FID, LineCol Begin, + LineCol End) { + auto BeginLoc = SourceMgr.translateLineCol(FID, Begin.first, Begin.second); + auto EndLoc = SourceMgr.translateLineCol(FID, End.first, End.second); + return CharSourceRange{SourceRange{BeginLoc, EndLoc}, /* ITR = */ false}; + } +}; + +TEST_F(SarifDocumentWriterTest, createEmptyDocument) { + // GIVEN: + SarifDocumentWriter Writer{SourceMgr}; + + // WHEN: + const llvm::json::Object &EmptyDoc = Writer.createDocument(); + std::vector Keys(EmptyDoc.size()); + std::transform(EmptyDoc.begin(), EmptyDoc.end(), Keys.begin(), + [](auto item) { return item.getFirst(); }); + + // THEN: + ASSERT_THAT(Keys, testing::UnorderedElementsAre("$schema", "version")); +} + +// Test that a newly inserted run will associate correct tool names +TEST_F(SarifDocumentWriterTest, documentWithARun) { + // GIVEN: + SarifDocumentWriter Writer{SourceMgr}; + const char *ShortName = "sariftest"; + const char *LongName = "sarif writer test"; + + // WHEN: + Writer.createRun(ShortName, LongName); + Writer.endRun(); + const llvm::json::Object &Doc = Writer.createDocument(); + const llvm::json::Array *Runs = Doc.getArray("runs"); + + // THEN: + // A run was created + ASSERT_THAT(Runs, testing::NotNull()); + + // It is the only run + ASSERT_EQ(Runs->size(), 1UL); + + // The tool associated with the run was the tool + const llvm::json::Object *driver = + Runs->begin()->getAsObject()->getObject("tool")->getObject("driver"); + ASSERT_THAT(driver, testing::NotNull()); + + ASSERT_TRUE(driver->getString("name").hasValue()); + ASSERT_TRUE(driver->getString("fullName").hasValue()); + ASSERT_TRUE(driver->getString("language").hasValue()); + + EXPECT_EQ(driver->getString("name").getValue(), ShortName); + EXPECT_EQ(driver->getString("fullName").getValue(), LongName); + EXPECT_EQ(driver->getString("language").getValue(), "en-US"); +} + +// Test adding result without a run causes a crash +TEST_F(SarifDocumentWriterTest, addingResultsWillCrashIfThereIsNoRun) { + // GIVEN: + SarifDocumentWriter Writer{SourceMgr}; + + // WHEN: + // A SarifDocumentWriter::createRun(...) was not called prior to + // SarifDocumentWriter::appendResult(...) + // But a rule exists + auto RuleIdx = Writer.createRule(SarifRule::create()); + const SarifResult &EmptyResult = SarifResult::create(RuleIdx); + + // THEN: + ASSERT_DEATH({ Writer.appendResult(EmptyResult); }, ".*create a run first.*"); +} + +// Test adding rule and result shows up in the final document +TEST_F(SarifDocumentWriterTest, addResultWithValidRuleIsOk) { + // GIVEN: + SarifDocumentWriter Writer{SourceMgr}; + const SarifRule &Rule = + SarifRule::create() + .setRuleId("clang.unittest") + .setDescription("Example rule created during unit tests") + .setName("clang unit test"); + + // WHEN: + Writer.createRun("sarif test", "sarif test runner"); + unsigned RuleIdx = Writer.createRule(Rule); + const SarifResult &result = SarifResult::create(RuleIdx); + + Writer.appendResult(result); + const llvm::json::Object &Doc = Writer.createDocument(); + + // THEN: + // A document with a valid schema and version exists + ASSERT_THAT(Doc.get("$schema"), ::testing::NotNull()); + ASSERT_THAT(Doc.get("version"), ::testing::NotNull()); + const llvm::json::Array *Runs = Doc.getArray("runs"); + + // A run exists on this document + ASSERT_THAT(Runs, ::testing::NotNull()); + ASSERT_EQ(Runs->size(), 1UL); + const llvm::json::Object *TheRun = Runs->back().getAsObject(); + + // The run has slots for tools, results, rules and artifacts + ASSERT_THAT(TheRun->get("tool"), ::testing::NotNull()); + ASSERT_THAT(TheRun->get("results"), ::testing::NotNull()); + ASSERT_THAT(TheRun->get("artifacts"), ::testing::NotNull()); + const llvm::json::Object *Driver = + TheRun->getObject("tool")->getObject("driver"); + const llvm::json::Array *Results = TheRun->getArray("results"); + const llvm::json::Array *Artifacts = TheRun->getArray("artifacts"); + + // The tool is as expected + ASSERT_TRUE(Driver->getString("name").hasValue()); + ASSERT_TRUE(Driver->getString("fullName").hasValue()); + + EXPECT_EQ(Driver->getString("name").getValue(), "sarif test"); + EXPECT_EQ(Driver->getString("fullName").getValue(), "sarif test runner"); + + // The results are as expected + EXPECT_EQ(Results->size(), 1UL); + + // The artifacts are as expected + EXPECT_TRUE(Artifacts->empty()); +} + +TEST_F(SarifDocumentWriterTest, checkSerializingResults) { + // GIVEN: + const std::string ExpectedOutput = + R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[],"columnKind":"unicodeCodePoints","results":[{"message":{"text":""},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})"; + + SarifDocumentWriter Writer{SourceMgr}; + const SarifRule &Rule = + SarifRule::create() + .setRuleId("clang.unittest") + .setDescription("Example rule created during unit tests") + .setName("clang unit test"); + + // WHEN: A run contains a result + Writer.createRun("sarif test", "sarif test runner", "1.0.0"); + unsigned ruleIdx = Writer.createRule(Rule); + const SarifResult &Result = SarifResult::create(ruleIdx); + Writer.appendResult(Result); + std::string Output = serializeSarifDocument(Writer.createDocument()); + + // THEN: + ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput)); +} + +// Check that serializing artifacts from results produces valid SARIF +TEST_F(SarifDocumentWriterTest, checkSerializingArtifacts) { + // GIVEN: + const std::string ExpectedOutput = + R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":40,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":14,"startColumn":14,"startLine":3}}}],"message":{"text":"expected ';' after top level declarator"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})"; + + SarifDocumentWriter Writer{SourceMgr}; + const SarifRule &Rule = + SarifRule::create() + .setRuleId("clang.unittest") + .setDescription("Example rule created during unit tests") + .setName("clang unit test"); + + // WHEN: A result is added with valid source locations for its diagnostics + Writer.createRun("sarif test", "sarif test runner", "1.0.0"); + unsigned RuleIdx = Writer.createRule(Rule); + + llvm::SmallVector DiagLocs; + const char *SourceText = "int foo = 0;\n" + "int bar = 1;\n" + "float x = 0.0\n"; + + FileID MainFileID = + registerSource("/main.cpp", SourceText, /* IsMainFile = */ true); + CharSourceRange SourceCSR = + getFakeCharSourceRange(MainFileID, {3, 14}, {3, 14}); + + DiagLocs.push_back(SourceCSR); + + const SarifResult &Result = + SarifResult::create(RuleIdx).setLocations(DiagLocs).setDiagnosticMessage( + "expected ';' after top level declarator"); + Writer.appendResult(Result); + std::string Output = serializeSarifDocument(Writer.createDocument()); + + // THEN: Assert that the serialized SARIF is as expected + ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput)); +} + +TEST_F(SarifDocumentWriterTest, checkSerializingCodeflows) { + // GIVEN: + const std::string ExpectedOutput = + R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":27,"location":{"index":1,"uri":"file:///test-header-1.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":30,"location":{"index":2,"uri":"file:///test-header-2.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":28,"location":{"index":3,"uri":"file:///test-header-3.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":41,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"codeFlows":[{"threadFlows":[{"locations":[{"importance":"essential","location":{"message":{"text":"Message #1"},"physicalLocation":{"artifactLocation":{"index":1},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"important","location":{"message":{"text":"Message #2"},"physicalLocation":{"artifactLocation":{"index":2},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"unimportant","location":{"message":{"text":"Message #3"},"physicalLocation":{"artifactLocation":{"index":3},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}}]}]}],"locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":8,"endLine":2,"startColumn":5,"startLine":2}}}],"message":{"text":"Redefinition of 'foo'"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})"; + + const char *SourceText = "int foo = 0;\n" + "int foo = 1;\n" + "float x = 0.0;\n"; + FileID MainFileID = + registerSource("/main.cpp", SourceText, /* IsMainFile = */ true); + CharSourceRange DiagLoc{getFakeCharSourceRange(MainFileID, {2, 5}, {2, 8})}; + + SarifDocumentWriter Writer{SourceMgr}; + const SarifRule &Rule = + SarifRule::create() + .setRuleId("clang.unittest") + .setDescription("Example rule created during unit tests") + .setName("clang unit test"); + + constexpr unsigned int NUM_CASES = 3; + llvm::SmallVector Threadflows; + const char *HeaderTexts[NUM_CASES]{("#pragma once\n" + "#include "), + ("#ifndef FOO\n" + "#define FOO\n" + "#endif"), + ("#ifdef FOO\n" + "#undef FOO\n" + "#endif")}; + const char *HeaderNames[NUM_CASES]{"/test-header-1.h", "/test-header-2.h", + "/test-header-3.h"}; + ThreadFlowImportance Importances[NUM_CASES]{ + ThreadFlowImportance::Essential, ThreadFlowImportance::Important, + ThreadFlowImportance::Unimportant}; + for (size_t Idx = 0; Idx != NUM_CASES; ++Idx) { + FileID FID = registerSource(HeaderNames[Idx], HeaderTexts[Idx]); + CharSourceRange &&CSR = getFakeCharSourceRange(FID, {1, 1}, {2, 8}); + std::string Message = llvm::formatv("Message #{0}", Idx + 1); + ThreadFlow Item = ThreadFlow::create() + .setRange(CSR) + .setImportance(Importances[Idx]) + .setMessage(Message); + Threadflows.push_back(Item); + } + + // WHEN: A result containing code flows and diagnostic locations is added + Writer.createRun("sarif test", "sarif test runner", "1.0.0"); + unsigned RuleIdx = Writer.createRule(Rule); + const SarifResult &Result = SarifResult::create(RuleIdx) + .setLocations({DiagLoc}) + .setDiagnosticMessage("Redefinition of 'foo'") + .setThreadFlows(Threadflows); + Writer.appendResult(Result); + std::string Output = serializeSarifDocument(Writer.createDocument()); + + // THEN: Assert that the serialized SARIF is as expected + ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput)); +} + +} // namespace