diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h deleted file mode 100644 index 818d78668ff155..00000000000000 --- a/clang/include/clang/Basic/Sarif.h +++ /dev/null @@ -1,440 +0,0 @@ -//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult. -/// -/// The document built can be accessed as a JSON Object. -/// Several value semantic types are also introduced which represent properties -/// of the SARIF standard, such as 'artifact', 'result', 'rule'. -/// -/// A SARIF (Static Analysis Results Interchange Format) document is JSON -/// document that describes in detail the results of running static analysis -/// tools on a project. Each (non-trivial) document consists of at least one -/// "run", which are themselves composed of details such as: -/// * Tool: The tool that was run -/// * Rules: The rules applied during the tool run, represented by -/// \c reportingDescriptor objects in SARIF -/// * Results: The matches for the rules applied against the project(s) being -/// evaluated, represented by \c result objects in SARIF -/// -/// Reference: -/// 1. The SARIF standard -/// 2. SARIF
reportingDescriptor
-/// 3. SARIF
result
-//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_BASIC_SARIF_H -#define LLVM_CLANG_BASIC_SARIF_H - -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/Version.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/JSON.h" -#include -#include -#include -#include -#include - -namespace clang { - -class SarifDocumentWriter; -class SourceManager; - -namespace detail { - -/// \internal -/// An artifact location is SARIF's way of describing the complete location -/// of an artifact encountered during analysis. The \c artifactLocation object -/// typically consists of a URI, and/or an index to reference the artifact it -/// locates. -/// -/// This builder makes an additional assumption: that every artifact encountered -/// by \c clang will be a physical, top-level artifact. Which is why the static -/// creation method \ref SarifArtifactLocation::create takes a mandatory URI -/// parameter. The official standard states that either a \c URI or \c Index -/// must be available in the object, \c clang picks the \c URI as a reasonable -/// default, because it intends to deal in physical artifacts for now. -/// -/// Reference: -/// 1. artifactLocation object -/// 2. \ref SarifArtifact -class SarifArtifactLocation { -private: - friend class clang::SarifDocumentWriter; - - llvm::Optional Index; - std::string URI; - - SarifArtifactLocation() = delete; - explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {} - -public: - static SarifArtifactLocation create(llvm::StringRef URI) { - return SarifArtifactLocation{URI.str()}; - } - - SarifArtifactLocation setIndex(uint32_t Idx) { - Index = Idx; - return *this; - } -}; - -/// \internal -/// An artifact in SARIF is any object (a sequence of bytes) addressable by -/// a URI (RFC 3986). The most common type of artifact for clang's use-case -/// would be source files. SARIF's artifact object is described in detail in -/// section 3.24. -// -/// Since every clang artifact MUST have a location (there being no nested -/// artifacts), the creation method \ref SarifArtifact::create requires a -/// \ref SarifArtifactLocation object. -/// -/// Reference: -/// 1. artifact object -class SarifArtifact { -private: - friend class clang::SarifDocumentWriter; - - llvm::Optional Offset; - llvm::Optional Length; - std::string MimeType; - SarifArtifactLocation Location; - llvm::SmallVector Roles; - - SarifArtifact() = delete; - - explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {} - -public: - static SarifArtifact create(const SarifArtifactLocation &Loc) { - return SarifArtifact{Loc}; - } - - SarifArtifact setOffset(uint32_t ArtifactOffset) { - Offset = ArtifactOffset; - return *this; - } - - SarifArtifact setLength(size_t NumBytes) { - Length = NumBytes; - return *this; - } - - SarifArtifact setRoles(std::initializer_list ArtifactRoles) { - Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end()); - return *this; - } - - SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) { - MimeType = ArtifactMimeType.str(); - return *this; - } -}; - -} // namespace detail - -enum class ThreadFlowImportance { Important, Essential, Unimportant }; - -/// A thread flow is a sequence of code locations that specify a possible path -/// through a single thread of execution. -/// A thread flow in SARIF is related to a code flow which describes -/// the progress of one or more programs through one or more thread flows. -/// -/// Reference: -/// 1. threadFlow object -/// 2. codeFlow object -class ThreadFlow { - friend class SarifDocumentWriter; - - CharSourceRange Range; - ThreadFlowImportance Importance; - std::string Message; - - ThreadFlow() = default; - -public: - static ThreadFlow create() { return {}; } - - ThreadFlow setRange(const CharSourceRange &ItemRange) { - assert(ItemRange.isCharRange() && - "ThreadFlows require a character granular source range!"); - Range = ItemRange; - return *this; - } - - ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) { - Importance = ItemImportance; - return *this; - } - - ThreadFlow setMessage(llvm::StringRef ItemMessage) { - Message = ItemMessage.str(); - return *this; - } -}; - -/// A SARIF rule (\c reportingDescriptor object) contains information that -/// describes a reporting item generated by a tool. A reporting item is -/// either a result of analysis or notification of a condition encountered by -/// the tool. Rules are arbitrary but are identifiable by a hierarchical -/// rule-id. -/// -/// This builder provides an interface to create SARIF \c reportingDescriptor -/// objects via the \ref SarifRule::create static method. -/// -/// Reference: -/// 1. reportingDescriptor object -class SarifRule { - friend class clang::SarifDocumentWriter; - - std::string Name; - std::string Id; - std::string Description; - std::string HelpURI; - - SarifRule() = default; - -public: - static SarifRule create() { return {}; } - - SarifRule setName(llvm::StringRef RuleName) { - Name = RuleName.str(); - return *this; - } - - SarifRule setRuleId(llvm::StringRef RuleId) { - Id = RuleId.str(); - return *this; - } - - SarifRule setDescription(llvm::StringRef RuleDesc) { - Description = RuleDesc.str(); - return *this; - } - - SarifRule setHelpURI(llvm::StringRef RuleHelpURI) { - HelpURI = RuleHelpURI.str(); - return *this; - } -}; - -/// A SARIF result (also called a "reporting item") is a unit of output -/// produced when one of the tool's \c reportingDescriptor encounters a match -/// on the file being analysed by the tool. -/// -/// This builder provides a \ref SarifResult::create static method that can be -/// used to create an empty shell onto which attributes can be added using the -/// \c setX(...) methods. -/// -/// For example: -/// \code{.cpp} -/// SarifResult result = SarifResult::create(...) -/// .setRuleId(...) -/// .setDiagnosticMessage(...); -/// \endcode -/// -/// Reference: -/// 1. SARIF
result
-class SarifResult { - friend class clang::SarifDocumentWriter; - - // NOTE: - // This type cannot fit all possible indexes representable by JSON, but is - // chosen because it is the largest unsigned type that can be safely - // converted to an \c int64_t. - uint32_t RuleIdx; - std::string RuleId; - std::string DiagnosticMessage; - llvm::SmallVector Locations; - llvm::SmallVector ThreadFlows; - - SarifResult() = delete; - explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {} - -public: - static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; } - - SarifResult setIndex(uint32_t Idx) { - RuleIdx = Idx; - return *this; - } - - SarifResult setRuleId(llvm::StringRef Id) { - RuleId = Id.str(); - return *this; - } - - SarifResult setDiagnosticMessage(llvm::StringRef Message) { - DiagnosticMessage = Message.str(); - return *this; - } - - SarifResult setLocations(llvm::ArrayRef DiagLocs) { -#ifndef NDEBUG - for (const auto &Loc : DiagLocs) { - assert(Loc.isCharRange() && - "SARIF Results require character granular source ranges!"); - } -#endif - Locations.assign(DiagLocs.begin(), DiagLocs.end()); - return *this; - } - SarifResult setThreadFlows(llvm::ArrayRef ThreadFlowResults) { - ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end()); - return *this; - } -}; - -/// This class handles creating a valid SARIF document given various input -/// attributes. However, it requires an ordering among certain method calls: -/// -/// 1. Because every SARIF document must contain at least 1 \c run, callers -/// must ensure that \ref SarifDocumentWriter::createRun is is called before -/// any other methods. -/// 2. If SarifDocumentWriter::endRun is called, callers MUST call -/// SarifDocumentWriter::createRun, before invoking any of the result -/// aggregation methods such as SarifDocumentWriter::appendResult etc. -class SarifDocumentWriter { -private: - const llvm::StringRef SchemaURI{ - "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/" - "sarif-schema-2.1.0.json"}; - const llvm::StringRef SchemaVersion{"2.1.0"}; - - /// \internal - /// Return a pointer to the current tool. Asserts that a run exists. - llvm::json::Object &getCurrentTool(); - - /// \internal - /// Checks if there is a run associated with this document. - /// - /// \return true on success - bool hasRun() const; - - /// \internal - /// Reset portions of the internal state so that the document is ready to - /// receive data for a new run. - void reset(); - - /// \internal - /// Return a mutable reference to the current run, after asserting it exists. - /// - /// \note It is undefined behavior to call this if a run does not exist in - /// the SARIF document. - llvm::json::Object &getCurrentRun(); - - /// Create a code flow object for the given threadflows. - /// See \ref ThreadFlow. - /// - /// \note It is undefined behavior to call this if a run does not exist in - /// the SARIF document. - llvm::json::Object - createCodeFlow(const llvm::ArrayRef ThreadFlows); - - /// Add the given threadflows to the ones this SARIF document knows about. - llvm::json::Array - createThreadFlows(const llvm::ArrayRef ThreadFlows); - - /// Add the given \ref CharSourceRange to the SARIF document as a physical - /// location, with its corresponding artifact. - llvm::json::Object createPhysicalLocation(const CharSourceRange &R); - -public: - SarifDocumentWriter() = delete; - - /// Create a new empty SARIF document with the given source manager. - SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} - - /// Release resources held by this SARIF document. - ~SarifDocumentWriter() = default; - - /// Create a new run with which any upcoming analysis will be associated. - /// Each run requires specifying the tool that is generating reporting items. - void createRun(const llvm::StringRef ShortToolName, - const llvm::StringRef LongToolName, - const llvm::StringRef ToolVersion = CLANG_VERSION_STRING); - - /// If there is a current run, end it. - /// - /// This method collects various book-keeping required to clear and close - /// resources associated with the current run, but may also allocate some - /// for the next run. - /// - /// Calling \ref endRun before associating a run through \ref createRun leads - /// to undefined behaviour. - void endRun(); - - /// Associate the given rule with the current run. - /// - /// Returns an integer rule index for the created rule that is unique within - /// the current run, which can then be used to create a \ref SarifResult - /// to add to the current run. Note that a rule must exist before being - /// referenced by a result. - /// - /// \pre - /// There must be a run associated with the document, failing to do so will - /// cause undefined behaviour. - size_t createRule(const SarifRule &Rule); - - /// Append a new result to the currently in-flight run. - /// - /// \pre - /// There must be a run associated with the document, failing to do so will - /// cause undefined behaviour. - /// \pre - /// \c RuleIdx used to create the result must correspond to a rule known by - /// the SARIF document. It must be the value returned by a previous call - /// to \ref createRule. - void appendResult(const SarifResult &SarifResult); - - /// Return the SARIF document in its current state. - /// Calling this will trigger a copy of the internal state including all - /// reported diagnostics, resulting in an expensive call. - llvm::json::Object createDocument(); - -private: - /// Source Manager to use for the current SARIF document. - const SourceManager &SourceMgr; - - /// Flag to track the state of this document: - /// A closed document is one on which a new runs must be created. - /// This could be a document that is freshly created, or has recently - /// finished writing to a previous run. - bool Closed = true; - - /// A sequence of SARIF runs. - /// Each run object describes a single run of an analysis tool and contains - /// the output of that run. - /// - /// Reference: run object - llvm::json::Array Runs; - - /// The list of rules associated with the most recent active run. These are - /// defined using the diagnostics passed to the SarifDocument. Each rule - /// need not be unique through the result set. E.g. there may be several - /// 'syntax' errors throughout code under analysis, each of which has its - /// own specific diagnostic message (and consequently, RuleId). Rules are - /// also known as "reportingDescriptor" objects in SARIF. - /// - /// Reference: rules property - llvm::SmallVector CurrentRules; - - /// The list of artifacts that have been encountered on the most recent active - /// run. An artifact is defined in SARIF as a sequence of bytes addressable - /// by a URI. A common example for clang's case would be files named by - /// filesystem paths. - llvm::StringMap CurrentArtifacts; -}; -} // namespace clang - -#endif // LLVM_CLANG_BASIC_SARIF_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 284e73b1c11fde..c815b571bc9c04 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -63,7 +63,6 @@ add_clang_library(clangBasic NoSanitizeList.cpp SanitizerSpecialCaseList.cpp Sanitizers.cpp - Sarif.cpp SourceLocation.cpp SourceManager.cpp Stack.cpp diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp deleted file mode 100644 index 668e60d47eecd2..00000000000000 --- a/clang/lib/Basic/Sarif.cpp +++ /dev/null @@ -1,389 +0,0 @@ -//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the declaration of the SARIFDocumentWriter class, and -/// associated builders such as: -/// - \ref SarifArtifact -/// - \ref SarifArtifactLocation -/// - \ref SarifRule -/// - \ref SarifResult -//===----------------------------------------------------------------------===// -#include "clang/Basic/Sarif.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/ConvertUTF.h" -#include "llvm/Support/JSON.h" -#include "llvm/Support/Path.h" - -#include -#include - -using namespace clang; -using namespace llvm; - -using clang::detail::SarifArtifact; -using clang::detail::SarifArtifactLocation; - -static StringRef getFileName(const FileEntry &FE) { - StringRef Filename = FE.tryGetRealPathName(); - if (Filename.empty()) - Filename = FE.getName(); - return Filename; -} -/// \name URI -/// @{ - -/// \internal -/// \brief -/// Return the RFC3986 encoding of the input character. -/// -/// \param C Character to encode to RFC3986. -/// -/// \return The RFC3986 representation of \c C. -static std::string percentEncodeURICharacter(char C) { - // RFC 3986 claims alpha, numeric, and this handful of - // characters are not reserved for the path component and - // should be written out directly. Otherwise, percent - // encode the character and write that out instead of the - // reserved character. - if (llvm::isAlnum(C) || - StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) - return std::string(&C, 1); - return "%" + llvm::toHex(StringRef(&C, 1)); -} - -/// \internal -/// \brief Return a URI representing the given file name. -/// -/// \param Filename The filename to be represented as URI. -/// -/// \return RFC3986 URI representing the input file name. -static std::string fileNameToURI(StringRef Filename) { - SmallString<32> Ret = StringRef("file://"); - - // Get the root name to see if it has a URI authority. - StringRef Root = sys::path::root_name(Filename); - if (Root.startswith("//")) { - // There is an authority, so add it to the URI. - Ret += Root.drop_front(2).str(); - } else if (!Root.empty()) { - // There is no authority, so end the component and add the root to the URI. - Ret += Twine("/" + Root).str(); - } - - auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); - assert(Iter != End && "Expected there to be a non-root path component."); - // Add the rest of the path components, encoding any reserved characters; - // we skip past the first path component, as it was handled it above. - std::for_each(++Iter, End, [&Ret](StringRef Component) { - // For reasons unknown to me, we may get a backslash with Windows native - // paths for the initial backslash following the drive component, which - // we need to ignore as a URI path part. - if (Component == "\\") - return; - - // Add the separator between the previous path part and the one being - // currently processed. - Ret += "/"; - - // URI encode the part. - for (char C : Component) { - Ret += percentEncodeURICharacter(C); - } - }); - - return std::string(Ret); -} -/// @} - -/// \brief Calculate the column position expressed in the number of UTF-8 code -/// points from column start to the source location -/// -/// \param Loc The source location whose column needs to be calculated. -/// \param TokenLen Optional hint for when the token is multiple bytes long. -/// -/// \return The column number as a UTF-8 aware byte offset from column start to -/// the effective source location. -static unsigned int adjustColumnPos(FullSourceLoc Loc, - unsigned int TokenLen = 0) { - assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); - - std::pair LocInfo = Loc.getDecomposedLoc(); - Optional Buf = - Loc.getManager().getBufferOrNone(LocInfo.first); - assert(Buf && "got an invalid buffer for the location's file"); - assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && - "token extends past end of buffer?"); - - // Adjust the offset to be the start of the line, since we'll be counting - // Unicode characters from there until our column offset. - unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); - unsigned int Ret = 1; - while (Off < (LocInfo.second + TokenLen)) { - Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); - Ret++; - } - - return Ret; -} - -/// \name SARIF Utilities -/// @{ - -/// \internal -json::Object createMessage(StringRef Text) { - return json::Object{{"text", Text.str()}}; -} - -/// \internal -/// \pre CharSourceRange must be a token range -static json::Object createTextRegion(const SourceManager &SM, - const CharSourceRange &R) { - FullSourceLoc FirstTokenLoc{R.getBegin(), SM}; - FullSourceLoc LastTokenLoc{R.getEnd(), SM}; - json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()}, - {"startColumn", adjustColumnPos(FirstTokenLoc)}, - {"endColumn", adjustColumnPos(LastTokenLoc)}}; - if (FirstTokenLoc != LastTokenLoc) { - Region["endLine"] = LastTokenLoc.getExpansionLineNumber(); - } - return Region; -} - -static json::Object createLocation(json::Object &&PhysicalLocation, - StringRef Message = "") { - json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; - if (!Message.empty()) - Ret.insert({"message", createMessage(Message)}); - return Ret; -} - -static StringRef importanceToStr(ThreadFlowImportance I) { - switch (I) { - case ThreadFlowImportance::Important: - return "important"; - case ThreadFlowImportance::Essential: - return "essential"; - case ThreadFlowImportance::Unimportant: - return "unimportant"; - } - llvm_unreachable("Fully covered switch is not so fully covered"); -} - -static json::Object -createThreadFlowLocation(json::Object &&Location, - const ThreadFlowImportance &Importance) { - return json::Object{{"location", std::move(Location)}, - {"importance", importanceToStr(Importance)}}; -} -/// @} - -json::Object -SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { - assert(R.isValid() && - "Cannot create a physicalLocation from invalid SourceRange!"); - assert(R.isCharRange() && - "Cannot create a physicalLocation from a token range!"); - FullSourceLoc Start{R.getBegin(), SourceMgr}; - const FileEntry *FE = Start.getExpansionLoc().getFileEntry(); - assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); - - const std::string &FileURI = fileNameToURI(getFileName(*FE)); - auto I = CurrentArtifacts.find(FileURI); - - if (I == CurrentArtifacts.end()) { - uint32_t Idx = static_cast(CurrentArtifacts.size()); - const SarifArtifactLocation &Location = - SarifArtifactLocation::create(FileURI).setIndex(Idx); - const SarifArtifact &Artifact = SarifArtifact::create(Location) - .setRoles({"resultFile"}) - .setLength(FE->getSize()) - .setMimeType("text/plain"); - auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); - // If inserted, ensure the original iterator points to the newly inserted - // element, so it can be used downstream. - if (StatusIter.second) - I = StatusIter.first; - } - assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); - const SarifArtifactLocation &Location = I->second.Location; - uint32_t Idx = Location.Index.getValue(); - return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}}, - {"region", createTextRegion(SourceMgr, R)}}}; -} - -json::Object &SarifDocumentWriter::getCurrentTool() { - assert(!Closed && "SARIF Document is closed. " - "Need to call createRun() before using getcurrentTool!"); - - // Since Closed = false here, expect there to be at least 1 Run, anything - // else is an invalid state. - assert(!Runs.empty() && "There are no runs associated with the document!"); - - return *Runs.back().getAsObject()->get("tool")->getAsObject(); -} - -void SarifDocumentWriter::reset() { - CurrentRules.clear(); - CurrentArtifacts.clear(); -} - -void SarifDocumentWriter::endRun() { - // Exit early if trying to close a closed Document. - if (Closed) { - reset(); - return; - } - - // Since Closed = false here, expect there to be at least 1 Run, anything - // else is an invalid state. - assert(!Runs.empty() && "There are no runs associated with the document!"); - - // Flush all the rules. - json::Object &Tool = getCurrentTool(); - json::Array Rules; - for (const SarifRule &R : CurrentRules) { - json::Object Rule{ - {"name", R.Name}, - {"id", R.Id}, - {"fullDescription", json::Object{{"text", R.Description}}}}; - if (!R.HelpURI.empty()) - Rule["helpUri"] = R.HelpURI; - Rules.emplace_back(std::move(Rule)); - } - json::Object &Driver = *Tool.getObject("driver"); - Driver["rules"] = std::move(Rules); - - // Flush all the artifacts. - json::Object &Run = getCurrentRun(); - json::Array *Artifacts = Run.getArray("artifacts"); - for (const auto &Pair : CurrentArtifacts) { - const SarifArtifact &A = Pair.getValue(); - json::Object Loc{{"uri", A.Location.URI}}; - if (A.Location.Index.hasValue()) { - Loc["index"] = static_cast(A.Location.Index.getValue()); - } - json::Object Artifact; - Artifact["location"] = std::move(Loc); - if (A.Length.hasValue()) - Artifact["length"] = static_cast(A.Length.getValue()); - if (!A.Roles.empty()) - Artifact["roles"] = json::Array(A.Roles); - if (!A.MimeType.empty()) - Artifact["mimeType"] = A.MimeType; - if (A.Offset.hasValue()) - Artifact["offset"] = A.Offset; - Artifacts->push_back(json::Value(std::move(Artifact))); - } - - // Clear, reset temporaries before next run. - reset(); - - // Mark the document as closed. - Closed = true; -} - -json::Array -SarifDocumentWriter::createThreadFlows(ArrayRef ThreadFlows) { - json::Object Ret{{"locations", json::Array{}}}; - json::Array Locs; - for (const auto &ThreadFlow : ThreadFlows) { - json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); - json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); - Locs.emplace_back( - createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); - } - Ret["locations"] = std::move(Locs); - return json::Array{std::move(Ret)}; -} - -json::Object -SarifDocumentWriter::createCodeFlow(ArrayRef ThreadFlows) { - return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; -} - -void SarifDocumentWriter::createRun(StringRef ShortToolName, - StringRef LongToolName, - StringRef ToolVersion) { - // Clear resources associated with a previous run. - endRun(); - - // Signify a new run has begun. - Closed = false; - - json::Object Tool{ - {"driver", - json::Object{{"name", ShortToolName}, - {"fullName", LongToolName}, - {"language", "en-US"}, - {"version", ToolVersion}, - {"informationUri", - "https://clang.llvm.org/docs/UsersManual.html"}}}}; - json::Object TheRun{{"tool", std::move(Tool)}, - {"results", {}}, - {"artifacts", {}}, - {"columnKind", "unicodeCodePoints"}}; - Runs.emplace_back(std::move(TheRun)); -} - -json::Object &SarifDocumentWriter::getCurrentRun() { - assert(!Closed && - "SARIF Document is closed. " - "Can only getCurrentRun() if document is opened via createRun(), " - "create a run first"); - - // Since Closed = false here, expect there to be at least 1 Run, anything - // else is an invalid state. - assert(!Runs.empty() && "There are no runs associated with the document!"); - return *Runs.back().getAsObject(); -} - -size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { - size_t Ret = CurrentRules.size(); - CurrentRules.emplace_back(Rule); - return Ret; -} - -void SarifDocumentWriter::appendResult(const SarifResult &Result) { - size_t RuleIdx = Result.RuleIdx; - assert(RuleIdx < CurrentRules.size() && - "Trying to reference a rule that doesn't exist"); - json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, - {"ruleIndex", static_cast(RuleIdx)}, - {"ruleId", CurrentRules[RuleIdx].Id}}; - if (!Result.Locations.empty()) { - json::Array Locs; - for (auto &Range : Result.Locations) { - Locs.emplace_back(createLocation(createPhysicalLocation(Range))); - } - Ret["locations"] = std::move(Locs); - } - if (!Result.ThreadFlows.empty()) - Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; - json::Object &Run = getCurrentRun(); - json::Array *Results = Run.getArray("results"); - Results->emplace_back(std::move(Ret)); -} - -json::Object SarifDocumentWriter::createDocument() { - // Flush all temporaries to their destinations if needed. - endRun(); - - json::Object Doc{ - {"$schema", SchemaURI}, - {"version", SchemaVersion}, - }; - if (!Runs.empty()) - Doc["runs"] = json::Array(Runs); - return Doc; -} diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt index 6c00f63332af4c..b6f5d79e87c773 100644 --- a/clang/unittests/Basic/CMakeLists.txt +++ b/clang/unittests/Basic/CMakeLists.txt @@ -10,7 +10,6 @@ add_clang_unittest(BasicTests FileManagerTest.cpp LineOffsetMappingTest.cpp SanitizersTest.cpp - SarifTest.cpp SourceManagerTest.cpp ) diff --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp deleted file mode 100644 index ff58c6b78b2e08..00000000000000 --- a/clang/unittests/Basic/SarifTest.cpp +++ /dev/null @@ -1,320 +0,0 @@ -//===- unittests/Basic/SarifTest.cpp - Test writing SARIF documents -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Sarif.h" -#include "clang/Basic/DiagnosticIDs.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/FileSystemOptions.h" -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/JSON.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/VirtualFileSystem.h" -#include "llvm/Support/raw_ostream.h" -#include "gmock/gmock-matchers.h" -#include "gtest/gtest-death-test.h" -#include "gtest/gtest-matchers.h" -#include "gtest/gtest.h" - -#include - -using namespace clang; - -namespace { - -using LineCol = std::pair; - -static std::string serializeSarifDocument(llvm::json::Object &&Doc) { - std::string Output; - llvm::json::Value value(std::move(Doc)); - llvm::raw_string_ostream OS{Output}; - OS << llvm::formatv("{0}", value); - OS.flush(); - return Output; -} - -class SarifDocumentWriterTest : public ::testing::Test { -protected: - SarifDocumentWriterTest() - : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem), - FileMgr(FileSystemOptions(), InMemoryFileSystem), - DiagID(new DiagnosticIDs()), DiagOpts(new DiagnosticOptions()), - Diags(DiagID, DiagOpts.get(), new IgnoringDiagConsumer()), - SourceMgr(Diags, FileMgr) {} - - IntrusiveRefCntPtr InMemoryFileSystem; - FileManager FileMgr; - IntrusiveRefCntPtr DiagID; - IntrusiveRefCntPtr DiagOpts; - DiagnosticsEngine Diags; - SourceManager SourceMgr; - LangOptions LangOpts; - - FileID registerSource(llvm::StringRef Name, const char *SourceText, - bool IsMainFile = false) { - std::unique_ptr SourceBuf = - llvm::MemoryBuffer::getMemBuffer(SourceText); - const FileEntry *SourceFile = - FileMgr.getVirtualFile(Name, SourceBuf->getBufferSize(), 0); - SourceMgr.overrideFileContents(SourceFile, std::move(SourceBuf)); - FileID FID = SourceMgr.getOrCreateFileID(SourceFile, SrcMgr::C_User); - if (IsMainFile) - SourceMgr.setMainFileID(FID); - return FID; - } - - CharSourceRange getFakeCharSourceRange(FileID FID, LineCol Begin, - LineCol End) { - auto BeginLoc = SourceMgr.translateLineCol(FID, Begin.first, Begin.second); - auto EndLoc = SourceMgr.translateLineCol(FID, End.first, End.second); - return CharSourceRange{SourceRange{BeginLoc, EndLoc}, /* ITR = */ false}; - } -}; - -TEST_F(SarifDocumentWriterTest, createEmptyDocument) { - // GIVEN: - SarifDocumentWriter Writer{SourceMgr}; - - // WHEN: - const llvm::json::Object &EmptyDoc = Writer.createDocument(); - std::vector Keys(EmptyDoc.size()); - std::transform(EmptyDoc.begin(), EmptyDoc.end(), Keys.begin(), - [](auto item) { return item.getFirst(); }); - - // THEN: - ASSERT_THAT(Keys, testing::UnorderedElementsAre("$schema", "version")); -} - -// Test that a newly inserted run will associate correct tool names -TEST_F(SarifDocumentWriterTest, documentWithARun) { - // GIVEN: - SarifDocumentWriter Writer{SourceMgr}; - const char *ShortName = "sariftest"; - const char *LongName = "sarif writer test"; - - // WHEN: - Writer.createRun(ShortName, LongName); - Writer.endRun(); - const llvm::json::Object &Doc = Writer.createDocument(); - const llvm::json::Array *Runs = Doc.getArray("runs"); - - // THEN: - // A run was created - ASSERT_THAT(Runs, testing::NotNull()); - - // It is the only run - ASSERT_EQ(Runs->size(), 1UL); - - // The tool associated with the run was the tool - const llvm::json::Object *driver = - Runs->begin()->getAsObject()->getObject("tool")->getObject("driver"); - ASSERT_THAT(driver, testing::NotNull()); - - ASSERT_TRUE(driver->getString("name").hasValue()); - ASSERT_TRUE(driver->getString("fullName").hasValue()); - ASSERT_TRUE(driver->getString("language").hasValue()); - - EXPECT_EQ(driver->getString("name").getValue(), ShortName); - EXPECT_EQ(driver->getString("fullName").getValue(), LongName); - EXPECT_EQ(driver->getString("language").getValue(), "en-US"); -} - -// Test adding result without a run causes a crash -TEST_F(SarifDocumentWriterTest, addingResultsWillCrashIfThereIsNoRun) { - // GIVEN: - SarifDocumentWriter Writer{SourceMgr}; - - // WHEN: - // A SarifDocumentWriter::createRun(...) was not called prior to - // SarifDocumentWriter::appendResult(...) - // But a rule exists - auto RuleIdx = Writer.createRule(SarifRule::create()); - const SarifResult &EmptyResult = SarifResult::create(RuleIdx); - - // THEN: - ASSERT_DEATH({ Writer.appendResult(EmptyResult); }, ".*create a run first.*"); -} - -// Test adding rule and result shows up in the final document -TEST_F(SarifDocumentWriterTest, addResultWithValidRuleIsOk) { - // GIVEN: - SarifDocumentWriter Writer{SourceMgr}; - const SarifRule &Rule = - SarifRule::create() - .setRuleId("clang.unittest") - .setDescription("Example rule created during unit tests") - .setName("clang unit test"); - - // WHEN: - Writer.createRun("sarif test", "sarif test runner"); - unsigned RuleIdx = Writer.createRule(Rule); - const SarifResult &result = SarifResult::create(RuleIdx); - - Writer.appendResult(result); - const llvm::json::Object &Doc = Writer.createDocument(); - - // THEN: - // A document with a valid schema and version exists - ASSERT_THAT(Doc.get("$schema"), ::testing::NotNull()); - ASSERT_THAT(Doc.get("version"), ::testing::NotNull()); - const llvm::json::Array *Runs = Doc.getArray("runs"); - - // A run exists on this document - ASSERT_THAT(Runs, ::testing::NotNull()); - ASSERT_EQ(Runs->size(), 1UL); - const llvm::json::Object *TheRun = Runs->back().getAsObject(); - - // The run has slots for tools, results, rules and artifacts - ASSERT_THAT(TheRun->get("tool"), ::testing::NotNull()); - ASSERT_THAT(TheRun->get("results"), ::testing::NotNull()); - ASSERT_THAT(TheRun->get("artifacts"), ::testing::NotNull()); - const llvm::json::Object *Driver = - TheRun->getObject("tool")->getObject("driver"); - const llvm::json::Array *Results = TheRun->getArray("results"); - const llvm::json::Array *Artifacts = TheRun->getArray("artifacts"); - - // The tool is as expected - ASSERT_TRUE(Driver->getString("name").hasValue()); - ASSERT_TRUE(Driver->getString("fullName").hasValue()); - - EXPECT_EQ(Driver->getString("name").getValue(), "sarif test"); - EXPECT_EQ(Driver->getString("fullName").getValue(), "sarif test runner"); - - // The results are as expected - EXPECT_EQ(Results->size(), 1UL); - - // The artifacts are as expected - EXPECT_TRUE(Artifacts->empty()); -} - -TEST_F(SarifDocumentWriterTest, checkSerializingResults) { - // GIVEN: - const std::string ExpectedOutput = - R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[],"columnKind":"unicodeCodePoints","results":[{"message":{"text":""},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})"; - - SarifDocumentWriter Writer{SourceMgr}; - const SarifRule &Rule = - SarifRule::create() - .setRuleId("clang.unittest") - .setDescription("Example rule created during unit tests") - .setName("clang unit test"); - - // WHEN: A run contains a result - Writer.createRun("sarif test", "sarif test runner", "1.0.0"); - unsigned ruleIdx = Writer.createRule(Rule); - const SarifResult &Result = SarifResult::create(ruleIdx); - Writer.appendResult(Result); - std::string Output = serializeSarifDocument(Writer.createDocument()); - - // THEN: - ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput)); -} - -// Check that serializing artifacts from results produces valid SARIF -TEST_F(SarifDocumentWriterTest, checkSerializingArtifacts) { - // GIVEN: - const std::string ExpectedOutput = - R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":40,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":14,"startColumn":14,"startLine":3}}}],"message":{"text":"expected ';' after top level declarator"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})"; - - SarifDocumentWriter Writer{SourceMgr}; - const SarifRule &Rule = - SarifRule::create() - .setRuleId("clang.unittest") - .setDescription("Example rule created during unit tests") - .setName("clang unit test"); - - // WHEN: A result is added with valid source locations for its diagnostics - Writer.createRun("sarif test", "sarif test runner", "1.0.0"); - unsigned RuleIdx = Writer.createRule(Rule); - - llvm::SmallVector DiagLocs; - const char *SourceText = "int foo = 0;\n" - "int bar = 1;\n" - "float x = 0.0\n"; - - FileID MainFileID = - registerSource("/main.cpp", SourceText, /* IsMainFile = */ true); - CharSourceRange SourceCSR = - getFakeCharSourceRange(MainFileID, {3, 14}, {3, 14}); - - DiagLocs.push_back(SourceCSR); - - const SarifResult &Result = - SarifResult::create(RuleIdx).setLocations(DiagLocs).setDiagnosticMessage( - "expected ';' after top level declarator"); - Writer.appendResult(Result); - std::string Output = serializeSarifDocument(Writer.createDocument()); - - // THEN: Assert that the serialized SARIF is as expected - ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput)); -} - -TEST_F(SarifDocumentWriterTest, checkSerializingCodeflows) { - // GIVEN: - const std::string ExpectedOutput = - R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":27,"location":{"index":1,"uri":"file:///test-header-1.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":30,"location":{"index":2,"uri":"file:///test-header-2.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":28,"location":{"index":3,"uri":"file:///test-header-3.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":41,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"codeFlows":[{"threadFlows":[{"locations":[{"importance":"essential","location":{"message":{"text":"Message #1"},"physicalLocation":{"artifactLocation":{"index":1},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"important","location":{"message":{"text":"Message #2"},"physicalLocation":{"artifactLocation":{"index":2},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"unimportant","location":{"message":{"text":"Message #3"},"physicalLocation":{"artifactLocation":{"index":3},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}}]}]}],"locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":8,"endLine":2,"startColumn":5,"startLine":2}}}],"message":{"text":"Redefinition of 'foo'"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})"; - - const char *SourceText = "int foo = 0;\n" - "int foo = 1;\n" - "float x = 0.0;\n"; - FileID MainFileID = - registerSource("/main.cpp", SourceText, /* IsMainFile = */ true); - CharSourceRange DiagLoc{getFakeCharSourceRange(MainFileID, {2, 5}, {2, 8})}; - - SarifDocumentWriter Writer{SourceMgr}; - const SarifRule &Rule = - SarifRule::create() - .setRuleId("clang.unittest") - .setDescription("Example rule created during unit tests") - .setName("clang unit test"); - - constexpr unsigned int NUM_CASES = 3; - llvm::SmallVector Threadflows; - const char *HeaderTexts[NUM_CASES]{("#pragma once\n" - "#include "), - ("#ifndef FOO\n" - "#define FOO\n" - "#endif"), - ("#ifdef FOO\n" - "#undef FOO\n" - "#endif")}; - const char *HeaderNames[NUM_CASES]{"/test-header-1.h", "/test-header-2.h", - "/test-header-3.h"}; - ThreadFlowImportance Importances[NUM_CASES]{ - ThreadFlowImportance::Essential, ThreadFlowImportance::Important, - ThreadFlowImportance::Unimportant}; - for (size_t Idx = 0; Idx != NUM_CASES; ++Idx) { - FileID FID = registerSource(HeaderNames[Idx], HeaderTexts[Idx]); - CharSourceRange &&CSR = getFakeCharSourceRange(FID, {1, 1}, {2, 8}); - std::string Message = llvm::formatv("Message #{0}", Idx + 1); - ThreadFlow Item = ThreadFlow::create() - .setRange(CSR) - .setImportance(Importances[Idx]) - .setMessage(Message); - Threadflows.push_back(Item); - } - - // WHEN: A result containing code flows and diagnostic locations is added - Writer.createRun("sarif test", "sarif test runner", "1.0.0"); - unsigned RuleIdx = Writer.createRule(Rule); - const SarifResult &Result = SarifResult::create(RuleIdx) - .setLocations({DiagLoc}) - .setDiagnosticMessage("Redefinition of 'foo'") - .setThreadFlows(Threadflows); - Writer.appendResult(Result); - std::string Output = serializeSarifDocument(Writer.createDocument()); - - // THEN: Assert that the serialized SARIF is as expected - ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput)); -} - -} // namespace