diff --git a/BreakingChanges.md b/BreakingChanges.md index 8c17daa8ae..50cfe65bd7 100644 --- a/BreakingChanges.md +++ b/BreakingChanges.md @@ -2,7 +2,9 @@ ## development HEAD -*None* +- The `AdjacencyList` struct now now has one more template argument to denote the intege-like `vertex_t` type. It is the second template argument (which previously was the EdgeType). The edge-type is now denoted by the *third* template argument. +- The `AdjacencyList` switches from using `llvm::NoneType` as empty-node marker to `psr::EmptyType` for forward-compatibility with LLVM-16 that removes `llvm::NoneType`. + ## v2503 diff --git a/cmake/phasar_macros.cmake b/cmake/phasar_macros.cmake index 2bd952852c..e250bf10cb 100644 --- a/cmake/phasar_macros.cmake +++ b/cmake/phasar_macros.cmake @@ -353,7 +353,7 @@ function(add_phasar_library name) set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) endfunction(add_phasar_library) -macro(subdirlist result curdir) +function(subdirlist result curdir) file(GLOB children RELATIVE ${curdir} ${curdir}/*) set(dirlist "") @@ -363,5 +363,5 @@ macro(subdirlist result curdir) endif() endforeach() - set(${result} ${dirlist}) -endmacro(subdirlist) + set(${result} ${dirlist} PARENT_SCOPE) +endfunction(subdirlist) diff --git a/include/phasar/PhasarLLVM/ControlFlow.h b/include/phasar/PhasarLLVM/ControlFlow.h index 5ab99e536f..7019a511d7 100644 --- a/include/phasar/PhasarLLVM/ControlFlow.h +++ b/include/phasar/PhasarLLVM/ControlFlow.h @@ -20,5 +20,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" #endif // PHASAR_PHASARLLVM_CONTROLFLOW_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h index 165bc20229..78003dcd9b 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h +++ b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h @@ -26,6 +26,9 @@ getEntryFunctions(const LLVMProjectIRDB &IRDB, [[nodiscard]] std::vector getEntryFunctionsMut(LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints); + +[[nodiscard]] std::vector +getDefaultEntryPoints(const LLVMProjectIRDB &IRDB); } // namespace psr #endif // PHASAR_PHASARLLVM_UTILS_ENTRYFUNCTIONUTILS_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h index 6a3f97c56e..6bc968f4fc 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h @@ -18,6 +18,8 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "nlohmann/json.hpp" + namespace llvm { class Function; } // namespace llvm diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h index 1679e7b5cd..1503b1d318 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h @@ -21,29 +21,66 @@ class DIBasedTypeHierarchy; class LLVMVFTableProvider; class Resolver; +/// Constructs a call-graph using the given CGResolver to resolve indirect +/// calls. +/// +/// Uses a fixpoint iteration, if +/// `CGResolver.mutatesHelperAnalysisInformation()` returns true and the +/// soundness S is not Soundness::Unsound. +/// +/// \param IRDB The IR code where the call-graph should be based on +/// \param CGResolver The resolver to use for resolving indirect calls. +/// \param EntryPoints The functions, where the call-graph construction should +/// start. The resulting call-graph will only contain functions that are +/// (transitively) reachable from the entry-points. +/// \param S The soundness level. May be used to trade soundness for +/// performance. [[nodiscard]] LLVMBasedCallGraph -buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, +buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, llvm::ArrayRef EntryPoints, - DIBasedTypeHierarchy &TH, LLVMVFTableProvider &VTP, - LLVMAliasInfoRef PT = nullptr, Soundness S = Soundness::Soundy); +/// Constructs a call-graph using the given CGResolver to resolve indirect +/// calls. +/// +/// Uses a fixpoint iteration, if +/// `CGResolver.mutatesHelperAnalysisInformation()` returns true and the +/// soundness S is not Soundness::Unsound. +/// +/// \param IRDB The IR code where the call-graph should be based on +/// \param CGResolver The resolver to use for resolving indirect calls. +/// \param EntryPoints Names of the functions, where the call-graph construction +/// should start. The resulting call-graph will only contain functions that are +/// (transitively) reachable from the entry-points. +/// \param S The soundness level. May be used to trade soundness for +/// performance. [[nodiscard]] LLVMBasedCallGraph buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, - llvm::ArrayRef EntryPoints, + llvm::ArrayRef EntryPoints, Soundness S = Soundness::Soundy); +/// Kept for compatibility with LLVMBasedICFG. See the constructor of +/// LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *, CallGraphAnalysisType, +/// llvm::ArrayRef, DIBasedTypeHierarchy *, LLVMAliasInfoRef, +/// Soundness, bool) for more information. [[nodiscard]] LLVMBasedCallGraph buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, - llvm::ArrayRef EntryPoints, + llvm::ArrayRef EntryPoints, DIBasedTypeHierarchy &TH, LLVMVFTableProvider &VTP, LLVMAliasInfoRef PT = nullptr, Soundness S = Soundness::Soundy); +/// Kept for compatibility with LLVMBasedICFG. See the constructor of +/// LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *, CallGraphAnalysisType, +/// llvm::ArrayRef, DIBasedTypeHierarchy *, LLVMAliasInfoRef, +/// Soundness, bool) for more information. [[nodiscard]] LLVMBasedCallGraph -buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, +buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, llvm::ArrayRef EntryPoints, + DIBasedTypeHierarchy &TH, LLVMVFTableProvider &VTP, + LLVMAliasInfoRef PT = nullptr, Soundness S = Soundness::Soundy); + } // namespace psr #endif // PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPHBUILDER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index b14ca90f75..34984e0358 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -61,10 +61,12 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { /// \param EntryPoints The names of the functions to start with when /// incrementally building up the ICFG. For whole-program analysis of an /// executable use {"main"}. - /// \param TH The type-hierarchy implementation to use. Will be constructed - /// on-the-fly if nullptr, but required + /// \param TH The type-hierarchy implementation to use. Must be non-null, if + /// the selected call-graph analysis requires type-hierarchy information; + /// currently, this holds for the CHA and RTA algorithms. /// \param PT The points-to implementation to use. Will be constructed - /// on-the-fly if nullptr, but required + /// on-the-fly if nullptr, but required; currently, this holds for the OTF and + /// VTA algorithms. /// \param S The soundness level to expect from the analysis. Currently unused /// \param IncludeGlobals Properly include global constructors/destructors /// into the ICFG, if true. Requires to generate artificial functions into the diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h new file mode 100644 index 0000000000..04304d6c28 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_PRECOMPUTEDRESOLVER_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_PRECOMPUTEDRESOLVER_H + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/Utils/MaybeUniquePtr.h" + +namespace psr { +/// \brief A Resolver that uses a pre-computed call-graph to resolve indirect +/// calls. +/// +/// \note We eventually may want the LLVMBasedCallGraph to *be* a Resolver. This +/// requires the concept of resolvers to generalize beyond LLVM. See +/// for +/// reference +class PrecomputedResolver : public Resolver { +public: + PrecomputedResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, + MaybeUniquePtr BaseCG); + + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } + + void resolveVirtualCall(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override { + resolveFunctionPointer(PossibleTargets, CallSite); + } + + void resolveFunctionPointer(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override; + + [[nodiscard]] std::string str() const override; + +private: + MaybeUniquePtr BaseCG; +}; +} // namespace psr + +#endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index cc1e31100a..408c9f3204 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -18,6 +18,7 @@ #define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/MaybeUniquePtr.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" @@ -121,12 +122,37 @@ class Resolver { [[nodiscard]] llvm::ArrayRef getAddressTakenFunctions(); - [[nodiscard]] static std::unique_ptr + using BaseResolverProvider = llvm::function_ref( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT)>; + + /// Factory function to create a Resolver that can be used to implement the + /// given call-graph analysis type. + /// + /// \param Ty Determines the Resolver subclass to instantiate + /// \param IRDB The IR code where the Resolver should be based on. Must not be + /// nullptr. + /// \param VTP A virtual-table-provider that is used to extract C++-VTables + /// from the IR. Must not be nullptr. + /// \param TH The type-hierarchy implementation to use. Must be non-null, if + /// the selected call-graph analysis requires type-hierarchy information; + /// currently, this holds for the CHA and RTA algorithms. + /// \param PT The points-to implementation to use. Will be constructed + /// on-the-fly if nullptr, but required; currently, this holds for the OTF and + /// VTA algorithms. + static std::unique_ptr create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT = nullptr); + LLVMAliasInfoRef PT = nullptr, + BaseResolverProvider GetBaseRes = nullptr); protected: + virtual void resolveVirtualCall(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) = 0; + + virtual void resolveFunctionPointer(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite); + const llvm::Function * getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, const llvm::CallBase *CallSite, @@ -137,17 +163,12 @@ class Resolver { return psr::getNonPureVirtualVFTEntry(T, Idx, CallSite, *VTP, ReceiverType); } + // --- + const LLVMProjectIRDB *IRDB{}; const LLVMVFTableProvider *VTP{}; std::optional> AddressTakenFunctions{}; - -protected: - virtual void resolveVirtualCall(FunctionSetTy &PossibleTargets, - const llvm::CallBase *CallSite) = 0; - - virtual void resolveFunctionPointer(FunctionSetTy &PossibleTargets, - const llvm::CallBase *CallSite); }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h new file mode 100644 index 0000000000..4146974f41 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -0,0 +1,112 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/MaybeUniquePtr.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/ADT/STLFunctionalExtras.h" + +namespace psr { + +class LLVMProjectIRDB; + +/// \brief A Resolver that uses a variant of the Variable Type Analysis to +/// resolver indirect calls. +/// +/// Uses debug-information to achieve better results with C++ virtual calls. +/// Uses alias-information as fallback mechanism for when types don't help or +/// are not found, e.g., to resolve function-pointer calls. +/// +/// Requires a base-call-graph or at least a base-resolver to resolve indirect +/// calls while constructing the type-assignment graph. +class VTAResolver : public Resolver { +public: + struct DefaultReachableFunctions { + void operator()(const LLVMProjectIRDB &IRDB, + llvm::function_ref WithFun); + }; + + /// Constructs a VTAResolver with a given pre-computed call-graph and + /// call-back based alias-information (to-be-replaced by AliasIterator once + /// available #783) + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. + explicit VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, + MaybeUniquePtr BaseCG); + + /// Constructs a VTAResolver with a given pre-computed call-graph and + /// LLVMAliasInfoRef alias-information. + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. + explicit VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, + MaybeUniquePtr BaseCG); + + /// Constructs a VTAResolver with a given base-resolver (no base-call-graph) + /// and call-back based alias-information (to-be-replaced by AliasIterator + /// once available #783). + /// Uses the optional parameter ReachableFunctions to consider only a subset + /// of all functions for building the type-assignment graph + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. + explicit VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + vta::AliasInfoTy AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions = DefaultReachableFunctions{}); + + /// Constructs a VTAResolver with a given base-resolver (no base-call-graph) + /// and LLVMAliasInfoRef alias-information. + /// Uses the optional parameter ReachableFunctions to consider only a subset + /// of all functions for building the type-assignment graph + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. + explicit VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions = DefaultReachableFunctions{}); + + [[nodiscard]] std::string str() const override; + + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } + +private: + void resolveVirtualCall(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override; + + void resolveFunctionPointer(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override; + + MaybeUniquePtr BaseResolver{}; + vta::TypeAssignment TA{}; + SCCHolder SCCs{}; + Compressor Nodes; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h new file mode 100644 index 0000000000..cdd07b3b41 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -0,0 +1,231 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_TYPEASSIGNMENTGRAPH_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_TYPEASSIGNMENTGRAPH_H + +#include "phasar/ControlFlow/CallGraph.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/GraphTraits.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/TypedVector.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace psr { +class Resolver; +class LLVMProjectIRDB; +class LLVMVFTableProvider; +} // namespace psr + +namespace llvm { +class DIType; +class Value; +class Function; +} // namespace llvm + +namespace psr::vta { + +enum class TAGNodeId : uint32_t {}; + +struct Variable { + const llvm::Value *Val; +}; + +struct Field { + const llvm::DIType *Base; + size_t ByteOffset; +}; + +struct Return { + const llvm::Function *Fun; +}; + +struct TAGNode { + std::variant Label; +}; + +constexpr bool operator==(Variable L, Variable R) noexcept { + return L.Val == R.Val; +} +constexpr bool operator==(Field L, Field R) noexcept { + return L.Base == R.Base && L.ByteOffset == R.ByteOffset; +} +constexpr bool operator==(Return L, Return R) noexcept { + return L.Fun == R.Fun; +} +constexpr bool operator==(TAGNode L, TAGNode R) noexcept { + return L.Label == R.Label; +} +}; // namespace psr::vta + +namespace llvm { +template <> struct DenseMapInfo { + using TAGNode = psr::vta::TAGNode; + using Variable = psr::vta::Variable; + using Field = psr::vta::Field; + using Return = psr::vta::Return; + + inline static TAGNode getEmptyKey() noexcept { + return {Variable{llvm::DenseMapInfo::getEmptyKey()}}; + } + inline static TAGNode getTombstoneKey() noexcept { + return { + Variable{llvm::DenseMapInfo::getTombstoneKey()}}; + } + inline static bool isEqual(TAGNode L, TAGNode R) noexcept { return L == R; } + inline static auto getHashValue(TAGNode TN) noexcept { + if (const auto *Var = std::get_if(&TN.Label)) { + return llvm::hash_combine(0, Var->Val); + } + if (const auto *Fld = std::get_if(&TN.Label)) { + return llvm::hash_combine(1, Fld->Base, Fld->ByteOffset); + } + if (const auto *Ret = std::get_if(&TN.Label)) { + return llvm::hash_combine(2, Ret->Fun); + } + llvm_unreachable("All TAGNode variants should be handled already"); + } +}; + +template <> struct DenseMapInfo { + using GraphNodeId = psr::vta::TAGNodeId; + inline static GraphNodeId getEmptyKey() noexcept { return GraphNodeId(-1); } + inline static GraphNodeId getTombstoneKey() noexcept { + return GraphNodeId(-2); + } + inline static bool isEqual(GraphNodeId L, GraphNodeId R) noexcept { + return L == R; + } + inline static auto getHashValue(GraphNodeId TN) noexcept { + return llvm::hash_value(uint32_t(TN)); + } +}; + +} // namespace llvm + +namespace psr::vta { + +struct TypeAssignmentGraph { + using GraphNodeId = TAGNodeId; + using TypeInfoTy = + llvm::PointerUnion; + Compressor Nodes; + + TypedVector> Adj; + llvm::SmallDenseMap> + TypeEntryPoints; + + [[nodiscard]] inline std::optional get(TAGNode TN) const noexcept { + return Nodes.getOrNull(TN); + } + + [[nodiscard]] inline TAGNode operator[](TAGNodeId Id) const noexcept { + return Nodes[Id]; + } + + inline void addEdge(TAGNodeId From, TAGNodeId To) { + assert(size_t(From) < Adj.size()); + assert(size_t(To) < Adj.size()); + + if (From == To) { + return; + } + + Adj[From].insert(To); + } + + void print(llvm::raw_ostream &OS); +}; + +using AliasHandlerTy = llvm::function_ref; +using AliasInfoTy = llvm::function_ref; + +using ReachableFunsHandlerTy = llvm::function_ref; +using ReachableFunsTy = + llvm::function_ref; + +// TODO: Use AliasIterator here, once available #783 +[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( + const LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, + AliasInfoTy AS, Resolver &BaseRes, ReachableFunsTy ReachableFunctions); + +void printNode(llvm::raw_ostream &OS, TAGNode TN); +}; // namespace psr::vta + +namespace psr { +template <> struct GraphTraits { + using graph_type = vta::TypeAssignmentGraph; + using value_type = vta::TAGNode; + using vertex_t = vta::TAGNodeId; + using edge_t = vertex_t; + + static constexpr vertex_t Invalid = vertex_t(UINT32_MAX); + + [[nodiscard]] static const auto &outEdges(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + return G.Adj[Vtx]; + } + [[nodiscard]] static size_t outDegree(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + return G.Adj[Vtx].size(); + } + + [[nodiscard]] static const auto &nodes(const graph_type &G) noexcept { + return G.Nodes; + } + + [[nodiscard]] static auto roots(const graph_type &G) noexcept { + return llvm::make_first_range(G.TypeEntryPoints); + } + + [[nodiscard]] static auto vertices(const graph_type &G) noexcept { + return iota(G.Adj.size()); + } + + [[nodiscard]] static value_type node(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + assert(G.Adj.size() == G.Nodes.size()); + return G.Nodes[Vtx]; + } + + [[nodiscard]] static size_t size(const graph_type &G) noexcept { + assert(G.Adj.size() == G.Nodes.size()); + return G.Adj.size(); + } + + [[nodiscard]] static size_t + roots_size(const graph_type &G) noexcept { // NOLINT + return G.TypeEntryPoints.size(); + } + + [[nodiscard]] static vertex_t target(edge_t Edge) noexcept { return Edge; } + + [[nodiscard]] static vertex_t withEdgeTarget(edge_t /*Edge*/, + vertex_t NewTgt) noexcept { + return NewTgt; + } +}; +} // namespace psr + +#endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h new file mode 100644 index 0000000000..2e6d6b874d --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h @@ -0,0 +1,54 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_TYPEPROPAGATOR_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_TYPEPROPAGATOR_H + +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" +#include "phasar/Utils/TypedVector.h" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +class Value; +} // namespace llvm + +namespace psr { +template struct SCCId; +template struct SCCHolder; +template struct SCCDependencyGraph; +template struct SCCOrder; +} // namespace psr + +namespace psr::vta { +struct TypeAssignmentGraph; +enum class TAGNodeId : uint32_t; + +/// \brief A concrete type-assignment that assigns a set of possible types to +/// each SCC of the TypeAssignmentGraph +struct TypeAssignment { + TypedVector, + llvm::SmallDenseSet>> + TypesPerSCC; + + void print(llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs); +}; + +/// Computes a TypeAssignment, based on a given TypeAssignmentGraph +[[nodiscard]] TypeAssignment +propagateTypes(const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, + const SCCDependencyGraph &Deps, + const SCCOrder &Order); + +} // namespace psr::vta +#endif diff --git a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h index 03b6c03ea8..c7adb60f88 100644 --- a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h +++ b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h @@ -100,6 +100,10 @@ class LLVMProjectIRDB : public ProjectIRDBBase { [[nodiscard]] static llvm::ErrorOr load(const llvm::Twine &IRFileName, bool EnableOpaquePointers = LLVM_VERSION_MAJOR > 14); + [[nodiscard]] static LLVMProjectIRDB + loadOrExit(const llvm::Twine &IRFileName, + bool EnableOpaquePointers = LLVM_VERSION_MAJOR > 14, + int ErrorExitCode = 1); /// Also use the const overload using ProjectIRDBBase::getFunction; diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h index fccd023f85..e4001bbd25 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h @@ -92,7 +92,7 @@ class DIBasedTypeHierarchy [[nodiscard]] const auto &getAllVTables() const noexcept { return VTables; } - [[nodiscard]] llvm::StringRef getTypeName(ClassType Type) const override { + [[nodiscard]] static llvm::StringRef typeName(ClassType Type) { if (const auto *CompTy = llvm::dyn_cast(Type)) { auto Ident = CompTy->getIdentifier(); return Ident.empty() ? CompTy->getName() : Ident; @@ -100,6 +100,10 @@ class DIBasedTypeHierarchy return Type->getName(); } + [[nodiscard]] llvm::StringRef getTypeName(ClassType Type) const override { + return typeName(Type); + } + [[nodiscard]] size_t size() const noexcept override { return VertexTypes.size(); } diff --git a/include/phasar/Utils.h b/include/phasar/Utils.h index f120b4faa0..a034f6c871 100644 --- a/include/phasar/Utils.h +++ b/include/phasar/Utils.h @@ -11,9 +11,11 @@ #define PHASAR_UTILS_H #include "phasar/Utils/AnalysisProperties.h" +#include "phasar/Utils/BitSet.h" #include "phasar/Utils/BitVectorSet.h" #include "phasar/Utils/BoxedPointer.h" #include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Compressor.h" #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/DebugOutput.h" #include "phasar/Utils/EnumFlags.h" @@ -28,10 +30,12 @@ #include "phasar/Utils/Nullable.h" #include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Printer.h" +#include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/Soundness.h" #include "phasar/Utils/StableVector.h" #include "phasar/Utils/Table.h" #include "phasar/Utils/TypeTraits.h" +#include "phasar/Utils/TypedVector.h" #include "phasar/Utils/Utilities.h" #endif // PHASAR_UTILS_H diff --git a/include/phasar/Utils/AdjacencyList.h b/include/phasar/Utils/AdjacencyList.h index b69abe74cf..c8e22bb087 100644 --- a/include/phasar/Utils/AdjacencyList.h +++ b/include/phasar/Utils/AdjacencyList.h @@ -10,38 +10,41 @@ #ifndef PHASAR_UTILS_ADJACENCYLIST_H #define PHASAR_UTILS_ADJACENCYLIST_H +#include "phasar/Utils/EmptyBaseOptimizationUtils.h" #include "phasar/Utils/GraphTraits.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/RepeatIterator.h" -#include "phasar/Utils/Utilities.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" +#include #include #include #include namespace psr { -template struct AdjacencyList { - llvm::SmallVector Nodes{}; - llvm::SmallVector, 0> Adj{}; - llvm::SmallVector Roots{}; +template +struct AdjacencyList { + TypedVector Nodes{}; + TypedVector, 0> Adj{}; + llvm::SmallVector Roots{}; }; -template struct AdjacencyList { - llvm::SmallVector, 0> Adj{}; - llvm::SmallVector Roots{}; +template +struct AdjacencyList { + TypedVector, 0> Adj{}; + llvm::SmallVector Roots{}; }; /// A simple graph implementation based on an adjacency list -template -struct GraphTraits> { - using graph_type = AdjacencyList; +template +struct GraphTraits> { + using graph_type = AdjacencyList; using value_type = T; - using vertex_t = unsigned; + using vertex_t = VtxId; using edge_t = EdgeTy; using edge_iterator = typename llvm::ArrayRef::const_iterator; using roots_iterator = typename llvm::ArrayRef::const_iterator; @@ -53,12 +56,12 @@ struct GraphTraits> { /// Adds a new node to the graph G with node-tag Val /// /// \returns The vertex-descriptor for the newly created node - template >> - static vertex_t addNode(graph_type &G, TT &&Val) { + template >> + static constexpr vertex_t addNode(graph_type &G, TT &&Val) { assert(G.Adj.size() == G.Nodes.size()); - auto Ret = G.Nodes.size(); + auto Ret = vertex_t(G.Nodes.size()); G.Nodes.push_back(std::forward(Val)); G.Adj.emplace_back(); return Ret; @@ -68,26 +71,27 @@ struct GraphTraits> { /// /// \returns The vertex-descriptor for the newly created node template >> - static vertex_t addNode(graph_type &G, llvm::NoneType /*Val*/ = llvm::None) { - auto Ret = G.Adj.size(); + typename = std::enable_if_t>> + static constexpr vertex_t addNode(graph_type &G, value_type /*Val*/ = {}) { + auto Ret = vertex_t(G.Adj.size()); G.Adj.emplace_back(); return Ret; } /// Makes the node Vtx as root in the graph G. A node should not be registered /// as root multiple times - static void addRoot(graph_type &G, vertex_t Vtx) { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + static constexpr void addRoot(graph_type &G, vertex_t Vtx) { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } G.Roots.push_back(Vtx); } /// Gets a range of all root nodes of graph G - static llvm::ArrayRef roots(const graph_type &G) noexcept { - if constexpr (!std::is_same_v) { + static constexpr llvm::ArrayRef + roots(const graph_type &G) noexcept { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Roots; @@ -97,120 +101,132 @@ struct GraphTraits> { /// be nodes inside G. Multi-edges are supported, i.e. edges are not /// deduplicated automatically; to manually deduplicate the edges of one /// source-node, call dedupOutEdges() - static void addEdge(graph_type &G, vertex_t From, edge_t To) { - assert(From < G.Adj.size()); - if constexpr (!std::is_same_v) { + static constexpr void addEdge(graph_type &G, vertex_t From, edge_t To) { + assert(G.Adj.inbounds(From)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } G.Adj[From].push_back(std::move(To)); } /// Gets a range of all edges outgoing from node Vtx in graph G - static llvm::ArrayRef outEdges(const graph_type &G, - vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + static constexpr llvm::ArrayRef outEdges(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Adj[Vtx]; } /// Gets the number of edges outgoing from node Vtx in graph G - static size_t outDegree(const graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + static constexpr size_t outDegree(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Adj[Vtx].size(); } /// Deduplicates the edges outgoing from node Vtx in graph G. Deduplication is - /// based on operator< and operator== of the edge_t type - static void dedupOutEdges(graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + /// based on operator== of the edge_t type, and operator< if available. + static constexpr void dedupOutEdges(graph_type &G, vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } auto &OutEdges = G.Adj[Vtx]; - std::sort(OutEdges.begin(), OutEdges.end()); - OutEdges.erase(std::unique(OutEdges.begin(), OutEdges.end()), - OutEdges.end()); + + if constexpr (IsLessComparable) { + std::sort(OutEdges.begin(), OutEdges.end()); + OutEdges.erase(std::unique(OutEdges.begin(), OutEdges.end()), + OutEdges.end()); + } else { + auto End = OutEdges.end(); + for (auto It = OutEdges.begin(); It < End; ++It) { + End = std::remove(std::next(It), End, *It); + } + OutEdges.erase(End, OutEdges.end()); + } } /// Gets a const range of all nodes in graph G template >> - static llvm::ArrayRef nodes(const graph_type &G) noexcept { + typename = std::enable_if_t>> + static constexpr const auto &nodes(const graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } /// Gets a mutable range of all nodes in graph G template >> - static llvm::MutableArrayRef nodes(graph_type &G) noexcept { + typename = std::enable_if_t>> + static constexpr auto &nodes(graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } /// Gets a range of all nodes in graph G template >> - static RepeatRangeType nodes(const graph_type &G) noexcept { - return repeat(llvm::None, G.Adj.size()); + typename = std::enable_if_t>> + static constexpr RepeatRangeType + nodes(const graph_type &G) noexcept { + return repeat(value_type{}, G.Adj.size()); } /// Gets a range of vertex-descriptors for all nodes in graph G - static auto vertices(const graph_type &G) noexcept { - if constexpr (!std::is_same_v) { + static constexpr auto vertices(const graph_type &G) noexcept { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } - return psr::iota(vertex_t(0), G.Adj.size()); + return psr::iota(G.Adj.size()); } /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> - static const value_type &node(const graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Nodes.size()); + typename = std::enable_if_t>> + static constexpr const value_type &node(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); assert(G.Adj.size() == G.Nodes.size()); return G.Nodes[Vtx]; } /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> - static value_type &node(graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Nodes.size()); + typename = std::enable_if_t>> + static constexpr value_type &node(graph_type &G, vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); assert(G.Adj.size() == G.Nodes.size()); return G.Nodes[Vtx]; } /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> - static llvm::NoneType node([[maybe_unused]] const graph_type &G, - [[maybe_unused]] vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - return llvm::None; + typename = std::enable_if_t>> + static constexpr value_type node([[maybe_unused]] const graph_type &G, + [[maybe_unused]] vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + return {}; } /// Gets the number of nodes in graph G - static size_t size(const graph_type &G) noexcept { - if constexpr (!std::is_same_v) { + static constexpr size_t size(const graph_type &G) noexcept { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Adj.size(); } /// Gets the number of nodes in graph G that are marked as root - static size_t roots_size(const graph_type &G) noexcept { // NOLINT - if constexpr (!std::is_same_v) { + static constexpr size_t roots_size(const graph_type &G) noexcept { // NOLINT + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Roots.size(); } /// Pre-allocates space to hold up to Capacity nodes - static void reserve(graph_type &G, size_t Capacity) { - if constexpr (!std::is_same_v) { + static constexpr void reserve(graph_type &G, size_t Capacity) { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); G.Nodes.reserve(Capacity); } @@ -222,10 +238,10 @@ struct GraphTraits> { /// was another not-popped node inserted in between. /// /// \returns True, iff the removal was successful - static bool pop(graph_type &G, vertex_t Vtx) { + static constexpr bool pop(graph_type &G, vertex_t Vtx) { if (Vtx == G.Adj.size() - 1) { G.Adj.pop_back(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { G.Nodes.pop_back(); } return true; @@ -235,7 +251,7 @@ struct GraphTraits> { /// Gets the vertex-descriptor of the target-node of the given Edge template - static std::enable_if_t, vertex_t> + static constexpr std::enable_if_t, vertex_t> target(edge_t Edge) noexcept { return Edge; } @@ -244,25 +260,23 @@ struct GraphTraits> { /// weight of the returned edge and the parameter edge is same, but the target /// nodes may differ. template - static std::enable_if_t, edge_t> + static constexpr std::enable_if_t, edge_t> withEdgeTarget(edge_t /*edge*/, vertex_t Tar) noexcept { return Tar; } /// Gets the weight associated with the given edge - static llvm::NoneType weight(edge_t /*unused*/) noexcept { - return llvm::None; - } + static constexpr EmptyType weight(edge_t /*unused*/) noexcept { return {}; } /// Removes the edge denoted by It outgoing from source-vertex Vtx from the /// graph G. This function is not required by the is_graph_trait concept. /// /// \returns An edge_iterator directly following It that should be used to /// continue iteration instead of std::next(It) - static edge_iterator removeEdge(graph_type &G, vertex_t Vtx, - edge_iterator It) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + static constexpr edge_iterator removeEdge(graph_type &G, vertex_t Vtx, + edge_iterator It) noexcept { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } assert(G.Adj[Vtx].begin() <= It && It < G.Adj[Vtx].end()); @@ -278,8 +292,9 @@ struct GraphTraits> { /// /// \returns A roots_iterator directly following It that should be used to /// continue iteration instead of std::next(It) - static roots_iterator removeRoot(graph_type &G, roots_iterator It) noexcept { - if constexpr (!std::is_same_v) { + static constexpr roots_iterator removeRoot(graph_type &G, + roots_iterator It) noexcept { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } assert(G.Roots.begin() <= It && It < G.Roots.end()); @@ -289,11 +304,6 @@ struct GraphTraits> { G.Roots.pop_back(); return It; } - -#if __cplusplus >= 202002L - static_assert(is_graph>); -#endif - static_assert(is_reservable_graph_trait_v>>); }; } // namespace psr diff --git a/include/phasar/Utils/AlignNum.h b/include/phasar/Utils/AlignNum.h new file mode 100644 index 0000000000..152a8f824a --- /dev/null +++ b/include/phasar/Utils/AlignNum.h @@ -0,0 +1,65 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel + *****************************************************************************/ + +#ifndef PHASAR_UTILS_ALIGNNUM_H +#define PHASAR_UTILS_ALIGNNUM_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" + +namespace psr { + +template struct AlignNum { + llvm::StringRef Name; + T Num; + + constexpr AlignNum(llvm::StringRef Name, T Num) noexcept + : Name(Name), Num(Num) {} + constexpr AlignNum(llvm::StringRef Name, size_t Numerator, + size_t Denominator) noexcept + : Name(Name), Num(double(Numerator) / double(Denominator)) {} + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const AlignNum &AN) { + auto Len = AN.Name.size() + 1; + auto Diff = -(Len < NumOffs) & (NumOffs - Len); + + OS << AN.Name << ':'; + // Default is two fixed-point decimal places, so shift the output by three + // spaces + OS.indent(Diff + std::is_floating_point_v * 3); + OS << llvm::formatv("{0,+7}\n", AN.Num); + + return OS; + } +}; +template AlignNum(llvm::StringRef, T) -> AlignNum; +AlignNum(llvm::StringRef, size_t, size_t) -> AlignNum; + +template struct AlignStr { + llvm::StringRef Name; + llvm::StringRef Value; + + constexpr AlignStr(llvm::StringRef Name, llvm::StringRef Value) noexcept + : Name(Name), Value(Value) {} + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const AlignStr &AS) { + auto Len = AS.Name.size(); + auto Diff = -(Len < NumOffs) & (NumOffs - Len); + + OS << AS.Name << ':'; + OS.indent(Diff); + return OS << AS.Value << '\n'; + } +}; +} // namespace psr + +#endif // PHASAR_UTILS_ALIGNNUM_H diff --git a/include/phasar/Utils/BitSet.h b/include/phasar/Utils/BitSet.h new file mode 100644 index 0000000000..03023af337 --- /dev/null +++ b/include/phasar/Utils/BitSet.h @@ -0,0 +1,244 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#ifndef PHASAR_UTILS_BITSET_H +#define PHASAR_UTILS_BITSET_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" + +#include +#include +#include + +namespace psr { + +/// \brief A set-type that can compactly store sets of sequential integer-like +/// types. +/// +/// Use this type for sequential (unsigned) integers and ids that can convert +/// from and to uint32_t. +/// +/// \tparam IdT The type of elements to store in this set. Must be losslessly +/// convertible from and to uint32_t. +/// \tparam BitVectorTy The underlying bit-vector to use. Must be either +/// llvm::BitVector or llvm::SmallBitVector. +template class BitSet { + static llvm::ArrayRef getWords(const llvm::BitVector &BV, + uintptr_t & /*Store*/) { + return BV.getData(); + } + static llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, + uintptr_t &Store) { + return BV.getData(Store); + } + +public: + /// Wraps BitVectorTy::const_set_bits_iterator, as LLVM's bitset iterators + /// unfortunately do not conform to the named requirement of an iterator + class Iterator { + public: + using value_type = IdT; + using reference = IdT; + using pointer = const IdT *; + using difference_type = ptrdiff_t; + using iterator_category = std::forward_iterator_tag; + + Iterator(typename BitVectorTy::const_set_bits_iterator It) noexcept + : It(It) {} + + Iterator &operator++() noexcept { + ++It; + return *this; + } + Iterator operator++(int) noexcept { + auto Ret = *this; + ++*this; + return Ret; + } + reference operator*() const noexcept { return IdT(*It); } + + bool operator==(const Iterator &Other) const noexcept { + return It == Other.It; + } + bool operator!=(const Iterator &Other) const noexcept { + return !(*this == Other); + } + + private: + typename BitVectorTy::const_set_bits_iterator It; + }; + + using iterator = Iterator; + using value_type = IdT; + + BitSet() noexcept = default; + explicit BitSet(size_t InitialCapacity) : Bits(InitialCapacity) {} + explicit BitSet(size_t InitialCapacity, bool InitialValue) + : Bits(InitialCapacity, InitialValue) {} + + void reserve(size_t Cap) { + if (Bits.size() < Cap) { + Bits.resize(Cap); + } + } + + [[nodiscard]] bool contains(IdT Id) const noexcept { + auto Index = uint32_t(Id); + return Bits.size() > Index && Bits.test(Index); + } + + void insert(IdT Id) { + auto Index = uint32_t(Id); + if (Bits.size() <= Index) { + Bits.resize(Index + 1); + } + + Bits.set(Index); + } + + /// Same as insert(), but returns, whether the set was changed. + [[nodiscard]] bool tryInsert(IdT Id) { + auto Index = uint32_t(Id); + if (Bits.size() <= Index) { + Bits.resize(Index + 1); + } + + bool Ret = !Bits.test(Index); + Bits.set(Index); + return Ret; + } + + void erase(IdT Id) noexcept { + if (Bits.size() > size_t(Id)) { + Bits.reset(uint32_t(Id)); + } + } + /// Same as erase(), but returns, whether the set was changed. + [[nodiscard]] bool tryErase(IdT Id) noexcept { + if (contains(Id)) { + return Bits.reset(uint32_t(Id)), true; + } + + return false; + } + + void mergeWith(const BitSet &Other) { Bits |= Other.Bits; } + + /// Same as mergeWith(), but returns, whether the set was changed. + bool tryMergeWith(const BitSet &Other) { + /// TODO: Make this more efficient + return isSupersetOf(Other) ? false : (mergeWith(Other), true); + } + + void clear() noexcept { Bits.reset(); } + + [[nodiscard]] friend bool operator==(const BitSet &Lhs, + const BitSet &Rhs) noexcept { + bool LeftEmpty = Lhs.Bits.none(); + bool RightEmpty = Rhs.Bits.none(); + if (LeftEmpty || RightEmpty) { + return LeftEmpty == RightEmpty; + } + // Check, whether Lhs and Rhs actually have the same bits set and not + // whether their internal representation is exactly identitcal + uintptr_t LhsStore{}; + uintptr_t RhsStore{}; + + auto LhsWords = getWords(Lhs.Bits, LhsStore); + auto RhsWords = getWords(Rhs.Bits, RhsStore); + if (LhsWords.size() == RhsWords.size()) { + return LhsWords == RhsWords; + } + auto MinSize = std::min(LhsWords.size(), RhsWords.size()); + if (LhsWords.slice(0, MinSize) != RhsWords.slice(0, MinSize)) { + return false; + } + auto Rest = (LhsWords.size() > RhsWords.size() ? LhsWords : RhsWords) + .slice(MinSize); + return std::all_of(Rest.begin(), Rest.end(), + [](auto Word) { return Word == 0; }); + } + + [[nodiscard]] friend bool operator!=(const BitSet &Lhs, + const BitSet &Rhs) noexcept { + return !(Lhs == Rhs); + } + + [[nodiscard]] bool any() const noexcept { return Bits.any(); } + + [[nodiscard]] iterator begin() const noexcept { + return Bits.set_bits_begin(); + } + [[nodiscard]] iterator end() const noexcept { return Bits.set_bits_end(); } + + /// Same as mergeWith() + void operator|=(const BitSet &Other) { Bits |= Other.Bits; } + void operator-=(const BitSet &Other) { Bits.reset(Other.Bits); } + + [[nodiscard]] BitSet operator-(const BitSet &Other) const { + // TODO: keep allocation small by looping from the end and truncating all + // words that result in all-zero + auto Ret = *this; + Ret -= Other; + return Ret; + } + + /// Same as mergeWith(), but returns *this to allow a fluent interface. + BitSet &insertAllOf(const BitSet &Other) { + Bits |= Other.Bits; + return *this; + } + /// Same as operator-=, but returns *this to allow a fluent interface. + BitSet &eraseAllOf(const BitSet &Other) { + Bits.reset(Other.Bits); + return *this; + } + + [[nodiscard]] bool isSubsetOf(const BitSet &Of) const { + uintptr_t Buf = 0; + uintptr_t OfBuf = 0; + + auto Words = getWords(Bits, Buf); + auto OfWords = getWords(Of.Bits, OfBuf); + if (Words.size() > OfWords.size()) { + if (llvm::any_of(Words.drop_front(OfWords.size()), + [](uintptr_t W) { return W != 0; })) { + return false; + } + } + + for (auto [W, OfW] : llvm::zip(Words, OfWords)) { + if ((W & ~OfW) != 0) { + return false; + } + } + + return true; + } + + [[nodiscard]] bool isSupersetOf(const BitSet &Of) const { + return Of.isSubsetOf(*this); + } + + /// The number of bits available. This operation is O(1) + [[nodiscard]] size_t capacity() const noexcept { return Bits.size(); } + /// The number of bits set to 1. In contrast to most other containers, this + /// operation is linear in O(capacity()) + [[nodiscard]] size_t size() const noexcept { return Bits.count(); } + /// Whether this set contains no elements. In contrast to most other + /// containers, this operation is linear in O(capacity()) + [[nodiscard]] bool empty() const noexcept { return Bits.none(); } + +private: + BitVectorTy Bits; +}; +} // namespace psr + +#endif diff --git a/include/phasar/Utils/ChronoUtils.h b/include/phasar/Utils/ChronoUtils.h index ca4f4092fd..e4ebc852e2 100644 --- a/include/phasar/Utils/ChronoUtils.h +++ b/include/phasar/Utils/ChronoUtils.h @@ -10,7 +10,6 @@ #ifndef PHASAR_UTILS_CHRONOUTILS_H #define PHASAR_UTILS_CHRONOUTILS_H -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/include/phasar/Utils/Compressor.h b/include/phasar/Utils/Compressor.h index 296dee5e08..883e214fd5 100644 --- a/include/phasar/Utils/Compressor.h +++ b/include/phasar/Utils/Compressor.h @@ -1,12 +1,21 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + #ifndef PHASAR_UTILS_COMPRESSOR_H #define PHASAR_UTILS_COMPRESSOR_H #include "phasar/Utils/ByRef.h" #include "phasar/Utils/TypeTraits.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/SmallVector.h" #include #include @@ -39,6 +48,15 @@ class Compressor>> { return It->second; } + std::pair insert(T Elem) { + auto [It, Inserted] = ToInt.try_emplace(Elem, IdT(ToInt.size())); + if (Inserted) { + FromInt.push_back(Elem); + } + return {It->second, Inserted}; + } + + [[nodiscard]] std::optional getOrNull(T Elem) const { if (auto It = ToInt.find(Elem); It != ToInt.end()) { return It->second; @@ -46,13 +64,19 @@ class Compressor>> { return std::nullopt; } + [[nodiscard]] IdT get(T Elem) const { + auto It = ToInt.find(Elem); + assert(It != ToInt.end()); + return It->second; + } + [[nodiscard]] bool inbounds(IdT Idx) const noexcept { - return size_t(Idx) < FromInt.size(); + return FromInt.inbounds(Idx); } - T operator[](IdT Idx) const noexcept { + [[nodiscard]] T operator[](IdT Idx) const noexcept { assert(inbounds(Idx)); - return FromInt[size_t(Idx)]; + return FromInt[Idx]; } [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } @@ -61,8 +85,10 @@ class Compressor>> { ToInt.getMemorySize() / sizeof(typename decltype(ToInt)::value_type); } - auto begin() const noexcept { return FromInt.begin(); } - auto end() const noexcept { return FromInt.end(); } + [[nodiscard]] auto begin() const noexcept { return FromInt.begin(); } + [[nodiscard]] auto end() const noexcept { return FromInt.end(); } + + [[nodiscard]] auto enumerate() const noexcept { return FromInt.enumerate(); } void clear() noexcept { ToInt.clear(); @@ -71,7 +97,7 @@ class Compressor>> { private: llvm::DenseMap ToInt; - llvm::SmallVector FromInt; + TypedVector FromInt; }; /// \brief A utility class that assigns a sequential Id to every inserted @@ -86,8 +112,8 @@ class Compressor>> { ToInt.reserve(Capacity); } - /// Returns the index of the given element in the compressors storage. If the - /// element isn't present yet, it will be added first and its index will + /// Returns the index of the given element in the compressors storage. If + /// the element isn't present yet, it will be added first and its index will /// then be returned. IdT getOrInsert(const T &Elem) { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { @@ -99,8 +125,8 @@ class Compressor>> { return Ret; } - /// Returns the index of the given element in the compressors storage. If the - /// element isn't present yet, it will be added first and its index will + /// Returns the index of the given element in the compressors storage. If + /// the element isn't present yet, it will be added first and its index will /// then be returned. IdT getOrInsert(T &&Elem) { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { @@ -112,20 +138,46 @@ class Compressor>> { return Ret; } - /// Returns the index of the given element in the compressors storage. If the - /// element isn't present, std::nullopt will be returned - std::optional getOrNull(const T &Elem) const { + std::pair insert(const T &Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return {It->second, false}; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(Elem); + ToInt[Ins] = Ret; + return {Ret, true}; + } + + std::pair insert(T &&Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return {It->second, false}; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(std::move(Elem)); + ToInt[Ins] = Ret; + return {Ret, true}; + } + + /// Returns the index of the given element in the compressors storage. If + /// the element isn't present, std::nullopt will be returned + [[nodiscard]] std::optional getOrNull(const T &Elem) const { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { return It->second; } return std::nullopt; } + [[nodiscard]] IdT get(const T &Elem) const { + auto It = ToInt.find(&Elem); + assert(It != ToInt.end()); + return It->second; + } + [[nodiscard]] bool inbounds(IdT Idx) const noexcept { return size_t(Idx) < FromInt.size(); } - const T &operator[](IdT Idx) const noexcept { + [[nodiscard]] const T &operator[](IdT Idx) const noexcept { assert(inbounds(Idx)); return FromInt[size_t(Idx)]; } @@ -139,6 +191,14 @@ class Compressor>> { auto begin() const noexcept { return FromInt.begin(); } auto end() const noexcept { return FromInt.end(); } + [[nodiscard]] auto enumerate() const noexcept { + return llvm::map_range(llvm::enumerate(FromInt), + [](const auto &IndexAndVal) { + return std::pair{ + IdT(IndexAndVal.index()), IndexAndVal.value()}; + }); + } + void clear() noexcept { ToInt.clear(); FromInt.clear(); diff --git a/include/phasar/Utils/GraphTraits.h b/include/phasar/Utils/GraphTraits.h index de96b9cc20..04e6f1d501 100644 --- a/include/phasar/Utils/GraphTraits.h +++ b/include/phasar/Utils/GraphTraits.h @@ -10,9 +10,9 @@ #ifndef PHASAR_UTILS_GRAPHTRAITS_H #define PHASAR_UTILS_GRAPHTRAITS_H +#include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/Utilities.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -35,82 +35,125 @@ template struct GraphTraits; #if __cplusplus >= 202002L template -concept is_graph_edge = requires(const Edge e1, Edge e2) { - { e1 == e2 } -> std::convertible_to; - { e1 != e2 } -> std::convertible_to; - { e1 < e2 } -> std::convertible_to; +concept is_graph_edge = requires(const Edge E1, Edge E2) { + { E1 == E2 } -> std::convertible_to; + { E1 != E2 } -> std::convertible_to; }; template -concept is_graph_trait = requires(typename GraphTrait::graph_type &graph, - const typename GraphTrait::graph_type &cgraph, - typename GraphTrait::value_type val, - typename GraphTrait::vertex_t vtx, - typename GraphTrait::edge_t edge) { - typename GraphTrait::graph_type; - typename GraphTrait::value_type; - typename GraphTrait::vertex_t; - typename GraphTrait::edge_t; - requires is_graph_edge; - { GraphTrait::Invalid } -> std::convertible_to; - { - GraphTrait::addNode(graph, val) - } -> std::convertible_to; - {GraphTrait::addEdge(graph, vtx, edge)}; - { - GraphTrait::outEdges(cgraph, vtx) - } -> psr::is_iterable_over_v; - { GraphTrait::outDegree(cgraph, vtx) } -> std::convertible_to; - {GraphTrait::dedupOutEdges(graph, vtx)}; - { - GraphTrait::nodes(cgraph) - } -> psr::is_iterable_over_v; - { - GraphTrait::vertices(cgraph) - } -> psr::is_iterable_over_v; - { - GraphTrait::node(cgraph, vtx) - } -> std::convertible_to; - { GraphTrait::size(cgraph) } -> std::convertible_to; - {GraphTrait::addRoot(graph, vtx)}; - { - GraphTrait::roots(cgraph) - } -> psr::is_iterable_over_v; - { GraphTrait::pop(graph, vtx) } -> std::same_as; - { GraphTrait::roots_size(cgraph) } -> std::convertible_to; - { - GraphTrait::target(edge) - } -> std::convertible_to; - { - GraphTrait::withEdgeTarget(edge, vtx) - } -> std::convertible_to; - {GraphTrait::weight(edge)}; +concept is_const_graph_trait = + requires(const typename GraphTrait::graph_type &CGraph, + typename GraphTrait::value_type Val, + typename GraphTrait::vertex_t Vtx, + typename GraphTrait::edge_t Edge) { + typename GraphTrait::graph_type; + typename GraphTrait::value_type; + typename GraphTrait::vertex_t; + typename GraphTrait::edge_t; + requires is_graph_edge; + + { + GraphTrait::Invalid + } -> std::convertible_to; + + { + GraphTrait::outEdges(CGraph, Vtx) + } -> psr::is_iterable_over_v; + { GraphTrait::outDegree(CGraph, Vtx) } -> std::convertible_to; + { + GraphTrait::nodes(CGraph) + } -> psr::is_iterable_over_v; + { + GraphTrait::roots(CGraph) + } -> psr::is_iterable_over_v; + { + GraphTrait::vertices(CGraph) + } -> psr::is_iterable_over_v; + { + GraphTrait::node(CGraph, Vtx) + } -> std::convertible_to; + { GraphTrait::size(CGraph) } -> std::convertible_to; + { GraphTrait::roots_size(CGraph) } -> std::convertible_to; + { + GraphTrait::target(Edge) + } -> std::convertible_to; + { + GraphTrait::withEdgeTarget(Edge, Vtx) + } -> std::convertible_to; + }; + +template +concept is_graph_trait = + is_const_graph_trait && + requires(typename GraphTrait::graph_type &Graph, + typename GraphTrait::value_type Val, + typename GraphTrait::vertex_t Vtx, + typename GraphTrait::edge_t Edge) { + { + GraphTrait::addNode(Graph, Val) + } -> std::convertible_to; + { GraphTrait::addEdge(Graph, Vtx, Edge) }; + { GraphTrait::dedupOutEdges(Graph, Vtx) }; + { GraphTrait::addRoot(Graph, Vtx) }; + { GraphTrait::pop(Graph, Vtx) } -> std::same_as; + }; + +template +concept is_weighted_const_graph_trait = + is_const_graph_trait && + requires(const typename GraphTrait::edge_t &Edge) { + typename GraphTrait::weight_t; + { + GraphTrait::weight(Edge) + } -> std::convertible_to; + }; +template +concept is_weighted_graph_trait = + is_graph_trait && is_weighted_const_graph_trait; + +template +concept is_const_graph = requires(Graph G) { + typename GraphTraits>; + requires is_const_graph_trait>>; }; template -concept is_graph = requires(Graph g) { +concept is_graph = requires(Graph G) { typename GraphTraits>; requires is_graph_trait>>; }; -template -concept is_reservable_graph_trait_v = is_graph_trait && - requires(typename GraphTrait::graph_type &g) { - {GraphTrait::reserve(g, size_t(0))}; +template +concept is_weighted_const_graph = requires(Graph G) { + typename GraphTraits>; + requires is_weighted_const_graph_trait>>; }; -template -concept is_removable_graph_trait_v = is_graph_trait && - requires(typename GraphTrait::graph_type &g, - typename GraphTrait::vertex_t vtx, - typename GraphTrait::edge_iterator edge_it, - typename GraphTrait::roots_iterator root_it) { - typename GraphTrait::edge_iterator; - typename GraphTrait::roots_iterator; - {GraphTrait::removeEdge(g, vtx, edge_it)}; - {GraphTrait::removeRoot(g, root_it)}; +template +concept is_weighted_graph = requires(Graph G) { + typename GraphTraits>; + requires is_weighted_graph_trait>>; }; +template +concept is_reservable_graph_trait_v = + is_graph_trait && requires(typename GraphTrait::graph_type &G) { + { GraphTrait::reserve(G, size_t(0)) }; + }; + +template +concept is_removable_graph_trait_v = + is_graph_trait && + requires(typename GraphTrait::graph_type &G, + typename GraphTrait::vertex_t Vtx, + typename GraphTrait::edge_iterator EdgeIt, + typename GraphTrait::roots_iterator RootIt) { + typename GraphTrait::edge_iterator; + typename GraphTrait::roots_iterator; + { GraphTrait::removeEdge(G, Vtx, EdgeIt) }; + { GraphTrait::removeRoot(G, RootIt) }; + }; + #else namespace detail { template @@ -155,7 +198,7 @@ static constexpr bool is_removable_graph_trait_v = template std::decay_t reverseGraph(GraphTy &&G) #if __cplusplus >= 202002L - requires is_graph + requires is_graph #endif { std::decay_t Ret; @@ -189,31 +232,43 @@ struct DefaultNodeTransform { } }; +/// \brief Prints the given graph G as dot. +/// +/// \param G The graph to print +/// \param OS The output-stream, where to print into +/// \param Name The name of the graph +/// \param NodeToString If the graph has node-labels, convert a node-label to +/// string template void printGraph(const GraphTy &G, llvm::raw_ostream &OS, llvm::StringRef Name = "", NodeTransform NodeToString = {}) #if __cplusplus >= 202002L - requires is_graph + requires is_const_graph #endif { using traits_t = GraphTraits; - OS << "digraph " << Name << " {\n"; + OS << "digraph \""; + OS.write_escaped(Name) << "\" {\n"; psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - auto Sz = traits_t::size(G); - - for (size_t I = 0; I < Sz; ++I) { - OS << I; - if constexpr (!std::is_same_v) { + for (auto Vtx : traits_t::vertices(G)) { + OS << size_t(Vtx); + if constexpr (!std::is_empty_v) { OS << "[label=\""; - OS.write_escaped(std::invoke(NodeToString, traits_t::node(G, I))); + OS.write_escaped(std::invoke(NodeToString, traits_t::node(G, Vtx))); OS << "\"]"; } OS << ";\n"; - for (const auto &Edge : traits_t::outEdges(G, I)) { - OS << I << "->" << Edge << ";\n"; + for (const auto &Edge : traits_t::outEdges(G, Vtx)) { + OS << size_t(Vtx) << "->"; + if constexpr (is_llvm_printable_v) { + // to print the edge-weight as well, if possible + OS << Edge; + } else { + OS << size_t(traits_t::target(Edge)); + } + OS << ";\n"; } } } diff --git a/include/phasar/Utils/IO.h b/include/phasar/Utils/IO.h index 27669460b3..081332b9a1 100644 --- a/include/phasar/Utils/IO.h +++ b/include/phasar/Utils/IO.h @@ -22,7 +22,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" -#include "nlohmann/json.hpp" +#include "nlohmann/json_fwd.hpp" #include diff --git a/include/phasar/Utils/IotaIterator.h b/include/phasar/Utils/IotaIterator.h index 9b55162717..c01805f688 100644 --- a/include/phasar/Utils/IotaIterator.h +++ b/include/phasar/Utils/IotaIterator.h @@ -12,6 +12,7 @@ #include "phasar/Utils/TypeTraits.h" +#include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include @@ -19,34 +20,48 @@ #include namespace psr { -/// An iterator that iterates over the same value a specified number of times -template class IotaIterator { +/// An iterator that iterates over a numeric range, where the start value is +/// always incremented by one. +template +class IotaIterator + : public llvm::iterator_facade_base, + std::random_access_iterator_tag, T, + ptrdiff_t, const T *, T> { + using base_t = llvm::iterator_facade_base, + std::random_access_iterator_tag, T, + ptrdiff_t, const T *, T>; + public: - using value_type = T; - using reference = T; - using pointer = const T *; - using difference_type = ptrdiff_t; - using iterator_category = std::forward_iterator_tag; + using typename base_t::difference_type; + using typename base_t::iterator_category; + using typename base_t::pointer; + using typename base_t::reference; + using typename base_t::value_type; constexpr reference operator*() const noexcept { return Elem; } constexpr pointer operator->() const noexcept { return &Elem; } - constexpr IotaIterator &operator++() noexcept { - ++Elem; + constexpr IotaIterator &operator+=(difference_type N) noexcept { + Elem = T(difference_type(Elem) + N); + return *this; + } + constexpr IotaIterator &operator-=(difference_type N) noexcept { + Elem = T(difference_type(Elem) - N); return *this; } - constexpr IotaIterator operator++(int) noexcept { - auto Ret = *this; - ++*this; - return Ret; + constexpr bool operator<(const IotaIterator &Other) const noexcept { + return difference_type(Other.Elem) < difference_type(Elem); } - constexpr bool operator==(const IotaIterator &Other) const noexcept { return Other.Elem == Elem; } constexpr bool operator!=(const IotaIterator &Other) const noexcept { return !(*this == Other); } + constexpr difference_type + operator-(const IotaIterator &Other) const noexcept { + return difference_type(Elem) - difference_type(Other.Elem); + } constexpr explicit IotaIterator(T Elem) noexcept : Elem(Elem) {} @@ -56,16 +71,27 @@ template class IotaIterator { T Elem{}; }; -template -using IotaRangeType = llvm::iterator_range>; -template -constexpr auto iota(T From, type_identity_t To) noexcept { - static_assert(std::is_integral_v, "Iota only works on integers"); - using iterator_type = IotaIterator>; +template +using IotaRangeType = llvm::iterator_range>; + +template +[[nodiscard]] constexpr auto iota(IdT From, type_identity_t To) noexcept { + static_assert(is_explicitly_convertible_to && + is_explicitly_convertible_to, + "Iota only works on integers and integer-like types"); + using iterator_type = IotaIterator>; auto Ret = llvm::make_range(iterator_type(From), iterator_type(To)); return Ret; } +template [[nodiscard]] constexpr auto iota(size_t To) noexcept { + static_assert(is_explicitly_convertible_to && + is_explicitly_convertible_to, + "Iota only works on integers and integer-like types"); + using iterator_type = IotaIterator>; + return llvm::make_range(iterator_type(), iterator_type(IdT(To))); +} + static_assert(is_iterable_over_v, int>); } // namespace psr diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h new file mode 100644 index 0000000000..55e85d6e34 --- /dev/null +++ b/include/phasar/Utils/SCCGeneric.h @@ -0,0 +1,572 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#ifndef PHASAR_UTILS_SCCGENERIC_H +#define PHASAR_UTILS_SCCGENERIC_H + +#include "phasar/Utils/BitSet.h" +#include "phasar/Utils/EmptyBaseOptimizationUtils.h" +#include "phasar/Utils/GraphTraits.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/RepeatIterator.h" +#include "phasar/Utils/TypedVector.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace psr { + +namespace detail { +// Unfortunately, `enum class` cannot be templated, but we want type-safety for +// SCC-IDs... +struct SCCIdBase { + uint32_t Value{}; + + constexpr SCCIdBase() noexcept = default; + + explicit constexpr SCCIdBase(uint32_t Val) noexcept : Value(Val) {} + + explicit constexpr operator uint32_t() const noexcept { return Value; } + template >> + explicit constexpr operator size_t() const noexcept { + return Value; + } + + explicit constexpr operator ptrdiff_t() const noexcept { + return ptrdiff_t(Value); + } + + constexpr uint32_t operator+() const noexcept { return Value; } + + friend constexpr bool operator==(SCCIdBase L, SCCIdBase R) noexcept { + return L.Value == R.Value; + } + friend constexpr bool operator!=(SCCIdBase L, SCCIdBase R) noexcept { + return !(L == R); + } +}; +} // namespace detail + +/// \brief The Id of a strongly-connected component in a graph. +/// +/// \tparam GraphNodeId The vertex-type of the graph where this SCC was computed +/// for. +template struct SCCId : detail::SCCIdBase { + using detail::SCCIdBase::SCCIdBase; +}; + +} // namespace psr + +namespace llvm { +template struct DenseMapInfo> { + using SCCId = psr::SCCId; + + static constexpr SCCId getEmptyKey() noexcept { return SCCId(UINT32_MAX); } + static constexpr SCCId getTombstoneKey() noexcept { + return SCCId(UINT32_MAX - 1); + } + + static auto getHashValue(SCCId SCC) noexcept { + return llvm::hash_value(uint32_t(SCC)); + } + static constexpr bool isEqual(SCCId SCC1, SCCId SCC2) noexcept { + return SCC1 == SCC2; + } +}; +} // namespace llvm + +namespace psr { + +/// \brief Holds the SCCs of a given graph. Each SCC is assigned a unique +/// sequential id. +template struct SCCHolder { + TypedVector> SCCOfNode; + TypedVector, llvm::SmallVector> + NodesInSCC{}; + + /// Number of SCCs + [[nodiscard]] size_t size() const noexcept { return NodesInSCC.size(); } + [[nodiscard]] bool empty() const noexcept { return NodesInSCC.empty(); } + + /// \brief Prints the given Graph as dot, highlighting the SCCs in the graph. + /// + /// \param Graph The graph to print + /// \param OS The output-stream, where to print into + /// \param Name The name of the graph + /// \param NodeToString If the graph has node-labels, convert a node-label to + /// string + template ::vertex_t, GraphNodeId>, + int> = 0> +#if __cplusplus >= 202002L + requires is_const_graph +#endif + void print(const G &Graph, llvm::raw_ostream &OS, llvm::StringRef Name = "", + NodeTransform NodeToString = {}) const { + OS << "digraph \""; + OS.write_escaped(Name) << "\" {\n"; + psr::scope_exit CloseBrace = [&] { OS << "}\n"; }; + + using GTraits = psr::GraphTraits; + + for (const auto &[SCCId, SCC] : NodesInSCC.enumerate()) { + OS << " subgraph cluster_" << +SCCId + << "{\n node [style=filled]; color=blue; label=\"SCC " << +SCCId + << "\";\n"; + psr::scope_exit CloseSCC = [&] { OS << " }\n"; }; + + for (auto Nod : SCC) { + OS << " " << size_t(Nod); + if constexpr (!std::is_empty_v) { + OS << "[label=\""; + OS.write_escaped( + std::invoke(NodeToString, GTraits::node(Graph, Nod))); + OS << "\"]"; + } + OS << ";\n"; + } + } + + for (auto FromVtx : GTraits::vertices(Graph)) { + for (const auto &Succ : GTraits::outEdges(Graph, FromVtx)) { + OS << " " << size_t(FromVtx) << "->"; + if constexpr (is_llvm_printable_v) { + // to print the edge-weight as well, if possible + OS << Succ; + } else { + OS << size_t(GTraits::target(Succ)); + } + OS << ";\n"; + } + } + } +}; + +/// \brief Holds a graph where the SCCs are collapsed to a single node. +/// Conforms to the is_const_graph concept. +template struct SCCDependencyGraph { + TypedVector, llvm::SmallDenseSet>> + ChildrenOfSCC; + llvm::SmallVector, 0> SCCRoots; +}; + +/// \brief Implements the is_const_graph concept for SCCDependencyGraph +template +struct GraphTraits> { + using graph_type = SCCDependencyGraph; + using value_type = EmptyType; + using vertex_t = SCCId; + using edge_t = vertex_t; + + static inline constexpr auto Invalid = vertex_t(UINT32_MAX); + + [[nodiscard]] static constexpr const auto &outEdges(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.ChildrenOfSCC.inbounds(Vtx)); + return G.ChildrenOfSCC[Vtx]; + } + + [[nodiscard]] static constexpr size_t outDegree(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.ChildrenOfSCC.inbounds(Vtx)); + return G.ChildrenOfSCC[Vtx].size(); + } + + [[nodiscard]] static constexpr auto nodes(const graph_type &G) noexcept { + return repeat(EmptyType{}, G.ChildrenOfSCC.size()); + } + + [[nodiscard]] static constexpr llvm::ArrayRef + roots(const graph_type &G) noexcept { + return G.SCCRoots; + } + + [[nodiscard]] static constexpr auto vertices(const graph_type &G) noexcept { + return iota(G.ChildrenOfSCC.size()); + } + + [[nodiscard]] static constexpr value_type + node([[maybe_unused]] const graph_type &G, + [[maybe_unused]] vertex_t Vtx) noexcept { + assert(G.ChildrenOfSCC.inbounds(Vtx)); + return {}; + } + + [[nodiscard]] static constexpr size_t size(const graph_type &G) noexcept { + return G.ChildrenOfSCC.size(); + } + + [[nodiscard]] static constexpr size_t + roots_size(const graph_type &G) noexcept { // NOLINT + return G.SCCRoots.size(); + } + + [[nodiscard]] static constexpr vertex_t target(edge_t Edge) noexcept { + return Edge; + } + + [[nodiscard]] static constexpr vertex_t + withEdgeTarget(edge_t /*edge*/, vertex_t Tar) noexcept { + return Tar; + } +}; + +/// \brief Holds topologically sorted SCCDependencyGraph nodes +template struct SCCOrder { + llvm::SmallVector, 0> SCCIds; +}; + +/// \brief Creates a graph based on the given input Graph, collapsing all SCCs +/// to single nodes. The resulting graph is always a DAG, i.e., it contains no +/// cycles +template +#if __cplusplus >= 202002L + requires is_const_graph +#endif +SCCDependencyGraph::vertex_t> computeSCCDependencies( + const G &Graph, const SCCHolder::vertex_t> &SCCs) { + + using GTraits = GraphTraits; + using GraphNodeId = typename GraphTraits::vertex_t; + + SCCDependencyGraph Ret; + Ret.ChildrenOfSCC.resize(SCCs.size()); + + BitSet> Roots(SCCs.size(), true); + + for (auto NodeId : GTraits::vertices(Graph)) { + auto SrcSCC = SCCs.SCCOfNode[NodeId]; + + for (const auto &Edge : GTraits::outEdges(Graph, NodeId)) { + auto Succ = GTraits::target(Edge); + auto SuccSCC = SCCs.SCCOfNode[Succ]; + if (SuccSCC != SrcSCC) { + Ret.ChildrenOfSCC[SrcSCC].insert(SuccSCC); + Roots.erase(SuccSCC); + } + } + } + + Ret.SCCRoots.reserve(Roots.size()); + for (auto Rt : Roots) { + Ret.SCCRoots.push_back(Rt); + } + + return Ret; +} + +/// \brief Computes a topological order of the nodes in the given +/// dependency-graph. +/// +/// Uses a simple, recursive postorder-DFS search to find a topological +/// ordering. +template +[[nodiscard]] SCCOrder +computeSCCOrder(const SCCHolder &SCCs, + const SCCDependencyGraph &Callers) { + SCCOrder Ret; + Ret.SCCIds.reserve(SCCs.size()); + + BitSet> Seen; + Seen.reserve(SCCs.size()); + + auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { + Seen.insert(CurrSCC); + for (auto Caller : Callers.ChildrenOfSCC[CurrSCC]) { + if (!Seen.contains(Caller)) { + Dfs(Dfs, Caller); + } + } + Ret.SCCIds.push_back(CurrSCC); + }; + + for (auto Leaf : Callers.SCCRoots) { + if (!Seen.contains(Leaf)) { + Dfs(Dfs, Leaf); + } + } + + std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); + + return Ret; +} + +namespace detail { +/// Data for Pearce's Algorithm. +template struct Pearce4Data { + TypedVector RIndex; // only per-vertex array + BitSet Root; // root[v] in Algorithm 4 + uint32_t Index = 1; // DFS counter + uint32_t C; // SCC id counter + llvm::SmallVector Stack; + + explicit Pearce4Data(size_t Num) + : RIndex(Num, 0), Root(Num), C(Num ? Num - 1 : 0) {} +}; + +// Recursive variant of Pearce's algorithm (based on Algo 3 in the paper) +template +static void +pearce4VisitRec(const G &Graph, typename GraphTraits::vertex_t V, + Pearce4Data::vertex_t> &Data, + SCCHolder::vertex_t> &Holder) { + using GTraits = psr::GraphTraits; + using Vertex = typename GTraits::vertex_t; + using SCCId = psr::SCCId; + + bool Root = true; + Data.RIndex[V] = Data.Index++; + + for (const auto &Edge : GTraits::outEdges(Graph, V)) { + auto W = GTraits::target(Edge); + if (Data.RIndex[W] == 0) { + pearce4VisitRec(Graph, W, Data, Holder); + } + if (Data.RIndex[W] < Data.RIndex[V]) { + Data.RIndex[V] = Data.RIndex[W]; + Root = false; + } + } + + if (Root) { + Data.Index--; + auto NewSCC = SCCId(Holder.NodesInSCC.size()); + auto &Nodes = Holder.NodesInSCC.emplace_back(); + + while (!Data.Stack.empty() && + Data.RIndex[V] <= Data.RIndex[Data.Stack.back()]) { + auto W = Data.Stack.pop_back_val(); + Data.RIndex[W] = Data.C; + Data.Index--; + + Holder.SCCOfNode[W] = NewSCC; + Nodes.push_back(W); + } + Nodes.push_back(V); + Holder.SCCOfNode[V] = NewSCC; + Data.RIndex[V] = Data.C; + Data.C--; + } else { + Data.Stack.push_back(V); + } +} + +// Iterative variant of Pearce's algorithm (adapted from on Algo 4 in the paper) +template +static void +pearce4VisitIt(const G &Graph, typename GraphTraits::vertex_t Start, + Pearce4Data::vertex_t> &Data, + SCCHolder::vertex_t> &Holder) { + using GTraits = psr::GraphTraits; + using Vertex = typename GTraits::vertex_t; + using SCCId = psr::SCCId; + + using OutEdgeRange = + decltype(GTraits::outEdges(Graph, std::declval())); + using OutEdgeIterator = decltype(std::begin(std::declval())); + using OutEdgeSentinel = decltype(std::end(std::declval())); + + struct DfsFrame { + Vertex CurrVtx; + OutEdgeIterator It; + [[no_unique_address]] OutEdgeSentinel End; + }; + + llvm::SmallVector CallStack; + + const auto PushFrames = [&](Vertex V, DfsFrame *Frame) { + if (Frame->It == Frame->End) { + return false; + } + // Recurse into children until reaching the bottom + do { + auto W = GTraits::target(*Frame->It); + + if (Data.RIndex[W] != 0) { + // Already pushed the children of W + break; + } + + Data.RIndex[W] = Data.Index++; + Data.Root.insert(W); + + auto &&OutEdges = GTraits::outEdges(Graph, W); + Frame = &CallStack.emplace_back( + DfsFrame{W, std::begin(OutEdges), std::end(OutEdges)}); + V = W; + + } while (Frame->It != Frame->End); + + return true; + }; + + const auto VisitLoop = [&](Vertex V, DfsFrame &Frame) { + // Finish visiting the current child and advance to the next child + if (Frame.It != Frame.End) { + auto W = GTraits::target(*Frame.It); + if (Data.RIndex[W] < Data.RIndex[V]) { + Data.RIndex[V] = Data.RIndex[W]; + Data.Root.erase(V); + } + + ++Frame.It; + } + }; + + const auto FinishFrame = [&](Vertex V) { + // finish visiting V and backtrack to the parent + + if (Data.Root.contains(V)) { + // Found a SCC + + Data.Index--; + auto NewSCC = SCCId(Holder.NodesInSCC.size()); + auto &Nodes = Holder.NodesInSCC.emplace_back(); + while (!Data.Stack.empty() && + Data.RIndex[V] <= Data.RIndex[Data.Stack.back()]) { + auto W = Data.Stack.pop_back_val(); + Data.RIndex[W] = Data.C; + Data.Index--; + + Holder.SCCOfNode[W] = NewSCC; + Nodes.push_back(W); + } + Nodes.push_back(V); + Holder.SCCOfNode[V] = NewSCC; + Data.RIndex[V] = Data.C; + Data.C--; + } else { + Data.Stack.push_back(V); + } + + CallStack.pop_back(); + }; + + // Initialize the callstack by pushing the initial frame + Data.RIndex[Start] = Data.Index++; + Data.Root.insert(Start); + { + auto &&OutEdges = GTraits::outEdges(Graph, Start); + static_assert( + std::is_lvalue_reference_v || + std::is_trivially_destructible_v>, + "We assume that outEdges gives either a reference or a view into " + "the out-edges, but never an owning container by value. Otherwise, " + "the DFSFrame iterators may be dangling"); + CallStack.emplace_back( + DfsFrame{Start, std::begin(OutEdges), std::end(OutEdges)}); + } + + // Simulate the recursion + + PushFrames(Start, &CallStack.back()); + while (true) { + auto &Frame = CallStack.back(); + Vertex V = Frame.CurrVtx; + VisitLoop(V, Frame); + if (PushFrames(V, &Frame)) { + continue; // we don't pop from the callstack here + } + + FinishFrame(V); + if (CallStack.empty()) { + break; + } + } +} + +} // namespace detail + +/// Compute SCCs adapted from the paper "A Space-Efficient Algorithm for Finding +/// Strongly Connected Components", Pearce 2015, DOI: +/// +/// +/// \tparam G The graph-type +/// \tparam Iterative Whether to use the iterative or recursive variant of the +/// algorithm (default: true) +/// \param Graph The graph for with to compute SCCs and topological ordering +template +[[nodiscard]] SCCHolder::vertex_t> +computeSCCs(const G &Graph, std::bool_constant /*Iterative*/ = {}) { + using GTraits = psr::GraphTraits; + using Vertex = typename GTraits::vertex_t; + + SCCHolder Ret; + auto N = GTraits::size(Graph); + if (!N) { + return Ret; + } + + Ret.SCCOfNode.resize(N); + + detail::Pearce4Data Data(N); + + // for all v ∈ V do if rindex[v]==0 then visit(v) + for (auto V : GTraits::vertices(Graph)) { + if (Data.RIndex[V] == 0) { + if constexpr (Iterative) { + detail::pearce4VisitIt(Graph, V, Data, Ret); + } else { + detail::pearce4VisitRec(Graph, V, Data, Ret); + } + + if (!Data.Stack.empty()) { + auto NewSCC = SCCId(Ret.NodesInSCC.size()); + auto &Nodes = Ret.NodesInSCC.emplace_back(); + Nodes.reserve(Data.Stack.size()); + for (auto Vtx : Data.Stack) { + Nodes.push_back(Vtx); + Ret.SCCOfNode[Vtx] = NewSCC; + } + Data.Stack.clear(); + } + } + } + + return Ret; +} + +/// Compute SCCs and a topological ordering on the SCCs, adapted from the paper +/// "A Space-Efficient Algorithm for Finding Strongly Connected Components", +/// Pearce 2015, DOI: +/// +/// \tparam G The graph-type \tparam Iterative Whether to use the iterative +/// or recursive variant of the algorithm (default: true) \param Graph The graph +/// for with to compute SCCs and topological ordering +template +[[nodiscard]] std::pair::vertex_t>, + SCCOrder::vertex_t>> +computeSCCsAndTopologicalOrder( + const G &Graph, std::bool_constant /*Iterative*/ = {}) { + using Vertex = typename GraphTraits::vertex_t; + + std::pair, SCCOrder> Ret = { + computeSCCs(Graph, std::bool_constant{}), + {}, + }; + + // Pearce's algorithm produces SCCs in reverse topological order + auto Ids = llvm::reverse(psr::iota>(Ret.first.size())); + Ret.second.SCCIds.append(Ids.begin(), Ids.end()); + + return Ret; +} + +} // namespace psr + +#endif diff --git a/include/phasar/Utils/Timer.h b/include/phasar/Utils/Timer.h index f485aa0cba..bbf692c209 100644 --- a/include/phasar/Utils/Timer.h +++ b/include/phasar/Utils/Timer.h @@ -10,17 +10,38 @@ #ifndef PHASAR_UTILS_TIMER_H #define PHASAR_UTILS_TIMER_H +#include "phasar/Utils/ChronoUtils.h" + #include "llvm/ADT/FunctionExtras.h" #include namespace psr { -class Timer { + +class SimpleTimer { +public: + SimpleTimer() noexcept : Start(std::chrono::steady_clock::now()) {} + + [[nodiscard]] hms elapsed() const noexcept { + auto End = std::chrono::steady_clock::now(); + return {End - Start}; + } + [[nodiscard]] std::chrono::nanoseconds elapsedNanos() const noexcept { + auto End = std::chrono::steady_clock::now(); + return End - Start; + } + + void restart() noexcept { Start = std::chrono::steady_clock::now(); } + +private: + std::chrono::steady_clock::time_point Start; +}; + +class Timer : public SimpleTimer { public: Timer(llvm::unique_function WithElapsed) noexcept - : WithElapsed(std::move(WithElapsed)), - Start(std::chrono::steady_clock::now()) {} + : WithElapsed(std::move(WithElapsed)) {} Timer(Timer &&) noexcept = default; Timer &operator=(Timer &&) noexcept = default; @@ -29,14 +50,12 @@ class Timer { ~Timer() { if (WithElapsed) { - auto End = std::chrono::steady_clock::now(); - WithElapsed(End - Start); + WithElapsed(elapsedNanos()); } } private: llvm::unique_function WithElapsed; - std::chrono::steady_clock::time_point Start; }; } // namespace psr diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 1e2f2524cc..b9aef51b52 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -15,8 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" -#include "nlohmann/json.hpp" - #include #include #include @@ -35,6 +33,9 @@ template struct type_identity { template using type_identity = std::type_identity; #endif +/// \file TODO: We should stick to one naming convention here and not mix +/// CamelCase with lower_case! + // NOLINTBEGIN(readability-identifier-naming) namespace detail { @@ -165,6 +166,12 @@ struct AreEqualityComparable() == std::declval())> : std::true_type {}; +template +struct IsLessComparable : std::false_type {}; +template +struct IsLessComparable() < std::declval())> + : std::true_type {}; + template struct HasDepth : std::false_type {}; template struct HasDepth().depth())> @@ -203,6 +210,13 @@ struct has_llvm_dense_map_info< decltype(llvm::DenseMapInfo::isEqual(std::declval(), std::declval()))>> : std::true_type {}; + +template +struct is_explicitly_convertible_to : std::false_type {}; +template +struct is_explicitly_convertible_to< + From, To, std::void_t(std::declval()))>> + : std::true_type {}; } // namespace detail template @@ -272,6 +286,9 @@ PSR_CONCEPT IsEqualityComparable = detail::IsEqualityComparable::value; template PSR_CONCEPT AreEqualityComparable = detail::AreEqualityComparable::value; +template +PSR_CONCEPT IsLessComparable = detail::IsLessComparable::value; + template PSR_CONCEPT has_isInteresting_v = // NOLINT detail::has_isInteresting::value; @@ -281,15 +298,20 @@ constexpr bool has_llvm_dense_map_info = detail::has_llvm_dense_map_info::value; template using type_identity_t = typename type_identity::type; +template +PSR_CONCEPT is_explicitly_convertible_to = + detail::is_explicitly_convertible_to::value; + template constexpr size_t variant_idx = detail::variant_idx::value; template using ElementType = typename detail::ElementType::type; -template +template struct has_getAsJson : std::false_type {}; // NOLINT template -struct has_getAsJson().getAsJson())> +struct has_getAsJson().getAsJson())>> : std::true_type {}; // NOLINT struct TrueFn { diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h new file mode 100644 index 0000000000..86d6e3f1f2 --- /dev/null +++ b/include/phasar/Utils/TypedVector.h @@ -0,0 +1,136 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#ifndef PHASAR_UTILS_TYPEDVECTOR_H +#define PHASAR_UTILS_TYPEDVECTOR_H + +#include "phasar/Utils/ByRef.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +#include +#include +#include +#include +#include + +namespace psr { + +/// Wraps a llvm::SmallVector, allowing index-based access by IdT, instead of +/// size_t. +/// +/// \tparam IdT The index-type that should be used for operator[]. Must be +/// losslessly convertible from and to size_t. +/// \tparam ValueT The usual value_type of SmallVector. +/// \tparam SmallSize The size of the inline-storage of SmallVector (default: +/// 0) +template +class TypedVector { +public: + TypedVector() noexcept = default; + TypedVector(std::initializer_list IList) : Vec(IList) {} + TypedVector(size_t Size) : Vec(Size) {} + TypedVector(size_t Size, ByConstRef Default) : Vec(Size, Default) {}; + + template + explicit TypedVector(Iter From, Iter To) + : Vec(std::move(From), std::move(To)) {} + + explicit TypedVector(llvm::ArrayRef Arr) + : Vec(Arr.begin(), Arr.end()) {} + + void reserve(size_t Capa) { Vec.reserve(Capa); } + + void resize(size_t Sz) { Vec.resize(Sz); } + + void resize(size_t Sz, ByConstRef Val) { Vec.resize(Sz, Val); } + + [[nodiscard]] bool empty() const noexcept { return Vec.empty(); } + [[nodiscard]] bool any() const noexcept { return !Vec.empty(); } + [[nodiscard]] size_t size() const noexcept { return Vec.size(); } + [[nodiscard]] size_t capacity() const noexcept { return Vec.capacity(); } + + [[nodiscard]] bool inbounds(IdT Id) const noexcept { + return size_t(Id) < size(); + } + + [[nodiscard]] const ValueT &operator[](IdT Id) const & { + assert(inbounds(Id)); + return Vec[size_t(Id)]; + } + + [[nodiscard]] ValueT &operator[](IdT Id) & { + assert(inbounds(Id)); + return Vec[size_t(Id)]; + } + + [[nodiscard]] ValueT operator[](IdT Id) && { + assert(inbounds(Id)); + return std::move(Vec[size_t(Id)]); + } + + [[nodiscard]] auto begin() noexcept { return Vec.begin(); } + [[nodiscard]] auto end() noexcept { return Vec.end(); } + + [[nodiscard]] auto begin() const noexcept { return Vec.begin(); } + [[nodiscard]] auto end() const noexcept { return Vec.end(); } + + template ValueT &emplace_back(ArgsT &&...Args) { + return Vec.emplace_back(std::forward(Args)...); + } + + void push_back(ByConstRef Val) { Vec.push_back(Val); } + + template + std::enable_if_t> push_back(ValueT &&Val) { + Vec.push_back(std::move(Val)); + } + + void pop_back() { Vec.pop_back(); } + [[nodiscard]] ValueT pop_back_val() { return Vec.pop_back_val(); } + + [[nodiscard]] bool operator==(const TypedVector &Other) const noexcept { + return Vec == Other.Vec; + } + [[nodiscard]] bool operator!=(const TypedVector &Other) const noexcept { + return !(*this == Other); + } + + [[nodiscard]] llvm::ArrayRef asRef() const & noexcept { return Vec; } + [[nodiscard]] llvm::ArrayRef asRef() && noexcept = delete; + + [[nodiscard]] llvm::ArrayRef + // NOLINTNEXTLINE(readability-identifier-naming) + drop_front(size_t Offs) const & noexcept { + return asRef().drop_front(Offs); + } + [[nodiscard]] llvm::ArrayRef + drop_front(size_t Offs) && noexcept = delete; + + [[nodiscard]] auto enumerate() const noexcept { + return llvm::map_range(llvm::enumerate(Vec), [](const auto &IndexAndVal) { + return std::pair>{IdT(IndexAndVal.index()), + IndexAndVal.value()}; + }); + } + [[nodiscard]] auto enumerate() noexcept { + return llvm::map_range(llvm::enumerate(Vec), [](auto &IndexAndVal) { + return std::pair{IdT(IndexAndVal.index()), + IndexAndVal.value()}; + }); + } + +private: + llvm::SmallVector Vec{}; +}; +} // namespace psr + +#endif diff --git a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm index 271967a5b2..629bd45daf 100644 --- a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm +++ b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm @@ -8,6 +8,7 @@ module; #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" @@ -19,6 +20,7 @@ export namespace psr { using psr::buildLLVMBasedCallGraph; using psr::CFGTraits; using psr::CHAResolver; +using psr::getDefaultEntryPoints; using psr::getEntryFunctions; using psr::getEntryFunctionsMut; using psr::getNonPureVirtualVFTEntry; @@ -45,4 +47,5 @@ using psr::SparseLLVMBasedCFGProvider; using psr::SparseLLVMBasedICFG; using psr::SparseLLVMBasedICFGView; using psr::valueOf; +using psr::VTAResolver; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp index 06986f9c2d..13d41123fd 100644 --- a/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp +++ b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp @@ -1,5 +1,6 @@ #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/Utils/Logger.h" @@ -66,3 +67,14 @@ psr::getEntryFunctionsMut(LLVMProjectIRDB &IRDB, } return UserEntryPointFns; } + +std::vector +psr::getDefaultEntryPoints(const LLVMProjectIRDB &IRDB) { + if (IRDB.getFunctionDefinition(GlobalCtorsDtorsModel::ModelName)) { + return {GlobalCtorsDtorsModel::DtorModelName.str()}; + } + if (IRDB.getFunctionDefinition("main")) { + return {"main"}; + } + return {"__ALL__"}; +} diff --git a/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp index cffe27cb89..91aa57504b 100644 --- a/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp +++ b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/IRBuilder.h" #include +#include namespace psr { template diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index 492176a510..0f7f71e15a 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -3,6 +3,7 @@ #include "phasar/ControlFlow/CallGraphAnalysisType.h" #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" diff --git a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp index 67246c938e..82c2f9ec34 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp @@ -4,6 +4,7 @@ #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMVFTable.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/MapUtils.h" #include "llvm/ADT/StringRef.h" @@ -25,20 +26,18 @@ static std::string getTypeName(const llvm::DIType *DITy) { auto TypeName = [DITy] { if (const auto *CompTy = llvm::dyn_cast(DITy)) { if (auto Ident = CompTy->getIdentifier(); !Ident.empty()) { - return Ident; + // In LLVM 17 demangle() takes a StringRef + return llvm::demangle(Ident.str()); } } - return DITy->getName(); + return llvmTypeToString(DITy, true); }(); - // In LLVM 17 demangle() takes a StringRef - auto Ret = llvm::demangle(TypeName.str()); - - if (llvm::StringRef(Ret).startswith(TSPrefixDemang)) { - Ret.erase(0, TSPrefixDemang.size()); + if (llvm::StringRef(TypeName).startswith(TSPrefixDemang)) { + TypeName.erase(0, TSPrefixDemang.size()); } - return Ret; + return TypeName; } static void insertVirtualFunctions( diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.cpp new file mode 100644 index 0000000000..d249a40346 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.cpp @@ -0,0 +1,20 @@ +#include "phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h" + +#include "llvm/IR/InstrTypes.h" + +using namespace psr; + +PrecomputedResolver::PrecomputedResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + MaybeUniquePtr BaseCG) + : Resolver(IRDB, VTP), BaseCG(std::move(BaseCG)) { + assert(this->BaseCG != nullptr); +} + +void PrecomputedResolver::resolveFunctionPointer( + FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) { + auto Callees = BaseCG->getCalleesOfCallAt(CallSite); + PossibleTargets.insert(Callees.begin(), Callees.end()); +} + +std::string PrecomputedResolver::str() const { return "Precomputed"; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index b80fe475eb..07d77f33f0 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -22,6 +22,7 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" @@ -161,15 +162,14 @@ bool psr::isVirtualCall(const llvm::Instruction *Inst, // check potential receiver type const auto *RecType = getReceiverType(CallSite); if (!RecType) { - llvm::errs() << "No receiver type found for call at " - << llvmIRToString(Inst) << '\n'; return false; } if (!VTP.hasVFTable(RecType)) { return false; } - return getVFTIndex(CallSite) >= 0; + auto Idx = getVFTIndex(CallSite); + return Idx >= 0; } // Derived from LLVM's llvm::Function::hasAddressTaken() @@ -243,9 +243,11 @@ auto Resolver::resolveIndirectCall(const llvm::CallBase *CallSite) FunctionSetTy PossibleTargets; if (VTP && isVirtualCall(CallSite, *VTP)) { resolveVirtualCall(PossibleTargets, CallSite); - } - - if (PossibleTargets.empty()) { + } else { + // Note: Don't use resolveFunctionPointer() as fallback when + // resolveVirtualCall() does not find callees, because this will break the + // fixpoint computation when using the OTFResolver. Resolvers should install + // a meaningful fallback themselves, if necessary. resolveFunctionPointer(PossibleTargets, CallSite); } @@ -284,11 +286,10 @@ void Resolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, void Resolver::otherInst(const llvm::Instruction *Inst) {} -std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, - const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT) { +std::unique_ptr +Resolver::create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT, BaseResolverProvider GetBaseRes) { assert(IRDB != nullptr); assert(VTP != nullptr); @@ -301,9 +302,18 @@ std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, case CallGraphAnalysisType::RTA: assert(TH != nullptr); return std::make_unique(IRDB, VTP, TH); - case CallGraphAnalysisType::VTA: - llvm::report_fatal_error( - "The VTA callgraph algorithm is not implemented yet"); + case CallGraphAnalysisType::VTA: { + assert(PT); + auto BaseRes = [&]() -> MaybeUniquePtr { + if (!GetBaseRes) { + return std::make_unique(IRDB, VTP, TH); + } + + return GetBaseRes(IRDB, VTP, TH, PT); + }(); + assert(BaseRes != nullptr); + return std::make_unique(IRDB, VTP, PT, std::move(BaseRes)); + } case CallGraphAnalysisType::OTF: assert(PT); return std::make_unique(IRDB, VTP, PT); diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp new file mode 100644 index 0000000000..7226938501 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp @@ -0,0 +1,179 @@ +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" + +#include "phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/MaybeUniquePtr.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/InstrTypes.h" + +using namespace psr; + +void VTAResolver::DefaultReachableFunctions::operator()( + const LLVMProjectIRDB &IRDB, + llvm::function_ref WithFun) { + llvm::for_each(IRDB.getAllFunctions(), WithFun); +} + +static VTAResolver createWithBaseCGResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + MaybeUniquePtr BaseCG, vta::AliasInfoTy AS) { + auto ReachableFunctions = + [BaseCG = BaseCG.get()]( + const LLVMProjectIRDB &, + llvm::function_ref WithFun) { + llvm::for_each(BaseCG->getAllVertexFunctions(), WithFun); + }; + auto BaseRes = + std::make_unique(IRDB, VTP, std::move(BaseCG)); + + return VTAResolver(IRDB, VTP, AS, std::move(BaseRes), ReachableFunctions); +} + +VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, + MaybeUniquePtr BaseCG) + : psr::VTAResolver( + createWithBaseCGResolver(IRDB, VTP, std::move(BaseCG), AS)) {} + +VTAResolver::VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + vta::AliasInfoTy AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions) + : Resolver(IRDB, VTP), BaseResolver(std::move(BaseRes)) { + assert(this->BaseResolver != nullptr); + + auto TAG = vta::computeTypeAssignmentGraph( + *IRDB, *VTP, AS, *this->BaseResolver, ReachableFunctions); + + auto [SCCs, Order] = computeSCCsAndTopologicalOrder(TAG); + auto Deps = computeSCCDependencies(TAG, SCCs); + + TA = vta::propagateTypes(TAG, SCCs, Deps, Order); + + // TAG.print(llvm::errs()); + // TA.print(llvm::errs(), TAG, SCCs); + + this->SCCs = std::move(SCCs); + Nodes = std::move(TAG.Nodes); +} + +VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, + MaybeUniquePtr BaseCG) + : VTAResolver( + IRDB, VTP, + [AS](const llvm::Value *Ptr, const llvm::Instruction *At, + vta::AliasHandlerTy WithAlias) { + auto ASet = AS.getAliasSet(Ptr, At); + llvm::for_each(*ASet, WithAlias); + }, + std::move(BaseCG)) {} + +VTAResolver::VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions) + : VTAResolver( + IRDB, VTP, + [AS](const llvm::Value *Ptr, const llvm::Instruction *At, + vta::AliasHandlerTy WithAlias) { + auto ASet = AS.getAliasSet(Ptr, At); + llvm::for_each(*ASet, WithAlias); + }, + std::move(BaseRes), ReachableFunctions) {} + +std::string VTAResolver::str() const { return "VTA"; } + +void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) { + + auto RetrievedVtableIndex = getVFTIndex(CallSite); + if (!RetrievedVtableIndex.has_value()) { + // An error occured + PHASAR_LOG_LEVEL(DEBUG, + "Error with resolveVirtualCall : impossible to retrieve " + "the vtable index\n" + << llvmIRToString(CallSite) << "\n"); + return; + } + + auto *CalledOp = CallSite->getCalledOperand()->stripPointerCastsAndAliases(); + auto VtableIndex = RetrievedVtableIndex.value(); + + auto BaseCallees = BaseResolver->resolveIndirectCall(CallSite); + + auto ReceiverIdx = CallSite->hasStructRetAttr(); + if (CallSite->arg_size() > ReceiverIdx) { + const auto *Receiver = CallSite->getArgOperand(ReceiverIdx); + if (auto ReceiverNod = Nodes.getOrNull({vta::Variable{Receiver}})) { + auto SCC = SCCs.SCCOfNode[*ReceiverNod]; + const auto *ReceiverType = getReceiverType(CallSite); + + const auto &Types = TA.TypesPerSCC[SCC]; + for (auto Ty : Types) { + if (const auto *DITy = Ty.dyn_cast()) { + if (const auto *Fun = getNonPureVirtualVFTEntry( + DITy, VtableIndex, CallSite, ReceiverType)) { + if (psr::isConsistentCall(CallSite, Fun) && + (BaseCallees.empty() || BaseCallees.contains(Fun))) { + PossibleTargets.insert(Fun); + } + } + } + } + } + } + + auto TNId = Nodes.getOrNull({vta::Variable{CalledOp}}); + if (TNId) { + auto SCC = SCCs.SCCOfNode[*TNId]; + const auto &Types = TA.TypesPerSCC[SCC]; + for (auto Ty : Types) { + if (const auto *Fun = Ty.dyn_cast()) { + if (psr::isConsistentCall(CallSite, Fun) && + (BaseCallees.empty() || BaseCallees.contains(Fun))) { + PossibleTargets.insert(Fun); + } + } + } + } + + if (PossibleTargets.empty()) { + PossibleTargets = std::move(BaseCallees); + } +} + +void VTAResolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) { + auto BaseCallees = BaseResolver->resolveIndirectCall(CallSite); + + auto TNId = Nodes.getOrNull({vta::Variable{ + CallSite->getCalledOperand()->stripPointerCastsAndAliases()}}); + if (TNId) { + auto SCC = SCCs.SCCOfNode[*TNId]; + const auto &Types = TA.TypesPerSCC[SCC]; + for (auto Ty : Types) { + if (const auto *Fun = Ty.dyn_cast()) { + if (psr::isConsistentCall(CallSite, Fun) && + (BaseCallees.empty() || BaseCallees.contains(Fun))) { + PossibleTargets.insert(Fun); + } + } + } + } + + if (PossibleTargets.empty()) { + PossibleTargets = std::move(BaseCallees); + } +} diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp new file mode 100644 index 0000000000..9ca875bd37 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -0,0 +1,659 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Logger.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" + +#include +#include +#include +#include + +using namespace psr; +using namespace psr::vta; + +#if __cplusplus >= 202002L +static_assert(is_const_graph); +#endif + +static void printNodeImpl(llvm::raw_ostream &OS, Variable Var) { + OS << "var-"; + OS.write_escaped(psr::llvmIRToString(Var.Val)); +} + +static void printNodeImpl(llvm::raw_ostream &OS, Field Fld) { + OS << "fld-"; + OS.write_escaped(psr::llvmTypeToString(Fld.Base, true)); + OS << '+' << Fld.ByteOffset; +} + +static void printNodeImpl(llvm::raw_ostream &OS, Return Ret) { + OS << "ret-"; + OS.write_escaped(Ret.Fun->getName()); +} + +void vta::printNode(llvm::raw_ostream &OS, TAGNode TN) { + std::visit([&OS](auto Nod) { printNodeImpl(OS, Nod); }, TN.Label); +} + +static const llvm::DIType *stripMemberAndTypedef(const llvm::DIType *Ty) { + while (const auto *DerivedTy = llvm::dyn_cast(Ty)) { + if (DerivedTy->getTag() == llvm::dwarf::DW_TAG_typedef || + DerivedTy->getTag() == llvm::dwarf::DW_TAG_member) { + Ty = DerivedTy->getBaseType(); + continue; + } + break; + } + return Ty; +} + +static bool isPointerTy(const llvm::DIType *Ty) { + if (const auto *DerivedTy = + llvm::dyn_cast(stripMemberAndTypedef(Ty))) { + return DerivedTy->getTag() == llvm::dwarf::DW_TAG_pointer_type || + DerivedTy->getTag() == llvm::dwarf::DW_TAG_reference_type; + } + return false; +} + +static const llvm::DICompositeType *isCompositeTy(const llvm::DIType *Ty) { + return llvm::dyn_cast(stripMemberAndTypedef(Ty)); +} + +static llvm::SmallBitVector +getPointerIndicesOfType(llvm::DICompositeType *Ty, const llvm::DataLayout &DL) { + llvm::SmallBitVector Ret; + + auto PointerSize = DL.getPointerSizeInBits(); + + // XXX: Does every type provide a meaningful getSizeInBits? + auto MaxNumPointers = Ty->getSizeInBits() / PointerSize; + if (!MaxNumPointers) { + return Ret; + } + Ret.resize(MaxNumPointers); + + llvm::SmallVector> WorkList = {{Ty, 0}}; + + while (!WorkList.empty()) { + auto [CurrTy, CurrBitOffs] = WorkList.pop_back_val(); + + if (isPointerTy(CurrTy)) { + size_t Idx = CurrBitOffs / PointerSize; + if (CurrBitOffs % PointerSize) [[unlikely]] { + PHASAR_LOG_LEVEL(WARNING, "Unaligned pointer.."); + } + assert(Ret.size() > Idx && + "reserved unsufficient space for pointer indices"); + Ret.set(Idx); + continue; + } + + const auto *CompTy = isCompositeTy(CurrTy); + if (!CompTy) { + continue; + } + + auto Tag = CompTy->getTag(); + + if (Tag == llvm::dwarf::DW_TAG_array_type) { + auto *ElemTy = CompTy->getBaseType(); + const auto *ArrayLenRange = + llvm::cast(CompTy->getElements()[0]); + auto ArrayLenBound = ArrayLenRange->getCount(); + if (const auto *ArrayLenCInt = + ArrayLenBound.dyn_cast()) { + auto ArrayLen = ArrayLenCInt->getSExtValue(); + // Count is -1 for flexible array members; + if (ArrayLen < 0) { + continue; + } + + auto ElemSize = int64_t(ElemTy->getSizeInBits()); + for (int64_t I = 0, Offs = CurrBitOffs; I < ArrayLen; + ++I, Offs += ElemSize) { + WorkList.emplace_back(ElemTy, Offs); + } + } + + continue; + } + + if (Tag == llvm::dwarf::DW_TAG_structure_type || + Tag == llvm::dwarf::DW_TAG_class_type) { + + auto Elems = CompTy->getElements(); + uint64_t Offs = CurrBitOffs; + for (auto *Elem : Elems) { + auto *ElemTy = llvm::dyn_cast(Elem); + if (!ElemTy) { + continue; + } + + scope_exit IncOffs = [&] { Offs += ElemTy->getSizeInBits(); }; + + if (Elem->getTag() != llvm::dwarf::DW_TAG_inheritance && + Elem->getTag() != llvm::dwarf::DW_TAG_member) { + continue; + } + + WorkList.emplace_back(ElemTy, Offs); + } + + continue; + } + } + + return Ret; +} + +static void addTAGNode(TAGNode TN, TypeAssignmentGraph &TAG) { + TAG.Nodes.getOrInsert(TN); +} + +static void addFields(const LLVMProjectIRDB &IRDB, TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + + size_t PointerSize = DL.getPointerSize(); + + llvm::DebugInfoFinder DIF; + DIF.processModule(*IRDB.getModule()); + + for (auto *DITy : DIF.types()) { + if (auto *CompTy = llvm::dyn_cast(DITy)) { + auto Offsets = getPointerIndicesOfType(CompTy, DL); + for (auto Offs : Offsets.set_bits()) { + addTAGNode({Field{CompTy, Offs * PointerSize}}, TAG); + } + addTAGNode({Field{CompTy, SIZE_MAX}}, TAG); + } + } +} + +static void addGlobals(const LLVMProjectIRDB &IRDB, TypeAssignmentGraph &TAG) { + auto NumGlobals = IRDB.getNumGlobals(); + TAG.Nodes.reserve(TAG.Nodes.size() + NumGlobals); + + for (const auto &Glob : IRDB.getModule()->globals()) { + if (Glob.getValueType()->isIntOrIntVectorTy() || + Glob.getValueType()->isFloatingPointTy()) { + continue; + } + auto GlobName = Glob.getName(); + if (GlobName.startswith("_ZTV") || GlobName.startswith("_ZTI") || + GlobName.startswith("_ZTS")) { + continue; + } + + addTAGNode({Variable{&Glob}}, TAG); + } +} + +static void initializeWithFun(const llvm::Function *Fun, + TypeAssignmentGraph &TAG) { + // Add all params + // Add all locals + // Add return + + if (Fun->isDeclaration()) { + return; + } + + for (const auto &Arg : Fun->args()) { + if (!Arg.getType()->isPointerTy()) { + continue; + } + + addTAGNode({Variable{&Arg}}, TAG); + } + + for (const auto &I : llvm::instructions(Fun)) { + if (!I.getType()->isPointerTy()) { + // XXX: What about SSA structs that contain pointers? + continue; + } + + if (const auto *Alloca = llvm::dyn_cast(&I)) { + if (Alloca->getAllocatedType()->isIntOrIntVectorTy() || + Alloca->getAllocatedType()->isFloatingPointTy()) { + continue; + } + } + + addTAGNode({Variable{&I}}, TAG); + } + + if (Fun->getReturnType() && Fun->getReturnType()->isPointerTy()) { + addTAGNode({Return{Fun}}, TAG); + } +} + +static void handleAlloca(const llvm::AllocaInst *Alloca, + TypeAssignmentGraph &TAG, + const psr::LLVMVFTableProvider & /*VTP*/) { + if (Alloca->getAllocatedType()->isPointerTy()) { + return; + } + + auto TN = TAG.get({Variable{Alloca}}); + if (!TN) { + return; + } + + const auto *AllocTy = getVarTypeFromIR(Alloca); + if (!AllocTy) { + return; + } + + TAG.TypeEntryPoints[*TN].insert(AllocTy); +} + +static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, + TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto Offs = [&]() -> size_t { + llvm::APInt Offs(64, 0); + if (GEP->accumulateConstantOffset(DL, Offs)) { + return Offs.getZExtValue(); + } + return SIZE_MAX; + }(); + + auto *VarTy = getVarTypeFromIR(GEP); + if (!VarTy) { + return std::nullopt; + } + + return TAG.get({Field{VarTy, Offs}}); +} + +static void handleGEP(const llvm::GetElementPtrInst *GEP, + TypeAssignmentGraph &TAG, const llvm::DataLayout &DL) { + auto To = TAG.get({Variable{GEP}}); + if (!To) { + return; + } + + if (!GEP->isInBounds()) { + auto From = TAG.get({Variable{GEP->getPointerOperand()}}); + + if (From && To) { + TAG.addEdge(*From, *To); + } + + return; + } + + auto From = getGEPNode(GEP, TAG, DL); + if (From) { + TAG.addEdge(*From, *To); + } +} + +static bool handleEntryForStore(const llvm::StoreInst *Store, + TypeAssignmentGraph &TAG, AliasInfoTy AI, + const llvm::DataLayout &DL) { + const auto *Base = llvm::dyn_cast( + Store->getValueOperand()->stripPointerCastsAndAliases()); + + if (!Base) { + return false; + } + + if (const auto *GEPDest = + llvm::dyn_cast(Store->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { + TAG.TypeEntryPoints[*GEPNodeId].insert(Base); + + auto GEPNode = TAG[*GEPNodeId]; + if (const auto *FldDest = std::get_if(&GEPNode.Label)) { + auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); + + if (ApproxDest) { + TAG.TypeEntryPoints[*ApproxDest].insert(Base); + } + } + } + } + + AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { + // XXX: Fuse store and GEP! + + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) { + return; + } + + TAG.TypeEntryPoints[*DestNodeId].insert(Base); + }); + return true; +} + +static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, + AliasInfoTy AI, const llvm::DataLayout &DL) { + + if (handleEntryForStore(Store, TAG, AI, DL)) { + return; + } + + auto From = TAG.get({Variable{Store->getValueOperand()}}); + if (!From) { + return; + } + + if (const auto *GEPDest = + llvm::dyn_cast(Store->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { + TAG.addEdge(*From, *GEPNodeId); + + auto GEPNode = TAG[*GEPNodeId]; + if (const auto *FldDest = std::get_if(&GEPNode.Label)) { + auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); + + if (ApproxDest) { + TAG.addEdge(*From, *ApproxDest); + } + } + } + } + + AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { + // XXX: Fuse store and GEP! + + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) { + return; + } + + TAG.addEdge(*From, *DestNodeId); + }); +} + +static void handleLoad(const llvm::LoadInst *Load, TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto To = TAG.get({Variable{Load}}); + if (!To) { + return; + } + + auto From = TAG.get({Variable{Load->getPointerOperand()}}); + if (From) { + TAG.addEdge(*From, *To); + } + + if (const auto *GEPDest = + llvm::dyn_cast(Load->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { + TAG.addEdge(*GEPNodeId, *To); + } + } +} + +static void handlePhi(const llvm::PHINode *Phi, TypeAssignmentGraph &TAG) { + auto To = TAG.get({Variable{Phi}}); + if (!To) { + return; + } + + for (const auto &Inc : Phi->incoming_values()) { + auto From = TAG.get({Variable{Inc.get()}}); + if (From) { + TAG.addEdge(*From, *To); + } + } +} + +static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, + TypeAssignmentGraph &TAG, + const llvm::Function *Callee, + const psr::LLVMVFTableProvider & /*VTP*/) { + + if (!psr::isHeapAllocatingFunction(Callee)) { + return; + } + + if (const auto *MDNode = Call->getMetadata("heapallocsite")) { + + // Shortcut + if (const auto *CompTy = llvm::dyn_cast(MDNode); + CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || + CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { + + TAG.TypeEntryPoints[CSNod].insert(CompTy); + } + } +} + +static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, + Resolver &BaseRes, const psr::LLVMVFTableProvider &VTP) { + + llvm::SmallVector> Args; + llvm::SmallBitVector EntryArgs; + bool HasArgNode = false; + + for (const auto &Arg : Call->args()) { + auto TN = TAG.get({Variable{Arg.get()}}); + Args.push_back(TN); + if (TN) { + HasArgNode = true; + } + + bool IsEntry = + llvm::isa(Arg.get()->stripPointerCastsAndAliases()); + EntryArgs.push_back(IsEntry); + } + + auto CSNod = TAG.get({Variable{Call}}); + + // XXX: Handle struct returns that contain pointers + if (!HasArgNode && !CSNod) { + return; + } + + const auto HandleCallTarget = [&](const llvm::Function *Callee) { + handleEntryForCall(Call, *CSNod, TAG, Callee, VTP); + + if (Callee->isDeclaration()) { + // XXX: Integrate with getLibCSummary() + return; + } + + for (const auto &[Param, Arg] : llvm::zip(Callee->args(), Args)) { + auto ParamNodId = TAG.get({Variable{&Param}}); + if (!ParamNodId) { + continue; + } + + if (EntryArgs.test(Param.getArgNo())) { + TAG.TypeEntryPoints[*ParamNodId].insert( + llvm::cast(Call->getArgOperand(Param.getArgNo()) + ->stripPointerCastsAndAliases())); + } + + if (!Arg) { + continue; + } + + if (!Param.hasStructRetAttr()) { + TAG.addEdge(*Arg, *ParamNodId); + } + + // if (!Param.hasByValAttr()) + // TAG.addEdge(*ParamNodId, *Arg); + } + if (CSNod) { + auto RetNod = TAG.get({Return{Callee}}); + if (RetNod) { + TAG.addEdge(*RetNod, *CSNod); + } + } + }; + + if (const auto *StaticCallee = llvm::dyn_cast( + Call->getCalledOperand()->stripPointerCastsAndAliases())) { + HandleCallTarget(StaticCallee); + } else { + for (const auto *Callee : BaseRes.resolveIndirectCall(Call)) { + HandleCallTarget(Callee); + } + } +} + +static void handleReturn(const llvm::ReturnInst *Ret, + TypeAssignmentGraph &TAG) { + + auto TNId = TAG.get({Return{Ret->getFunction()}}); + if (!TNId) { + return; + } + + if (const auto *RetVal = Ret->getReturnValue()) { + const auto *Base = RetVal->stripPointerCastsAndAliases(); + if (const auto *RetFun = llvm::dyn_cast(Base)) { + TAG.TypeEntryPoints[*TNId].insert(RetFun); + return; + } + + auto From = TAG.get({Variable{Base}}); + if (From) { + TAG.addEdge(*From, *TNId); + } + } +} + +static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, + Resolver &BaseRes, AliasInfoTy AI, + const llvm::DataLayout &DL, + const psr::LLVMVFTableProvider &VTP) { + if (llvm::isa(&I)) { + return; + } + + if (const auto *Alloca = llvm::dyn_cast(&I)) { + handleAlloca(Alloca, TAG, VTP); + return; + } + if (const auto *Load = llvm::dyn_cast(&I)) { + handleLoad(Load, TAG, DL); + return; + } + if (const auto *GEP = llvm::dyn_cast(&I)) { + handleGEP(GEP, TAG, DL); + return; + } + if (const auto *Store = llvm::dyn_cast(&I)) { + handleStore(Store, TAG, AI, DL); + return; + } + if (const auto *Phi = llvm::dyn_cast(&I)) { + handlePhi(Phi, TAG); + return; + } + if (const auto *Cast = llvm::dyn_cast(&I)) { + auto From = TAG.get({Variable{Cast->getOperand(0)}}); + auto To = TAG.get({Variable{Cast}}); + + if (From && To) { + TAG.addEdge(*From, *To); + } + } + if (const auto *Call = llvm::dyn_cast(&I)) { + handleCall(Call, TAG, BaseRes, VTP); + return; + } + if (const auto *Ret = llvm::dyn_cast(&I)) { + handleReturn(Ret, TAG); + return; + } + // XXX: Handle more cases +} + +static void buildTAGWithFun(const llvm::Function *Fun, TypeAssignmentGraph &TAG, + Resolver &BaseRes, AliasInfoTy AI, + const llvm::DataLayout &DL, + const psr::LLVMVFTableProvider &VTP) { + for (const auto &I : llvm::instructions(Fun)) { + dispatch(I, TAG, BaseRes, AI, DL, VTP); + } +} + +static auto computeTypeAssignmentGraphImpl(const LLVMProjectIRDB &IRDB, + Resolver &BaseRes, AliasInfoTy AI, + const psr::LLVMVFTableProvider &VTP, + ReachableFunsTy ReachableFunctions) + -> TypeAssignmentGraph { + TypeAssignmentGraph TAG; + + const auto &DL = IRDB.getModule()->getDataLayout(); + + addFields(IRDB, TAG, DL); + addGlobals(IRDB, TAG); + + assert(ReachableFunctions); + + ReachableFunctions(IRDB, + [&TAG](const auto *Fun) { initializeWithFun(Fun, TAG); }); + + TAG.Adj.resize(TAG.Nodes.size()); + + ReachableFunctions(IRDB, [&](const auto *Fun) { + buildTAGWithFun(Fun, TAG, BaseRes, AI, DL, VTP); + }); + + return TAG; +} + +auto vta::computeTypeAssignmentGraph(const LLVMProjectIRDB &IRDB, + const psr::LLVMVFTableProvider &VTP, + AliasInfoTy AS, Resolver &BaseRes, + ReachableFunsTy ReachableFunctions) + -> TypeAssignmentGraph { + + return computeTypeAssignmentGraphImpl(IRDB, BaseRes, AS, VTP, + ReachableFunctions); +} + +void TypeAssignmentGraph::print(llvm::raw_ostream &OS) { + OS << "digraph TAG {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + + size_t Ctr = 0; + for (const auto &TN : Nodes) { + OS << " " << Ctr << "[label=\""; + printNode(OS, TN); + OS << "\"];\n"; + + ++Ctr; + } + + OS << '\n'; + + Ctr = 0; + for (const auto &Targets : Adj) { + for (auto Tgt : Targets) { + OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; + } + ++Ctr; + } +} diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp new file mode 100644 index 0000000000..8be4122f43 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp @@ -0,0 +1,94 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" + +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/IR/DebugInfoMetadata.h" + +using namespace psr; +using namespace psr::vta; + +static void initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { + for (const auto &[Node, Types] : TAG.TypeEntryPoints) { + auto SCC = SCCs.SCCOfNode[Node]; + TA.TypesPerSCC[SCC].insert(Types.begin(), Types.end()); + } +} + +static void propagate(TypeAssignment &TA, + const SCCDependencyGraph &Deps, + SCCId CurrSCC) { + const auto &Types = TA.TypesPerSCC[CurrSCC]; + if (Types.empty()) { + return; + } + + for (auto Succ : Deps.ChildrenOfSCC[CurrSCC]) { + TA.TypesPerSCC[Succ].insert(Types.begin(), Types.end()); + } +} + +TypeAssignment vta::propagateTypes(const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs, + const SCCDependencyGraph &Deps, + const SCCOrder &Order) { + TypeAssignment Ret; + Ret.TypesPerSCC.resize(SCCs.size()); + + initialize(Ret, TAG, SCCs); + for (auto SCC : Order.SCCIds) { + propagate(Ret, Deps, SCC); + } + + return Ret; +} + +void TypeAssignment::print(llvm::raw_ostream &OS, + const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { + OS << "digraph TypeAssignment {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + + Compressor Types; + auto GetOrAddType = [&](TypeAssignmentGraph::TypeInfoTy Ty) { + auto [Id, Inserted] = Types.insert(Ty); + if (Inserted) { + OS << (size_t(Id) + SCCs.size()) << "[label=\""; + if (const auto *Fun = Ty.dyn_cast()) { + OS << "fun-" << Fun->getName(); + } else if (const auto *DITy = Ty.dyn_cast()) { + OS << "type-"; + OS.write_escaped(llvmTypeToString(DITy, true)); + } + OS << "\"];\n"; + } + return Id + SCCs.size(); + }; + + for (const auto &[Ctr, NodesInSCC] : SCCs.NodesInSCC.enumerate()) { + OS << " " << uint32_t(Ctr) << "[label=\""; + for (auto TNId : SCCs.NodesInSCC[Ctr]) { + auto TN = TAG.Nodes[TNId]; + printNode(OS, TN); + OS << "\\n"; + } + OS << "\"];\n"; + + for (auto Ty : TypesPerSCC[Ctr]) { + auto TyId = GetOrAddType(Ty); + OS << uint32_t(Ctr) << "->" << TyId << ";\n"; + } + } +} diff --git a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp index 27c35db533..77a1c6af76 100644 --- a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp +++ b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/WithColor.h" #include #include @@ -155,6 +156,20 @@ LLVMProjectIRDB::load(const llvm::Twine &IRFileName, return LLVMProjectIRDB(std::move(*M), std::move(Ctx), EnableOpaquePointers); } +LLVMProjectIRDB LLVMProjectIRDB::loadOrExit(const llvm::Twine &IRFileName, + bool EnableOpaquePointers, + int ErrorExitCode) { + auto Ret = load(IRFileName, EnableOpaquePointers); + if (!Ret) { + llvm::WithColor::error() + << "Could not load LLVM-" << LLVM_VERSION_MAJOR << " IR file " + << IRFileName << ": " << Ret.getError().message() << '\n'; + std::exit(ErrorExitCode); + } + + return std::move(*Ret); +} + LLVMProjectIRDB::LLVMProjectIRDB(const llvm::Twine &IRFileName, bool EnableOpaquePointers) : Ctx(new llvm::LLVMContext()) { @@ -162,6 +177,9 @@ LLVMProjectIRDB::LLVMProjectIRDB(const llvm::Twine &IRFileName, auto M = getParsedIRModuleOrErr(IRFileName, *Ctx); if (!M) { + llvm::WithColor::error() + << "Could not load LLVM-" << LLVM_VERSION_MAJOR << " IR file " + << IRFileName << ": " << M.getError().message() << '\n'; return; } @@ -264,6 +282,9 @@ LLVMProjectIRDB::LLVMProjectIRDB(llvm::MemoryBufferRef Buf, setOpaquePointersForCtx(*Ctx, EnableOpaquePointers); auto M = getParsedIRModuleOrErr(Buf, *Ctx); if (!M) { + llvm::WithColor::error() << "Could not load " << LLVM_VERSION_MAJOR + << " IR buffer: " << Buf.getBufferIdentifier() + << ": " << M.getError().message() << '\n'; return; } diff --git a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp index 9b5befda55..a40c8d2468 100644 --- a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp +++ b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp @@ -11,6 +11,7 @@ #include "phasar/PhasarLLVM/Utils/AllocatedTypes.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/AlignNum.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/NlohmannLogging.h" #include "phasar/Utils/PAMMMacros.h" @@ -24,11 +25,9 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include -#include namespace psr { @@ -272,35 +271,6 @@ void GeneralStatistics::printAsJson(llvm::raw_ostream &OS) const { } // namespace psr -namespace { -template struct AlignNum { - llvm::StringRef Name; - T Num; - - AlignNum(llvm::StringRef Name, T Num) noexcept : Name(Name), Num(Num) {} - AlignNum(llvm::StringRef Name, size_t Numerator, size_t Denominator) noexcept - : Name(Name), Num(double(Numerator) / double(Denominator)) {} - - friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const AlignNum &AN) { - static constexpr size_t NumOffs = 32; - - auto Len = AN.Name.size() + 1; - auto Diff = -(Len < NumOffs) & (NumOffs - Len); - - OS << AN.Name << ':'; - // Default is two fixed-point decimal places, so shift the output by three - // spaces - OS.indent(Diff + std::is_floating_point_v * 3); - OS << llvm::formatv("{0,+7}\n", AN.Num); - - return OS; - } -}; -template AlignNum(llvm::StringRef, T) -> AlignNum; -AlignNum(llvm::StringRef, size_t, size_t) -> AlignNum; -} // namespace - llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, const GeneralStatistics &Statistics) { return OS diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index eb991a8ffa..a0b05cbbb1 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -25,6 +25,7 @@ #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" diff --git a/lib/Utils/ChronoUtils.cpp b/lib/Utils/ChronoUtils.cpp index 6540baf96c..05f23edeaf 100644 --- a/lib/Utils/ChronoUtils.cpp +++ b/lib/Utils/ChronoUtils.cpp @@ -1,5 +1,7 @@ #include "phasar/Utils/ChronoUtils.h" +#include "llvm/Support/Format.h" + llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, const hms &HMS) { return OS << llvm::format("%.2ld:%.2ld:%.2ld:%.6ld", HMS.Hours.count(), HMS.Minutes.count(), HMS.Seconds.count(), diff --git a/lib/Utils/IO.cpp b/lib/Utils/IO.cpp index dad2e4a70f..b016db47d4 100644 --- a/lib/Utils/IO.cpp +++ b/lib/Utils/IO.cpp @@ -18,7 +18,6 @@ #include "phasar/Utils/ErrorHandling.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/Utilities.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/lib/Utils/Utils.cppm b/lib/Utils/Utils.cppm index 9440d6c5dd..7273ccedc5 100644 --- a/lib/Utils/Utils.cppm +++ b/lib/Utils/Utils.cppm @@ -1,13 +1,16 @@ module; #include "phasar/Utils/AdjacencyList.h" +#include "phasar/Utils/AlignNum.h" #include "phasar/Utils/AnalysisPrinterBase.h" #include "phasar/Utils/AnalysisProperties.h" #include "phasar/Utils/Average.h" +#include "phasar/Utils/BitSet.h" #include "phasar/Utils/BitVectorSet.h" #include "phasar/Utils/BoxedPointer.h" #include "phasar/Utils/ByRef.h" #include "phasar/Utils/ChronoUtils.h" +#include "phasar/Utils/Compressor.h" #include "phasar/Utils/DFAMinimizer.h" #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/DebugOutput.h" @@ -32,6 +35,7 @@ module; #include "phasar/Utils/PointerUtils.h" #include "phasar/Utils/Printer.h" #include "phasar/Utils/RepeatIterator.h" +#include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/SemiRing.h" #include "phasar/Utils/Soundness.h" #include "phasar/Utils/StableVector.h" @@ -39,18 +43,22 @@ module; #include "phasar/Utils/TableWrappers.h" #include "phasar/Utils/Timer.h" #include "phasar/Utils/TypeTraits.h" +#include "phasar/Utils/TypedVector.h" #include "phasar/Utils/Utilities.h" export module phasar.utils; export namespace psr { using psr::AdjacencyList; +using psr::AlignNum; +using psr::AlignStr; using psr::AnalysisPrinterBase; using psr::AnalysisProperties; using psr::GraphTraits; using psr::to_string; using psr::operator<<; using psr::AnalysisPropertiesMixin; +using psr::BitSet; using psr::BitVectorSet; using psr::BoxedConstPtr; using psr::BoxedPtr; @@ -93,11 +101,14 @@ using psr::hasFlag; using psr::InitPhasar; using psr::iota; using psr::IotaIterator; +using psr::is_const_graph; using psr::is_graph; using psr::is_graph_edge; using psr::is_graph_trait; using psr::is_removable_graph_trait_v; using psr::is_reservable_graph_trait_v; +using psr::is_weighted_const_graph; +using psr::is_weighted_graph; using psr::JoinLattice; using psr::JoinLatticeTraits; using psr::Logger; @@ -142,7 +153,12 @@ using psr::adl_to_string; using psr::AreEqualityComparable; using psr::assertAllNotNull; using psr::assertNotNull; +using psr::Compressor; using psr::computePowerSet; +using psr::computeSCCDependencies; +using psr::computeSCCIterative; +using psr::computeSCCOrder; +using psr::computeSCCs; using psr::createTimeStamp; using psr::DefaultConstruct; using psr::DenseSet; @@ -167,6 +183,8 @@ using psr::IdentityFn; using psr::IgnoreArgs; using psr::intersectWith; using psr::is_crtp_base_of_v; +using psr::is_explicitly_convertible_to; +using psr::is_incrementable; using psr::is_iterable_over_v; using psr::is_iterable_v; using psr::is_llvm_hashable_v; @@ -181,19 +199,26 @@ using psr::is_variant; using psr::is_variant_v; using psr::isConstructor; using psr::IsEqualityComparable; +using psr::IsLessComparable; using psr::isMangled; using psr::Overloaded; using psr::remove_by_index; using psr::reserveIfPossible; +using psr::SCCDependencyGraph; +using psr::SCCHolder; +using psr::SCCId; +using psr::SCCOrder; using psr::scope_exit; +using psr::SimpleTimer; using psr::SmallDenseTable1d; using psr::StableVector; using psr::StringIDLess; using psr::Table; using psr::Timer; using psr::TrueFn; +using psr::TypedVector; using psr::UnorderedSet; using psr::UnorderedTable1d; using psr::variant_idx; -// using psr::variant_idx; + } // namespace psr diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index a1b50268ee..0cd9236476 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,5 +1,38 @@ -add_subdirectory(example-tool) -add_subdirectory(phasar-cli) -if (PHASAR_BUILD_MODULES) - add_subdirectory(hello-modules-tool) + +subdirlist(subdirs ${CMAKE_CURRENT_SOURCE_DIR}) +if (NOT PHASAR_BUILD_MODULES) + list(REMOVE_ITEM subdirs hello-modules-tool) +endif() + +foreach(tool ${subdirs}) + message(STATUS "Set-up phasar-tool: ${tool}") + + file(GLOB_RECURSE tool_src "${tool}/*.cpp" "${tool}/*.h") + if(PHASAR_IN_TREE) + add_phasar_executable(${tool} + ${tool_src} + ) + else() + add_executable(${tool} + ${tool_src} + ) + endif() + + set_target_properties(${tool} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${tool} + ) + + target_link_libraries(${tool} + PRIVATE + phasar + ${PHASAR_STD_FILESYSTEM} + ) +endforeach() + +set_target_properties(example-tool PROPERTIES + OUTPUT_NAME "myphasartool" +) + +if (NOT PHASAR_IN_TREE) + install(TARGETS phasar-cli) endif() diff --git a/tools/call-graph/call-graph.cpp b/tools/call-graph/call-graph.cpp new file mode 100644 index 0000000000..93bd1986af --- /dev/null +++ b/tools/call-graph/call-graph.cpp @@ -0,0 +1,305 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/Pointer/AliasAnalysisType.h" +#include "phasar/Utils/AlignNum.h" +#include "phasar/Utils/Timer.h" + +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace cl = llvm::cl; + +static cl::OptionCategory CGCat("PhASAR CallGraph"); + +static cl::opt EmitCGAsDot( + "emit-cg-as-dot", + cl::desc("Output the computed call-graph as DOT graph that can be " + "displayed with any graphviz viewer (default: true)"), + cl::init(true), cl::cat(CGCat)); + +static cl::opt EmitCGAsJson( + "emit-cg-as-json", + cl::desc("Output the computed call-graph as JSON (default: false)"), + cl::cat(CGCat)); + +static cl::opt + OutputFile("o", + cl::desc("The file-path, where the output should be written to " + "(default: stdout)"), + cl::init("-"), cl::cat(CGCat)); + +static cl::opt + CGType("cg-type", cl::desc("The call-graph analysis type to use"), + cl::ValuesClass{ +#define CALL_GRAPH_ANALYSIS_TYPE(NAME, CMDFLAG, DESC) \ + clEnumValN(psr::CallGraphAnalysisType::NAME, CMDFLAG, DESC), +#include "phasar/ControlFlow/CallGraphAnalysisType.def" + }, + cl::init(psr::CallGraphAnalysisType::OTF), cl::cat(CGCat)); + +static cl::opt BuildBaseCG( + "build-base-cg", + cl::desc("Whether to build-up an explicit base-call-graph to " + "initialize the VTA algorithm. May take more time, but may reduce " + "the size of the type-assignment graph")); + +static cl::opt + AAType("aa-type", + cl::desc("The alias-analysis type for those call-graph " + "algorithms that require alias information"), + cl::ValuesClass{ +#define ALIAS_ANALYSIS_TYPE(NAME, CMDFLAG, DESC) \ + clEnumValN(psr::AliasAnalysisType::NAME, CMDFLAG, DESC), +#include "phasar/Pointer/AliasAnalysisType.def" + }, + cl::init(psr::AliasAnalysisType::CFLAnders), cl::cat(CGCat)); + +static cl::opt + EmitStats("S", cl::desc("Compute statistics on the computed call-graph"), + cl::cat(CGCat)); + +static cl::opt IRFile(cl::Positional, cl::Required, + cl::desc(""), + cl::cat(CGCat)); + +struct DiagTimer : psr::SimpleTimer { // NOLINT + DiagTimer(llvm::StringRef Msg) noexcept : Message(Msg) {} + ~DiagTimer() { llvm::errs() << Message << " (" << elapsed() << ")\n"; } + + llvm::StringRef Message; +}; +static void computeCGStats(const psr::LLVMBasedCallGraph &CG, + llvm::raw_ostream &OS); + +int main(int Argc, char *Argv[]) { + cl::HideUnrelatedOptions(CGCat); + cl::ParseCommandLineOptions(Argc, Argv); + + psr::SimpleTimer LoadingTm; + auto IRDB = psr::LLVMProjectIRDB::loadOrExit(IRFile); + auto VTP = psr::LLVMVFTableProvider(IRDB); + auto TH = psr::DIBasedTypeHierarchy(IRDB); + auto EntryPoints = psr::getDefaultEntryPoints(IRDB); + llvm::errs() << "Loaded IR and computed helpers (" << LoadingTm.elapsed() + << ")\n"; + + if (BuildBaseCG && CGType != psr::CallGraphAnalysisType::VTA) { + llvm::WithColor::warning() << "The option --build-base-cg only works for " + "the cg-type 'vta'. It will be ignored for '" + << CGType << "'\n"; + } + + auto CG = [&] { + DiagTimer Tm{"Created resolver"}; + + switch (CGType) { + case psr::CallGraphAnalysisType::NORESOLVE: + case psr::CallGraphAnalysisType::CHA: + case psr::CallGraphAnalysisType::RTA: { + auto Res = psr::Resolver::create(CGType, &IRDB, &VTP, &TH); + return psr::buildLLVMBasedCallGraph(IRDB, *Res, EntryPoints); + } + case psr::CallGraphAnalysisType::VTA: { + auto BaseRes = psr::RTAResolver(&IRDB, &VTP, &TH); + auto AA = psr::LLVMAliasSet(&IRDB, true, AAType); + auto Res = [&] { + if (BuildBaseCG) { + auto BaseCG = std::make_unique( + psr::buildLLVMBasedCallGraph(IRDB, BaseRes, EntryPoints)); + return psr::VTAResolver(&IRDB, &VTP, &AA, std::move(BaseCG)); + } + return psr::VTAResolver(&IRDB, &VTP, &AA, &BaseRes); + }(); + return psr::buildLLVMBasedCallGraph(IRDB, Res, EntryPoints); + } + case psr::CallGraphAnalysisType::OTF: { + auto AA = psr::LLVMAliasSet(&IRDB, true, AAType); + auto Res = psr::OTFResolver(&IRDB, &VTP, &AA); + return psr::buildLLVMBasedCallGraph(IRDB, Res, EntryPoints); + } + case psr::CallGraphAnalysisType::Invalid: + llvm::report_fatal_error("Invalid call-graph analysis type"); + } + }(); + + std::optional OS; + const auto GetOS = [&OS]() -> llvm::raw_ostream & { + if (!OS) { + std::error_code EC; + OS.emplace(OutputFile, EC); + if (EC) { + llvm::WithColor::error() + << "Could not open output-file: " << EC.message() << '\n'; + std::exit(1); + } + } + return *OS; + }; + + auto ICF = [&] { + DiagTimer Tm{"Built call-graph"}; + return psr::LLVMBasedICFG(std::move(CG), &IRDB); + }(); + + if (EmitCGAsDot) { + ICF.print(GetOS()); + } + if (EmitCGAsJson) { + ICF.printAsJson(GetOS()); + } + if (EmitStats) { + computeCGStats(ICF.getCallGraph(), GetOS()); + } +} + +static constexpr unsigned Indent = 48; + +template struct Align : psr::AlignNum { + using psr::AlignNum::AlignNum; +}; +template Align(llvm::StringRef, T) -> Align; +Align(llvm::StringRef, size_t, size_t) -> Align; + +using AlignS = psr::AlignStr; + +static void computeCGStats(const psr::LLVMBasedCallGraph &CG, + llvm::raw_ostream &OS) { + size_t NumVtxFuns = CG.getNumVertexFunctions(); + size_t NumVtxCS = CG.getNumVertexCallSites(); + + size_t NumIndCalls = 0; + size_t NumCallEdges = 0; + size_t NumIndCallEdges = 0; + + size_t NumIndCSWith0Callees = 0; + size_t NumIndCSWith1Callees = 0; + size_t NumIndCSWith2Callees = 0; + size_t NumIndCSWithGreater2Callees = 0; + size_t NumIndCSWithGreater5Callees = 0; + size_t NumIndCSWithGreater10Callees = 0; + size_t NumIndCSWithGreater20Callees = 0; + size_t NumIndCSWithGreater50Callees = 0; + size_t NumIndCSWithGreater100Callees = 0; + size_t LargestFanOut = 0; + + std::vector NumCallEdgesPerCS; + std::vector NumCallEdgesPerIndCS; + NumCallEdgesPerCS.reserve(NumVtxCS); + NumCallEdgesPerIndCS.reserve(NumVtxCS); + + for (const auto *CS : CG.getAllVertexCallSites()) { + bool IsIndCall = + !llvm::isa(llvm::cast(CS) + ->getCalledOperand() + ->stripPointerCastsAndAliases()); + + auto Callees = CG.getCalleesOfCallAt(CS); + NumIndCalls += IsIndCall; + NumCallEdges += Callees.size(); + NumIndCallEdges += Callees.size() * IsIndCall; + NumCallEdgesPerCS.push_back(Callees.size()); + if (IsIndCall) { + NumCallEdgesPerIndCS.push_back(Callees.size()); + } + if (Callees.size() > LargestFanOut) { + LargestFanOut = Callees.size(); + } + + NumIndCSWith0Callees += Callees.empty(); + NumIndCSWith1Callees += Callees.size() == 1 && IsIndCall; + NumIndCSWith2Callees += Callees.size() == 2; + NumIndCSWithGreater2Callees += Callees.size() > 2; + NumIndCSWithGreater5Callees += Callees.size() > 5; + NumIndCSWithGreater10Callees += Callees.size() > 10; + NumIndCSWithGreater20Callees += Callees.size() > 20; + NumIndCSWithGreater50Callees += Callees.size() > 50; + NumIndCSWithGreater100Callees += Callees.size() > 100; + } + + llvm::sort(NumCallEdgesPerCS); + llvm::sort(NumCallEdgesPerIndCS); + + OS << "================== CallGraph Statistics ==================\n"; + + OS << Align("Num vertex functions", NumVtxFuns); + OS << Align("Num call-sites", NumVtxCS); + OS << Align("Num call-edges", NumCallEdges); + if (NumCallEdgesPerCS.empty()) { + OS << AlignS("Avg num call-edges per call-site", ""); + OS << AlignS("Med num call-edges per call-site", ""); + OS << AlignS("90% num call-edges per call-site", ""); + } else { + OS << Align("Avg num call-edges per call-site", + double(NumCallEdges) / double(NumVtxCS)); + OS << Align("Med num call-edges per call-site", + NumCallEdgesPerCS[NumCallEdgesPerCS.size() / 2]); + OS << Align( + "90% num call-edges per call-site", + NumCallEdgesPerCS[size_t(double(NumCallEdgesPerCS.size()) * 0.9)]); + } + OS << '\n'; + OS << Align("Num indirect call-sites", NumIndCalls); + OS << Align("Num indirect call-edges", NumIndCallEdges); + + if (NumCallEdgesPerIndCS.empty()) { + OS << AlignS("Avg num call-edges per indirect call-site", ""); + OS << AlignS("Med num call-edges per indirect call-site", ""); + OS << AlignS("90% num call-edges per indirect call-site", ""); + } else { + OS << Align("Avg num call-edges per indirect call-site", + double(NumIndCallEdges) / double(NumIndCalls)); + OS << Align("Med num call-edges per indirect call-site", + NumCallEdgesPerIndCS[NumCallEdgesPerIndCS.size() / 2]); + OS << Align("90% num call-edges per indirect call-site", + NumCallEdgesPerIndCS[size_t( + double(NumCallEdgesPerIndCS.size()) * 0.9)]); + } + OS << Align("Largest fanout (max num callees per call-site)", LargestFanOut); + + OS << '\n'; + OS << Align("Num indirect calls with 0 resolved callees", + NumIndCSWith0Callees); + OS << Align("Num indirect calls with 1 resolved callee", + NumIndCSWith1Callees); + OS << Align("Num indirect calls with 2 resolved callees", + NumIndCSWith2Callees); + OS << Align("Num indirect calls with > 2 resolved callees", + NumIndCSWithGreater2Callees); + OS << Align("Num indirect calls with > 5 resolved callees", + NumIndCSWithGreater5Callees); + OS << Align("Num indirect calls with > 10 resolved callees", + NumIndCSWithGreater10Callees); + OS << Align("Num indirect calls with > 20 resolved callees", + NumIndCSWithGreater20Callees); + OS << Align("Num indirect calls with > 50 resolved callees", + NumIndCSWithGreater50Callees); + OS << Align("Num indirect calls with >100 resolved callees", + NumIndCSWithGreater100Callees); +} diff --git a/tools/example-tool/CMakeLists.txt b/tools/example-tool/CMakeLists.txt deleted file mode 100644 index 2a2d547661..0000000000 --- a/tools/example-tool/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Build a stand-alone executable -if(PHASAR_IN_TREE) - # Build a small test tool to show how phasar may be used - add_phasar_executable(myphasartool - myphasartool.cpp - ) -else() - # Build a small test tool to show how phasar may be used - add_executable(myphasartool - myphasartool.cpp - ) -endif() - -target_link_libraries(myphasartool - PRIVATE - phasar - ${PHASAR_STD_FILESYSTEM} -) diff --git a/tools/hello-modules-tool/CMakeLists.txt b/tools/hello-modules-tool/CMakeLists.txt deleted file mode 100644 index f38f50030e..0000000000 --- a/tools/hello-modules-tool/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Build a stand-alone executable -if(PHASAR_IN_TREE) - # Build a small test tool to show how phasar may be used - add_phasar_executable(hello-modules - hello_modules.cpp - ) -else() - # Build a small test tool to show how phasar may be used - add_executable(hello-modules - hello_modules.cpp - ) -endif() - -target_link_libraries(hello-modules - PRIVATE - phasar - ${PHASAR_STD_FILESYSTEM} -) diff --git a/tools/phasar-cli/CMakeLists.txt b/tools/phasar-cli/CMakeLists.txt deleted file mode 100644 index f25ce28f7c..0000000000 --- a/tools/phasar-cli/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Analysis - BitWriter - Core - Demangle - IRReader - Linker - Passes - Support -) - -# Build a stand-alone executable -if(PHASAR_IN_TREE) - add_phasar_executable(phasar-cli - phasar-cli.cpp - ) -else() - add_executable(phasar-cli - phasar-cli.cpp - ) -endif() - -add_subdirectory(Controller) - -target_link_libraries(phasar-cli - PRIVATE - phasar - ${PHASAR_STD_FILESYSTEM} -) - -if (NOT PHASAR_IN_TREE) - install(TARGETS phasar-cli) -endif() diff --git a/tools/phasar-cli/Controller/CMakeLists.txt b/tools/phasar-cli/Controller/CMakeLists.txt deleted file mode 100644 index 5977a5aefc..0000000000 --- a/tools/phasar-cli/Controller/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -file(GLOB_RECURSE CONTROLLER_SRC *.h *.cpp) - -target_sources(phasar-cli PRIVATE ${CONTROLLER_SRC}) diff --git a/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt b/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt index 95d3b00e67..680b7aa647 100644 --- a/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt +++ b/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt @@ -1,19 +1,20 @@ set(ControlFlowSources - LLVMBasedCFGTest.cpp - LLVMBasedICFGTest.cpp - LLVMBasedICFG_CHATest.cpp - LLVMBasedICFG_OTFTest.cpp - LLVMBasedICFG_RTATest.cpp - LLVMBasedICFG_RTA_MultipleInheritanceTest.cpp - LLVMBasedBackwardCFGTest.cpp - LLVMBasedBackwardICFGTest.cpp - LLVMBasedICFGExportTest.cpp - LLVMBasedICFGGlobCtorDtorTest.cpp - LLVMBasedICFGSerializationTest.cpp - LLVMVFTableProviderTest.cpp + LLVMBasedCFGTest.cpp + LLVMBasedICFGTest.cpp + LLVMBasedICFG_CHATest.cpp + LLVMBasedICFG_OTFTest.cpp + LLVMBasedICFG_RTATest.cpp + LLVMBasedICFG_RTA_MultipleInheritanceTest.cpp + LLVMBasedBackwardCFGTest.cpp + LLVMBasedBackwardICFGTest.cpp + LLVMBasedICFGExportTest.cpp + LLVMBasedICFGGlobCtorDtorTest.cpp + LLVMBasedICFGSerializationTest.cpp + LLVMVFTableProviderTest.cpp + VTACallGraphTest.cpp ) set(LLVM_LINK_COMPONENTS Linker) # The CtorDtorTest needs the linker foreach(TEST_SRC ${ControlFlowSources}) - add_phasar_unittest(${TEST_SRC}) + add_phasar_unittest(${TEST_SRC}) endforeach(TEST_SRC) diff --git a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp new file mode 100644 index 0000000000..977cc89f24 --- /dev/null +++ b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp @@ -0,0 +1,263 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/raw_ostream.h" + +#include "SrcCodeLocationEntry.h" +#include "TestConfig.h" +#include "gtest/gtest.h" + +#include + +namespace { +[[nodiscard]] std::string printStringSet(const std::set &Set) { + std::string Ret; + llvm::raw_string_ostream OS(Ret); + llvm::interleaveComma(Set, OS << "{ "); + OS << " }"; + return Ret; +} + +std::vector getEntryPoints(const psr::LLVMProjectIRDB &IRDB) { + std::vector EntryPoints; + + if (IRDB.getFunctionDefinition("main")) { + EntryPoints.emplace_back("main"); + } else { + for (const auto *F : IRDB.getAllFunctions()) { + if (!F->isDeclaration() && F->hasExternalLinkage()) { + EntryPoints.emplace_back(F->getName()); + } + } + } + return EntryPoints; +} + +psr::LLVMBasedCallGraph createBaseCG(psr::LLVMProjectIRDB &IRDB, + const psr::LLVMVFTableProvider &VTP, + const psr::DIBasedTypeHierarchy &TH, + psr::LLVMAliasInfoRef /*PT*/) { + psr::RTAResolver Res(&IRDB, &VTP, &TH); + return psr::buildLLVMBasedCallGraph(IRDB, Res, getEntryPoints(IRDB), + psr::Soundness::Soundy); +} + +psr::LLVMBasedCallGraph computeVTACallGraph( + psr::LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, + psr::LLVMAliasInfoRef AS, const psr::LLVMBasedCallGraph &BaseCG) { + psr::VTAResolver Res(&IRDB, &VTP, AS, &BaseCG); + return psr::buildLLVMBasedCallGraph(IRDB, Res, getEntryPoints(IRDB)); +} + +using psr::unittest::LineColFunOp; +using psr::unittest::TestingSrcLocation; + +class VTACallGraphTest : public ::testing::Test { +protected: + static constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER(""); + + struct GroundTruthEntry { + TestingSrcLocation CSId; + std::set Callees; + }; + + void doAnalysisAndCompareResults(const llvm::Twine &IRFile, + llvm::ArrayRef GT) { + ASSERT_FALSE(GT.empty()) << "No Ground-Truth provided!"; + + auto IRDB = psr::LLVMProjectIRDB(PathToLLFiles + IRFile); + ASSERT_TRUE(IRDB.isValid()); + + psr::LLVMVFTableProvider VTP(IRDB); + psr::DIBasedTypeHierarchy TH(IRDB); + psr::LLVMAliasSet AS(&IRDB); + // implement function locally + auto BaseCG = createBaseCG(IRDB, VTP, TH, &AS); + + auto CG = computeVTACallGraph(IRDB, VTP, &AS, BaseCG); + + for (const auto &Entry : GT) { + const auto *CS = llvm::cast( + psr::unittest::testingLocInIR(Entry.CSId, IRDB)); + ASSERT_NE(nullptr, CS); + ASSERT_TRUE(llvm::isa(CS)) + << "CS " << psr::llvmIRToString(CS) << " is no call-site!"; + auto &&Callees = CG.getCalleesOfCallAt(CS); + + EXPECT_EQ(Entry.Callees.size(), Callees.size()); + + auto GTCallees = Entry.Callees; + for (const auto *Callee : Callees) { + auto CalleeName = Callee->getName(); + EXPECT_TRUE(Entry.Callees.count(CalleeName)) + << "Did not expect function '" << CalleeName.str() + << "' being called at " << psr::llvmIRToString(CS); + GTCallees.erase(CalleeName); + } + + EXPECT_TRUE(GTCallees.empty()) + << "Expected callees not found at " << psr::llvmIRToString(CS) << ": " + << printStringSet(GTCallees); + } + } +}; + +TEST_F(VTACallGraphTest, VirtualCallSite_InterProcCallSite) { + doAnalysisAndCompareResults( + "virtual_callsites/interproc_callsite_cpp_dbg.ll", + { + {LineColFunOp{11, 40, "_Z12callFunctionR4Base", + llvm::Instruction::Call}, + {"_ZN7Derived3barEv"}}, + }); +} + +TEST_F(VTACallGraphTest, UninitializedVariables_VirtualCall) { + doAnalysisAndCompareResults( + "uninitialized_variables/virtual_call_cpp_dbg.ll", + { + {LineColFunOp{16, 11, "main", llvm::Instruction::Call}, + {"_Z3barRi", "_Z3fooRi"}}, + }); +} + +TEST_F(VTACallGraphTest, PathTracing_Inter12) { + // Note: The VTA analysis is not flow-sensitive + doAnalysisAndCompareResults( + "path_tracing/inter_12_cpp_dbg.ll", + { + {LineColFunOp{16, 3, "main", llvm::Instruction::Call}, + {"_ZN3TwoD0Ev", "_ZN5ThreeD0Ev"}}, + {LineColFunOp{19, 13, "main", llvm::Instruction::Call}, + {"_ZN5Three11assignValueEi", "_ZN3Two11assignValueEi"}}, + }); +} + +TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer1) { + doAnalysisAndCompareResults( + "call_graphs/function_pointer_1_c_dbg.ll", + { + {LineColFunOp{9, 27, "main", llvm::Instruction::Call}, {"bar"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer2) { + doAnalysisAndCompareResults( + "call_graphs/function_pointer_2_cpp_dbg.ll", + { + {LineColFunOp{8, 16, "main", llvm::Instruction::Call}, {"_Z3barv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer3) { + // Note: Although bar is assigned (and part of the TAG), is does not qualify + // as psr::isConsistentCall() + doAnalysisAndCompareResults( + "call_graphs/function_pointer_3_cpp_dbg.ll", + { + {LineColFunOp{10, 16, "main", llvm::Instruction::Call}, + {/*"_Z3bari",*/ "_Z3foov"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall2) { + doAnalysisAndCompareResults( + "call_graphs/virtual_call_2_cpp_dbg.ll", + { + {LineColFunOp{15, 8, "main", llvm::Instruction::Invoke}, + {"_ZN1B3fooEv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall3) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + doAnalysisAndCompareResults( + "call_graphs/virtual_call_3_cpp_dbg.ll", + { + {LineColFunOp{14, 0, "main", llvm::Instruction::Call}, + {"_ZN5AImpl3fooEv"}}, + {LineColFunOp{15, 3, "main", llvm::Instruction::Call}, + {"_ZN5AImplD0Ev"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall4) { + doAnalysisAndCompareResults( + "call_graphs/virtual_call_4_cpp_dbg.ll", + { + {LineColFunOp{15, 0, "main", llvm::Instruction::Invoke}, + {"_ZN1B3fooEv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall5) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + doAnalysisAndCompareResults( + "call_graphs/virtual_call_5_cpp_dbg.ll", + { + {LineColFunOp{20, 6, "main", llvm::Instruction::Call}, + {"_ZN1B5VfuncEv"}}, + {LineColFunOp{22, 3, "main", llvm::Instruction::Call}, {"_ZN1BD0Ev"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall7) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + doAnalysisAndCompareResults( + "call_graphs/virtual_call_7_cpp_dbg.ll", + { + {LineColFunOp{19, 6, "main", llvm::Instruction::Call}, + {"_ZN1A5VfuncEv"}}, + {LineColFunOp{20, 6, "main", llvm::Instruction::Call}, + {"_ZN1B5VfuncEv"}}, + {LineColFunOp{22, 3, "main", llvm::Instruction::Call}, {"_ZN1AD0Ev"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall8) { + + // Use the dbg version, because VTA relies on !heapallocsite metadata + // Note: The VTA analysis is neither flow-, nor context-sensitive + doAnalysisAndCompareResults( + "call_graphs/virtual_call_8_cpp_dbg.ll", + { + {LineColFunOp{32, 6, "main", llvm::Instruction::Call}, + {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + {LineColFunOp{33, 6, "main", llvm::Instruction::Call}, + {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall9) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + // Note: The VTA analysis is neither flow-, nor context-sensitive + doAnalysisAndCompareResults( + "call_graphs/virtual_call_9_cpp_dbg.ll", + { + {LineColFunOp{57, 6, "main", llvm::Instruction::Call}, + {"_ZN1B3fooEv", "_ZN1C3fooEv", "_ZN1D3fooEv"}}, + {LineColFunOp{58, 3, "main", llvm::Instruction::Call}, + {"_ZN1BD0Ev", "_ZN1CD0Ev", "_ZN1DD0Ev"}}, + }); +} +// TODO: More tests! + +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} diff --git a/unittests/Utils/CMakeLists.txt b/unittests/Utils/CMakeLists.txt index 73c042570a..d84306e783 100644 --- a/unittests/Utils/CMakeLists.txt +++ b/unittests/Utils/CMakeLists.txt @@ -10,6 +10,7 @@ set(UtilsSources AnalysisPrinterTest.cpp OnTheFlyAnalysisPrinterTest.cpp SourceMgrPrinterTest.cpp + SCCGenericTest.cpp ) if(PHASAR_ENABLE_DYNAMIC_LOG) diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp new file mode 100644 index 0000000000..3e3f2943c5 --- /dev/null +++ b/unittests/Utils/SCCGenericTest.cpp @@ -0,0 +1,650 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "phasar/Utils/SCCGeneric.h" + +#include "phasar/Utils/AdjacencyList.h" +#include "phasar/Utils/EmptyBaseOptimizationUtils.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/TypedVector.h" + +#include "llvm/ADT/ArrayRef.h" + +#include "gtest/gtest.h" + +#include + +namespace { +using namespace psr; + +enum class NodeId : uint32_t {}; + +using ExampleGraph = AdjacencyList; + +static SCCHolder makeGTSCCs(llvm::ArrayRef> SCCs) { + SCCHolder Ret; + + uint32_t Ctr = 0; + for (const auto &SCC : SCCs) { + auto CurrSCC = SCCId(Ctr++); + auto &NodesInSCC = Ret.NodesInSCC.emplace_back(); + for (auto Nod : SCC) { + NodesInSCC.push_back(NodeId(Nod)); + + if (Ret.SCCOfNode.size() <= size_t(Nod)) { + Ret.SCCOfNode.resize(Nod + 1); + } + + Ret.SCCOfNode[NodeId(Nod)] = CurrSCC; + } + } + + return Ret; +}; + +static void compareSCCs(const SCCHolder &ComputedSCCs, + const SCCHolder &ExpectedSCCs, + std::string_view ComputedName) { + ASSERT_EQ(ComputedSCCs.size(), ExpectedSCCs.size()) + << "Unequal number of SCC components\n"; + ASSERT_EQ(ComputedSCCs.SCCOfNode.size(), ExpectedSCCs.SCCOfNode.size()) + << "Unequal number of Graph Nodes\n"; + + const auto None = SCCId(UINT32_MAX); + TypedVector, SCCId> Isomorphism(ComputedSCCs.size(), + None); + + for (auto Vtx : iota(ComputedSCCs.SCCOfNode.size())) { + auto ExpectedSCC = ExpectedSCCs.SCCOfNode[Vtx]; + auto ComputedSCC = ComputedSCCs.SCCOfNode[Vtx]; + + if (Isomorphism[ExpectedSCC] == None) { + Isomorphism[ExpectedSCC] = ComputedSCC; + } else { + EXPECT_EQ(Isomorphism[ExpectedSCC], ComputedSCC) + << "SCCs differ for node: " << uint32_t(Vtx) << " in " + << ComputedName; + } + } +} + +static void validateTopologicalOrder(const ExampleGraph &Graph, + const SCCHolder &ComputedSCCs, + std::string_view ComputedName) { + // Note: Pearce's algorithm produces SCCs in reverse-topological order + for (auto [Vtx, SCC] : ComputedSCCs.SCCOfNode.enumerate()) { + for (auto Succ : Graph.Adj[Vtx]) { + auto SuccSCC = ComputedSCCs.SCCOfNode[Succ]; + EXPECT_LE(+SuccSCC, +SCC) + << "Invalid topological order in " << ComputedName << ": SCC #" + << +SCC << " must come before #" << +SuccSCC; + } + } +} + +static void computeSCCsAndCompare(ExampleGraph &Graph, + llvm::ArrayRef> ExpectedSCCs) { + + auto ComputedSCCsIt = computeSCCs(Graph); + auto ComputedSCCsRec = computeSCCs(Graph, std::false_type{}); + ASSERT_EQ(ComputedSCCsIt.SCCOfNode.size(), Graph.Adj.size()) + << "Iterative Pearce's Approach did not reach all nodes\n"; + ASSERT_EQ(ComputedSCCsIt.SCCOfNode.size(), Graph.Adj.size()) + << "Recursive Pearce's Approach did not reach all nodes\n"; + +#if __cplusplus >= 202002L + [[maybe_unused]] auto SCCDeps = computeSCCDependencies(Graph, ComputedSCCsIt); + static_assert(is_const_graph); +#endif + + auto GroundTruth = makeGTSCCs(ExpectedSCCs); + compareSCCs(ComputedSCCsIt, GroundTruth, "Pearce Iterative"); + validateTopologicalOrder(Graph, ComputedSCCsIt, "Pearce Iterative"); + if (::testing::Test::HasFailure()) { + ComputedSCCsIt.print(Graph, llvm::outs(), "ExampleGraph"); + return; + } + + compareSCCs(ComputedSCCsRec, GroundTruth, "Pearce Recursive"); + validateTopologicalOrder(Graph, ComputedSCCsRec, "Pearce Recursive"); + if (::testing::Test::HasFailure()) { + ComputedSCCsRec.print(Graph, llvm::outs(), "ExampleGraph"); + } +} + +TEST(SCCGenericTest, SCCTest01) { + ExampleGraph Graph{{{NodeId(2)}, + {NodeId(0)}, + {NodeId(1)}, + {NodeId(1), NodeId(2)}, + {NodeId(1)}, + {NodeId(4), NodeId(6)}, + {NodeId(4), NodeId(7)}, + {NodeId(5)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3}, {4}, {5, 6, 7}}); +} + +TEST(SCCGenericTest, SCCTest02) { + ExampleGraph Graph{{{}, {}, {}, {}, {}, {}, {}, {}, {}, {}}}; + computeSCCsAndCompare(Graph, + {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}}); +} + +TEST(SCCGenericTest, SCCTest03) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(0)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4, 5, 6}}); +} + +TEST(SCCGenericTest, SCCTest04) { + ExampleGraph Graph{{{NodeId(1), NodeId(2), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(2), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(2), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(2), NodeId(3)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}}); +} + +TEST(SCCGenericTest, SCCTest05) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3), NodeId(4)}, + {NodeId(5)}, + {NodeId(5)}, + {NodeId(2), NodeId(6)}, + {NodeId(7)}, + {NodeId(1), NodeId(8)}, + {}}}; + computeSCCsAndCompare(Graph, {{0}, {1, 2, 3, 4, 5, 6, 7}, {8}}); +} + +TEST(SCCGenericTest, SCCTest06) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(7)}, + {NodeId(0)}, + {NodeId(9)}, + {NodeId(10)}, + {NodeId(11)}, + {NodeId(12)}, + {NodeId(13), NodeId(4)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(3)}, + {NodeId(5)}}}; + computeSCCsAndCompare( + Graph, + {{0, 1, 2, 3, 4, 5, 6, 7}, {8, 9, 10, 11, 12, 13}, {14}, {15}, {16}}); +} + +// Note: Following tests generated by ChatGPT + +// SCC test: two disjoint cycles +TEST(SCCGenericTest, SCCTest07) { + ExampleGraph Graph{{{NodeId(1)}, {NodeId(0)}, {NodeId(3)}, {NodeId(2)}}}; + computeSCCsAndCompare(Graph, {{0, 1}, {2, 3}}); +} + +// SCC test: diamond shape, no cycles +TEST(SCCGenericTest, SCCTest08) { + ExampleGraph Graph{{{NodeId(1), NodeId(2)}, {NodeId(3)}, {NodeId(3)}, {}}}; + computeSCCsAndCompare(Graph, {{0}, {1}, {2}, {3}}); +} + +// SCC test: diamond with back edge creating cycle +TEST(SCCGenericTest, SCCTest09) { + ExampleGraph Graph{ + {{NodeId(1), NodeId(2)}, {NodeId(3)}, {NodeId(3)}, {NodeId(0)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2, 3}}); +} + +// SCC test: one self-loop, others acyclic +TEST(SCCGenericTest, SCCTest10) { + ExampleGraph Graph{{{NodeId(0)}, {NodeId(2)}, {}}}; + computeSCCsAndCompare(Graph, {{0}, {1}, {2}}); +} + +// SCC test: disconnected nodes +TEST(SCCGenericTest, SCCTest11) { + ExampleGraph Graph{{{}, {}, {}}}; + computeSCCsAndCompare(Graph, {{0}, {1}, {2}}); +} + +// SCC test: complex graph with two larger SCCs and one singleton +TEST(SCCGenericTest, SCCTest12) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle 0-1-2 + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // cycle 3-4-5 + {}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6}}); +} + +// SCC test: nested cycles sharing a node +TEST(SCCGenericTest, SCCTest13) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0), NodeId(3)}, + {NodeId(4)}, + {NodeId(2)}}}; + // 0-1-2 form a cycle, and 2-3-4 also cycle back to 2 => all {0,1,2,3,4} + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}}); +} + +// SCC test: long linear chain ending in a self-loop +TEST(SCCGenericTest, SCCTest14) { + ExampleGraph Graph{{{NodeId(1)}, {NodeId(2)}, {NodeId(3)}, {NodeId(3)}}}; + // nodes 0,1,2 feed into 3; node 3 has self-loop + computeSCCsAndCompare(Graph, {{0}, {1}, {2}, {3}}); +} + +// SCC test: three SCCs connected in DAG shape +TEST(SCCGenericTest, SCCTest15) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(0)}, // SCC {0,1} + {NodeId(3)}, + {NodeId(2)}, // SCC {2,3} + {NodeId(5)}, + {NodeId(4)}}}; // SCC {4,5} + computeSCCsAndCompare(Graph, {{0, 1}, {2, 3}, {4, 5}}); +} + +// SCC test: two big SCCs connected by single edge +TEST(SCCGenericTest, SCCTest16) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle 0-1-2 + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3), NodeId(0)}}}; // cycle 3-4-5, with edge 5->0 + // Two SCCs {0,1,2} and {3,4,5}; edge {3,4,5} -> {0,1,2} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}}); +} + +// SCC test: large cycle with attached tail +TEST(SCCGenericTest, SCCTest17) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(0)}, // cycle 0-1-2-3-4-0 + {NodeId(0)}}}; // tail node 5 -> 0 + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}, {5}}); +} + +// SCC test: two SCCs joined by a “bow-tie” structure +TEST(SCCGenericTest, SCCTest18) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // left cycle {0,1,2} + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // right cycle {3,4,5} + {NodeId(0), NodeId(3)}}}; // node 6 links both + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6}}); +} + +// SCC test: complete bipartite between {0,1} and {2,3} +TEST(SCCGenericTest, SCCTest19) { + ExampleGraph Graph{{{NodeId(2), NodeId(3)}, + {NodeId(2), NodeId(3)}, + {NodeId(0), NodeId(1)}, + {NodeId(0), NodeId(1)}}}; + // All nodes strongly connected + computeSCCsAndCompare(Graph, {{0, 1, 2, 3}}); +} + +// SCC test: three SCCs connected linearly +TEST(SCCGenericTest, SCCTest20) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle {0,1,2} + {NodeId(4)}, + {NodeId(3)}, // cycle {3,4} + {NodeId(6)}, + {NodeId(5)}}}; // cycle {5,6} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4}, {5, 6}}); +} + +// SCC test: complex graph with interleaved cycles +TEST(SCCGenericTest, SCCTest21) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle {0,1,2} + {NodeId(1), NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // cycle {3,4,5} + {NodeId(7)}, + {NodeId(6)}}}; // cycle {6,7} + // SCCs: {0,1,2}, {3,4,5}, {6,7} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6, 7}}); +} + +// SCC test: “ladder” structure with rungs forming cycles +TEST(SCCGenericTest, SCCTest22) { + ExampleGraph Graph{{{NodeId(1), NodeId(2)}, + {NodeId(0), NodeId(3)}, + {NodeId(0), NodeId(3)}, + {NodeId(1), NodeId(2)}}}; + // Essentially two squares connected; all nodes mutually reachable + computeSCCsAndCompare(Graph, {{0, 1, 2, 3}}); +} + +// SCC test: disconnected large SCCs plus singletons +TEST(SCCGenericTest, SCCTest23) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // SCC {0,1,2} + {NodeId(4)}, + {NodeId(3)}, // SCC {3,4} + {}, + {}, // nodes 5,6 isolated + {NodeId(9)}, + {NodeId(9)}, + {NodeId(8)}}}; // SCC {8,9} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4}, {5}, {6}, {7}, {8, 9}}); +} + +// SCC test: 12-node graph with 4 SCCs, each of size 3 +TEST(SCCGenericTest, SCCTest24) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // {0,1,2} + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // {3,4,5} + {NodeId(7)}, + {NodeId(8)}, + {NodeId(6)}, // {6,7,8} + {NodeId(10)}, + {NodeId(11)}, + {NodeId(9)}}}; // {9,10,11} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}); +} + +// SCC test: 15-node graph with one large SCC and dangling tails +TEST(SCCGenericTest, SCCTest25) { + ExampleGraph Graph{{ + {NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(0)}, // {0,1,2,3} + {NodeId(5)}, + {NodeId(4)}, // {4,5} + {NodeId(7)}, + {NodeId(8)}, + {NodeId(6)}, // {6,7,8} + {NodeId(0)}, + {NodeId(4)}, + {NodeId(6)}, // tails into SCCs + {}, + {}, + {} // 3 isolated + }}; + computeSCCsAndCompare( + Graph, + {{0, 1, 2, 3}, {4, 5}, {6, 7, 8}, {9}, {10}, {11}, {12}, {13}, {14}}); +} + +// SCC test: 16-node graph with interlinked clusters +TEST(SCCGenericTest, SCCTest26) { + ExampleGraph Graph{ + {{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // {0,1,2} + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // {3,4,5} + {NodeId(7)}, + {NodeId(6)}, // {6,7} + {NodeId(9)}, + {NodeId(10)}, + {NodeId(8)}, // {8,9,10} + {NodeId(12)}, + {NodeId(11)}, // {11,12} + {NodeId(0), NodeId(3), NodeId(6), NodeId(8)}, // 13 links clusters + {NodeId(13)}, // 14 -> 13 + {NodeId(14)}}}; // 15 -> 14 -> 13 + computeSCCsAndCompare( + Graph, + {{0, 1, 2}, {3, 4, 5}, {6, 7}, {8, 9, 10}, {11, 12}, {13}, {14}, {15}}); +} + +// SCC test: 18-node graph forming a big cycle plus smaller SCCs +TEST(SCCGenericTest, SCCTest27) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(7)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(10)}, + {NodeId(11)}, + {NodeId(0)}, // 0-11 cycle + {NodeId(13)}, + {NodeId(12)}, // {12,13} + {NodeId(15)}, + {NodeId(14)}, // {14,15} + {NodeId(17)}, + {NodeId(16)}}}; // {16,17} + computeSCCsAndCompare( + Graph, + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, {12, 13}, {14, 15}, {16, 17}}); +} + +// SCC test: 20-node graph with mixed SCC sizes +TEST(SCCGenericTest, SCCTest28) { + ExampleGraph Graph{{{NodeId(1)}, {NodeId(2)}, + {NodeId(0)}, // {0,1,2} + {NodeId(4)}, {NodeId(5)}, + {NodeId(3)}, // {3,4,5} + {NodeId(7)}, {NodeId(6)}, // {6,7} + {NodeId(9)}, {NodeId(8)}, // {8,9} + {NodeId(10)}, {NodeId(11)}, + {NodeId(10)}, {NodeId(12)}, + {NodeId(15)}, {NodeId(14)}, // {14,15} + {NodeId(17)}, {NodeId(18)}, + {NodeId(19)}, {}}}; // chain 16->17->18->19->isolated + computeSCCsAndCompare(Graph, {{0, 1, 2}, + {3, 4, 5}, + {6, 7}, + {8, 9}, + {10}, + {11}, + {12}, + {13}, + {14, 15}, + {16}, + {17}, + {18}, + {19}}); +} + +// SCC test: 25-node graph, 5 clusters of 5 nodes each forming cycles +TEST(SCCGenericTest, SCCTest29) { + ExampleGraph Graph{// Cluster 0: nodes 0-4 cycle + {{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(0)}, + // Cluster 1: nodes 5-9 cycle + {NodeId(6)}, + {NodeId(7)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(5)}, + // Cluster 2: nodes 10-14 cycle + {NodeId(11)}, + {NodeId(12)}, + {NodeId(13)}, + {NodeId(14)}, + {NodeId(10)}, + // Cluster 3: nodes 15-19 cycle + {NodeId(16)}, + {NodeId(17)}, + {NodeId(18)}, + {NodeId(19)}, + {NodeId(15)}, + // Cluster 4: nodes 20-24 cycle + {NodeId(21)}, + {NodeId(22)}, + {NodeId(23)}, + {NodeId(24)}, + {NodeId(20)}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}); +} + +// SCC test: 25-node graph, one giant SCC (0-19 cycle) plus 5 isolated nodes +TEST(SCCGenericTest, SCCTest30) { + ExampleGraph Graph{// Giant cycle through 0..19 + {{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(7)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(10)}, + {NodeId(11)}, + {NodeId(12)}, + {NodeId(13)}, + {NodeId(14)}, + {NodeId(15)}, + {NodeId(16)}, + {NodeId(17)}, + {NodeId(18)}, + {NodeId(19)}, + {NodeId(0)}, + // Isolated nodes 20-24 + {}, + {}, + {}, + {}, + {}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + {20}, + {21}, + {22}, + {23}, + {24}}); +} + +// SCC test: 25-node graph, 5 SCC clusters of size 5, linked in DAG +TEST(SCCGenericTest, SCCTest31) { + ExampleGraph Graph{ + {// 0..24 + /* 0 */ { + NodeId(1), + NodeId(5)}, // cycle 0->1->2->3->4->0 and cross 0->5 (to cluster1) + /* 1 */ {NodeId(2)}, + /* 2 */ {NodeId(3)}, + /* 3 */ {NodeId(4)}, + /* 4 */ {NodeId(0)}, + /* 5 */ {NodeId(6)}, // cluster1 + /* 6 */ {NodeId(7), NodeId(10)}, // 6->7 and cross 6->10 (to cluster2) + /* 7 */ {NodeId(8)}, + /* 8 */ {NodeId(9)}, + /* 9 */ {NodeId(5)}, + /*10 */ {NodeId(11)}, // cluster2 + /*11 */ {NodeId(12)}, + /*12 */ {NodeId(13), NodeId(15)}, // 12->13 and cross 12->15 (to + // cluster3) + /*13 */ {NodeId(14)}, + /*14 */ {NodeId(10)}, + /*15 */ {NodeId(16)}, // cluster3 + /*16 */ {NodeId(17)}, + /*17 */ {NodeId(18), NodeId(20)}, // 17->18 and cross 17->20 (to + // cluster4) + /*18 */ {NodeId(19)}, + /*19 */ {NodeId(15)}, + /*20 */ {NodeId(21)}, // cluster4 + /*21 */ {NodeId(22)}, + /*22 */ {NodeId(23)}, + /*23 */ {NodeId(24)}, + /*24 */ {NodeId(20)}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}); +} + +// SCC test: 25-node graph, one giant SCC (0-19 cycle) plus 5 isolated nodes, +// with edges from the big SCC to the isolated nodes +TEST(SCCGenericTest, SCCTest32) { + ExampleGraph Graph{ + {// 0..24 + /* 0 */ {NodeId(1), + NodeId(20)}, // cycle 0->1->...->19->0 and extra 0->20 + /* 1 */ {NodeId(2)}, + /* 2 */ {NodeId(3)}, + /* 3 */ {NodeId(4)}, + /* 4 */ {NodeId(5)}, + /* 5 */ {NodeId(6), NodeId(21)}, // 5->6 and extra 5->21 + /* 6 */ {NodeId(7)}, + /* 7 */ {NodeId(8)}, + /* 8 */ {NodeId(9)}, + /* 9 */ {NodeId(10)}, + /*10 */ {NodeId(11), NodeId(22)}, // 10->11 and extra 10->22 + /*11 */ {NodeId(12)}, + /*12 */ {NodeId(13)}, + /*13 */ {NodeId(14)}, + /*14 */ {NodeId(15)}, + /*15 */ {NodeId(16), NodeId(23)}, // 15->16 and extra 15->23 + /*16 */ {NodeId(17)}, + /*17 */ {NodeId(18)}, + /*18 */ {NodeId(19)}, + /*19 */ {NodeId(0), NodeId(24)}, // 19->0 and extra 19->24 + /*20 */ {}, // isolated singletons + /*21 */ {}, + /*22 */ {}, + /*23 */ {}, + /*24 */ {}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + {20}, + {21}, + {22}, + {23}, + {24}}); +} + +} // namespace + +// main function for the test case +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +}