From f7b957b3d21fe41cdba38d4cf2c5051da298fe53 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 2 Aug 2022 08:56:29 +0200 Subject: [PATCH 01/18] Start adding CFGBase and new LLVMBasedCFG/ICFG --- include/phasar/PhasarLLVM/ControlFlow/CFG.h | 18 +- .../phasar/PhasarLLVM/ControlFlow/CFGBase.h | 124 +++++++++ .../ControlFlow/CallGraphAnalysisType.h | 36 +++ include/phasar/PhasarLLVM/ControlFlow/ICFG.h | 13 - .../phasar/PhasarLLVM/ControlFlow/ICFGBase.h | 75 ++++++ .../PhasarLLVM/ControlFlow/LLVMBasedCFG.h | 159 +++++------- .../ControlFlow/SpecialMemberFunctionType.h | 43 ++++ include/phasar/Utils/LLVMShorthands.h | 7 +- include/phasar/Utils/TypeTraits.h | 9 + .../{ICFG.cpp => CallGraphAnalysisType.cpp} | 34 +-- lib/PhasarLLVM/ControlFlow/LLVMBasedCFG.cpp | 236 +++++++----------- lib/Utils/LLVMShorthands.cpp | 9 +- 12 files changed, 455 insertions(+), 308 deletions(-) create mode 100644 include/phasar/PhasarLLVM/ControlFlow/CFGBase.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/SpecialMemberFunctionType.h rename lib/PhasarLLVM/ControlFlow/{ICFG.cpp => CallGraphAnalysisType.cpp} (58%) diff --git a/include/phasar/PhasarLLVM/ControlFlow/CFG.h b/include/phasar/PhasarLLVM/ControlFlow/CFG.h index dfe42fb2a4..a916df7331 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/CFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/CFG.h @@ -17,31 +17,21 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_CFG_H_ #define PHASAR_PHASARLLVM_CONTROLFLOW_CFG_H_ +#include "phasar/PhasarLLVM/ControlFlow/SpecialMemberFunctionType.h" + +#include "nlohmann/json.hpp" + #include #include #include #include -#include "nlohmann/json.hpp" - namespace llvm { class raw_ostream; } namespace psr { -enum class SpecialMemberFunctionType { -#define SPECIAL_MEMBER_FUNCTION_TYPES(NAME, TYPE) TYPE, -#include "phasar/PhasarLLVM/ControlFlow/SpecialMemberFunctionType.def" -}; - -std::string toString(const SpecialMemberFunctionType &SMFT); - -SpecialMemberFunctionType toSpecialMemberFunctionType(const std::string &SMFT); - -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const SpecialMemberFunctionType &SMFT); - template class CFG { public: virtual ~CFG() = default; diff --git a/include/phasar/PhasarLLVM/ControlFlow/CFGBase.h b/include/phasar/PhasarLLVM/ControlFlow/CFGBase.h new file mode 100644 index 0000000000..4cf7adcc1c --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/CFGBase.h @@ -0,0 +1,124 @@ +/****************************************************************************** + * Copyright (c) 2022 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_CFGBASE_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_CFGBASE_H + +#include "nlohmann/json.hpp" +#include "phasar/Utils/TypeTraits.h" + +namespace psr { + +enum class SpecialMemberFunctionType; + +template struct CFGTraits { + // using n_t + // using f_t +}; + +template class CFGBase { +public: + using n_t = typename CFGTraits::n_t; + using f_t = typename CFGTraits::f_t; + + [[nodiscard]] f_t getFunctionOf(n_t Inst) const noexcept { + return self().getFunctionOfImpl(Inst); + } + [[nodiscard]] decltype(auto) getPredsOf(n_t Inst) const { + static_assert( + is_iterable_over_v); + return self().getPredsOfImpl(Inst); + } + [[nodiscard]] decltype(auto) getSuccsOf(n_t Inst) const { + static_assert( + is_iterable_over_v); + return self().getSuccsOfImpl(Inst); + } + [[nodiscard]] decltype(auto) getAllControlFlowEdges(f_t Fun) const { + static_assert( + is_iterable_over_v>); + return self().getAllControlFlowEdgesImpl(Fun); + } + [[nodiscard]] decltype(auto) getAllInstructionsOf(f_t Fun) const { + static_assert( + is_iterable_over_v); + return self().getAllInstructionsOfImpl(Fun); + } + [[nodiscard]] decltype(auto) getStartPointsOf(f_t Fun) const { + static_assert( + is_iterable_over_v); + return self().getStartPointsOfImpl(Fun); + } + [[nodiscard]] decltype(auto) getExitPointsOf(f_t Fun) const { + static_assert( + is_iterable_over_v); + return self().getExitPointsOfImpl(Fun); + } + [[nodiscard]] bool isCallSite(n_t Inst) const noexcept { + return self().isCallSiteImpl(Inst); + } + [[nodiscard]] bool isExitInst(n_t Inst) const noexcept { + return self().isExitInstImpl(Inst); + } + [[nodiscard]] bool isStartPoint(n_t Inst) const noexcept { + return self().isStartPointImpl(Inst); + } + [[nodiscard]] bool isFieldLoad(n_t Inst) const noexcept { + return self().isFieldLoadImpl(Inst); + } + [[nodiscard]] bool isFieldStore(n_t Inst) const noexcept { + return self().isFieldStoreImpl(Inst); + } + [[nodiscard]] bool isFallThroughSuccessor(n_t Inst, n_t Succ) const noexcept { + return self().isFallThroughSuccessorImpl(Inst, Succ); + } + [[nodiscard]] bool isBranchTarget(n_t Inst, n_t Succ) const noexcept { + return self().isBranchTargetImpl(Inst, Succ); + } + [[nodiscard]] bool isHeapAllocatingFunction(f_t Fun) const { + return self().isHeapAllocatingFunctionImpl(Fun); + } + [[nodiscard]] bool isSpecialMemberFunction(f_t Fun) const { + return self().isSpecialMemberFunctionImpl(Fun); + } + [[nodiscard]] SpecialMemberFunctionType + getSpecialMemberFunctionType(f_t Fun) const { + return self().getSpecialMemberFunctionTypeImpl(Fun); + } + [[nodiscard]] decltype(auto) getStatementId(n_t Inst) const { + static_assert(is_string_like_v); + return self().getStatementIdImpl(Inst); + } + [[nodiscard]] decltype(auto) getFunctionName(f_t Fun) const { + static_assert(is_string_like_v); + return self().getFunctionNameImpl(Fun); + } + [[nodiscard]] decltype(auto) getDemangledFunctionName(f_t Fun) const { + static_assert( + is_string_like_v); + return self().getDemangledFunctionNameImpl(Fun); + } + void print(f_t Fun, llvm::raw_ostream &OS) const { + self().printImpl(Fun, OS); + } + [[nodiscard]] nlohmann::json getAsJson(f_t Fun) const { + return self().getAsJsonImpl(Fun); + } + +private: + Derived &self() noexcept { return static_cast(*this); } + const Derived &self() const noexcept { + return static_cast(*this); + } +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_CFGBASE_H \ No newline at end of file diff --git a/include/phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h b/include/phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h new file mode 100644 index 0000000000..0c73cc3a9d --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h @@ -0,0 +1,36 @@ +/****************************************************************************** + * Copyright (c) 2022 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Philipp Schubert, Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CALLGRAPHANALYSISTYPE_H +#define PHASAR_PHASARLLVM_CALLGRAPHANALYSISTYPE_H + +#include "llvm/ADT/StringRef.h" + +#include + +namespace llvm { +class raw_ostream; +} // namespace llvm + +namespace psr { +enum class CallGraphAnalysisType { +#define ANALYSIS_SETUP_CALLGRAPH_TYPE(NAME, CMDFLAG, TYPE) TYPE, +#include "phasar/PhasarLLVM/Utils/AnalysisSetups.def" + Invalid +}; + +std::string toString(CallGraphAnalysisType CGA); + +CallGraphAnalysisType toCallGraphAnalysisType(llvm::StringRef S); + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, CallGraphAnalysisType CGA); + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CALLGRAPHANALYSISTYPE_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/ICFG.h b/include/phasar/PhasarLLVM/ControlFlow/ICFG.h index c5dc3b01bd..34fd105e7d 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/ICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/ICFG.h @@ -31,19 +31,6 @@ namespace psr { -enum class CallGraphAnalysisType { -#define ANALYSIS_SETUP_CALLGRAPH_TYPE(NAME, CMDFLAG, TYPE) TYPE, -#include "phasar/PhasarLLVM/Utils/AnalysisSetups.def" - Invalid -}; - -std::string toString(const CallGraphAnalysisType &CGA); - -CallGraphAnalysisType toCallGraphAnalysisType(const std::string &S); - -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const CallGraphAnalysisType &CGA); - template class ICFG : public virtual CFG { protected: diff --git a/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h b/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h new file mode 100644 index 0000000000..24aa7899e6 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h @@ -0,0 +1,75 @@ +/****************************************************************************** + * Copyright (c) 2022 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +/* + * ICFG.h + * + * Created on: 17.08.2016 + * Author: pdschbrt + */ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_ICFGBASE_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_ICFGBASE_H + +#include "phasar/PhasarLLVM/ControlFlow/CFGBase.h" +#include "phasar/Utils/TypeTraits.h" + +#include "llvm/ADT/StringRef.h" + +#include + +namespace psr { +template class ICFGBase { +public: + using n_t = typename CFGTraits::n_t; + using f_t = typename CFGTraits::f_t; + + static_assert(std::is_base_of_v, Derived>, + "An ICFG must also be a CFG"); + + [[nodiscard]] decltype(auto) getAllFunctions() const { + return self().getAllFunctionsImpl(); + } + + [[nodiscard]] f_t getFunction(llvm::StringRef Fun) const { + return self().getFunctionImpl(Fun); + } + + [[nodiscard]] bool isIndirectFunctionCall(n_t Inst) const { + return self().isIndirectFunctionCallImpl(Inst); + } + [[nodiscard]] bool isVirtualFunctionCall(n_t Inst) const { + return self().isVirtualFunctionCallImpl(Inst); + } + [[nodiscard]] decltype(auto) allNonCallStartNodes() const { + static_assert( + is_iterable_over_v); + return self().allNonCallStartNodesImpl(); + } + [[nodiscard]] decltype(auto) getCalleesOfCallAt(n_t Inst) const { + static_assert( + is_iterable_over_v); + return self().getCalleesOfCallAtImpl(Inst); + } + [[nodiscard]] decltype(auto) getCallersOf(f_t Fun) const { + static_assert( + is_iterable_over_v); + return self().getCallersOfImpl(Fun); + } + /// TODO: More Member functions + +private: + Derived &self() noexcept { return static_cast(*this); } + const Derived &self() const noexcept { + return static_cast(*this); + } +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_ICFGBASE_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h index a6e56b4733..a800b61362 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h @@ -1,10 +1,10 @@ /****************************************************************************** - * Copyright (c) 2017 Philipp Schubert. + * Copyright (c) 2022 Philipp Schubert. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * * Contributors: - * Philipp Schubert and others + * Philipp Schubert, Fabian Schiebel and others *****************************************************************************/ /* @@ -17,115 +17,78 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCFG_H_ #define PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCFG_H_ -#include -#include -#include - -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Function.h" - #include "phasar/PhasarLLVM/ControlFlow/CFG.h" -#include "phasar/Utils/LLVMIRToSrc.h" +#include "phasar/PhasarLLVM/ControlFlow/CFGBase.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" -#include "nlohmann/json.hpp" +namespace llvm { +class Instruction; +class Function; +} // namespace llvm namespace psr { +class LLVMBasedCFG; + +template <> struct CFGTraits { + using n_t = const llvm::Instruction *; + using f_t = const llvm::Function *; +}; + +class LLVMBasedCFG : public CFGBase { + friend CFGBase; -class LLVMBasedCFG - : public virtual CFG { public: LLVMBasedCFG(bool IgnoreDbgInstructions = true) : IgnoreDbgInstructions(IgnoreDbgInstructions) {} - ~LLVMBasedCFG() override = default; - - [[nodiscard]] const llvm::Function * - getFunctionOf(const llvm::Instruction *Inst) const override; - - [[nodiscard]] std::vector - getPredsOf(const llvm::Instruction *Inst) const override; - - [[nodiscard]] std::vector - getSuccsOf(const llvm::Instruction *Inst) const override; - - [[nodiscard]] std::vector< - std::pair> - getAllControlFlowEdges(const llvm::Function *Fun) const override; - - [[nodiscard]] std::vector - getAllInstructionsOf(const llvm::Function *Fun) const override; - - [[nodiscard]] std::set - getStartPointsOf(const llvm::Function *Fun) const override; - - [[nodiscard]] std::set - getExitPointsOf(const llvm::Function *Fun) const override; - - [[nodiscard]] bool isCallSite(const llvm::Instruction *Inst) const override; - - [[nodiscard]] bool isExitInst(const llvm::Instruction *Inst) const override; - - [[nodiscard]] bool isStartPoint(const llvm::Instruction *Inst) const override; - - [[nodiscard]] bool isFieldLoad(const llvm::Instruction *Inst) const override; - - [[nodiscard]] bool isFieldStore(const llvm::Instruction *Inst) const override; - - [[nodiscard]] bool - isFallThroughSuccessor(const llvm::Instruction *Inst, - const llvm::Instruction *Succ) const override; - - [[nodiscard]] bool - isBranchTarget(const llvm::Instruction *Inst, - const llvm::Instruction *Succ) const override; - - [[nodiscard]] bool - isHeapAllocatingFunction(const llvm::Function *Fun) const override; - - [[nodiscard]] bool - isSpecialMemberFunction(const llvm::Function *Fun) const override; - - [[nodiscard]] SpecialMemberFunctionType - getSpecialMemberFunctionType(const llvm::Function *Fun) const override; - - [[nodiscard]] std::string - getStatementId(const llvm::Instruction *Inst) const override; - - [[nodiscard]] std::string - getFunctionName(const llvm::Function *Fun) const override; - - [[nodiscard]] std::string - getDemangledFunctionName(const llvm::Function *Fun) const override; - - void print(const llvm::Function *Fun, - llvm::raw_ostream &OS = llvm::outs()) const override; - - [[nodiscard]] nlohmann::json - getAsJson(const llvm::Function *Fun) const override; - [[nodiscard]] nlohmann::json exportCFGAsJson(const llvm::Function *F) const; - [[nodiscard]] nlohmann::json exportCFGAsSourceCodeJson(const llvm::Function *F) const; -protected: - // Ignores debug instructions in control flow if set to true. - const bool IgnoreDbgInstructions; - - struct SourceCodeInfoWithIR : public SourceCodeInfo { - std::string IR; - }; - - friend void from_json(const nlohmann::json &J, // NOLINT - SourceCodeInfoWithIR &Info); - friend void to_json(nlohmann::json &J, // NOLINT - const SourceCodeInfoWithIR &Info); - - /// Used by export(I)CFGAsJson - static SourceCodeInfoWithIR - getFirstNonEmpty(llvm::BasicBlock::const_iterator &It, - llvm::BasicBlock::const_iterator End); - static SourceCodeInfoWithIR getFirstNonEmpty(const llvm::BasicBlock *BB); +private: + [[nodiscard]] f_t getFunctionOfImpl(n_t Inst) const noexcept; + [[nodiscard]] llvm::SmallVector getPredsOfImpl(n_t Inst) const; + [[nodiscard]] llvm::SmallVector getSuccsOfImpl(n_t Inst) const; + [[nodiscard]] std::vector> + getAllControlFlowEdgesImpl(f_t Fun) const; + [[nodiscard]] auto getAllInstructionsOfImpl(f_t Fun) const { + return llvm::map_range(llvm::instructions(Fun), + [](const llvm::Instruction &Inst) { return &Inst; }); + } + [[nodiscard]] llvm::SmallVector getStartPointsOfImpl(f_t Fun) const; + [[nodiscard]] llvm::SmallVector getExitPointsOfImpl(f_t Fun) const; + [[nodiscard]] bool isCallSiteImpl(n_t Inst) const noexcept { + return llvm::isa(Inst); + } + [[nodiscard]] bool isExitInstImpl(n_t Inst) const noexcept { + return llvm::isa(Inst); + } + [[nodiscard]] bool isStartPointImpl(n_t Inst) const noexcept; + [[nodiscard]] bool isFieldLoadImpl(n_t Inst) const noexcept; + [[nodiscard]] bool isFieldStoreImpl(n_t Inst) const noexcept; + [[nodiscard]] bool isFallThroughSuccessorImpl(n_t Inst, + n_t Succ) const noexcept; + [[nodiscard]] bool isBranchTargetImpl(n_t Inst, n_t Succ) const noexcept; + [[nodiscard]] bool isHeapAllocatingFunctionImpl(f_t Fun) const; + [[nodiscard]] bool isSpecialMemberFunctionImpl(f_t Fun) const { + return getSpecialMemberFunctionType(Fun) != SpecialMemberFunctionType{}; + } + [[nodiscard]] SpecialMemberFunctionType + getSpecialMemberFunctionTypeImpl(f_t Fun) const; + [[nodiscard]] std::string getStatementIdImpl(n_t Inst) const; + [[nodiscard]] auto getFunctionNameImpl(f_t Fun) const { + return Fun->getName(); + } + [[nodiscard]] std::string getDemangledFunctionNameImpl(f_t Fun) const; + void printImpl(f_t Fun, llvm::raw_ostream &OS) const { OS << *Fun; } + [[nodiscard]] nlohmann::json getAsJsonImpl(f_t /*Fun*/) const { return ""; } + + bool IgnoreDbgInstructions = false; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/SpecialMemberFunctionType.h b/include/phasar/PhasarLLVM/ControlFlow/SpecialMemberFunctionType.h new file mode 100644 index 0000000000..9f6e488b01 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SpecialMemberFunctionType.h @@ -0,0 +1,43 @@ +/****************************************************************************** + * Copyright (c) 2017 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Philipp Schubert and others + *****************************************************************************/ + +/* + * CFG.h + * + * Created on: 07.06.2017 + * Author: philipp + */ + +#ifndef PHASAR_PHASARLLVM_SPECIALMEMBERFUNCTIONTYPE_H +#define PHASAR_PHASARLLVM_SPECIALMEMBERFUNCTIONTYPE_H + +#include "llvm/ADT/StringRef.h" + +#include + +namespace llvm { +class raw_ostream; +} // namespace llvm + +namespace psr { + +enum class SpecialMemberFunctionType { +#define SPECIAL_MEMBER_FUNCTION_TYPES(NAME, TYPE) TYPE, +#include "phasar/PhasarLLVM/ControlFlow/SpecialMemberFunctionType.def" +}; + +std::string toString(SpecialMemberFunctionType SMFT); + +SpecialMemberFunctionType toSpecialMemberFunctionType(llvm::StringRef SMFT); + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + SpecialMemberFunctionType SMFT); +} // namespace psr + +#endif // PHASAR_PHASARLLVM_SPECIALMEMBERFUNCTIONTYPE_H diff --git a/include/phasar/Utils/LLVMShorthands.h b/include/phasar/Utils/LLVMShorthands.h index 0050d18261..16c8e698bd 100644 --- a/include/phasar/Utils/LLVMShorthands.h +++ b/include/phasar/Utils/LLVMShorthands.h @@ -168,10 +168,11 @@ const llvm::Instruction *getNthTermInstruction(const llvm::Function *F, const llvm::StoreInst *getNthStoreInstruction(const llvm::Function *F, unsigned StoNo); -std::vector +llvm::SmallVector getAllExitPoints(const llvm::Function *F); -void appendAllExitPoints(const llvm::Function *F, - std::vector &ExitPoints); +void appendAllExitPoints( + const llvm::Function *F, + llvm::SmallVectorImpl &ExitPoints); /** * @brief Returns the LLVM Module to which the given LLVM Value belongs to. diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 6f1ee8281f..90937758fa 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -10,6 +10,7 @@ #ifndef PHASAR_UTILS_TYPETRAITS_H #define PHASAR_UTILS_TYPETRAITS_H +#include #include #include #include @@ -88,6 +89,10 @@ struct has_setIFDSIDESolverConfig< template constexpr bool is_iterable_v = detail::is_iterable::value; // NOLINT +template +constexpr bool is_iterable_over_v = is_iterable_v // NOLINT + &&std::is_convertible_v().begin()), U>; + template constexpr bool is_pair_v = detail::is_pair::value; // NOLINT @@ -131,6 +136,10 @@ struct is_variant> : std::true_type {}; // NOLINT template inline constexpr bool is_variant_v = is_variant::value; // NOLINT +template +// NOLINTNEXTLINE +constexpr bool is_string_like_v = std::is_convertible_v; + // NOLINTEND(readability-identifier-naming) } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/ICFG.cpp b/lib/PhasarLLVM/ControlFlow/CallGraphAnalysisType.cpp similarity index 58% rename from lib/PhasarLLVM/ControlFlow/ICFG.cpp rename to lib/PhasarLLVM/ControlFlow/CallGraphAnalysisType.cpp index 8b6c452db1..0f5c5367d6 100644 --- a/lib/PhasarLLVM/ControlFlow/ICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/CallGraphAnalysisType.cpp @@ -1,31 +1,9 @@ -/****************************************************************************** - * Copyright (c) 2017 Philipp Schubert. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Philipp Schubert and others - *****************************************************************************/ - -/* - * ICFG.cpp - * - * Created on: 17.08.2016 - * Author: pdschbrt - */ - -#include +#include "phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/raw_ostream.h" -#include "phasar/PhasarLLVM/ControlFlow/ICFG.h" - -using namespace psr; - -namespace psr { - -std::string toString(const CallGraphAnalysisType &CGA) { +std::string psr::toString(CallGraphAnalysisType CGA) { switch (CGA) { default: #define ANALYSIS_SETUP_CALLGRAPH_TYPE(NAME, CMDFLAG, TYPE) \ @@ -36,7 +14,7 @@ std::string toString(const CallGraphAnalysisType &CGA) { } } -CallGraphAnalysisType toCallGraphAnalysisType(const std::string &S) { +psr::CallGraphAnalysisType psr::toCallGraphAnalysisType(llvm::StringRef S) { CallGraphAnalysisType Type = llvm::StringSwitch(S) #define ANALYSIS_SETUP_CALLGRAPH_TYPE(NAME, CMDFLAG, TYPE) \ .Case(NAME, CallGraphAnalysisType::TYPE) @@ -52,9 +30,7 @@ CallGraphAnalysisType toCallGraphAnalysisType(const std::string &S) { return Type; } -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const CallGraphAnalysisType &CGA) { +llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, + CallGraphAnalysisType CGA) { return OS << toString(CGA); } - -} // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCFG.cpp index 713d454e6d..3254a64d0a 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCFG.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - * Copyright (c) 2017 Philipp Schubert. + * Copyright (c) 2022 Philipp Schubert. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * * Contributors: - * Philipp Schubert and others + * Philipp Schubert, Fabian Schiebel and others *****************************************************************************/ /* @@ -14,39 +14,18 @@ * Author: philipp */ -#include -#include -#include - -#include "llvm/ADT/StringRef.h" -#include "llvm/Demangle/Demangle.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/Casting.h" - -#include "nlohmann/json.hpp" -#include "phasar/Config/Configuration.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/Utils/LLVMIRToSrc.h" #include "phasar/Utils/LLVMShorthands.h" -#include "phasar/Utils/Logger.h" -#include "phasar/Utils/Utilities.h" -using namespace psr; +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/IR/IntrinsicInst.h" namespace psr { -const llvm::Function * -LLVMBasedCFG::getFunctionOf(const llvm::Instruction *Inst) const { - return Inst->getFunction(); -} - -std::vector -LLVMBasedCFG::getPredsOf(const llvm::Instruction *I) const { +auto LLVMBasedCFG::getPredsOfImpl(const llvm::Instruction *I) const + -> llvm::SmallVector { if (!IgnoreDbgInstructions) { if (const auto *PrevInst = I->getPrevNode()) { return {PrevInst}; @@ -60,9 +39,9 @@ LLVMBasedCFG::getPredsOf(const llvm::Instruction *I) const { // If we do not have a predecessor yet, look for basic blocks which // lead to our instruction in question! - std::vector Preds; + llvm::SmallVector Preds; std::transform(llvm::pred_begin(I->getParent()), - llvm::pred_end(I->getParent()), back_inserter(Preds), + llvm::pred_end(I->getParent()), std::back_inserter(Preds), [](const llvm::BasicBlock *BB) { assert(BB && "BB under analysis was not well formed."); const llvm::Instruction *Pred = BB->getTerminator(); @@ -76,8 +55,8 @@ LLVMBasedCFG::getPredsOf(const llvm::Instruction *I) const { return Preds; } -std::vector -LLVMBasedCFG::getSuccsOf(const llvm::Instruction *I) const { +auto LLVMBasedCFG::getSuccsOfImpl(const llvm::Instruction *I) const + -> llvm::SmallVector { // case we wish to consider LLVM's debug instructions if (!IgnoreDbgInstructions) { if (const auto *NextInst = I->getNextNode()) { @@ -97,7 +76,8 @@ LLVMBasedCFG::getSuccsOf(const llvm::Instruction *I) const { } return {NextInst}; } - std::vector Successors; + + llvm::SmallVector Successors; Successors.reserve(I->getNumSuccessors() + Successors.size()); std::transform( llvm::succ_begin(I), llvm::succ_end(I), std::back_inserter(Successors), @@ -113,8 +93,8 @@ LLVMBasedCFG::getSuccsOf(const llvm::Instruction *I) const { return Successors; } -std::vector> -LLVMBasedCFG::getAllControlFlowEdges(const llvm::Function *Fun) const { +auto LLVMBasedCFG::getAllControlFlowEdgesImpl(const llvm::Function *Fun) const + -> std::vector> { std::vector> Edges; @@ -140,19 +120,8 @@ LLVMBasedCFG::getAllControlFlowEdges(const llvm::Function *Fun) const { return Edges; } -std::vector -LLVMBasedCFG::getAllInstructionsOf(const llvm::Function *Fun) const { - std::vector Instructions; - - for (const auto &I : llvm::instructions(Fun)) { - Instructions.push_back(&I); - } - - return Instructions; -} - -std::set -LLVMBasedCFG::getStartPointsOf(const llvm::Function *Fun) const { +auto LLVMBasedCFG::getStartPointsOfImpl(const llvm::Function *Fun) const + -> llvm::SmallVector { if (!Fun) { return {}; } @@ -170,41 +139,33 @@ LLVMBasedCFG::getStartPointsOf(const llvm::Function *Fun) const { return {}; } -std::set -LLVMBasedCFG::getExitPointsOf(const llvm::Function *Fun) const { +auto LLVMBasedCFG::getExitPointsOfImpl(const llvm::Function *Fun) const + -> llvm::SmallVector { if (!Fun) { return {}; } if (!Fun->isDeclaration()) { // A function can have more than one exit point - std::set ExitPoints; - auto ExitPointVector = psr::getAllExitPoints(Fun); - - for (const auto *ExitPoint : ExitPointVector) { - ExitPoints.insert(ExitPoint); - } - - return ExitPoints; + return psr::getAllExitPoints(Fun); } PHASAR_LOG_LEVEL(DEBUG, "Could not get exit points of '" << Fun->getName() << "' which is declaration!"); return {}; } -bool LLVMBasedCFG::isCallSite(const llvm::Instruction *Inst) const { - return llvm::isa(Inst); -} - -bool LLVMBasedCFG::isExitInst(const llvm::Instruction *Inst) const { - return llvm::isa(Inst); -} - -bool LLVMBasedCFG::isStartPoint(const llvm::Instruction *Inst) const { - return (Inst == &Inst->getFunction()->front().front()); +bool LLVMBasedCFG::isStartPointImpl( + const llvm::Instruction *Inst) const noexcept { + auto FirstInst = &Inst->getFunction()->front().front(); + if (Inst == FirstInst) { + return true; + } + return llvm::isa(FirstInst) && + Inst == FirstInst->getNextNonDebugInstruction(false); } -bool LLVMBasedCFG::isFieldLoad(const llvm::Instruction *Inst) const { +bool LLVMBasedCFG::isFieldLoadImpl( + const llvm::Instruction *Inst) const noexcept { if (const auto *Load = llvm::dyn_cast(Inst)) { if (const auto *GEP = llvm::dyn_cast( Load->getPointerOperand())) { @@ -214,7 +175,8 @@ bool LLVMBasedCFG::isFieldLoad(const llvm::Instruction *Inst) const { return false; } -bool LLVMBasedCFG::isFieldStore(const llvm::Instruction *Inst) const { +bool LLVMBasedCFG::isFieldStoreImpl( + const llvm::Instruction *Inst) const noexcept { if (const auto *Store = llvm::dyn_cast(Inst)) { if (const auto *GEP = llvm::dyn_cast( Store->getPointerOperand())) { @@ -224,8 +186,9 @@ bool LLVMBasedCFG::isFieldStore(const llvm::Instruction *Inst) const { return false; } -bool LLVMBasedCFG::isFallThroughSuccessor(const llvm::Instruction *Inst, - const llvm::Instruction *Succ) const { +bool LLVMBasedCFG::isFallThroughSuccessorImpl( + const llvm::Instruction *Inst, + const llvm::Instruction *Succ) const noexcept { // assert(false && "FallThrough not valid in LLVM IR"); if (const auto *B = llvm::dyn_cast(Inst)) { if (B->isConditional()) { @@ -236,8 +199,9 @@ bool LLVMBasedCFG::isFallThroughSuccessor(const llvm::Instruction *Inst, return false; } -bool LLVMBasedCFG::isBranchTarget(const llvm::Instruction *Inst, - const llvm::Instruction *Succ) const { +bool LLVMBasedCFG::isBranchTargetImpl( + const llvm::Instruction *Inst, + const llvm::Instruction *Succ) const noexcept { if (Inst->isTerminator()) { for (const auto *BB : llvm::successors(Inst->getParent())) { if (&BB->front() == Succ) { @@ -248,30 +212,20 @@ bool LLVMBasedCFG::isBranchTarget(const llvm::Instruction *Inst, return false; } -bool LLVMBasedCFG::isHeapAllocatingFunction(const llvm::Function *Fun) const { - static const std::set HeapAllocatingFunctions = { - "_Znwm", "_Znam", "malloc", "calloc", "realloc"}; - if (!Fun) { - return false; - } - if (Fun->hasName() && HeapAllocatingFunctions.find(Fun->getName()) != - HeapAllocatingFunctions.end()) { - return true; - } - return false; -} - -bool LLVMBasedCFG::isSpecialMemberFunction(const llvm::Function *Fun) const { - return getSpecialMemberFunctionType(Fun) != SpecialMemberFunctionType::None; +bool LLVMBasedCFG::isHeapAllocatingFunctionImpl( + const llvm::Function *Fun) const { + return llvm::StringSwitch(Fun->getName()) + .Cases("_Znwm", "_Znam", "malloc", "calloc", "realloc", true) + .Default(false); } -SpecialMemberFunctionType -LLVMBasedCFG::getSpecialMemberFunctionType(const llvm::Function *Fun) const { +SpecialMemberFunctionType LLVMBasedCFG::getSpecialMemberFunctionTypeImpl( + const llvm::Function *Fun) const { if (!Fun) { return SpecialMemberFunctionType::None; } auto FunctionName = Fun->getName(); - // TODO this looks terrible and needs fix + /// TODO: this looks terrible and needs fix static const std::map Codes{ {"C1", SpecialMemberFunctionType::Constructor}, {"C2", SpecialMemberFunctionType::Constructor}, @@ -325,28 +279,14 @@ LLVMBasedCFG::getSpecialMemberFunctionType(const llvm::Function *Fun) const { return SpecialMemberFunctionType::None; } -std::string LLVMBasedCFG::getStatementId(const llvm::Instruction *Inst) const { - return llvm::cast( - Inst->getMetadata(PhasarConfig::MetaDataKind())->getOperand(0)) - ->getString() - .str(); -} - -std::string LLVMBasedCFG::getFunctionName(const llvm::Function *Fun) const { - return Fun->getName().str(); -} - std::string -LLVMBasedCFG::getDemangledFunctionName(const llvm::Function *Fun) const { - return llvm::demangle(getFunctionName(Fun)); -} - -void LLVMBasedCFG::print(const llvm::Function *F, llvm::raw_ostream &OS) const { - OS << llvmIRToString(F); +LLVMBasedCFG::getStatementIdImpl(const llvm::Instruction *Inst) const { + return getMetaDataID(Inst); } -nlohmann::json LLVMBasedCFG::getAsJson(const llvm::Function * /*F*/) const { - return ""; +std::string +LLVMBasedCFG::getDemangledFunctionNameImpl(const llvm::Function *Fun) const { + return llvm::demangle(Fun->getName().str()); } [[nodiscard]] nlohmann::json @@ -364,6 +304,43 @@ LLVMBasedCFG::exportCFGAsJson(const llvm::Function *F) const { return J; } +struct SourceCodeInfoWithIR : public SourceCodeInfo { + std::string IR; +}; + +static void from_json(const nlohmann::json &J, SourceCodeInfoWithIR &Info) { + from_json(J, static_cast(Info)); + J.at("IR").get_to(Info.IR); +} +static void to_json(nlohmann::json &J, const SourceCodeInfoWithIR &Info) { + to_json(J, static_cast(Info)); + J["IR"] = Info.IR; +} + +static auto getFirstNonEmpty(llvm::BasicBlock::const_iterator &It, + llvm::BasicBlock::const_iterator End) + -> SourceCodeInfoWithIR { + assert(It != End); + + const auto *Inst = &*It; + auto Ret = getSrcCodeInfoFromIR(Inst); + + // Assume, we aren't skipping relevant calls here + + while ((Ret.empty() || It->isDebugOrPseudoInst()) && ++It != End) { + Inst = &*It; + Ret = getSrcCodeInfoFromIR(Inst); + } + + return {Ret, llvmIRToString(Inst)}; +} + +static auto getFirstNonEmpty(const llvm::BasicBlock *BB) + -> SourceCodeInfoWithIR { + auto It = BB->begin(); + return getFirstNonEmpty(It, BB->end()); +} + [[nodiscard]] nlohmann::json LLVMBasedCFG::exportCFGAsSourceCodeJson(const llvm::Function *F) const { nlohmann::json J; @@ -412,39 +389,4 @@ LLVMBasedCFG::exportCFGAsSourceCodeJson(const llvm::Function *F) const { return J; } -void from_json(const nlohmann::json &J, - LLVMBasedCFG::SourceCodeInfoWithIR &Info) { - from_json(J, static_cast(Info)); - J.at("IR").get_to(Info.IR); -} -void to_json(nlohmann::json &J, - const LLVMBasedCFG::SourceCodeInfoWithIR &Info) { - to_json(J, static_cast(Info)); - J["IR"] = Info.IR; -} - -auto LLVMBasedCFG::getFirstNonEmpty(llvm::BasicBlock::const_iterator &It, - llvm::BasicBlock::const_iterator End) - -> SourceCodeInfoWithIR { - assert(It != End); - - const auto *Inst = &*It; - auto Ret = getSrcCodeInfoFromIR(Inst); - - // Assume, we aren't skipping relevant calls here - - while ((Ret.empty() || It->isDebugOrPseudoInst()) && ++It != End) { - Inst = &*It; - Ret = getSrcCodeInfoFromIR(Inst); - } - - return {Ret, llvmIRToString(Inst)}; -} - -auto LLVMBasedCFG::getFirstNonEmpty(const llvm::BasicBlock *BB) - -> SourceCodeInfoWithIR { - auto It = BB->begin(); - return getFirstNonEmpty(It, BB->end()); -} - } // namespace psr diff --git a/lib/Utils/LLVMShorthands.cpp b/lib/Utils/LLVMShorthands.cpp index b8c794647f..71a87495c9 100644 --- a/lib/Utils/LLVMShorthands.cpp +++ b/lib/Utils/LLVMShorthands.cpp @@ -329,15 +329,16 @@ const llvm::Instruction *getNthInstruction(const llvm::Function *F, return nullptr; } -std::vector +llvm::SmallVector getAllExitPoints(const llvm::Function *F) { - std::vector Ret; + llvm::SmallVector Ret; appendAllExitPoints(F, Ret); return Ret; } -void appendAllExitPoints(const llvm::Function *F, - std::vector &ExitPoints) { +void appendAllExitPoints( + const llvm::Function *F, + llvm::SmallVectorImpl &ExitPoints) { if (!F) { return; } From ab53a3b618afc0e6fc260168cd68afc29117b84d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 2 Aug 2022 17:01:56 +0200 Subject: [PATCH 02/18] Add LLVMBasedICFG (TODO: implement) --- .../phasar/PhasarLLVM/ControlFlow/ICFGBase.h | 34 +- .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 612 ++++++++++-------- include/phasar/PhasarLLVM/Utils/ByRef.h | 28 + include/phasar/Utils/Soundness.h | 12 +- lib/Utils/Soundness.cpp | 16 +- 5 files changed, 416 insertions(+), 286 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Utils/ByRef.h diff --git a/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h b/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h index 24aa7899e6..05e542c2e7 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h +++ b/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h @@ -17,10 +17,12 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_ICFGBASE_H #define PHASAR_PHASARLLVM_CONTROLFLOW_ICFGBASE_H +#include "nlohmann/json.hpp" #include "phasar/PhasarLLVM/ControlFlow/CFGBase.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" #include @@ -30,8 +32,10 @@ template class ICFGBase { using n_t = typename CFGTraits::n_t; using f_t = typename CFGTraits::f_t; - static_assert(std::is_base_of_v, Derived>, - "An ICFG must also be a CFG"); + ICFGBase() noexcept { + static_assert(std::is_base_of_v, Derived>, + "An ICFG must also be a CFG"); + } [[nodiscard]] decltype(auto) getAllFunctions() const { return self().getAllFunctionsImpl(); @@ -59,10 +63,32 @@ template class ICFGBase { } [[nodiscard]] decltype(auto) getCallersOf(f_t Fun) const { static_assert( - is_iterable_over_v); + is_iterable_over_v); return self().getCallersOfImpl(Fun); } - /// TODO: More Member functions + [[nodiscard]] decltype(auto) getCallsFromWithin(f_t Fun) const { + static_assert( + is_iterable_over_v); + return self().getCallsFromWithinImpl(Fun); + } + [[nodiscard]] decltype(auto) getReturnSitesOfCallAt(f_t Fun) const { + static_assert( + is_iterable_over_v); + return self().getReturnSitesOfCallAtImpl(Fun); + } + [[nodiscard]] decltype(auto) getGlobalInitializers(f_t Fun) const { + static_assert( + is_iterable_over_v); + return self().getGlobalInitializersImpl(Fun); + } + void print(llvm::raw_ostream &OS = llvm::outs()) const { + self().printImpl(OS); + } + [[nodiscard]] nlohmann::json getAsJson() const { + return self().getAsJsonImpl(); + } private: Derived &self() noexcept { return static_cast(*this); } diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index 784fd3d264..e1a0358142 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -17,78 +17,24 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDICFG_H_ #define PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDICFG_H_ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "boost/container/flat_set.hpp" -#include "boost/graph/adjacency_list.hpp" - -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Module.h" - -#include "phasar/PhasarLLVM/ControlFlow/ICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/CFGBase.h" +#include "phasar/PhasarLLVM/ControlFlow/ICFGBase.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" -#include "phasar/PhasarLLVM/Pointer/LLVMPointsToInfo.h" #include "phasar/Utils/Soundness.h" -namespace llvm { -class Instruction; -class Function; -class Module; -class Instruction; -class BitCastInst; -} // namespace llvm - +#include "boost/graph/adjacency_list.hpp" namespace psr { - -class Resolver; -class ProjectIRDB; +class LLVMProjectIRDB; +enum class CallGraphAnalysisType; class LLVMTypeHierarchy; +class LLVMPointsToInfo; -class LLVMBasedICFG - : public ICFG, - public virtual LLVMBasedCFG { - friend class LLVMBasedBackwardsICFG; +class LLVMBasedICFG; +template <> struct CFGTraits : CFGTraits {}; - using GlobalCtorTy = std::multimap; - using GlobalDtorTy = std::multimap>; +class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { + friend ICFGBase; -private: - ProjectIRDB &IRDB; - CallGraphAnalysisType CGType; - Soundness S; - bool UserTHInfos = true; - bool UserPTInfos = true; - LLVMTypeHierarchy *TH; - LLVMPointsToInfo *PT; - std::unique_ptr Res; - llvm::DenseSet VisitedFunctions; - std::unordered_set UserEntryPoints; - - GlobalCtorTy GlobalCtors; - GlobalDtorTy GlobalDtors; - - llvm::Function *GlobalCleanupFn = nullptr; - - llvm::SmallDenseMap - GlobalRegisteredDtorsCaller; - - // The worklist for direct callee resolution. - std::vector FunctionWL; - - // Map indirect calls to the number of possible targets found for it. Fixpoint - // is not reached when more targets are found. - llvm::DenseMap IndirectCalls; // The VertexProperties for our call-graph. struct VertexProperties { const llvm::Function *F = nullptr; @@ -121,225 +67,355 @@ class LLVMBasedICFG /// The call graph. bidigraph_t CallGraph; - /// Maps functions to the corresponding vertex id. - std::unordered_map FunctionVertexMap; - - void processFunction(const llvm::Function *F, Resolver &Resolver, - bool &FixpointReached); - - bool constructDynamicCall(const llvm::Instruction *I, Resolver &Resolver); - - std::unique_ptr - makeResolver(ProjectIRDB &IRDB, LLVMTypeHierarchy &TH, LLVMPointsToInfo &PT); - - template - static void insertGlobalCtorsDtorsImpl(MapTy &Into, const llvm::Module *M, - llvm::StringRef Fun) { - const auto *Gtors = M->getGlobalVariable(Fun); - if (Gtors == nullptr) { - return; - } - - if (const auto *FunArray = llvm::dyn_cast( - Gtors->getType()->getPointerElementType())) { - if (const auto *ConstFunArray = - llvm::dyn_cast(Gtors->getInitializer())) { - for (const auto &Op : ConstFunArray->operands()) { - if (const auto *FunDesc = llvm::dyn_cast(Op)) { - auto *Fun = llvm::dyn_cast(FunDesc->getOperand(1)); - const auto *Prio = - llvm::dyn_cast(FunDesc->getOperand(0)); - if (Fun && Prio) { - auto PrioInt = size_t(Prio->getLimitedValue(SIZE_MAX)); - Into.emplace(PrioInt, Fun); - } - } - } - } - } - } - - llvm::Function *buildCRuntimeGlobalDtorsModel(llvm::Module &M); - const llvm::Function *buildCRuntimeGlobalCtorsDtorsModel(llvm::Module &M); - - struct dependency_visitor; - public: - static constexpr llvm::StringLiteral GlobalCRuntimeModelName = - "__psrCRuntimeGlobalCtorsModel"; - - /** - * Why a multimap? A given instruction might have multiple target functions. - * For example, if the points-to analysis indicates that a pointer could - * be for multiple different types. - */ - using OutEdgesAndTargets = std::unordered_multimap; - - LLVMBasedICFG(ProjectIRDB &IRDB, CallGraphAnalysisType CGType, - const std::set &EntryPoints = {}, - LLVMTypeHierarchy *TH = nullptr, LLVMPointsToInfo *PT = nullptr, - Soundness S = Soundness::Soundy, bool IncludeGlobals = true); - - LLVMBasedICFG(const LLVMBasedICFG &ICF); - - LLVMBasedICFG &operator=(const LLVMBasedICFG &) = delete; - - ~LLVMBasedICFG() override; - - [[nodiscard]] const llvm::Function *getFirstGlobalCtorOrNull() const; - - [[nodiscard]] const llvm::Function *getLastGlobalDtorOrNull() const; - - /** - * \return all of the functions in the IRDB, this may include some not in the - * callgraph - */ - [[nodiscard]] std::set - getAllFunctions() const override; - - /** - * A boost flat_set is used here because we already have the functions in - * order, so building it is fast since we can always add to the end. We get - * the performance and space benefits of array-backed storage and all the - * functionality of a set. - * - * \return all of the functions which are represented by a vertex in the - * callgraph. - */ - [[nodiscard]] boost::container::flat_set - getAllVertexFunctions() const; - - bool isIndirectFunctionCall(const llvm::Instruction *N) const override; - - bool isVirtualFunctionCall(const llvm::Instruction *N) const override; - - [[nodiscard]] const llvm::Function * - getFunction(const std::string &Fun) const override; - - /** - * Essentially the same as `getCallsFromWithin`, but uses the callgraph - * data directly. - * \return all call sites within a given method. - */ - std::vector - getOutEdges(const llvm::Function *Fun) const; - - /** - * For the supplied function, get all the output edge Instructions and - * the corresponding Function. This pulls data directly from the callgraph. - * - * \return the edges and the target function for each edge. - */ - OutEdgesAndTargets getOutEdgeAndTarget(const llvm::Function *Fun) const; - - /** - * Removes all edges found for the given instruction within the - * sourceFunction. \return number of edges removed - */ - size_t removeEdges(const llvm::Function *F, const llvm::Instruction *Inst); - - /** - * Removes the vertex for the given function. - * CAUTION: does not remove edges, invoking this on a function with - * IN or OUT edges is a bad idea. - * \return true iff the vertex was found and removed. - */ - bool removeVertex(const llvm::Function *Fun); - - /** - * \return the total number of in edges to the vertex representing this - * Function. - */ - size_t getCallerCount(const llvm::Function *Fun) const; - - /** - * \return all callee methods for a given call that might be called. - */ - [[nodiscard]] std::set - getCalleesOfCallAt(const llvm::Instruction *N) const override; - - void forEachCalleeOfCallAt( - const llvm::Instruction *I, - llvm::function_ref Callback) const; - - /** - * \return all caller statements/nodes of a given method. - */ - [[nodiscard]] std::set - getCallersOf(const llvm::Function *Fun) const override; - - /** - * \return all call sites within a given method. - */ - [[nodiscard]] std::set - getCallsFromWithin(const llvm::Function *Fun) const override; + explicit LLVMBasedICFG(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, + const std::set &EntryPoints = {}, + LLVMTypeHierarchy *TH = nullptr, + LLVMPointsToInfo *PT = nullptr, + Soundness S = Soundness::Soundy, + bool IncludeGlobals = true); - [[nodiscard]] std::set - getReturnSitesOfCallAt(const llvm::Instruction *N) const override; +private: + /// TODO: implement +}; +} // namespace psr - [[nodiscard]] std::set - allNonCallStartNodes() const override; +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include + +// #include "boost/container/flat_set.hpp" +// #include "boost/graph/adjacency_list.hpp" + +// #include "llvm/ADT/DenseSet.h" +// #include "llvm/ADT/StringRef.h" +// #include "llvm/IR/Constants.h" +// #include "llvm/IR/InstrTypes.h" +// #include "llvm/IR/Instruction.h" +// #include "llvm/IR/Module.h" + +// #include "phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h" +// #include "phasar/PhasarLLVM/ControlFlow/ICFG.h" +// #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +// #include "phasar/PhasarLLVM/Pointer/LLVMPointsToInfo.h" +// #include "phasar/Utils/Soundness.h" + +// namespace llvm { +// class Instruction; +// class Function; +// class Module; +// class Instruction; +// class BitCastInst; +// } // namespace llvm + +// namespace psr { + +// class Resolver; +// class ProjectIRDB; +// class LLVMTypeHierarchy; + +// class LLVMBasedICFG +// : public ICFG, +// public virtual LLVMBasedCFG { +// friend class LLVMBasedBackwardsICFG; + +// using GlobalCtorTy = std::multimap; +// using GlobalDtorTy = std::multimap>; + +// private: +// ProjectIRDB &IRDB; +// CallGraphAnalysisType CGType; +// Soundness S; +// bool UserTHInfos = true; +// bool UserPTInfos = true; +// LLVMTypeHierarchy *TH; +// LLVMPointsToInfo *PT; +// std::unique_ptr Res; +// llvm::DenseSet VisitedFunctions; +// std::unordered_set UserEntryPoints; + +// GlobalCtorTy GlobalCtors; +// GlobalDtorTy GlobalDtors; + +// llvm::Function *GlobalCleanupFn = nullptr; + +// llvm::SmallDenseMap +// GlobalRegisteredDtorsCaller; + +// // The worklist for direct callee resolution. +// std::vector FunctionWL; + +// // Map indirect calls to the number of possible targets found for it. +// Fixpoint +// // is not reached when more targets are found. +// llvm::DenseMap IndirectCalls; +// // The VertexProperties for our call-graph. +// struct VertexProperties { +// const llvm::Function *F = nullptr; +// VertexProperties() = default; +// VertexProperties(const llvm::Function *F); +// [[nodiscard]] std::string getFunctionName() const; +// }; + +// // The EdgeProperties for our call-graph. +// struct EdgeProperties { +// const llvm::Instruction *CS = nullptr; +// size_t ID = 0; +// EdgeProperties() = default; +// EdgeProperties(const llvm::Instruction *I); +// [[nodiscard]] std::string getCallSiteAsString() const; +// }; + +// /// Specify the type of graph to be used. +// using bidigraph_t = +// boost::adjacency_list; + +// // Let us have some handy typedefs. +// using vertex_t = boost::graph_traits::vertex_descriptor; +// using vertex_iterator = boost::graph_traits::vertex_iterator; +// using edge_t = boost::graph_traits::edge_descriptor; +// using out_edge_iterator = +// boost::graph_traits::out_edge_iterator; using in_edge_iterator +// = boost::graph_traits::in_edge_iterator; + +// /// The call graph. +// bidigraph_t CallGraph; + +// /// Maps functions to the corresponding vertex id. +// std::unordered_map FunctionVertexMap; + +// void processFunction(const llvm::Function *F, Resolver &Resolver, +// bool &FixpointReached); + +// bool constructDynamicCall(const llvm::Instruction *I, Resolver &Resolver); + +// std::unique_ptr +// makeResolver(ProjectIRDB &IRDB, LLVMTypeHierarchy &TH, LLVMPointsToInfo +// &PT); + +// template +// static void insertGlobalCtorsDtorsImpl(MapTy &Into, const llvm::Module *M, +// llvm::StringRef Fun) { +// const auto *Gtors = M->getGlobalVariable(Fun); +// if (Gtors == nullptr) { +// return; +// } + +// if (const auto *FunArray = llvm::dyn_cast( +// Gtors->getType()->getPointerElementType())) { +// if (const auto *ConstFunArray = +// llvm::dyn_cast(Gtors->getInitializer())) { +// for (const auto &Op : ConstFunArray->operands()) { +// if (const auto *FunDesc = llvm::dyn_cast(Op)) +// { +// auto *Fun = +// llvm::dyn_cast(FunDesc->getOperand(1)); const +// auto *Prio = +// llvm::dyn_cast(FunDesc->getOperand(0)); +// if (Fun && Prio) { +// auto PrioInt = size_t(Prio->getLimitedValue(SIZE_MAX)); +// Into.emplace(PrioInt, Fun); +// } +// } +// } +// } +// } +// } + +// llvm::Function *buildCRuntimeGlobalDtorsModel(llvm::Module &M); +// const llvm::Function *buildCRuntimeGlobalCtorsDtorsModel(llvm::Module &M); + +// struct dependency_visitor; + +// public: +// static constexpr llvm::StringLiteral GlobalCRuntimeModelName = +// "__psrCRuntimeGlobalCtorsModel"; + +// /** +// * Why a multimap? A given instruction might have multiple target +// functions. +// * For example, if the points-to analysis indicates that a pointer could +// * be for multiple different types. +// */ +// using OutEdgesAndTargets = std::unordered_multimap; + +// LLVMBasedICFG(ProjectIRDB &IRDB, CallGraphAnalysisType CGType, +// const std::set &EntryPoints = {}, +// LLVMTypeHierarchy *TH = nullptr, LLVMPointsToInfo *PT = +// nullptr, Soundness S = Soundness::Soundy, bool IncludeGlobals +// = true); + +// LLVMBasedICFG(const LLVMBasedICFG &ICF); + +// LLVMBasedICFG &operator=(const LLVMBasedICFG &) = delete; + +// ~LLVMBasedICFG() override; + +// [[nodiscard]] const llvm::Function *getFirstGlobalCtorOrNull() const; + +// [[nodiscard]] const llvm::Function *getLastGlobalDtorOrNull() const; + +// /** +// * \return all of the functions in the IRDB, this may include some not in +// the +// * callgraph +// */ +// [[nodiscard]] std::set +// getAllFunctions() const override; + +// /** +// * A boost flat_set is used here because we already have the functions in +// * order, so building it is fast since we can always add to the end. We +// get +// * the performance and space benefits of array-backed storage and all the +// * functionality of a set. +// * +// * \return all of the functions which are represented by a vertex in the +// * callgraph. +// */ +// [[nodiscard]] boost::container::flat_set +// getAllVertexFunctions() const; + +// bool isIndirectFunctionCall(const llvm::Instruction *N) const override; + +// bool isVirtualFunctionCall(const llvm::Instruction *N) const override; + +// [[nodiscard]] const llvm::Function * +// getFunction(const std::string &Fun) const override; + +// /** +// * Essentially the same as `getCallsFromWithin`, but uses the callgraph +// * data directly. +// * \return all call sites within a given method. +// */ +// std::vector +// getOutEdges(const llvm::Function *Fun) const; + +// /** +// * For the supplied function, get all the output edge Instructions and +// * the corresponding Function. This pulls data directly from the +// callgraph. +// * +// * \return the edges and the target function for each edge. +// */ +// OutEdgesAndTargets getOutEdgeAndTarget(const llvm::Function *Fun) const; + +// /** +// * Removes all edges found for the given instruction within the +// * sourceFunction. \return number of edges removed +// */ +// size_t removeEdges(const llvm::Function *F, const llvm::Instruction *Inst); + +// /** +// * Removes the vertex for the given function. +// * CAUTION: does not remove edges, invoking this on a function with +// * IN or OUT edges is a bad idea. +// * \return true iff the vertex was found and removed. +// */ +// bool removeVertex(const llvm::Function *Fun); + +// /** +// * \return the total number of in edges to the vertex representing this +// * Function. +// */ +// size_t getCallerCount(const llvm::Function *Fun) const; + +// /** +// * \return all callee methods for a given call that might be called. +// */ +// [[nodiscard]] std::set +// getCalleesOfCallAt(const llvm::Instruction *N) const override; + +// void forEachCalleeOfCallAt( +// const llvm::Instruction *I, +// llvm::function_ref Callback) const; + +// /** +// * \return all caller statements/nodes of a given method. +// */ +// [[nodiscard]] std::set +// getCallersOf(const llvm::Function *Fun) const override; + +// /** +// * \return all call sites within a given method. +// */ +// [[nodiscard]] std::set +// getCallsFromWithin(const llvm::Function *Fun) const override; - void mergeWith(const LLVMBasedICFG &Other); +// [[nodiscard]] std::set +// getReturnSitesOfCallAt(const llvm::Instruction *N) const override; - [[nodiscard]] CallGraphAnalysisType getCallGraphAnalysisType() const; +// [[nodiscard]] std::set +// allNonCallStartNodes() const override; + +// void mergeWith(const LLVMBasedICFG &Other); + +// [[nodiscard]] CallGraphAnalysisType getCallGraphAnalysisType() const; + +// using LLVMBasedCFG::print; // tell the compiler we wish to have both prints +// void print(llvm::raw_ostream &OS = llvm::outs()) const override; + +// void printAsDot(llvm::raw_ostream &OS = llvm::outs(), +// bool PrintEdgeLabels = true) const; + +// void printInternalPTGAsDot(llvm::raw_ostream &OS = llvm::outs()) const; + +// using LLVMBasedCFG::getAsJson; // tell the compiler we wish to have both +// // prints +// [[nodiscard]] nlohmann::json getAsJson() const override; + +// void printAsJson(llvm::raw_ostream &OS = llvm::outs()) const; + +// /// Create an IR based JSON export of the whole ICFG. +// /// +// /// Note: The exported JSON contains a list of all edges in this ICFG +// [[nodiscard]] nlohmann::json exportICFGAsJson() const; - using LLVMBasedCFG::print; // tell the compiler we wish to have both prints - void print(llvm::raw_ostream &OS = llvm::outs()) const override; - - void printAsDot(llvm::raw_ostream &OS = llvm::outs(), - bool PrintEdgeLabels = true) const; - - void printInternalPTGAsDot(llvm::raw_ostream &OS = llvm::outs()) const; - - using LLVMBasedCFG::getAsJson; // tell the compiler we wish to have both - // prints - [[nodiscard]] nlohmann::json getAsJson() const override; - - void printAsJson(llvm::raw_ostream &OS = llvm::outs()) const; - - /// Create an IR based JSON export of the whole ICFG. - /// - /// Note: The exported JSON contains a list of all edges in this ICFG - [[nodiscard]] nlohmann::json exportICFGAsJson() const; +// /// Create a JSON export of the whole ICFG similar to exportICFGAsJson() +// /// enriched with source-code information on every edge and ignoring debug +// /// instructions +// [[nodiscard]] nlohmann::json exportICFGAsSourceCodeJson() const; - /// Create a JSON export of the whole ICFG similar to exportICFGAsJson() - /// enriched with source-code information on every edge and ignoring debug - /// instructions - [[nodiscard]] nlohmann::json exportICFGAsSourceCodeJson() const; +// [[nodiscard]] unsigned getNumOfVertices() const; - [[nodiscard]] unsigned getNumOfVertices() const; +// [[nodiscard]] unsigned getNumOfEdges() const; - [[nodiscard]] unsigned getNumOfEdges() const; +// std::vector getDependencyOrderedFunctions(); - std::vector getDependencyOrderedFunctions(); - - [[nodiscard]] const llvm::Function * - getRegisteredDtorsCallerOrNull(const llvm::Module *Mod); - - template void forEachGlobalCtor(Fn &&F) const { - for (auto [Prio, Fun] : GlobalCtors) { - std::invoke(F, static_cast(Fun)); - } - } +// [[nodiscard]] const llvm::Function * +// getRegisteredDtorsCallerOrNull(const llvm::Module *Mod); + +// template void forEachGlobalCtor(Fn &&F) const { +// for (auto [Prio, Fun] : GlobalCtors) { +// std::invoke(F, static_cast(Fun)); +// } +// } - template void forEachGlobalDtor(Fn &&F) const { - for (auto [Prio, Fun] : GlobalDtors) { - std::invoke(F, static_cast(Fun)); - } - } +// template void forEachGlobalDtor(Fn &&F) const { +// for (auto [Prio, Fun] : GlobalDtors) { +// std::invoke(F, static_cast(Fun)); +// } +// } -protected: - void collectGlobalCtors() final; +// protected: +// void collectGlobalCtors() final; - void collectGlobalDtors() final; +// void collectGlobalDtors() final; - void collectGlobalInitializers() final; +// void collectGlobalInitializers() final; - void collectRegisteredDtors() final; -}; +// void collectRegisteredDtors() final; +// }; -} // namespace psr +// } // namespace psr #endif diff --git a/include/phasar/PhasarLLVM/Utils/ByRef.h b/include/phasar/PhasarLLVM/Utils/ByRef.h new file mode 100644 index 0000000000..4e83915bcd --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/ByRef.h @@ -0,0 +1,28 @@ +/****************************************************************************** + * Copyright (c) 2022 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_UTILS_BYREF_H +#define PHASAR_PHASARLLVM_UTILS_BYREF_H + +#include +namespace psr { +template +using ByConstRef = + std::conditional_t, + T, const T &>; +template +using ByMoveRef = + std::conditional_t, + T, T &&>; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_UTILS_BYREF_H \ No newline at end of file diff --git a/include/phasar/Utils/Soundness.h b/include/phasar/Utils/Soundness.h index 9a1357dfd6..914f79fb14 100644 --- a/include/phasar/Utils/Soundness.h +++ b/include/phasar/Utils/Soundness.h @@ -10,8 +10,14 @@ #ifndef PHASAR_UTILS_SOUNDNESS_H_ #define PHASAR_UTILS_SOUNDNESS_H_ +#include "llvm/ADT/StringRef.h" + #include +namespace llvm { +class raw_ostream; +} // namespace llvm + namespace psr { enum class Soundness { @@ -20,11 +26,11 @@ enum class Soundness { Invalid }; -std::string toString(const Soundness &S); +std::string toString(Soundness S); -Soundness toSoundness(const std::string &S); +Soundness toSoundness(llvm::StringRef S); -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Soundness &S); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Soundness S); } // namespace psr diff --git a/lib/Utils/Soundness.cpp b/lib/Utils/Soundness.cpp index 976749a829..3380ab0d6d 100644 --- a/lib/Utils/Soundness.cpp +++ b/lib/Utils/Soundness.cpp @@ -7,18 +7,14 @@ * Linus Jungemann and others *****************************************************************************/ -#include -#include +#include "phasar/Utils/Soundness.h" #include "llvm/ADT/StringSwitch.h" - -#include "phasar/Utils/Soundness.h" +#include "llvm/Support/raw_ostream.h" using namespace psr; -namespace psr { - -std::string toString(const Soundness &S) { +std::string psr::toString(Soundness S) { switch (S) { default: #define SOUNDNESS_FLAG_TYPE(NAME, TYPE) \ @@ -29,7 +25,7 @@ std::string toString(const Soundness &S) { } } -Soundness toSoundness(const std::string &S) { +Soundness psr::toSoundness(llvm::StringRef S) { Soundness Type = llvm::StringSwitch(S) #define SOUNDNESS_FLAG_TYPE(NAME, TYPE) .Case(NAME, Soundness::TYPE) #include "phasar/Utils/Soundness.def" @@ -37,8 +33,6 @@ Soundness toSoundness(const std::string &S) { return Type; } -std::ostream &operator<<(std::ostream &OS, const Soundness &S) { +llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, Soundness S) { return OS << toString(S); } - -} // namespace psr From 16f5c93186c6d2073d21def987fce1c2f3f0548d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 3 Aug 2022 16:03:13 +0200 Subject: [PATCH 03/18] Implement LLVMBasedICFG --- include/phasar/PhasarLLVM/ControlFlow/ICFG.h | 4 - .../phasar/PhasarLLVM/ControlFlow/ICFGBase.h | 8 +- .../PhasarLLVM/ControlFlow/LLVMBasedCFG.h | 3 +- .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 57 +- .../ControlFlow/Resolver/CHAResolver.h | 2 + .../{ => Resolver}/CallGraphAnalysisType.h | 0 .../ControlFlow/Resolver/DTAResolver.h | 2 + .../ControlFlow/Resolver/NOResolver.h | 2 + .../ControlFlow/Resolver/OTFResolver.h | 2 + .../ControlFlow/Resolver/RTAResolver.h | 2 + .../ControlFlow/Resolver/Resolver.h | 16 +- include/phasar/Utils/MaybeUniquePtr.h | 147 +++ include/phasar/Utils/TypeTraits.h | 25 + include/phasar/Utils/Utilities.h | 5 +- lib/PhasarLLVM/ControlFlow/CFG.cpp | 15 +- lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp | 999 ++++++------------ .../ControlFlow/LLVMBasedICFGExportsImpl.cpp | 344 ++++++ .../ControlFlow/LLVMBasedICFGGlobalsImpl.cpp | 306 ++++++ .../ControlFlow/Resolver/CHAResolver.cpp | 2 + .../{ => Resolver}/CallGraphAnalysisType.cpp | 2 +- .../ControlFlow/Resolver/DTAResolver.cpp | 2 + .../ControlFlow/Resolver/NOResolver.cpp | 2 + .../ControlFlow/Resolver/OTFResolver.cpp | 2 + .../ControlFlow/Resolver/RTAResolver.cpp | 2 + .../ControlFlow/Resolver/Resolver.cpp | 41 + 25 files changed, 1272 insertions(+), 720 deletions(-) rename include/phasar/PhasarLLVM/ControlFlow/{ => Resolver}/CallGraphAnalysisType.h (100%) create mode 100644 include/phasar/Utils/MaybeUniquePtr.h create mode 100644 lib/PhasarLLVM/ControlFlow/LLVMBasedICFGExportsImpl.cpp create mode 100644 lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp rename lib/PhasarLLVM/ControlFlow/{ => Resolver}/CallGraphAnalysisType.cpp (94%) diff --git a/include/phasar/PhasarLLVM/ControlFlow/ICFG.h b/include/phasar/PhasarLLVM/ControlFlow/ICFG.h index 34fd105e7d..862c85e762 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/ICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/ICFG.h @@ -65,10 +65,6 @@ template class ICFG : public virtual CFG { [[nodiscard]] virtual std::set getReturnSitesOfCallAt(N Stmt) const = 0; - [[nodiscard]] const std::vector &getGlobalInitializers() const { - return GlobalInitializers; - } - using CFG::print; // tell the compiler we wish to have both prints virtual void print(llvm::raw_ostream &OS = llvm::outs()) const = 0; diff --git a/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h b/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h index 05e542c2e7..f4b3bac9ee 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h +++ b/include/phasar/PhasarLLVM/ControlFlow/ICFGBase.h @@ -33,7 +33,7 @@ template class ICFGBase { using f_t = typename CFGTraits::f_t; ICFGBase() noexcept { - static_assert(std::is_base_of_v, Derived>, + static_assert(is_crtp_base_of_v, "An ICFG must also be a CFG"); } @@ -71,11 +71,11 @@ template class ICFGBase { is_iterable_over_v); return self().getCallsFromWithinImpl(Fun); } - [[nodiscard]] decltype(auto) getReturnSitesOfCallAt(f_t Fun) const { + [[nodiscard]] decltype(auto) getReturnSitesOfCallAt(n_t Inst) const { static_assert( - is_iterable_over_v); - return self().getReturnSitesOfCallAtImpl(Fun); + return self().getReturnSitesOfCallAtImpl(Inst); } [[nodiscard]] decltype(auto) getGlobalInitializers(f_t Fun) const { static_assert( diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h index a800b61362..e94872b3dd 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h @@ -88,7 +88,8 @@ class LLVMBasedCFG : public CFGBase { void printImpl(f_t Fun, llvm::raw_ostream &OS) const { OS << *Fun; } [[nodiscard]] nlohmann::json getAsJsonImpl(f_t /*Fun*/) const { return ""; } - bool IgnoreDbgInstructions = false; +protected: + const bool IgnoreDbgInstructions = false; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index e1a0358142..41bd5243cf 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -17,16 +17,22 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDICFG_H_ #define PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDICFG_H_ +#include "phasar/DB/ProjectIRDB.h" #include "phasar/PhasarLLVM/ControlFlow/CFGBase.h" #include "phasar/PhasarLLVM/ControlFlow/ICFGBase.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/Soundness.h" +#include "nlohmann/json.hpp" + #include "boost/graph/adjacency_list.hpp" + +#include "llvm/Support/raw_ostream.h" namespace psr { -class LLVMProjectIRDB; +class ProjectIRDB; enum class CallGraphAnalysisType; -class LLVMTypeHierarchy; class LLVMPointsToInfo; class LLVMBasedICFG; @@ -64,19 +70,56 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { using out_edge_iterator = boost::graph_traits::out_edge_iterator; using in_edge_iterator = boost::graph_traits::in_edge_iterator; - /// The call graph. - bidigraph_t CallGraph; + struct Builder; public: - explicit LLVMBasedICFG(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, - const std::set &EntryPoints = {}, + static constexpr llvm::StringLiteral GlobalCRuntimeModelName = + "__psrCRuntimeGlobalCtorsModel"; + + explicit LLVMBasedICFG(ProjectIRDB *IRDB, CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints = {}, LLVMTypeHierarchy *TH = nullptr, LLVMPointsToInfo *PT = nullptr, Soundness S = Soundness::Soundy, bool IncludeGlobals = true); + [[nodiscard]] std::string + exportICFGAsDot(bool WithSourceCodeInfo = true) const; + [[nodiscard]] nlohmann::json + exportICFGAsJson(bool WithSourceCodeInfo = true) const; + + [[nodiscard]] std::vector getAllVertexFunctions() const; + private: - /// TODO: implement + [[nodiscard]] decltype(auto) getAllFunctions() const { + return IRDB->getAllFunctions(); + } + + [[nodiscard]] f_t getFunction(llvm::StringRef Fun) const { + return IRDB->getFunction(Fun); + } + + [[nodiscard]] bool isIndirectFunctionCallImpl(n_t Inst) const; + [[nodiscard]] bool isVirtualFunctionCallImpl(n_t Inst) const; + [[nodiscard]] std::vector allNonCallStartNodesImpl() const; + [[nodiscard]] llvm::SmallVector getCalleesOfCallAtImpl(n_t Inst) const; + /// TODO: Return a map_iterator on the in_edge_iterator -- How to deal with + /// not-contaied funs? assert them out? + [[nodiscard]] llvm::SmallVector getCallersOfImpl(f_t Fun) const; + [[nodiscard]] llvm::SmallVector getCallsFromWithinImpl(f_t Fun) const; + [[nodiscard]] llvm::SmallVector + getReturnSitesOfCallAtImpl(n_t Inst) const; + void printImpl(llvm::raw_ostream &OS) const; + [[nodiscard]] nlohmann::json getAsJsonImpl() const; + + llvm::Function *buildCRuntimeGlobalCtorsDtorsModel( + llvm::Module &M, llvm::ArrayRef UserEntryPoints); + + /// The call graph. + bidigraph_t CallGraph; + llvm::DenseMap FunctionVertexMap; + ProjectIRDB *IRDB = nullptr; + MaybeUniquePtr TH; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index 9f5b69ee20..fc97503f01 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -34,6 +34,8 @@ class CHAResolver : public Resolver { ~CHAResolver() override = default; FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) override; + + [[nodiscard]] std::string str() const override; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CallGraphAnalysisType.h similarity index 100% rename from include/phasar/PhasarLLVM/ControlFlow/CallGraphAnalysisType.h rename to include/phasar/PhasarLLVM/ControlFlow/Resolver/CallGraphAnalysisType.h diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h index af354e37d0..2daae8f61b 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h @@ -64,6 +64,8 @@ class DTAResolver : public CHAResolver { FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) override; void otherInst(const llvm::Instruction *Inst) override; + + [[nodiscard]] std::string str() const override; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index c6756c4120..a3788727ec 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -46,6 +46,8 @@ class NOResolver final : public Resolver { FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite) override; void otherInst(const llvm::Instruction *Inst) override; + + [[nodiscard]] std::string str() const override; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 31e9300b5d..77bf8c0672 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -71,6 +71,8 @@ class OTFResolver : public CHAResolver { static std::vector> getActualFormalPointerPairs(const llvm::CallBase *CallSite, const llvm::Function *CalleeTarget); + + [[nodiscard]] std::string str() const override; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index 27bc8a1bf6..ea3a6dbc4f 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -35,6 +35,8 @@ class RTAResolver : public CHAResolver { ~RTAResolver() override = default; FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) override; + + [[nodiscard]] std::string str() const override; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 6f51d3dc58..4e0e7d91ca 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -17,12 +17,11 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ #define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ -#include -#include -#include - #include "llvm/ADT/DenseSet.h" +#include +#include + namespace llvm { class Instruction; class CallBase; @@ -33,6 +32,9 @@ class StructType; namespace psr { class ProjectIRDB; class LLVMTypeHierarchy; +class LLVMPointsToInfo; +enum class CallGraphAnalysisType; +class LLVMBasedICFG; std::optional getVFTIndex(const llvm::CallBase *CallSite); @@ -70,6 +72,12 @@ class Resolver { virtual FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite); virtual void otherInst(const llvm::Instruction *Inst); + + [[nodiscard]] virtual std::string str() const = 0; + + static std::unique_ptr + create(CallGraphAnalysisType Ty, ProjectIRDB *IRDB, LLVMTypeHierarchy *TH, + LLVMBasedICFG *ICF = nullptr, LLVMPointsToInfo *PT = nullptr); }; } // namespace psr diff --git a/include/phasar/Utils/MaybeUniquePtr.h b/include/phasar/Utils/MaybeUniquePtr.h new file mode 100644 index 0000000000..6ef8c66454 --- /dev/null +++ b/include/phasar/Utils/MaybeUniquePtr.h @@ -0,0 +1,147 @@ +/****************************************************************************** + * Copyright (c) 2022 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_UTILS_MAYBEUNIQUEPTR_H_ +#define PHASAR_UTILS_MAYBEUNIQUEPTR_H_ + +#include "llvm/ADT/PointerIntPair.h" + +#include +#include +#include + +namespace psr { + +/// A smart-pointer, similar to std::unique_ptr that can be used as both, +/// owning and non-owning pointer. +template class MaybeUniquePtr { + struct PointerBoolPairFallback { + T *Pointer = nullptr; + bool Flag = false; + + /// Compatibility with llvm::PointerIntPair: + [[nodiscard]] T *getPointer() const noexcept { return Pointer; } + [[nodiscard]] bool getInt() const noexcept { return Flag; } + void setInt(bool Flag) noexcept { this->Flag = Flag; } + }; + +public: + MaybeUniquePtr() noexcept = default; + + MaybeUniquePtr(T *Pointer, bool Owns = false) noexcept + : Data{Pointer, Owns && Pointer != nullptr} {} + + MaybeUniquePtr(std::unique_ptr &&Owner) noexcept + : MaybeUniquePtr(Owner.release(), true) {} + + template + MaybeUniquePtr(std::unique_ptr &&Owner) noexcept + : MaybeUniquePtr(Owner.release(), true) {} + + MaybeUniquePtr(MaybeUniquePtr &&Other) noexcept + : Data(std::exchange(Other.Data, {})) {} + + void swap(MaybeUniquePtr &Other) noexcept { std::swap(Data, Other, Data); } + + friend void swap(MaybeUniquePtr &LHS, MaybeUniquePtr &RHS) noexcept { + LHS.swap(RHS); + } + + MaybeUniquePtr &operator=(MaybeUniquePtr &&Other) noexcept { + swap(Other); + return *this; + } + + MaybeUniquePtr &operator=(std::unique_ptr &&Owner) noexcept { + if (owns()) { + delete Data.getPointer(); + } + Data = {Owner.release(), true}; + return *this; + } + + template + MaybeUniquePtr &operator=(std::unique_ptr &&Owner) noexcept { + if (owns()) { + delete Data.getPointer(); + } + Data = {Owner.release(), true}; + return *this; + } + + MaybeUniquePtr(const MaybeUniquePtr &) = delete; + MaybeUniquePtr &operator=(const MaybeUniquePtr &) = delete; + + ~MaybeUniquePtr() { + if (owns()) { + delete Data.getPointer(); + Data = {}; + } + } + + [[nodiscard]] T *get() noexcept { return Data.getPointer(); } + [[nodiscard]] const T *get() const noexcept { return Data.getPointer(); } + + [[nodiscard]] T *operator->() noexcept { return get(); } + [[nodiscard]] const T *operator->() const noexcept { return get(); } + + [[nodiscard]] T &operator*() noexcept { return *get(); } + [[nodiscard]] const T &operator*() const noexcept { return *get(); } + + T *release() noexcept { + Data.setInt(false); + return Data.getPointer(); + } + + void reset() noexcept { + if (owns()) { + delete Data.getPointer(); + } + Data = {}; + } + + [[nodiscard]] bool owns() const noexcept { + return Data.getInt() && Data.getPointer(); + } + + friend bool operator==(const MaybeUniquePtr &LHS, + const MaybeUniquePtr &RHS) noexcept { + return LHS.Data.getPointer() == RHS.Data.getPointer(); + } + friend bool operator!=(const MaybeUniquePtr &LHS, + const MaybeUniquePtr &RHS) noexcept { + return !(LHS == RHS); + } + + friend bool operator==(const MaybeUniquePtr &LHS, const T *RHS) noexcept { + return LHS.Data.getPointer() == RHS; + } + friend bool operator!=(const MaybeUniquePtr &LHS, const T *RHS) noexcept { + return !(LHS == RHS); + } + + friend bool operator==(const T *LHS, const MaybeUniquePtr &RHS) noexcept { + return LHS == RHS.Data.getPointer(); + } + friend bool operator!=(const T *LHS, const MaybeUniquePtr &RHS) noexcept { + return !(LHS == RHS); + } + + explicit operator bool() const noexcept { + return Data.getPointer() != nullptr; + } + +private: + std::conditional_t<(alignof(T) > 1), llvm::PointerIntPair, + PointerBoolPairFallback> + Data{}; +}; +} // namespace psr + +#endif // PHASAR_UTILS_MAYBEUNIQUEPTR_H_ diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 90937758fa..2377188bb4 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -84,6 +84,28 @@ struct has_setIFDSIDESolverConfig< T, decltype(std::declval().setIFDSIDESolverConfig( std::declval()))> : std::true_type {}; +template