Skip to content

Commit

Permalink
[analyzer] Add yaml parser to GenericTaintChecker
Browse files Browse the repository at this point in the history
While we implemented taint propagation rules for several
builtin/standard functions, there's a natural desire for users to add
such rules to custom functions.

A series of patches will implement an option that allows users to
annotate their functions with taint propagation rules through a YAML
file. This one adds parsing of the configuration file, which may be
specified in the commands line with the analyzer config:
alpha.security.taint.TaintPropagation:Config. The configuration may
contain propagation rules, filter functions (remove taint) and sink
functions (give a warning if it gets a tainted value).

I also added a new header for future checkers to conveniently read YAML
files as checker options.

Differential Revision: https://reviews.llvm.org/D59555

llvm-svn: 367190
  • Loading branch information
boga95 committed Jul 28, 2019
1 parent d5bc4b0 commit 4bde15f
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 25 deletions.
7 changes: 7 additions & 0 deletions clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
Expand Up @@ -799,6 +799,13 @@ let ParentPackage = Taint in {

def GenericTaintChecker : Checker<"TaintPropagation">,
HelpText<"Generate taint information used by other checkers">,
CheckerOptions<[
CmdLineOption<String,
"Config",
"Specifies the name of the configuration file.",
"",
InAlpha>,
]>,
Documentation<HasAlphaDocumentation>;

} // end "alpha.security.taint"
Expand Down
178 changes: 156 additions & 22 deletions clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Expand Up @@ -15,16 +15,18 @@
//===----------------------------------------------------------------------===//

#include "Taint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "Yaml.h"
#include "clang/AST/Attr.h"
#include "clang/Basic/Builtins.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include <climits>
#include <initializer_list>
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/YAMLTraits.h"
#include <limits>
#include <utility>

using namespace clang;
Expand All @@ -44,14 +46,51 @@ class GenericTaintChecker

void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;

void printState(raw_ostream &Out, ProgramStateRef State,
const char *NL, const char *Sep) const override;
void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
const char *Sep) const override;

private:
static const unsigned InvalidArgIndex = UINT_MAX;
using ArgVector = SmallVector<unsigned, 2>;
using SignedArgVector = SmallVector<int, 2>;

enum class VariadicType { None, Src, Dst };

/// Used to parse the configuration file.
struct TaintConfiguration {
using NameArgsPair = std::pair<std::string, ArgVector>;

struct Propagation {
std::string Name;
ArgVector SrcArgs;
SignedArgVector DstArgs;
VariadicType VarType;
unsigned VarIndex;
};

std::vector<Propagation> Propagations;
std::vector<NameArgsPair> Filters;
std::vector<NameArgsPair> Sinks;

TaintConfiguration() = default;
TaintConfiguration(const TaintConfiguration &) = delete;
TaintConfiguration(TaintConfiguration &&) = default;
TaintConfiguration &operator=(const TaintConfiguration &) = delete;
TaintConfiguration &operator=(TaintConfiguration &&) = default;
};

/// Convert SignedArgVector to ArgVector.
ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
SignedArgVector Args);

/// Parse the config.
void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
TaintConfiguration &&Config);

static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
/// Denotes the return vale.
static const unsigned ReturnValueIndex = UINT_MAX - 1;
static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
1};

private:
mutable std::unique_ptr<BugType> BT;
void initBugType() const {
if (!BT)
Expand Down Expand Up @@ -97,8 +136,6 @@ class GenericTaintChecker
bool generateReportIfTainted(const Expr *E, const char Msg[],
CheckerContext &C) const;

using ArgVector = SmallVector<unsigned, 2>;

/// A struct used to specify taint propagation rules for a function.
///
/// If any of the possible taint source arguments is tainted, all of the
Expand All @@ -109,8 +146,6 @@ class GenericTaintChecker
/// ReturnValueIndex is added to the dst list, the return value will be
/// tainted.
struct TaintPropagationRule {
enum class VariadicType { None, Src, Dst };

using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
CheckerContext &C);

Expand All @@ -131,8 +166,7 @@ class GenericTaintChecker
: VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
PropagationFunc(nullptr) {}

TaintPropagationRule(std::initializer_list<unsigned> &&Src,
std::initializer_list<unsigned> &&Dst,
TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
VariadicType Var = VariadicType::None,
unsigned VarIndex = InvalidArgIndex,
PropagationFuncType Func = nullptr)
Expand Down Expand Up @@ -176,6 +210,19 @@ class GenericTaintChecker
static bool postSocket(bool IsTainted, const CallExpr *CE,
CheckerContext &C);
};

using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
using NameArgMap = llvm::StringMap<ArgVector>;

/// Defines a map between the propagation function's name and
/// TaintPropagationRule.
NameRuleMap CustomPropagations;

/// Defines a map between the filter function's name and filtering args.
NameArgMap CustomFilters;

/// Defines a map between the sink function's name and sinking args.
NameArgMap CustomSinks;
};

const unsigned GenericTaintChecker::ReturnValueIndex;
Expand All @@ -193,15 +240,94 @@ const char GenericTaintChecker::MsgTaintedBufferSize[] =
"Untrusted data is used to specify the buffer size "
"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
"for character data and the null terminator)";

} // end of anonymous namespace

using TaintConfig = GenericTaintChecker::TaintConfiguration;

LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)

namespace llvm {
namespace yaml {
template <> struct MappingTraits<TaintConfig> {
static void mapping(IO &IO, TaintConfig &Config) {
IO.mapOptional("Propagations", Config.Propagations);
IO.mapOptional("Filters", Config.Filters);
IO.mapOptional("Sinks", Config.Sinks);
}
};

template <> struct MappingTraits<TaintConfig::Propagation> {
static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
IO.mapRequired("Name", Propagation.Name);
IO.mapOptional("SrcArgs", Propagation.SrcArgs);
IO.mapOptional("DstArgs", Propagation.DstArgs);
IO.mapOptional("VariadicType", Propagation.VarType,
GenericTaintChecker::VariadicType::None);
IO.mapOptional("VariadicIndex", Propagation.VarIndex,
GenericTaintChecker::InvalidArgIndex);
}
};

template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
}
};

template <> struct MappingTraits<TaintConfig::NameArgsPair> {
static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
IO.mapRequired("Name", NameArg.first);
IO.mapRequired("Args", NameArg.second);
}
};
} // namespace yaml
} // namespace llvm

/// A set which is used to pass information from call pre-visit instruction
/// to the call post-visit. The values are unsigned integers, which are either
/// ReturnValueIndex, or indexes of the pointer/reference argument, which
/// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)

GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
ArgVector Result;
for (int Arg : Args) {
if (Arg == -1)
Result.push_back(ReturnValueIndex);
else if (Arg < -1) {
Result.push_back(InvalidArgIndex);
Mgr.reportInvalidCheckerOptionValue(
this, Option,
"an argument number for propagation rules greater or equal to -1");
} else
Result.push_back(static_cast<unsigned>(Arg));
}
return Result;
}

void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
const std::string &Option,
TaintConfiguration &&Config) {
for (auto &P : Config.Propagations) {
GenericTaintChecker::CustomPropagations.try_emplace(
P.Name, std::move(P.SrcArgs),
convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
}

for (auto &F : Config.Filters) {
GenericTaintChecker::CustomFilters.try_emplace(F.first,
std::move(F.second));
}

for (auto &S : Config.Sinks) {
GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
}
}

GenericTaintChecker::TaintPropagationRule
GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
Expand All @@ -218,7 +344,8 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
.Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getchar_unlocked",
TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
.Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
Expand Down Expand Up @@ -454,7 +581,7 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
// Check for taint in variadic arguments.
if (!IsTainted && VariadicType::Src == VarType) {
// Check if any of the arguments is tainted
for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
break;
}
Expand Down Expand Up @@ -485,7 +612,7 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
// If they are not pointing to const data, mark data as tainted.
// TODO: So far we are just going one level down; ideally we'd need to
// recurse here.
for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
const Expr *Arg = CE->getArg(i);
// Process pointer argument.
const Type *ArgTy = Arg->getType().getTypePtr();
Expand Down Expand Up @@ -550,7 +677,7 @@ bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {

static bool getPrintfFormatArgumentNum(const CallExpr *CE,
const CheckerContext &C,
unsigned int &ArgNum) {
unsigned &ArgNum) {
// Find if the function contains a format string argument.
// Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
// vsnprintf, syslog, custom annotated functions.
Expand Down Expand Up @@ -603,7 +730,7 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
bool GenericTaintChecker::checkUncontrolledFormatString(
const CallExpr *CE, CheckerContext &C) const {
// Check if the function contains a format string argument.
unsigned int ArgNum = 0;
unsigned ArgNum = 0;
if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
return false;

Expand Down Expand Up @@ -676,8 +803,15 @@ bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
}

void ento::registerGenericTaintChecker(CheckerManager &mgr) {
mgr.registerChecker<GenericTaintChecker>();
void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
std::string Option{"Config"};
StringRef ConfigFile =
Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
llvm::Optional<TaintConfig> Config =
getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
if (Config)
Checker->parseConfiguration(Mgr, Option, std::move(Config).getValue());
}

bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
Expand Down
59 changes: 59 additions & 0 deletions clang/lib/StaticAnalyzer/Checkers/Yaml.h
@@ -0,0 +1,59 @@
//== Yaml.h ---------------------------------------------------- -*- C++ -*--=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines convenience functions for handling YAML configuration files
// for checkers/packages.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H
#define LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H

#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "llvm/Support/YAMLTraits.h"

namespace clang {
namespace ento {

/// Read the given file from the filesystem and parse it as a yaml file. The
/// template parameter must have a yaml MappingTraits.
/// Emit diagnostic error in case of any failure.
template <class T, class Checker>
llvm::Optional<T> getConfiguration(CheckerManager &Mgr, Checker *Chk,
StringRef Option, StringRef ConfigFile) {
if (ConfigFile.trim().empty())
return None;

llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer =
FS->getBufferForFile(ConfigFile.str());

if (std::error_code ec = Buffer.getError()) {
Mgr.reportInvalidCheckerOptionValue(Chk, Option,
"a valid filename instead of '" +
std::string(ConfigFile) + "'");
return None;
}

llvm::yaml::Input Input(Buffer.get()->getBuffer());
T Config;
Input >> Config;

if (std::error_code ec = Input.error()) {
Mgr.reportInvalidCheckerOptionValue(Chk, Option,
"a valid yaml file: " + ec.message());
return None;
}

return Config;
}

} // namespace ento
} // namespace clang

#endif // LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MOVE_H
@@ -0,0 +1,4 @@
Propagations:
- Name: mySource1
DstArgs: [-1]
NotExist: 1
@@ -0,0 +1,3 @@
Propagations:
- Name: mySource1
DstArgs: [-2]

0 comments on commit 4bde15f

Please sign in to comment.