Skip to content

Commit

Permalink
[analyzer] Support partially tainted records.
Browse files Browse the repository at this point in the history
The analyzer's taint analysis can now reason about structures or arrays
originating from taint sources in which only certain sections are tainted.

In particular, it also benefits modeling functions like read(), which may
read tainted data into a section of a structure, but RegionStore is incapable of
expressing the fact that the rest of the structure remains intact, even if we
try to model read() directly.

Patch by Vlad Tsyrklevich!

Differential revision: https://reviews.llvm.org/D28445

llvm-svn: 304162
  • Loading branch information
haoNoQ committed May 29, 2017
1 parent 4c4baf5 commit eed7a31
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ typedef std::unique_ptr<ConstraintManager>(*ConstraintManagerCreator)(
ProgramStateManager &, SubEngine *);
typedef std::unique_ptr<StoreManager>(*StoreManagerCreator)(
ProgramStateManager &);
typedef llvm::ImmutableMap<const SubRegion*, TaintTagType> TaintedSubRegions;
typedef llvm::ImmutableMapRef<const SubRegion*, TaintTagType>
TaintedSubRegionsRef;

//===----------------------------------------------------------------------===//
// ProgramStateTrait - Traits used by the Generic Data Map of a ProgramState.
Expand Down Expand Up @@ -87,6 +90,7 @@ class ProgramState : public llvm::FoldingSetNode {
Store store; // Maps a location to its current value.
GenericDataMap GDM; // Custom data stored by a client of this class.
unsigned refCount;
TaintedSubRegions::Factory TSRFactory;

/// makeWithStore - Return a ProgramState with the same values as the current
/// state with the exception of using the specified Store.
Expand Down Expand Up @@ -343,6 +347,9 @@ class ProgramState : public llvm::FoldingSetNode {
ProgramStateRef addTaint(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric) const;

/// Create a new state in which the value is marked as tainted.
ProgramStateRef addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const;

/// Create a new state in which the symbol is marked as tainted.
ProgramStateRef addTaint(SymbolRef S,
TaintTagType Kind = TaintTagGeneric) const;
Expand All @@ -351,6 +358,14 @@ class ProgramState : public llvm::FoldingSetNode {
ProgramStateRef addTaint(const MemRegion *R,
TaintTagType Kind = TaintTagGeneric) const;

/// Create a new state in a which a sub-region of a given symbol is tainted.
/// This might be necessary when referring to regions that can not have an
/// individual symbol, e.g. if they are represented by the default binding of
/// a LazyCompoundVal.
ProgramStateRef addPartialTaint(SymbolRef ParentSym,
const SubRegion *SubRegion,
TaintTagType Kind = TaintTagGeneric) const;

/// Check if the statement is tainted in the current state.
bool isTainted(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric) const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ template<> struct ProgramStateTrait<TaintMap>
static void *GDMIndex() { static int index = 0; return &index; }
};

/// The GDM component mapping derived symbols' parent symbols to their
/// underlying regions. This is used to efficiently check whether a symbol is
/// tainted when it represents a sub-region of a tainted symbol.
struct DerivedSymTaint {};
typedef llvm::ImmutableMap<SymbolRef, TaintedSubRegionsRef> DerivedSymTaintImpl;
template<> struct ProgramStateTrait<DerivedSymTaint>
: public ProgramStatePartialTrait<DerivedSymTaintImpl> {
static void *GDMIndex() { static int index; return &index; }
};

class TaintManager {

TaintManager() {}
Expand Down
83 changes: 24 additions & 59 deletions clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,8 @@ class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
/// and thus, is tainted.
static bool isStdin(const Expr *E, CheckerContext &C);

/// This is called from getPointedToSymbol() to resolve symbol references for
/// the region underlying a LazyCompoundVal. This is the default binding
/// for the LCV, which could be a conjured symbol from a function call that
/// initialized the region. It only returns the conjured symbol if the LCV
/// covers the entire region, e.g. we avoid false positives by not returning
/// a default bindingc for an entire struct if the symbol for only a single
/// field or element within it is requested.
// TODO: Return an appropriate symbol for sub-fields/elements of an LCV so
// that they are also appropriately tainted.
static SymbolRef getLCVSymbol(CheckerContext &C,
nonloc::LazyCompoundVal &LCV);

/// \brief Given a pointer argument, get the symbol of the value it contains
/// (points to).
static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
/// \brief Given a pointer argument, return the value it points to.
static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);

/// Functions defining the attack surface.
typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
Expand Down Expand Up @@ -186,9 +173,14 @@ class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
static inline bool isTaintedOrPointsToTainted(const Expr *E,
ProgramStateRef State,
CheckerContext &C) {
return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
(E->getType().getTypePtr()->isPointerType() &&
State->isTainted(getPointedToSymbol(C, E))));
if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
return true;

if (!E->getType().getTypePtr()->isPointerType())
return false;

Optional<SVal> V = getPointedToSVal(C, E);
return (V && State->isTainted(*V));
}

/// \brief Pre-process a function which propagates taint according to the
Expand Down Expand Up @@ -400,9 +392,9 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
if (CE->getNumArgs() < (ArgNum + 1))
return false;
const Expr* Arg = CE->getArg(ArgNum);
SymbolRef Sym = getPointedToSymbol(C, Arg);
if (Sym)
State = State->addTaint(Sym);
Optional<SVal> V = getPointedToSVal(C, Arg);
if (V)
State = State->addTaint(*V);
}

// Clear up the taint info from the state.
Expand Down Expand Up @@ -473,47 +465,20 @@ bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
return false;
}

SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C,
nonloc::LazyCompoundVal &LCV) {
StoreManager &StoreMgr = C.getStoreManager();

// getLCVSymbol() is reached in a PostStmt so we can always expect a default
// binding to exist if one is present.
if (Optional<SVal> binding = StoreMgr.getDefaultBinding(LCV)) {
SymbolRef Sym = binding->getAsSymbol();
if (!Sym)
return nullptr;

// If the LCV covers an entire base region return the default conjured symbol.
if (LCV.getRegion() == LCV.getRegion()->getBaseRegion())
return Sym;
}

// Otherwise, return a nullptr as there's not yet a functional way to taint
// sub-regions of LCVs.
return nullptr;
}

SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
const Expr* Arg) {
Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
const Expr* Arg) {
ProgramStateRef State = C.getState();
SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
if (AddrVal.isUnknownOrUndef())
return nullptr;
return None;

Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
if (!AddrLoc)
return nullptr;
return None;

const PointerType *ArgTy =
dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
SVal Val = State->getSVal(*AddrLoc,
ArgTy ? ArgTy->getPointeeType(): QualType());

if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
return getLCVSymbol(C, *LCV);

return Val.getAsSymbol();
return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
}

ProgramStateRef
Expand Down Expand Up @@ -633,9 +598,9 @@ ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
// The arguments are pointer arguments. The data they are pointing at is
// tainted after the call.
const Expr* Arg = CE->getArg(i);
SymbolRef Sym = getPointedToSymbol(C, Arg);
if (Sym)
State = State->addTaint(Sym);
Optional<SVal> V = getPointedToSVal(C, Arg);
if (V)
State = State->addTaint(*V);
}
return State;
}
Expand Down Expand Up @@ -710,10 +675,10 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,

// Check for taint.
ProgramStateRef State = C.getState();
const SymbolRef PointedToSym = getPointedToSymbol(C, E);
Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
SVal TaintedSVal;
if (State->isTainted(PointedToSym))
TaintedSVal = nonloc::SymbolVal(PointedToSym);
if (PointedToSVal && State->isTainted(*PointedToSVal))
TaintedSVal = *PointedToSVal;
else if (State->isTainted(E, C.getLocationContext()))
TaintedSVal = C.getSVal(E);
else
Expand Down
103 changes: 83 additions & 20 deletions clang/lib/StaticAnalyzer/Core/ProgramState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,15 +644,33 @@ ProgramStateRef ProgramState::addTaint(const Stmt *S,
if (const Expr *E = dyn_cast_or_null<Expr>(S))
S = E->IgnoreParens();

SymbolRef Sym = getSVal(S, LCtx).getAsSymbol();
return addTaint(getSVal(S, LCtx), Kind);
}

ProgramStateRef ProgramState::addTaint(SVal V,
TaintTagType Kind) const {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return addTaint(Sym, Kind);

const MemRegion *R = getSVal(S, LCtx).getAsRegion();
addTaint(R, Kind);
// If the SVal represents a structure, try to mass-taint all values within the
// structure. For now it only works efficiently on lazy compound values that
// were conjured during a conservative evaluation of a function - either as
// return values of functions that return structures or arrays by value, or as
// values of structures or arrays passed into the function by reference,
// directly or through pointer aliasing. Such lazy compound values are
// characterized by having exactly one binding in their captured store within
// their parent region, which is a conjured symbol default-bound to the base
// region of the parent region.
if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
if (SymbolRef Sym = binding->getAsSymbol())
return addPartialTaint(Sym, LCV->getRegion(), Kind);
}
}

// Cannot add taint, so just return the state.
return this;
const MemRegion *R = V.getAsRegion();
return addTaint(R, Kind);
}

ProgramStateRef ProgramState::addTaint(const MemRegion *R,
Expand All @@ -674,6 +692,28 @@ ProgramStateRef ProgramState::addTaint(SymbolRef Sym,
return NewState;
}

ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
const SubRegion *SubRegion,
TaintTagType Kind) const {
// Ignore partial taint if the entire parent symbol is already tainted.
if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind)
return this;

// Partial taint applies if only a portion of the symbol is tainted.
if (SubRegion == SubRegion->getBaseRegion())
return addTaint(ParentSym, Kind);

TaintedSubRegionsRef TaintedSubRegions(0, TSRFactory.getTreeFactory());
if (const TaintedSubRegionsRef *SavedTaintedRegions =
get<DerivedSymTaint>(ParentSym))
TaintedSubRegions = *SavedTaintedRegions;

TaintedSubRegions = TaintedSubRegions.add(SubRegion, Kind);
ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, TaintedSubRegions);
assert(NewState);
return NewState;
}

bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind) const {
if (const Expr *E = dyn_cast_or_null<Expr>(S))
Expand Down Expand Up @@ -714,31 +754,54 @@ bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const {
return false;

// Traverse all the symbols this symbol depends on to see if any are tainted.
bool Tainted = false;
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
SI != SE; ++SI) {
if (!isa<SymbolData>(*SI))
continue;

const TaintTagType *Tag = get<TaintMap>(*SI);
Tainted = (Tag && *Tag == Kind);
if (const TaintTagType *Tag = get<TaintMap>(*SI)) {
if (*Tag == Kind)
return true;
}

// If this is a SymbolDerived with a tainted parent, it's also tainted.
if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI))
Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind);
if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) {
// If this is a SymbolDerived with a tainted parent, it's also tainted.
if (isTainted(SD->getParentSymbol(), Kind))
return true;

// If this is a SymbolDerived with the same parent symbol as another
// tainted SymbolDerived and a region that's a sub-region of that tainted
// symbol, it's also tainted.
if (const TaintedSubRegionsRef *SymRegions =
get<DerivedSymTaint>(SD->getParentSymbol())) {
const TypedValueRegion *R = SD->getRegion();
for (TaintedSubRegionsRef::iterator I = SymRegions->begin(),
E = SymRegions->end();
I != E; ++I) {
// FIXME: The logic to identify tainted regions could be more
// complete. For example, this would not currently identify
// overlapping fields in a union as tainted. To identify this we can
// check for overlapping/nested byte offsets.
if (Kind == I->second &&
(R == I->first || R->isSubRegionOf(I->first)))
return true;
}
}
}

// If memory region is tainted, data is also tainted.
if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI))
Tainted = Tainted || isTainted(SRV->getRegion(), Kind);

// If If this is a SymbolCast from a tainted value, it's also tainted.
if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI))
Tainted = Tainted || isTainted(SC->getOperand(), Kind);
if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
if (isTainted(SRV->getRegion(), Kind))
return true;
}

if (Tainted)
return true;
// If this is a SymbolCast from a tainted value, it's also tainted.
if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) {
if (isTainted(SC->getOperand(), Kind))
return true;
}
}

return Tainted;
return false;
}

5 changes: 4 additions & 1 deletion clang/lib/StaticAnalyzer/Core/RegionStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,10 @@ class RegionStoreManager : public StoreManager {

Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
return B.getDefaultBinding(R);
// Default bindings are always applied over a base region so look up the
// base region's default binding, otherwise the lookup will fail when R
// is at an offset from R->getBaseRegion().
return B.getDefaultBinding(R->getBaseRegion());
}

SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType());
Expand Down
41 changes: 31 additions & 10 deletions clang/test/Analysis/taint-generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,20 +192,41 @@ void testStruct() {

void testStructArray() {
struct {
char buf[16];
struct {
int length;
} st[1];
} tainted;
int length;
} tainted[4];

char buffer[16];
char dstbuf[16], srcbuf[16];
int sock;

sock = socket(AF_INET, SOCK_STREAM, 0);
read(sock, &tainted.buf[0], sizeof(tainted.buf));
read(sock, &tainted.st[0], sizeof(tainted.st));
// FIXME: tainted.st[0].length should be marked tainted
__builtin_memcpy(buffer, tainted.buf, tainted.st[0].length); // no-warning
__builtin_memset(srcbuf, 0, sizeof(srcbuf));

read(sock, &tainted[0], sizeof(tainted));
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}

__builtin_memset(&tainted, 0, sizeof(tainted));
read(sock, &tainted, sizeof(tainted));
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}

__builtin_memset(&tainted, 0, sizeof(tainted));
// If we taint element 1, we should not raise an alert on taint for element 0 or element 2
read(sock, &tainted[1], sizeof(tainted));
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // no-warning
__builtin_memcpy(dstbuf, srcbuf, tainted[2].length); // no-warning
}

void testUnion() {
union {
int x;
char y[4];
} tainted;

char buffer[4];

int sock = socket(AF_INET, SOCK_STREAM, 0);
read(sock, &tainted.y, sizeof(tainted.y));
// FIXME: overlapping regions aren't detected by isTainted yet
__builtin_memcpy(buffer, tainted.y, tainted.x);
}

int testDivByZero() {
Expand Down

0 comments on commit eed7a31

Please sign in to comment.