Skip to content

Commit

Permalink
Improve static checks for sprintf and __builtin___sprintf_chk
Browse files Browse the repository at this point in the history
Implement a pessimistic evaluator of the minimal required size for a buffer
based on the format string, and couple that with the fortified version to emit a
warning when the buffer size is lower than the lower bound computed from the
format string.

Differential Revision: https://reviews.llvm.org/D71566
  • Loading branch information
serge-sans-paille committed Jan 25, 2020
1 parent d085634 commit 6d485ff
Show file tree
Hide file tree
Showing 3 changed files with 327 additions and 10 deletions.
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,12 @@ def warn_fortify_source_size_mismatch : Warning<
"'%0' size argument is too large; destination buffer has size %1,"
" but size argument is %2">, InGroup<FortifySource>;

def warn_fortify_source_format_overflow : Warning<
"'%0' will always overflow; destination buffer has size %1,"
" but format string expands to at least %2">,
InGroup<FortifySource>;


/// main()
// static main() is not an error in C, just in C++.
def warn_static_main : Warning<"'main' should not be declared static">,
Expand Down
244 changes: 234 additions & 10 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,13 +390,194 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) {
return false;
}

namespace {

class EstimateSizeFormatHandler
: public analyze_format_string::FormatStringHandler {
size_t Size;

public:
EstimateSizeFormatHandler(StringRef Format)
: Size(std::min(Format.find(0), Format.size()) +
1 /* null byte always written by sprintf */) {}

bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
const char *, unsigned SpecifierLen) override {

const size_t FieldWidth = computeFieldWidth(FS);
const size_t Precision = computePrecision(FS);

// The actual format.
switch (FS.getConversionSpecifier().getKind()) {
// Just a char.
case analyze_format_string::ConversionSpecifier::cArg:
case analyze_format_string::ConversionSpecifier::CArg:
Size += std::max(FieldWidth, (size_t)1);
break;
// Just an integer.
case analyze_format_string::ConversionSpecifier::dArg:
case analyze_format_string::ConversionSpecifier::DArg:
case analyze_format_string::ConversionSpecifier::iArg:
case analyze_format_string::ConversionSpecifier::oArg:
case analyze_format_string::ConversionSpecifier::OArg:
case analyze_format_string::ConversionSpecifier::uArg:
case analyze_format_string::ConversionSpecifier::UArg:
case analyze_format_string::ConversionSpecifier::xArg:
case analyze_format_string::ConversionSpecifier::XArg:
Size += std::max(FieldWidth, Precision);
break;

// %g style conversion switches between %f or %e style dynamically.
// %f always takes less space, so default to it.
case analyze_format_string::ConversionSpecifier::gArg:
case analyze_format_string::ConversionSpecifier::GArg:

// Floating point number in the form '[+]ddd.ddd'.
case analyze_format_string::ConversionSpecifier::fArg:
case analyze_format_string::ConversionSpecifier::FArg:
Size += std::max(FieldWidth, 1 /* integer part */ +
(Precision ? 1 + Precision
: 0) /* period + decimal */);
break;

// Floating point number in the form '[-]d.ddde[+-]dd'.
case analyze_format_string::ConversionSpecifier::eArg:
case analyze_format_string::ConversionSpecifier::EArg:
Size +=
std::max(FieldWidth,
1 /* integer part */ +
(Precision ? 1 + Precision : 0) /* period + decimal */ +
1 /* e or E letter */ + 2 /* exponent */);
break;

// Floating point number in the form '[-]0xh.hhhhp±dd'.
case analyze_format_string::ConversionSpecifier::aArg:
case analyze_format_string::ConversionSpecifier::AArg:
Size +=
std::max(FieldWidth,
2 /* 0x */ + 1 /* integer part */ +
(Precision ? 1 + Precision : 0) /* period + decimal */ +
1 /* p or P letter */ + 1 /* + or - */ + 1 /* value */);
break;

// Just a string.
case analyze_format_string::ConversionSpecifier::sArg:
case analyze_format_string::ConversionSpecifier::SArg:
Size += FieldWidth;
break;

// Just a pointer in the form '0xddd'.
case analyze_format_string::ConversionSpecifier::pArg:
Size += std::max(FieldWidth, 2 /* leading 0x */ + Precision);
break;

// A plain percent.
case analyze_format_string::ConversionSpecifier::PercentArg:
Size += 1;
break;

default:
break;
}

Size += FS.hasPlusPrefix() || FS.hasSpacePrefix();

if (FS.hasAlternativeForm()) {
switch (FS.getConversionSpecifier().getKind()) {
default:
break;
// Force a leading '0'.
case analyze_format_string::ConversionSpecifier::oArg:
Size += 1;
break;
// Force a leading '0x'.
case analyze_format_string::ConversionSpecifier::xArg:
case analyze_format_string::ConversionSpecifier::XArg:
Size += 2;
break;
// Force a period '.' before decimal, even if precision is 0.
case analyze_format_string::ConversionSpecifier::aArg:
case analyze_format_string::ConversionSpecifier::AArg:
case analyze_format_string::ConversionSpecifier::eArg:
case analyze_format_string::ConversionSpecifier::EArg:
case analyze_format_string::ConversionSpecifier::fArg:
case analyze_format_string::ConversionSpecifier::FArg:
case analyze_format_string::ConversionSpecifier::gArg:
case analyze_format_string::ConversionSpecifier::GArg:
Size += (Precision ? 0 : 1);
break;
}
}
assert(SpecifierLen <= Size && "no underflow");
Size -= SpecifierLen;
return true;
}

size_t getSizeLowerBound() const { return Size; }

private:
static size_t computeFieldWidth(const analyze_printf::PrintfSpecifier &FS) {
const analyze_format_string::OptionalAmount &FW = FS.getFieldWidth();
size_t FieldWidth = 0;
if (FW.getHowSpecified() == analyze_format_string::OptionalAmount::Constant)
FieldWidth = FW.getConstantAmount();
return FieldWidth;
}

static size_t computePrecision(const analyze_printf::PrintfSpecifier &FS) {
const analyze_format_string::OptionalAmount &FW = FS.getPrecision();
size_t Precision = 0;

// See man 3 printf for default precision value based on the specifier.
switch (FW.getHowSpecified()) {
case analyze_format_string::OptionalAmount::NotSpecified:
switch (FS.getConversionSpecifier().getKind()) {
default:
break;
case analyze_format_string::ConversionSpecifier::dArg: // %d
case analyze_format_string::ConversionSpecifier::DArg: // %D
case analyze_format_string::ConversionSpecifier::iArg: // %i
Precision = 1;
break;
case analyze_format_string::ConversionSpecifier::oArg: // %d
case analyze_format_string::ConversionSpecifier::OArg: // %D
case analyze_format_string::ConversionSpecifier::uArg: // %d
case analyze_format_string::ConversionSpecifier::UArg: // %D
case analyze_format_string::ConversionSpecifier::xArg: // %d
case analyze_format_string::ConversionSpecifier::XArg: // %D
Precision = 1;
break;
case analyze_format_string::ConversionSpecifier::fArg: // %f
case analyze_format_string::ConversionSpecifier::FArg: // %F
case analyze_format_string::ConversionSpecifier::eArg: // %e
case analyze_format_string::ConversionSpecifier::EArg: // %E
case analyze_format_string::ConversionSpecifier::gArg: // %g
case analyze_format_string::ConversionSpecifier::GArg: // %G
Precision = 6;
break;
case analyze_format_string::ConversionSpecifier::pArg: // %d
Precision = 1;
break;
}
break;
case analyze_format_string::OptionalAmount::Constant:
Precision = FW.getConstantAmount();
break;
default:
break;
}
return Precision;
}
};

} // namespace

/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
/// __builtin_*_chk function, then use the object size argument specified in the
/// source. Otherwise, infer the object size using __builtin_object_size.
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
CallExpr *TheCall) {
// FIXME: There are some more useful checks we could be doing here:
// - Analyze the format string of sprintf to see how much of buffer is used.
// - Evaluate strlen of strcpy arguments, use as object size.

if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
Expand All @@ -407,12 +588,55 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
if (!BuiltinID)
return;

const TargetInfo &TI = getASTContext().getTargetInfo();
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());

unsigned DiagID = 0;
bool IsChkVariant = false;
Optional<llvm::APSInt> UsedSize;
unsigned SizeIndex, ObjectIndex;
switch (BuiltinID) {
default:
return;
case Builtin::BIsprintf:
case Builtin::BI__builtin___sprintf_chk: {
size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();

if (auto *Format = dyn_cast<StringLiteral>(FormatExpr)) {

if (!Format->isAscii() && !Format->isUTF8())
return;

StringRef FormatStrRef = Format->getString();
EstimateSizeFormatHandler H(FormatStrRef);
const char *FormatBytes = FormatStrRef.data();
const ConstantArrayType *T =
Context.getAsConstantArrayType(Format->getType());
assert(T && "String literal not of constant array type!");
size_t TypeSize = T->getSize().getZExtValue();

// In case there's a null byte somewhere.
size_t StrLen =
std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
if (!analyze_format_string::ParsePrintfString(
H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
Context.getTargetInfo(), false)) {
DiagID = diag::warn_fortify_source_format_overflow;
UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
.extOrTrunc(SizeTypeWidth);
if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
IsChkVariant = true;
ObjectIndex = 2;
} else {
IsChkVariant = false;
ObjectIndex = 0;
}
break;
}
}
return;
}
case Builtin::BI__builtin___memcpy_chk:
case Builtin::BI__builtin___memmove_chk:
case Builtin::BI__builtin___memset_chk:
Expand Down Expand Up @@ -505,19 +729,19 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
return;
// Get the object size in the target's size_t width.
const TargetInfo &TI = getASTContext().getTargetInfo();
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
}

// Evaluate the number of bytes of the object that this call will use.
Expr::EvalResult Result;
Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
return;
llvm::APSInt UsedSize = Result.Val.getInt();
if (!UsedSize) {
Expr::EvalResult Result;
Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
return;
UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
}

if (UsedSize.ule(ObjectSize))
if (UsedSize.getValue().ule(ObjectSize))
return;

StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
Expand All @@ -533,7 +757,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
PDiag(DiagID)
<< FunctionName << ObjectSize.toString(/*Radix=*/10)
<< UsedSize.toString(/*Radix=*/10));
<< UsedSize.getValue().toString(/*Radix=*/10));
}

static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
Expand Down
Loading

0 comments on commit 6d485ff

Please sign in to comment.