Skip to content

Commit

Permalink
Revert "Do not warn about format strings that are indexed string lite…
Browse files Browse the repository at this point in the history
…rals."

Summary: This reverts r281527 because I messed up the attribution.

Reviewers: srhines

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D24579

llvm-svn: 281530
  • Loading branch information
stephenhines committed Sep 14, 2016
1 parent c0899b9 commit 6a17e51
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 201 deletions.
184 changes: 18 additions & 166 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3841,95 +3841,7 @@ enum StringLiteralCheckType {
};
} // end anonymous namespace

static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
BinaryOperatorKind BinOpKind,
bool AddendIsRight) {
unsigned BitWidth = Offset.getBitWidth();
unsigned AddendBitWidth = Addend.getBitWidth();
// There might be negative interim results.
if (Addend.isUnsigned()) {
Addend = Addend.zext(++AddendBitWidth);
Addend.setIsSigned(true);
}
// Adjust the bit width of the APSInts.
if (AddendBitWidth > BitWidth) {
Offset = Offset.sext(AddendBitWidth);
BitWidth = AddendBitWidth;
} else if (BitWidth > AddendBitWidth) {
Addend = Addend.sext(BitWidth);
}

bool Ov = false;
llvm::APSInt ResOffset = Offset;
if (BinOpKind == BO_Add)
ResOffset = Offset.sadd_ov(Addend, Ov);
else {
assert(AddendIsRight && BinOpKind == BO_Sub &&
"operator must be add or sub with addend on the right");
ResOffset = Offset.ssub_ov(Addend, Ov);
}

// We add an offset to a pointer here so we should support an offset as big as
// possible.
if (Ov) {
assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
Offset.sext(2 * BitWidth);
sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
return;
}

Offset = ResOffset;
}

namespace {
// This is a wrapper class around StringLiteral to support offsetted string
// literals as format strings. It takes the offset into account when returning
// the string and its length or the source locations to display notes correctly.
class FormatStringLiteral {
const StringLiteral *FExpr;
int64_t Offset;

public:
FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
: FExpr(fexpr), Offset(Offset) {}

StringRef getString() const {
return FExpr->getString().drop_front(Offset);
}

unsigned getByteLength() const {
return FExpr->getByteLength() - getCharByteWidth() * Offset;
}
unsigned getLength() const { return FExpr->getLength() - Offset; }
unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }

StringLiteral::StringKind getKind() const { return FExpr->getKind(); }

QualType getType() const { return FExpr->getType(); }

bool isAscii() const { return FExpr->isAscii(); }
bool isWide() const { return FExpr->isWide(); }
bool isUTF8() const { return FExpr->isUTF8(); }
bool isUTF16() const { return FExpr->isUTF16(); }
bool isUTF32() const { return FExpr->isUTF32(); }
bool isPascal() const { return FExpr->isPascal(); }

SourceLocation getLocationOfByte(
unsigned ByteNo, const SourceManager &SM, const LangOptions &Features,
const TargetInfo &Target, unsigned *StartToken = nullptr,
unsigned *StartTokenByteOffset = nullptr) const {
return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target,
StartToken, StartTokenByteOffset);
}

SourceLocation getLocStart() const LLVM_READONLY {
return FExpr->getLocStart().getLocWithOffset(Offset);
}
SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); }
};
} // end anonymous namespace

static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args,
bool HasVAListArg, unsigned format_idx,
Expand All @@ -3950,11 +3862,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
unsigned firstDataArg, Sema::FormatStringType Type,
Sema::VariadicCallType CallType, bool InFunctionCall,
llvm::SmallBitVector &CheckedVarArgs,
UncoveredArgHandler &UncoveredArg,
llvm::APSInt Offset) {
UncoveredArgHandler &UncoveredArg) {
tryAgain:
assert(Offset.isSigned() && "invalid offset");

if (E->isTypeDependent() || E->isValueDependent())
return SLCT_NotALiteral;

Expand Down Expand Up @@ -3988,28 +3897,23 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
CheckLeft = false;
}

// We need to maintain the offsets for the right and the left hand side
// separately to check if every possible indexed expression is a valid
// string literal. They might have different offsets for different string
// literals in the end.
StringLiteralCheckType Left;
if (!CheckLeft)
Left = SLCT_UncheckedLiteral;
else {
Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,
CheckedVarArgs, UncoveredArg, Offset);
if (Left == SLCT_NotALiteral || !CheckRight) {
CheckedVarArgs, UncoveredArg);
if (Left == SLCT_NotALiteral || !CheckRight)
return Left;
}
}

StringLiteralCheckType Right =
checkFormatStringExpr(S, C->getFalseExpr(), Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall, CheckedVarArgs,
UncoveredArg, Offset);
UncoveredArg);

return (CheckLeft && Left < Right) ? Left : Right;
}
Expand Down Expand Up @@ -4062,8 +3966,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
return checkFormatStringExpr(S, Init, Args,
HasVAListArg, format_idx,
firstDataArg, Type, CallType,
/*InFunctionCall*/ false, CheckedVarArgs,
UncoveredArg, Offset);
/*InFunctionCall*/false, CheckedVarArgs,
UncoveredArg);
}
}

Expand Down Expand Up @@ -4118,7 +4022,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
return checkFormatStringExpr(S, Arg, Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,
CheckedVarArgs, UncoveredArg, Offset);
CheckedVarArgs, UncoveredArg);
} else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
unsigned BuiltinID = FD->getBuiltinID();
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
Expand All @@ -4128,7 +4032,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
HasVAListArg, format_idx,
firstDataArg, Type, CallType,
InFunctionCall, CheckedVarArgs,
UncoveredArg, Offset);
UncoveredArg);
}
}
}
Expand All @@ -4145,64 +4049,14 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
StrE = cast<StringLiteral>(E);

if (StrE) {
if (Offset.isNegative() || Offset > StrE->getLength()) {
// TODO: It would be better to have an explicit warning for out of
// bounds literals.
return SLCT_NotALiteral;
}
FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue());
CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx,
CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,
firstDataArg, Type, InFunctionCall, CallType,
CheckedVarArgs, UncoveredArg);
return SLCT_CheckedLiteral;
}

return SLCT_NotALiteral;
}
case Stmt::BinaryOperatorClass: {
llvm::APSInt LResult;
llvm::APSInt RResult;

const BinaryOperator *BinOp = cast<BinaryOperator>(E);

// A string literal + an int offset is still a string literal.
if (BinOp->isAdditiveOp()) {
bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);

if (LIsInt != RIsInt) {
BinaryOperatorKind BinOpKind = BinOp->getOpcode();

if (LIsInt) {
if (BinOpKind == BO_Add) {
sumOffsets(Offset, LResult, BinOpKind, RIsInt);
E = BinOp->getRHS();
goto tryAgain;
}
} else {
sumOffsets(Offset, RResult, BinOpKind, RIsInt);
E = BinOp->getLHS();
goto tryAgain;
}
}

return SLCT_NotALiteral;
}
}
case Stmt::UnaryOperatorClass: {
const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
auto ASE = dyn_cast<ArraySubscriptExpr>(UnaOp->getSubExpr());
if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) {
llvm::APSInt IndexResult;
if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) {
sumOffsets(Offset, IndexResult, BO_Add, /*RHS is int*/ true);
E = ASE->getBase();
goto tryAgain;
}
}

return SLCT_NotALiteral;
}

default:
return SLCT_NotALiteral;
Expand Down Expand Up @@ -4269,9 +4123,8 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
StringLiteralCheckType CT =
checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg,
format_idx, firstDataArg, Type, CallType,
/*IsFunctionCall*/ true, CheckedVarArgs,
UncoveredArg,
/*no string offset*/ llvm::APSInt(64, false) = 0);
/*IsFunctionCall*/true, CheckedVarArgs,
UncoveredArg);

// Generate a diagnostic where an uncovered argument is detected.
if (UncoveredArg.hasUncoveredArg()) {
Expand Down Expand Up @@ -4327,7 +4180,7 @@ namespace {
class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
protected:
Sema &S;
const FormatStringLiteral *FExpr;
const StringLiteral *FExpr;
const Expr *OrigFormatExpr;
const unsigned FirstDataArg;
const unsigned NumDataArgs;
Expand All @@ -4344,7 +4197,7 @@ class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
UncoveredArgHandler &UncoveredArg;

public:
CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr,
CheckFormatHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, const char *beg, bool hasVAListArg,
ArrayRef<const Expr *> Args,
Expand Down Expand Up @@ -4444,8 +4297,7 @@ getSpecifierRange(const char *startSpecifier, unsigned specifierLen) {
}

SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) {
return FExpr->getLocationOfByte(x - Beg, S.getSourceManager(),
S.getLangOpts(), S.Context.getTargetInfo());
return S.getLocationOfStringLiteralByte(FExpr, x - Beg);
}

void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier,
Expand Down Expand Up @@ -4784,7 +4636,7 @@ class CheckPrintfHandler : public CheckFormatHandler {
bool ObjCContext;

public:
CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr,
CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, bool isObjC,
const char *beg, bool hasVAListArg,
Expand Down Expand Up @@ -5571,7 +5423,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
namespace {
class CheckScanfHandler : public CheckFormatHandler {
public:
CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr,
CheckScanfHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, const char *beg, bool hasVAListArg,
ArrayRef<const Expr *> Args,
Expand Down Expand Up @@ -5742,7 +5594,7 @@ bool CheckScanfHandler::HandleScanfSpecifier(
return true;
}

static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args,
bool HasVAListArg, unsigned format_idx,
Expand Down
35 changes: 0 additions & 35 deletions clang/test/Sema/format-strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -652,38 +652,3 @@ void test_format_security_pos(char* string) {
// expected-note@-1{{treat the string as an argument to avoid this}}
}
#pragma GCC diagnostic warning "-Wformat-nonliteral"

void test_char_pointer_arithmetic(int b) {
const char s1[] = "string";
const char s2[] = "%s string";

printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}}
// expected-note@-1{{treat the string as an argument to avoid this}}

printf(s1 + 2); // no-warning
printf(s2 + 2); // no-warning

const char s3[] = "%s string";
printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
printf(2 + s2); // no-warning
printf(6 + s2 - 2); // no-warning
printf(2 + (b ? s1 : s2)); // no-warning

const char s5[] = "string %s";
printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
printf(2 + (b ? s2 : s5), ""); // no-warning
printf(2 + (b ? s1 : s2 - 2), ""); // no-warning

const char s6[] = "%s string";
printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
printf(1 ? s2 + 2 : s2); // no-warning
printf(0 ? s2 : s2 + 2); // no-warning
printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}}

const char s7[] = "%s string %s %s";
printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}}
// expected-note@-2{{format string is defined here}}
}

0 comments on commit 6a17e51

Please sign in to comment.