-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Continuation of fexec-charset #169803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/abhina/fexec_charset
Are you sure you want to change the base?
Continuation of fexec-charset #169803
Conversation
(cherry picked from commit c2647a73957921d3f7a53c6f25a69f1cc2725aa3)
(cherry picked from commit 20a6fdfe3045eebaf1acc4fff7269c66e85e10c3)
|
@llvm/pr-subscribers-llvm-support Author: Abhina Sree (abhina-sree) ChangesThis patch builds upon #138895 and introduces a ParserConversionAction which is able to control which charset to use for various string literals. I also introduce a FormatStrConverter which is used to do format string checking Patch is 58.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169803.diff 22 Files Affected:
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 573cc72db35c6..7d1ac3193812f 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -28,6 +28,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SyncScope.h"
#include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/LiteralConverter.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
@@ -2063,6 +2064,11 @@ class PredefinedExpr final
return getIdentKindName(getIdentKind());
}
+ static std::string
+ ComputeNameAndTranslate(PredefinedIdentKind IK, const Decl *CurrentDecl,
+ LiteralConverter &LiteralConv,
+ bool ForceElaboratedPrinting = false);
+
static std::string ComputeName(PredefinedIdentKind IK,
const Decl *CurrentDecl,
bool ForceElaboratedPrinting = false);
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index a284f2c44d633..12083a0d00b4b 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -19,6 +19,7 @@
#define LLVM_CLANG_AST_FORMATSTRING_H
#include "clang/AST/CanonicalType.h"
+#include "llvm/Support/TextEncoding.h"
#include <optional>
namespace clang {
@@ -744,9 +745,9 @@ class FormatStringHandler {
// Printf-specific handlers.
virtual bool HandleInvalidPrintfConversionSpecifier(
- const analyze_printf::PrintfSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) {
+ const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
return true;
}
@@ -763,9 +764,9 @@ class FormatStringHandler {
// Scanf-specific handlers.
virtual bool HandleInvalidScanfConversionSpecifier(
- const analyze_scanf::ScanfSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) {
+ const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
return true;
}
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 1c16f9f79ae68..b3d507e1170dc 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -38,6 +38,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/TextEncoding.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/TargetParser/Triple.h"
#include <cassert>
@@ -320,6 +321,8 @@ class TargetInfo : public TransferrableTargetInfo,
virtual ~TargetInfo();
+ llvm::TextEncodingConverter *FormatStrConverter;
+
/// Retrieve the target options.
TargetOptions &getTargetOpts() const {
assert(TargetOpts && "Missing target options");
diff --git a/clang/include/clang/Lex/LiteralConverter.h b/clang/include/clang/Lex/LiteralConverter.h
index 6a66d2d0ff707..ba6fb6c87a782 100644
--- a/clang/include/clang/Lex/LiteralConverter.h
+++ b/clang/include/clang/Lex/LiteralConverter.h
@@ -34,7 +34,7 @@ class LiteralConverter {
static std::error_code
setConvertersFromOptions(LiteralConverter &LiteralConv,
const clang::LangOptions &Opts,
- const clang::TargetInfo &TInfo);
+ clang::TargetInfo &TInfo);
};
#endif
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 58eb1c0a7c114..97867183b5a1d 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -5633,6 +5633,7 @@ class Parser : public CodeCompletionHandler {
bool Finished;
};
ObjCImplParsingDataRAII *CurParsedObjCImpl;
+ ConversionAction ParserConversionAction;
/// StashAwayMethodOrFunctionBodyTokens - Consume the tokens and store them
/// for later parsing.
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index cbfcc9bc0ea99..65567e367dea4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -54,6 +54,7 @@
#include "clang/Basic/TemplateKinds.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/LiteralConverter.h"
#include "clang/Sema/AnalysisBasedWarnings.h"
#include "clang/Sema/Attr.h"
#include "clang/Sema/CleanupInfo.h"
@@ -7272,9 +7273,12 @@ class Sema final : public SemaBase {
/// from multiple tokens. However, the common case is that StringToks points
/// to one string.
ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks,
- Scope *UDLScope = nullptr);
+ Scope *UDLScope = nullptr,
+ ConversionAction Action = CA_ToExecEncoding);
- ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks);
+ ExprResult
+ ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks,
+ ConversionAction Action = CA_ToExecEncoding);
/// ControllingExprOrType is either an opaque pointer coming out of a
/// ParsedType or an Expr *. FIXME: it'd be better to split this interface
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 1d914fa876759..d9765f4a73fcd 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -667,6 +667,21 @@ StringRef PredefinedExpr::getIdentKindName(PredefinedIdentKind IK) {
llvm_unreachable("Unknown ident kind for PredefinedExpr");
}
+std::string PredefinedExpr::ComputeNameAndTranslate(
+ PredefinedIdentKind IK, const Decl *CurrentDecl,
+ LiteralConverter &LiteralConv, bool ForceElaboratedPrinting) {
+ using namespace clang::charinfo;
+ std::string Result = ComputeName(IK, CurrentDecl, ForceElaboratedPrinting);
+ llvm::TextEncodingConverter *Converter =
+ LiteralConv.getConverter(CA_ToExecEncoding);
+ if (Converter) {
+ SmallString<128> Converted;
+ Converter->convert(Result, Converted);
+ Result = std::string(Converted);
+ }
+ return Result;
+}
+
// FIXME: Maybe this should use DeclPrinter with a special "print predefined
// expr" policy instead.
std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index d4cb89b43ae87..be0f527da92e5 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -33,8 +33,9 @@ FormatStringHandler::~FormatStringHandler() {}
// scanf format strings.
//===----------------------------------------------------------------------===//
-OptionalAmount
-clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
+OptionalAmount clang::analyze_format_string::ParseAmount(
+ const char *&Beg, const char *E,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
const char *I = Beg;
UpdateOnReturn <const char*> UpdateBeg(Beg, I);
@@ -42,7 +43,7 @@ clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
bool hasDigits = false;
for ( ; I != E; ++I) {
- char c = *I;
+ char c = FormatStrConverter.convert(*I);
if (c >= '0' && c <= '9') {
hasDigits = true;
accumulator = (accumulator * 10) + (c - '0');
@@ -59,27 +60,23 @@ clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
return OptionalAmount();
}
-OptionalAmount
-clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
- const char *E,
- unsigned &argIndex) {
- if (*Beg == '*') {
+OptionalAmount clang::analyze_format_string::ParseNonPositionAmount(
+ const char *&Beg, const char *E, unsigned &argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
+ if (FormatStrConverter.convert(*Beg) == '*') {
++Beg;
return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
}
- return ParseAmount(Beg, E);
+ return ParseAmount(Beg, E, FormatStrConverter);
}
-OptionalAmount
-clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
- const char *Start,
- const char *&Beg,
- const char *E,
- PositionContext p) {
- if (*Beg == '*') {
+OptionalAmount clang::analyze_format_string::ParsePositionAmount(
+ FormatStringHandler &H, const char *Start, const char *&Beg, const char *E,
+ PositionContext p, const llvm::TextEncodingConverter &FormatStrConverter) {
+ if (FormatStrConverter.convert(*Beg) == '*') {
const char *I = Beg + 1;
- const OptionalAmount &Amt = ParseAmount(I, E);
+ const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter);
if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
H.HandleInvalidPosition(Beg, I - Beg, p);
@@ -94,7 +91,7 @@ clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
assert(Amt.getHowSpecified() == OptionalAmount::Constant);
- if (*I == '$') {
+ if (FormatStrConverter.convert(*I) == '$') {
// Handle positional arguments
// Special case: '*0$', since this is an easy mistake.
@@ -114,24 +111,22 @@ clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
return OptionalAmount(false);
}
- return ParseAmount(Beg, E);
+ return ParseAmount(Beg, E, FormatStrConverter);
}
-
-bool
-clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
- FormatSpecifier &CS,
- const char *Start,
- const char *&Beg, const char *E,
- unsigned *argIndex) {
+bool clang::analyze_format_string::ParseFieldWidth(
+ FormatStringHandler &H, FormatSpecifier &CS, const char *Start,
+ const char *&Beg, const char *E, unsigned *argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
// FIXME: Support negative field widths.
if (argIndex) {
- CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
+ CS.setFieldWidth(
+ ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter));
}
else {
- const OptionalAmount Amt =
- ParsePositionAmount(H, Start, Beg, E,
- analyze_format_string::FieldWidthPos);
+ const OptionalAmount Amt = ParsePositionAmount(
+ H, Start, Beg, E, analyze_format_string::FieldWidthPos,
+ FormatStrConverter);
if (Amt.isInvalid())
return true;
@@ -140,15 +135,13 @@ clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
return false;
}
-bool
-clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
- FormatSpecifier &FS,
- const char *Start,
- const char *&Beg,
- const char *E) {
+bool clang::analyze_format_string::ParseArgPosition(
+ FormatStringHandler &H, FormatSpecifier &FS, const char *Start,
+ const char *&Beg, const char *E,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
const char *I = Beg;
- const OptionalAmount &Amt = ParseAmount(I, E);
+ const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter);
if (I == E) {
// No more characters left?
@@ -156,7 +149,8 @@ clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
return true;
}
- if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
+ if (Amt.getHowSpecified() == OptionalAmount::Constant &&
+ FormatStrConverter.convert(*(I++)) == '$') {
// Warn that positional arguments are non-standard.
H.HandlePosition(Start, I - Start);
@@ -177,17 +171,15 @@ clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
return false;
}
-bool
-clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
- FormatSpecifier &FS,
- const char *&I,
- const char *E,
- const LangOptions &LO) {
+bool clang::analyze_format_string::ParseVectorModifier(
+ FormatStringHandler &H, FormatSpecifier &FS, const char *&I, const char *E,
+ const LangOptions &LO,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
if (!LO.OpenCL)
return false;
const char *Start = I;
- if (*I == 'v') {
+ if (FormatStrConverter.convert(*I) == 'v') {
++I;
if (I == E) {
@@ -195,7 +187,7 @@ clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
return true;
}
- OptionalAmount NumElts = ParseAmount(I, E);
+ OptionalAmount NumElts = ParseAmount(I, E, FormatStrConverter);
if (NumElts.getHowSpecified() != OptionalAmount::Constant) {
H.HandleIncompleteSpecifier(Start, E - Start);
return true;
@@ -207,86 +199,104 @@ clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
return false;
}
-bool
-clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
- const char *&I,
- const char *E,
- const LangOptions &LO,
- bool IsScanf) {
+bool clang::analyze_format_string::ParseLengthModifier(
+ FormatSpecifier &FS, const char *&I, const char *E, const LangOptions &LO,
+ const llvm::TextEncodingConverter &FormatStrConverter, bool IsScanf) {
LengthModifier::Kind lmKind = LengthModifier::None;
const char *lmPosition = I;
- switch (*I) {
- default:
- return false;
- case 'h':
+ switch (FormatStrConverter.convert(*I)) {
+ default:
+ return false;
+ case 'h':
+ ++I;
+ if (I != E && FormatStrConverter.convert(*I) == 'h') {
++I;
- if (I != E && *I == 'h') {
- ++I;
- lmKind = LengthModifier::AsChar;
- } else if (I != E && *I == 'l' && LO.OpenCL) {
- ++I;
- lmKind = LengthModifier::AsShortLong;
- } else {
- lmKind = LengthModifier::AsShort;
- }
- break;
- case 'l':
+ lmKind = LengthModifier::AsChar;
+ } else if (I != E && FormatStrConverter.convert(*I) == 'l' && LO.OpenCL) {
+ ++I;
+ lmKind = LengthModifier::AsShortLong;
+ } else {
+ lmKind = LengthModifier::AsShort;
+ }
+ break;
+ case 'l':
+ ++I;
+ if (I != E && FormatStrConverter.convert(*I) == 'l') {
+ ++I;
+ lmKind = LengthModifier::AsLongLong;
+ } else {
+ lmKind = LengthModifier::AsLong;
+ }
+ break;
+ case 'j':
+ lmKind = LengthModifier::AsIntMax;
+ ++I;
+ break;
+ case 'z':
+ lmKind = LengthModifier::AsSizeT;
+ ++I;
+ break;
+ case 't':
+ lmKind = LengthModifier::AsPtrDiff;
+ ++I;
+ break;
+ case 'L':
+ lmKind = LengthModifier::AsLongDouble;
+ ++I;
+ break;
+ case 'q':
+ lmKind = LengthModifier::AsQuad;
+ ++I;
+ break;
+ case 'a':
+ if (IsScanf && !LO.C99 && !LO.CPlusPlus11) {
+ // For scanf in C90, look at the next character to see if this should
+ // be parsed as the GNU extension 'a' length modifier. If not, this
+ // will be parsed as a conversion specifier.
++I;
- if (I != E && *I == 'l') {
- ++I;
- lmKind = LengthModifier::AsLongLong;
- } else {
- lmKind = LengthModifier::AsLong;
+ if (I != E && (FormatStrConverter.convert(*I) == 's' ||
+ FormatStrConverter.convert(*I) == 'S' ||
+ FormatStrConverter.convert(*I) == '[')) {
+ lmKind = LengthModifier::AsAllocate;
+ break;
}
+ --I;
+ }
+ return false;
+ case 'm':
+ if (IsScanf) {
+ lmKind = LengthModifier::AsMAllocate;
+ ++I;
break;
- case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
- case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
- case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
- case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
- case 'q': lmKind = LengthModifier::AsQuad; ++I; break;
- case 'a':
- if (IsScanf && !LO.C99 && !LO.CPlusPlus11) {
- // For scanf in C90, look at the next character to see if this should
- // be parsed as the GNU extension 'a' length modifier. If not, this
- // will be parsed as a conversion specifier.
- ++I;
- if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
- lmKind = LengthModifier::AsAllocate;
- break;
- }
- --I;
- }
- return false;
- case 'm':
- if (IsScanf) {
- lmKind = LengthModifier::AsMAllocate;
- ++I;
+ }
+ return false;
+ // printf: AsInt64, AsInt32, AsInt3264
+ // scanf: AsInt64
+ case 'I':
+ if (I + 1 != E && I + 2 != E) {
+ if (FormatStrConverter.convert(I[1]) == '6' &&
+ FormatStrConverter.convert(I[2]) == '4') {
+ I += 3;
+ lmKind = LengthModifier::AsInt64;
break;
}
- return false;
- // printf: AsInt64, AsInt32, AsInt3264
- // scanf: AsInt64
- case 'I':
- if (I + 1 != E && I + 2 != E) {
- if (I[1] == '6' && I[2] == '4') {
- I += 3;
- lmKind = LengthModifier::AsInt64;
- break;
- }
- if (IsScanf)
- return false;
+ if (IsScanf)
+ return false;
- if (I[1] == '3' && I[2] == '2') {
- I += 3;
- lmKind = LengthModifier::AsInt32;
- break;
- }
+ if (FormatStrConverter.convert(I[1]) == '3' &&
+ FormatStrConverter.convert(I[2]) == '2') {
+ I += 3;
+ lmKind = LengthModifier::AsInt32;
+ break;
}
- ++I;
- lmKind = LengthModifier::AsInt3264;
- break;
- case 'w':
- lmKind = LengthModifier::AsWide; ++I; break;
+ }
+ ++I;
+ lmKind = LengthModifier::AsInt3264;
+ break;
+ case 'w':
+ lmKind = LengthModifier::AsWide;
+ ++I;
+ break;
}
LengthModifier lm(lmPosition, lmKind);
FS.setLengthModifier(lm);
diff --git a/clang/lib/AST/FormatStringParsing.h b/clang/lib/AST/FormatStringParsing.h
index 764e5d46394d7..7ad6d4b98d2ac 100644
--- a/clang/lib/AST/FormatStringParsing.h
+++ b/clang/lib/AST/FormatStringParsing.h
@@ -37,31 +37,38 @@ class UpdateOnReturn {
namespace analyze_format_string {
-OptionalAmount ParseAmount(const char *&Beg, const char *E);
-OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
- unsigned &argIndex);
+OptionalAmount
+ParseAmount(const char *&Beg, const char *E,
+ const llvm::TextEncodingConverter &FormatStrConverter);
-OptionalAmount ParsePositionAmount(FormatStringHandler &H,
- const char *Start, const char *&Beg,
- const char *E, PositionContext p);
+OptionalAmount
+ParseNonPositionAmount(const char *&Beg, const char *E, unsigned &argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter);
-bool ParseFieldWidth(FormatStringHandler &H,
- FormatSpecifier &CS,
+OptionalAmount
+ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg,
+ const char *E, PositionContext p,
+ const llvm::TextEncodingConverter &FormatStrConverter);
+
+bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &CS,
const char *Start, const char *&Beg, const char *E,
- unsigned *argIndex);
+ ...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
|
This patch builds upon #138895 and introduces a ParserConversionAction which is able to control which charset to use for various string literals. I also introduce a FormatStrConverter which is used to do format string checking