Skip to content

Commit

Permalink
Implement P0482R2, support for char8_t type.
Browse files Browse the repository at this point in the history
This is not yet part of any C++ working draft, and so is controlled by the flag
-fchar8_t rather than a -std= flag. (The GCC implementation is controlled by a
flag with the same name.)

This implementation is experimental, and will be removed or revised
substantially to match the proposal as it makes its way through the C++
committee.

llvm-svn: 331244
  • Loading branch information
zygoloid committed May 1, 2018
1 parent 33dc01d commit 3a8244d
Show file tree
Hide file tree
Showing 48 changed files with 254 additions and 19 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/AST/ASTContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -999,6 +999,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
CanQualType WCharTy; // [C++ 3.9.1p5].
CanQualType WideCharTy; // Same as WCharTy in C++, integer type in C99.
CanQualType WIntTy; // [C99 7.24.1], integer type unchanged by default promotions.
CanQualType Char8Ty; // [C++20 proposal]
CanQualType Char16Ty; // [C++0x 3.9.1p5], integer type in C99.
CanQualType Char32Ty; // [C++0x 3.9.1p5], integer type in C99.
CanQualType SignedCharTy, ShortTy, IntTy, LongTy, LongLongTy, Int128Ty;
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/AST/BuiltinTypes.def
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ UNSIGNED_TYPE(UChar, UnsignedCharTy)
// 'wchar_t' for targets where it's unsigned
SHARED_SINGLETON_TYPE(UNSIGNED_TYPE(WChar_U, WCharTy))

// 'char8_t' in C++20 (proposed)
UNSIGNED_TYPE(Char8, Char8Ty)

// 'char16_t' in C++
UNSIGNED_TYPE(Char16, Char16Ty)

Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/AST/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,7 @@ class Type : public ExtQualsTypeCommonBase {
bool isBooleanType() const;
bool isCharType() const;
bool isWideCharType() const;
bool isChar8Type() const;
bool isChar16Type() const;
bool isChar32Type() const;
bool isAnyCharacterType() const;
Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -2421,6 +2421,9 @@ def err_template_different_associated_constraints : Error<
def warn_cxx98_compat_unicode_type : Warning<
"'%0' type specifier is incompatible with C++98">,
InGroup<CXX98Compat>, DefaultIgnore;
def warn_cxx17_compat_unicode_type : Warning<
"'char8_t' type specifier is incompatible with C++ standards before C++20">,
InGroup<CXXPre2aCompat>, DefaultIgnore;

// __make_integer_seq
def err_integer_sequence_negative_length : Error<
Expand Down Expand Up @@ -5822,6 +5825,13 @@ def err_array_init_wide_string_into_char : Error<
"initializing char array with wide string literal">;
def err_array_init_incompat_wide_string_into_wchar : Error<
"initializing wide char array with incompatible wide string literal">;
def err_array_init_plain_string_into_char8_t : Error<
"initializing 'char8_t' array with plain string literal">;
def note_array_init_plain_string_into_char8_t : Note<
"add 'u8' prefix to form a 'char8_t' string literal">;
def err_array_init_utf8_string_into_char : Error<
"initialization of char array with UTF-8 string literal is not permitted "
"by '-fchar8_t'">;
def err_array_init_different_type : Error<
"cannot initialize array %diff{of type $ with array of type $|"
"with different type of array}0,1">;
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ LANGOPT(LineComment , 1, 0, "'//' comments")
LANGOPT(Bool , 1, 0, "bool, true, and false keywords")
LANGOPT(Half , 1, 0, "half keyword")
LANGOPT(WChar , 1, CPlusPlus, "wchar_t keyword")
LANGOPT(Char8 , 1, 0, "char8_t keyword")
LANGOPT(DeclSpecKeyword , 1, 0, "__declspec keyword")
BENIGN_LANGOPT(DollarIdents , 1, 1, "'$' in identifiers")
BENIGN_LANGOPT(AsmPreprocessor, 1, 0, "preprocessor in asm mode")
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/Specifiers.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ namespace clang {
TST_void,
TST_char,
TST_wchar, // C++ wchar_t
TST_char8, // C++20 char8_t (proposed)
TST_char16, // C++11 char16_t
TST_char32, // C++11 char32_t
TST_int,
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/TokenKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ PUNCTUATOR(caretcaret, "^^")
// BOOLSUPPORT - This is a keyword if 'bool' is a built-in type
// HALFSUPPORT - This is a keyword if 'half' is a built-in type
// WCHARSUPPORT - This is a keyword if 'wchar_t' is a built-in type
// CHAR8SUPPORT - This is a keyword if 'char8_t' is a built-in type
//
KEYWORD(auto , KEYALL)
KEYWORD(break , KEYALL)
Expand Down Expand Up @@ -380,6 +381,9 @@ KEYWORD(co_yield , KEYCOROUTINES)
MODULES_KEYWORD(module)
MODULES_KEYWORD(import)

// C++ char8_t proposal
KEYWORD(char8_t , CHAR8SUPPORT)

// C11 Extension
KEYWORD(_Float16 , KEYALL)

Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,10 @@ def frtti : Flag<["-"], "frtti">, Group<f_Group>;
def : Flag<["-"], "fsched-interblock">, Group<clang_ignored_f_Group>;
def fshort_enums : Flag<["-"], "fshort-enums">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Allocate to an enum type only as many bytes as it needs for the declared range of possible values">;
def fchar8__t : Flag<["-"], "fchar8_t">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Enable C++ builtin type char8_t">;
def fno_char8__t : Flag<["-"], "fno-char8_t">, Group<f_Group>,
HelpText<"Disable C++ builtin type char8_t">;
def fshort_wchar : Flag<["-"], "fshort-wchar">, Group<f_Group>,
HelpText<"Force wchar_t to be a short unsigned int">;
def fno_short_wchar : Flag<["-"], "fno-short-wchar">, Group<f_Group>,
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Sema/DeclSpec.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ class DeclSpec {
static const TST TST_void = clang::TST_void;
static const TST TST_char = clang::TST_char;
static const TST TST_wchar = clang::TST_wchar;
static const TST TST_char8 = clang::TST_char8;
static const TST TST_char16 = clang::TST_char16;
static const TST TST_char32 = clang::TST_char32;
static const TST TST_int = clang::TST_int;
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Sema/Initialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,12 @@ class InitializationSequence {
/// literal.
FK_IncompatWideStringIntoWideChar,

/// \brief Initializing char8_t array with plain string literal.
FK_PlainStringIntoUTF8Char,

/// \brief Initializing char array with UTF-8 string literal.
FK_UTF8StringIntoPlainChar,

/// \brief Array type mismatch.
FK_ArrayTypeMismatch,

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,9 @@ namespace serialization {
/// \brief The '_Float16' type
PREDEF_TYPE_FLOAT16_ID = 44,

/// \brief The C++ 'char8_t' type.
PREDEF_TYPE_CHAR8_ID = 45,

/// \brief OpenCL image types with auto numeration
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
PREDEF_TYPE_##Id##_ID,
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,

WIntTy = getFromTargetType(Target.getWIntType());

// C++20 (proposed)
InitBuiltinType(Char8Ty, BuiltinType::Char8);

if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
InitBuiltinType(Char16Ty, BuiltinType::Char16);
else // C99
Expand Down Expand Up @@ -1739,6 +1742,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
case BuiltinType::Char_U:
case BuiltinType::UChar:
case BuiltinType::SChar:
case BuiltinType::Char8:
Width = Target->getCharWidth();
Align = Target->getCharAlign();
break;
Expand Down Expand Up @@ -5456,6 +5460,7 @@ QualType ASTContext::getPromotedIntegerType(QualType Promotable) const {
// FIXME: Is there some better way to compute this?
if (BT->getKind() == BuiltinType::WChar_S ||
BT->getKind() == BuiltinType::WChar_U ||
BT->getKind() == BuiltinType::Char8 ||
BT->getKind() == BuiltinType::Char16 ||
BT->getKind() == BuiltinType::Char32) {
bool FromIsSigned = BT->getKind() == BuiltinType::WChar_S;
Expand Down Expand Up @@ -6202,6 +6207,7 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C,
switch (kind) {
case BuiltinType::Void: return 'v';
case BuiltinType::Bool: return 'B';
case BuiltinType::Char8:
case BuiltinType::Char_U:
case BuiltinType::UChar: return 'C';
case BuiltinType::Char16:
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7326,6 +7326,7 @@ static int EvaluateBuiltinClassifyType(const CallExpr *E,
return pointer_type_class;

case BuiltinType::WChar_U:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::ObjCId:
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/AST/ItaniumMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2525,6 +2525,9 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
case BuiltinType::WChar_U:
Out << 'w';
break;
case BuiltinType::Char8:
Out << "Du";
break;
case BuiltinType::Char16:
Out << "Ds";
break;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/MicrosoftMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1918,6 +1918,7 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
Out << "$$T";
break;

case BuiltinType::Char8:
case BuiltinType::Float16:
mangleArtificalTagType(TTK_Struct, "_Float16", {"__clang"});
break;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/NSAPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const {
case BuiltinType::Void:
case BuiltinType::WChar_U:
case BuiltinType::WChar_S:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Int128:
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/AST/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1763,6 +1763,12 @@ bool Type::isWideCharType() const {
return false;
}

bool Type::isChar8Type() const {
if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
return BT->getKind() == BuiltinType::Char8;
return false;
}

bool Type::isChar16Type() const {
if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
return BT->getKind() == BuiltinType::Char16;
Expand All @@ -1785,6 +1791,7 @@ bool Type::isAnyCharacterType() const {
case BuiltinType::Char_U:
case BuiltinType::UChar:
case BuiltinType::WChar_U:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Char_S:
Expand Down Expand Up @@ -2419,6 +2426,7 @@ bool Type::isPromotableIntegerType() const {
case BuiltinType::UShort:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
return true;
Expand Down Expand Up @@ -2655,6 +2663,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
case WChar_S:
case WChar_U:
return Policy.MSWChar ? "__wchar_t" : "wchar_t";
case Char8:
return "char8_t";
case Char16:
return "char16_t";
case Char32:
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/AST/TypeLoc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const {
case BuiltinType::Char_U:
case BuiltinType::Char_S:
return TST_char;
case BuiltinType::Char8:
return TST_char8;
case BuiltinType::Char16:
return TST_char16;
case BuiltinType::Char32:
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Analysis/PrintfFormatString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
case BuiltinType::Bool:
case BuiltinType::WChar_U:
case BuiltinType::WChar_S:
case BuiltinType::Char8: // FIXME: Treat like 'char'?
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::UInt128:
Expand Down
16 changes: 9 additions & 7 deletions clang/lib/Basic/IdentifierTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,15 @@ namespace {
KEYNOOPENCL = 0x02000,
WCHARSUPPORT = 0x04000,
HALFSUPPORT = 0x08000,
KEYCONCEPTS = 0x10000,
KEYOBJC2 = 0x20000,
KEYZVECTOR = 0x40000,
KEYCOROUTINES = 0x80000,
KEYMODULES = 0x100000,
KEYCXX2A = 0x200000,
CHAR8SUPPORT = 0x10000,
KEYCONCEPTS = 0x20000,
KEYOBJC2 = 0x40000,
KEYZVECTOR = 0x80000,
KEYCOROUTINES = 0x100000,
KEYMODULES = 0x200000,
KEYCXX2A = 0x400000,
KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX2A,
KEYALL = (0x3fffff & ~KEYNOMS18 &
KEYALL = (0x7fffff & ~KEYNOMS18 &
~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude.
};

Expand Down Expand Up @@ -151,6 +152,7 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
if (LangOpts.Bool && (Flags & BOOLSUPPORT)) return KS_Enabled;
if (LangOpts.Half && (Flags & HALFSUPPORT)) return KS_Enabled;
if (LangOpts.WChar && (Flags & WCHARSUPPORT)) return KS_Enabled;
if (LangOpts.Char8 && (Flags & CHAR8SUPPORT)) return KS_Enabled;
if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) return KS_Enabled;
if (LangOpts.OpenCL && (Flags & KEYOPENCL)) return KS_Enabled;
if (!LangOpts.CPlusPlus && (Flags & KEYNOCXX)) return KS_Enabled;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGDebugInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
case BuiltinType::SChar:
Encoding = llvm::dwarf::DW_ATE_signed_char;
break;
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
Encoding = llvm::dwarf::DW_ATE_UTF;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CodeGenTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::ULongLong:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
ResultType = llvm::IntegerType::get(getLLVMContext(),
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CodeGen/ItaniumCXXABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2706,6 +2706,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::LongDouble:
case BuiltinType::Float16:
case BuiltinType::Float128:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Int128:
Expand Down Expand Up @@ -3567,7 +3568,8 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) {
getContext().UnsignedInt128Ty, getContext().HalfTy,
getContext().FloatTy, getContext().DoubleTy,
getContext().LongDoubleTy, getContext().Float128Ty,
getContext().Char16Ty, getContext().Char32Ty
getContext().Char8Ty, getContext().Char16Ty,
getContext().Char32Ty
};
for (const QualType &FundamentalType : FundamentalTypes)
EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport);
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2687,6 +2687,9 @@ static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T,
CmdArgs.push_back("-fno-signed-char");
}

if (Args.hasFlag(options::OPT_fchar8__t, options::OPT_fno_char8__t, false))
CmdArgs.push_back("-fchar8_t");

if (const Arg *A = Args.getLastArg(options::OPT_fshort_wchar,
options::OPT_fno_short_wchar)) {
if (A->getOption().matches(options::OPT_fshort_wchar)) {
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Format/FormatToken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ bool FormatToken::isSimpleTypeSpecifier() const {
case tok::kw_bool:
case tok::kw___underlying_type:
case tok::annot_typename:
case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_typeof:
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2362,6 +2362,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
Opts.ImplicitModules = !Args.hasArg(OPT_fno_implicit_modules);
Opts.CharIsSigned = Opts.OpenCL || !Args.hasArg(OPT_fno_signed_char);
Opts.WChar = Opts.CPlusPlus && !Args.hasArg(OPT_fno_wchar);
Opts.Char8 = Args.hasArg(OPT_fchar8__t);
if (const Arg *A = Args.getLastArg(OPT_fwchar_type_EQ)) {
Opts.WCharSize = llvm::StringSwitch<unsigned>(A->getValue())
.Case("char", 1)
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
Builder.defineMacro("__cpp_experimental_concepts", "1");
if (LangOpts.CoroutinesTS)
Builder.defineMacro("__cpp_coroutines", "201703L");

// Potential future breaking changes.
if (LangOpts.Char8)
Builder.defineMacro("__cpp_char8_t", "201803");
}

static void InitializePredefinedMacros(const TargetInfo &TI,
Expand Down Expand Up @@ -939,6 +943,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
InlineWidthBits));
DEFINE_LOCK_FREE_MACRO(BOOL, Bool);
DEFINE_LOCK_FREE_MACRO(CHAR, Char);
if (LangOpts.Char8)
DEFINE_LOCK_FREE_MACRO(CHAR8_T, Char); // Treat char8_t like char.
DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16);
DEFINE_LOCK_FREE_MACRO(CHAR32_T, Char32);
DEFINE_LOCK_FREE_MACRO(WCHAR_T, WChar);
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Index/USRGeneration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,8 @@ void USRGenerator::VisitType(QualType T) {
c = 'b'; break;
case BuiltinType::UChar:
c = 'c'; break;
case BuiltinType::Char8:
c = 'u'; break; // FIXME: Check this doesn't collide
case BuiltinType::Char16:
c = 'q'; break;
case BuiltinType::Char32:
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Lex/PPExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
NumBits = TI.getChar16Width();
else if (Literal.isUTF32())
NumBits = TI.getChar32Width();
else
else // char or char8_t
NumBits = TI.getCharWidth();

// Set the width.
Expand Down
Loading

0 comments on commit 3a8244d

Please sign in to comment.