Skip to content

Commit

Permalink
[Clang] Add __datasizeof (#67805)
Browse files Browse the repository at this point in the history
The data size is required for implementing the `memmove` optimization
for `std::copy`, `std::move` etc. correctly as well as replacing
`__compressed_pair` with `[[no_unique_address]]` in libc++. Since the
compiler already knows the data size, we can avoid some complexity by
exposing that information.
  • Loading branch information
philnik777 committed Nov 13, 2023
1 parent 0515ccc commit 4cc791b
Show file tree
Hide file tree
Showing 11 changed files with 150 additions and 19 deletions.
12 changes: 12 additions & 0 deletions clang/docs/LanguageExtensions.rst
Expand Up @@ -424,6 +424,18 @@ Builtin Macros
"UTF-16" or "UTF-32" (but may change in the future if the
``-fwide-exec-charset="Encoding-Name"`` option is implemented.)

Implementation-defined keywords
===============================

__datasizeof
------------

``__datasizeof`` behaves like ``sizeof``, except that it returns the size of the
type ignoring tail padding.

..
FIXME: This should list all the keyword extensions
.. _langext-vectors:

Vectors and Extended Vectors
Expand Down
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Expand Up @@ -217,6 +217,8 @@ Non-comprehensive list of changes in this release
(e.g., ``uint16x8_t``), this returns the constant number of elements at compile-time.
For scalable vectors, e.g., SVE or RISC-V V, the number of elements is not known at compile-time and is
determined at runtime.
* The ``__datasizeof`` keyword has been added. It is similar to ``sizeof``
except that it returns the size of a type ignoring tail padding.

New Compiler Flags
------------------
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/Features.def
Expand Up @@ -277,6 +277,7 @@ EXTENSION(gnu_asm_goto_with_outputs_full, LangOpts.GNUAsm)
EXTENSION(matrix_types, LangOpts.MatrixTypes)
EXTENSION(matrix_types_scalar_division, true)
EXTENSION(cxx_attributes_on_using_declarations, LangOpts.CPlusPlus11)
EXTENSION(datasizeof, LangOpts.CPlusPlus)

FEATURE(builtin_headers_in_system_modules, LangOpts.BuiltinHeadersInSystemModules)
FEATURE(cxx_abi_relative_vtable, LangOpts.CPlusPlus && LangOpts.RelativeCXXABIVTables)
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/TokenKinds.def
Expand Up @@ -310,6 +310,7 @@ KEYWORD(return , KEYALL)
KEYWORD(short , KEYALL)
KEYWORD(signed , KEYALL)
UNARY_EXPR_OR_TYPE_TRAIT(sizeof, SizeOf, KEYALL)
UNARY_EXPR_OR_TYPE_TRAIT(__datasizeof, DataSizeOf, KEYCXX)
KEYWORD(static , KEYALL)
KEYWORD(struct , KEYALL)
KEYWORD(switch , KEYALL)
Expand Down
20 changes: 16 additions & 4 deletions clang/lib/AST/ExprConstant.cpp
Expand Up @@ -3184,9 +3184,14 @@ static bool HandleLValueIndirectMember(EvalInfo &Info, const Expr *E,
return true;
}

enum class SizeOfType {
SizeOf,
DataSizeOf,
};

/// Get the size of the given type in char units.
static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc,
QualType Type, CharUnits &Size) {
static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, QualType Type,
CharUnits &Size, SizeOfType SOT = SizeOfType::SizeOf) {
// sizeof(void), __alignof__(void), sizeof(function) = 1 as a gcc
// extension.
if (Type->isVoidType() || Type->isFunctionType()) {
Expand All @@ -3206,7 +3211,10 @@ static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc,
return false;
}

Size = Info.Ctx.getTypeSizeInChars(Type);
if (SOT == SizeOfType::SizeOf)
Size = Info.Ctx.getTypeSizeInChars(Type);
else
Size = Info.Ctx.getTypeInfoDataSizeInChars(Type).Width;
return true;
}

Expand Down Expand Up @@ -13689,6 +13697,7 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
return Success(1, E);
}

case UETT_DataSizeOf:
case UETT_SizeOf: {
QualType SrcTy = E->getTypeOfArgument();
// C++ [expr.sizeof]p2: "When applied to a reference or a reference type,
Expand All @@ -13697,8 +13706,11 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
SrcTy = Ref->getPointeeType();

CharUnits Sizeof;
if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof))
if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof,
E->getKind() == UETT_DataSizeOf ? SizeOfType::DataSizeOf
: SizeOfType::SizeOf)) {
return false;
}
return Success(Sizeof, E);
}
case UETT_OpenMPRequiredSimdAlign:
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/AST/ItaniumMangle.cpp
Expand Up @@ -28,6 +28,7 @@
#include "clang/AST/Mangle.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/ABI.h"
#include "clang/Basic/DiagnosticAST.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
Expand Down Expand Up @@ -5068,6 +5069,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
Out << 'a';
MangleAlignofSizeofArg();
break;
case UETT_DataSizeOf: {
DiagnosticsEngine &Diags = Context.getDiags();
unsigned DiagID =
Diags.getCustomDiagID(DiagnosticsEngine::Error,
"cannot yet mangle __datasizeof expression");
Diags.Report(DiagID);
return;
}
case UETT_VecStep: {
DiagnosticsEngine &Diags = Context.getDiags();
unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/CodeGen/CGExprScalar.cpp
Expand Up @@ -3053,9 +3053,10 @@ Value *
ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
const UnaryExprOrTypeTraitExpr *E) {
QualType TypeToSize = E->getTypeOfArgument();
if (E->getKind() == UETT_SizeOf) {
if (auto Kind = E->getKind();
Kind == UETT_SizeOf || Kind == UETT_DataSizeOf) {
if (const VariableArrayType *VAT =
CGF.getContext().getAsVariableArrayType(TypeToSize)) {
CGF.getContext().getAsVariableArrayType(TypeToSize)) {
if (E->isArgumentType()) {
// sizeof(type) - make sure to emit the VLA size.
CGF.EmitVariablyModifiedType(TypeToSize);
Expand Down
42 changes: 31 additions & 11 deletions clang/lib/Parse/ParseExpr.cpp
Expand Up @@ -1460,6 +1460,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
// unary-expression: '__alignof' '(' type-name ')'
case tok::kw_sizeof: // unary-expression: 'sizeof' unary-expression
// unary-expression: 'sizeof' '(' type-name ')'
// unary-expression: '__datasizeof' unary-expression
// unary-expression: '__datasizeof' '(' type-name ')'
case tok::kw___datasizeof:
case tok::kw_vec_step: // unary-expression: OpenCL 'vec_step' expression
// unary-expression: '__builtin_omp_required_simd_align' '(' type-name ')'
case tok::kw___builtin_omp_required_simd_align:
Expand Down Expand Up @@ -2307,6 +2310,8 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
/// unary-expression: [C99 6.5.3]
/// 'sizeof' unary-expression
/// 'sizeof' '(' type-name ')'
/// [Clang] '__datasizeof' unary-expression
/// [Clang] '__datasizeof' '(' type-name ')'
/// [GNU] '__alignof' unary-expression
/// [GNU] '__alignof' '(' type-name ')'
/// [C11] '_Alignof' '(' type-name ')'
Expand Down Expand Up @@ -2335,8 +2340,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
SourceRange &CastRange) {

assert(OpTok.isOneOf(tok::kw_typeof, tok::kw_typeof_unqual, tok::kw_sizeof,
tok::kw___alignof, tok::kw_alignof, tok::kw__Alignof,
tok::kw_vec_step,
tok::kw___datasizeof, tok::kw___alignof, tok::kw_alignof,
tok::kw__Alignof, tok::kw_vec_step,
tok::kw___builtin_omp_required_simd_align,
tok::kw___builtin_vectorelements) &&
"Not a typeof/sizeof/alignof/vec_step expression!");
Expand All @@ -2347,8 +2352,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
if (Tok.isNot(tok::l_paren)) {
// If construct allows a form without parenthesis, user may forget to put
// pathenthesis around type name.
if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
tok::kw__Alignof)) {
if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___datasizeof, tok::kw___alignof,
tok::kw_alignof, tok::kw__Alignof)) {
if (isTypeIdUnambiguously()) {
DeclSpec DS(AttrFactory);
ParseSpecifierQualifierList(DS);
Expand Down Expand Up @@ -2451,14 +2456,16 @@ ExprResult Parser::ParseSYCLUniqueStableNameExpression() {
/// 'sizeof' unary-expression
/// 'sizeof' '(' type-name ')'
/// [C++11] 'sizeof' '...' '(' identifier ')'
/// [Clang] '__datasizeof' unary-expression
/// [Clang] '__datasizeof' '(' type-name ')'
/// [GNU] '__alignof' unary-expression
/// [GNU] '__alignof' '(' type-name ')'
/// [C11] '_Alignof' '(' type-name ')'
/// [C++11] 'alignof' '(' type-id ')'
/// \endverbatim
ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
tok::kw__Alignof, tok::kw_vec_step,
assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___datasizeof, tok::kw___alignof,
tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step,
tok::kw___builtin_omp_required_simd_align,
tok::kw___builtin_vectorelements) &&
"Not a sizeof/alignof/vec_step expression!");
Expand Down Expand Up @@ -2531,16 +2538,29 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
CastRange);

UnaryExprOrTypeTrait ExprKind = UETT_SizeOf;
if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof))
switch (OpTok.getKind()) {
case tok::kw_alignof:
case tok::kw__Alignof:
ExprKind = UETT_AlignOf;
else if (OpTok.is(tok::kw___alignof))
break;
case tok::kw___alignof:
ExprKind = UETT_PreferredAlignOf;
else if (OpTok.is(tok::kw_vec_step))
break;
case tok::kw_vec_step:
ExprKind = UETT_VecStep;
else if (OpTok.is(tok::kw___builtin_omp_required_simd_align))
break;
case tok::kw___builtin_omp_required_simd_align:
ExprKind = UETT_OpenMPRequiredSimdAlign;
else if (OpTok.is(tok::kw___builtin_vectorelements))
break;
case tok::kw___datasizeof:
ExprKind = UETT_DataSizeOf;
break;
case tok::kw___builtin_vectorelements:
ExprKind = UETT_VectorElements;
break;
default:
break;
}

if (isCastExpr)
return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(),
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/Sema/SemaExpr.cpp
Expand Up @@ -4444,8 +4444,9 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E,
assert(!ExprTy->isReferenceType());

bool IsUnevaluatedOperand =
(ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf ||
ExprKind == UETT_PreferredAlignOf || ExprKind == UETT_VecStep);
(ExprKind == UETT_SizeOf || ExprKind == UETT_DataSizeOf ||
ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf ||
ExprKind == UETT_VecStep);
if (IsUnevaluatedOperand) {
ExprResult Result = CheckUnevaluatedOperand(E);
if (Result.isInvalid())
Expand Down
19 changes: 19 additions & 0 deletions clang/test/CodeGenCXX/datasizeof.cpp
@@ -0,0 +1,19 @@
// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux -emit-llvm %s -o - | FileCheck %s

// CHECK-LABEL: define dso_local noundef i32 @_Z4testi(
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK-NEXT: store i32 [[INC]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 4, [[TMP1]]
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: ret i32 [[TMP3]]
//
int test(int i) {
(void)__datasizeof(int[i++]);
return i;
}
53 changes: 53 additions & 0 deletions clang/test/SemaCXX/datasizeof.cpp
@@ -0,0 +1,53 @@
// RUN: %clang_cc1 -fsyntax-only -triple x86_64-linux-gnu -verify %s

#if !__has_extension(datasizeof)
# error "Expected datasizeof extension"
#endif

struct HasPadding {
int i;
char c;
};

struct HasUsablePadding {
int i;
char c;

HasUsablePadding() {}
};

struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}

static_assert(__datasizeof(int) == 4);
static_assert(__datasizeof(HasPadding) == 8);
static_assert(__datasizeof(HasUsablePadding) == 5);
static_assert(__datasizeof(void)); // expected-error {{invalid application of '__datasizeof' to an incomplete type 'void'}}
static_assert(__datasizeof(Incomplete)); // expected-error {{invalid application of '__datasizeof' to an incomplete type 'Incomplete'}}

static_assert([] {
int* p = nullptr;
HasPadding* p2 = nullptr;
HasUsablePadding* p3 = nullptr;
static_assert(__datasizeof(*p) == 4);
static_assert(__datasizeof *p == 4);
static_assert(__datasizeof(*p2) == 8);
static_assert(__datasizeof(*p3) == 5);

return true;
}());

template <typename Ty>
constexpr int data_size_of() {
return __datasizeof(Ty);
}
static_assert(data_size_of<int>() == __datasizeof(int));
static_assert(data_size_of<HasPadding>() == __datasizeof(HasPadding));
static_assert(data_size_of<HasUsablePadding>() == __datasizeof(HasUsablePadding));

struct S {
int i = __datasizeof(S);
float f;
char c;
};

static_assert(S{}.i == 9);

0 comments on commit 4cc791b

Please sign in to comment.